def main(): start_time = time.time() global figsize, date_time_parser file_names, graph_file_names, abscissa_names, ordinate_names,\ ordinate_predicted_names, x_axis_label, y_axis_label, axis_title,\ figsize, column_names_sort, date_time_parser,\ date_formatter, alpha_value, function, output_url,\ header_title, header_id, parser = parameters() original_stdout = ds.html_begin(output_url=output_url, header_title=header_title, header_id=header_id) print('<pre style="white-space: pre-wrap;">') for (file_name, abscissa_name, ordinate_name, ordinate_predicted_name, date_time_parser, column_names_sort, date_formatter, graph_file_name) in zip(file_names, abscissa_names, ordinate_names, ordinate_predicted_names, date_time_parser, column_names_sort, date_formatter, graph_file_names): if date_time_parser == 'None': data = ds.read_file(file_name=file_name, sort_columns=column_names_sort, sort_columns_bool=True) else: data = ds.read_file(file_name=file_name, parse_dates=[abscissa_name], sort_columns=column_names_sort, sort_columns_bool=True) data[ordinate_predicted_name] = data[ordinate_name]\ .ewm(alpha=alpha_value).mean() fig, ax = ds.plot_scatter_line_x_y1_y2( X=data[abscissa_name], y1=data[ordinate_name], y2=data[ordinate_predicted_name], figsize=figsize) ax.set_title(label=axis_title, fontweight='bold') ax.set_xlabel(xlabel=x_axis_label, fontweight='bold') ax.set_ylabel(ylabel=y_axis_label, fontweight='bold') ds.despine(ax=ax) fig.savefig(fname=f'{graph_file_name}.svg', format='svg') ds.html_figure(file_name=f'{graph_file_name}.svg') ds.page_break() stop_time = time.time() ds.report_summary(start_time=start_time, stop_time=stop_time, read_file_names=file_names, targets=ordinate_names, features=abscissa_names) print('</pre>') ds.html_end(original_stdout=original_stdout, output_url=output_url)
def parameters() -> (List[str], List[str], List[str], List[int], str, Tuple[ int, int], str, str, str, str, str, str, str): """ Set parameters. """ parameters = ds.read_file( file_name='piecewise_natural_cubic_spline_parameters.ods') file_names = [x for x in parameters['File names'] if str(x) != 'nan'] targets = [x for x in parameters['Targets'] if str(x) != 'nan'] features = [x for x in parameters['Features'] if str(x) != 'nan'] number_knots = [ int(x) for x in parameters['Number of knots'] if str(x) != 'nan' ] datetimeparser = parameters['Other parameter values'][0] graphicsdirectory = parameters['Other parameter values'][1] figurewidthheight = eval(parameters['Other parameter values'][2]) xaxislabel = parameters['Other parameter values'][3] yaxislabel = parameters['Other parameter values'][4] axistitle = parameters['Other parameter values'][5] output_url = parameters['Other parameter values'][6] header_title = parameters['Other parameter values'][7] header_id = parameters['Other parameter values'][8] return (file_names, targets, features, number_knots, graphicsdirectory, figurewidthheight, xaxislabel, yaxislabel, axistitle, datetimeparser, output_url, header_title, header_id)
def parameters() -> (List[str], List[str], List[str], List[str], List[str], str, str, str, Tuple[float], List[bool], List[str], List[str], str, float, str, str, str, str, str): ''' Set parameters. ''' parameters = ds.read_file( file_name='exponentially_weighted_average_parameters.ods') file_names = [x for x in parameters['File names'] if str(x) != 'nan'] graph_file_names = [ x for x in parameters['Graph file names'] if str(x) != 'nan' ] abscissa_names = [ x for x in parameters['Abscissa names'] if str(x) != 'nan' ] ordinate_names = [ x for x in parameters['Ordinate names'] if str(x) != 'nan' ] ordinate_predicted_names = [ x for x in parameters['Ordinate predicted names'] if str(x) != 'nan' ] xaxislabel = parameters['Other parameter values'][0] yaxislabel = parameters['Other parameter values'][1] axistitle = parameters['Other parameter values'][2] figurewidthheight = eval(parameters['Other parameter values'][3]) column_names_sort = [ x for x in parameters['Column names sort'] if str(x) != 'nan' ] date_time_parser = [ x for x in parameters['Date time parser'] if str(x) != 'nan' ] parser = parameters['Other parameter values'][4] date_formatter = [ None if split.strip() == 'None' else split.strip() for unsplit in parameters['Date formatter'] if str(unsplit) != 'nan' for split in unsplit.split(',') ] alphavalue = parameters['Other parameter values'][6] function = parameters['Other parameter values'][7] output_url = parameters['Other parameter values'][8] header_title = parameters['Other parameter values'][9] header_id = parameters['Other parameter values'][10] return (file_names, graph_file_names, abscissa_names, ordinate_names, ordinate_predicted_names, xaxislabel, yaxislabel, axistitle, figurewidthheight, column_names_sort, date_time_parser, date_formatter, alphavalue, function, output_url, header_title, header_id, parser)
def main(): file_names = ['raw_data_integer_float.csv', 'raw_data_datetime_float.csv'] ordinate_predicted_name = ['ordinate_predicted', 'ordinate_predicted'] graph_file_name = [ 'cubic_spline_integer_float', 'cubic_spline_datetime_float' ] abscissa_name = ['abscissa', 'abscissa'] ordinate_name = ['ordinate', 'observed'] column_names_sort = [False, False] date_time_parser = [None, parser] date_formatter = [None, '%m-%d'] figure_width_height = (8, 6) parser = '%Y-%m-%d %H:%M:%S' axis_title = 'Cubic Spline' x_axis_label = 'Abscissa' y_axis_label = 'Ordinate' ds.style_graph() for (file_name, abscissaname, ordinatename, ordinatepredictedname, datetimeparser, columnnamessort, dateformatter, graphfile_name) in zip(file_names, abscissa_name, ordinate_name, ordinate_predicted_name, date_time_parser, column_names_sort, date_formatter, graph_file_name): data = ds.read_file(file_name=file_name, parse_dates=[abscissaname]) if datetimeparser is True: data[abscissaname] = pd.to_numeric(data[abscissaname]) spline = ds.cubic_spline(df=data, abscissa=abscissaname, ordinate=ordinatename) data[ordinatepredictedname] = spline(data[abscissaname]) data[abscissaname] = data[abscissaname]\ .astype(dtype='datetime64[ns]') else: spline = ds.cubic_spline(df=data, abscissa=abscissaname, ordinate=ordinatename) data[ordinatepredictedname] = spline(data[abscissaname]) plot_graph(df=data, columnx=abscissaname, columny=ordinatename, columnz=ordinatepredictedname, figsize=figure_width_height, dateformat=dateformatter, graphname=graphfile_name, graphtitle=axis_title, xaxislabel=x_axis_label, yaxislabel=y_axis_label)
def main(): start_time = time.time() global figsize, axis_title, x_axis_label, y_axis_label,\ graphics_directory file_names, targets, features, number_knots, graphics_directory,\ figsize, x_axis_label, y_axis_label, axis_title,\ date_parser, output_url, header_title, header_id = parameters() ds.create_directory(directories=graphics_directory) original_stdout = ds.html_begin(output_url=output_url, header_title=header_title, header_id=header_id) ds.page_break() for file_name, target, feature in zip(file_names, targets, features): data = ds.read_file(file_name=file_name, parse_dates=features) data[target] = data[target].fillna(data[target].mean()) dates = True X = pd.to_numeric(data[feature]) y = data[target] t = ((X, y, file_name, target, feature, knot, dates) for knot in number_knots) with Pool() as pool: for _ in pool.imap_unordered(plot_scatter_line, t): pass for knot in number_knots: ds.html_figure(file_name=f'{graphics_directory}/' f'spline_{file_name.strip(".csv")}_' f'{target}_{feature}_{knot}.svg') stop_time = time.time() ds.page_break() ds.report_summary(start_time=start_time, stop_time=stop_time, read_file_names=file_names, targets=targets, features=features, number_knots=number_knots) ds.html_end(original_stdout=original_stdout, output_url=output_url)
fig.savefig(fname=f'time_series_{feature}.svg', format='svg') ds.html_figure(file_name=f'time_series_{feature}.svg', caption=f'time_series_{feature}.svg') start_time = time.time() original_stdout = ds.html_begin(output_url=output_url, header_title=header_title, header_id=header_id) ds.page_break() print('<pre style="white-space: pre-wrap;">') # Cleaning the data # Data should be cleaned before fitting a model. A simple example of graphing # each feature in sample order and replacing outliers with NaN is shown. # Read the data file into a pandas DataFrame data = ds.read_file(file_name=file_name, nrows=nrows) # Plot target versus features # With multiprocessing t = ((data[feature], feature) for feature in features) with Pool() as pool: for _ in pool.imap_unordered(plot_scatter_y, t): pass # Without multiprocessing # for feature in features: # fig, ax = ds.plot_scatter_y( # y=data[feature], # figsize=figsize # ) # ax.set_ylabel(ylabel=feature) # ax.set_title(label='Time Series') # ds.despine(ax)
def main(): start_time = time.time() pd.options.display.width = 120 pd.options.display.max_columns = 100 pd.options.display.max_rows = 100 original_stdout = ds.html_begin(output_url=output_url, header_title=header_title, header_id=header_id) print('--------------------------') print('test dataframe_info') print('test example 1') my_file = 'myfile.csv' df = ds.read_file(my_file) df = ds.dataframe_info(df=df, file_in=my_file) print('--------------------------') print('test dataframe_info') print('test example 2') df = ds.create_dataframe() df = ds.dataframe_info(df=df, file_in='df') print('--------------------------') print('test find_bool_columns') print('test example') df = ds.create_dataframe() columns_bool = ds.find_bool_columns(df=df) print(columns_bool) print('--------------------------') print('test find_category_columns') print('test example') df = ds.create_dataframe() columns_category = ds.find_category_columns(df=df) print(columns_category) print('--------------------------') print('test find_datetime_columns') print('test example') df = ds.create_dataframe() columns_datetime = ds.find_datetime_columns(df=df) print(columns_datetime) print('--------------------------') print('test find_float_columns') print('test example') df = ds.create_dataframe() columns_float = ds.find_float_columns(df=df) print(columns_float) print('--------------------------') print('test find_int_columns') print('test example') df = ds.create_dataframe() columns_int = ds.find_int_columns(df=df) print(columns_int) print('--------------------------') print('test find_int_float_columns') print('test example') df = ds.create_dataframe() columns_int_float = ds.find_int_float_columns(df=df) print(columns_int_float) print('--------------------------') print('test find_object_columns') print('test example') df = ds.create_dataframe() columns_object = ds.find_object_columns(df=df) print(columns_object) print('--------------------------') print('test find_timedelta_columns') print('test example') df = ds.create_dataframe() columns_timedelta = ds.find_timedelta_columns(df=df) print(columns_timedelta) print('--------------------------') print('test number_empty_cells_in_columns') print('test example') df = pd.DataFrame({ 'X': [25.0, 24.0, 35.5, np.nan, 23.1], 'Y': [27, 24, np.nan, 23, np.nan], 'Z': ['a', 'b', np.nan, 'd', 'e'] }) empty_cells = ds.number_empty_cells_in_columns(df=df) print(empty_cells) print('--------------------------') print('test process_columns') print('test example') df = ds.create_dataframe() df, columns_in_count, columns_non_empty_count, columns_empty_count,\ columns_empty_list, columns_non_empty_list, columns_bool_list,\ columns_bool_count, columns_float_list, columns_float_count,\ columns_integer_list, columns_integer_count,\ columns_datetime_list, columns_datetime_count,\ columns_object_list, columns_object_count, columns_category_list,\ columns_category_count, columns_timedelta_list,\ columns_timedelta_count = ds.process_columns(df=df) print('columns_in_count :', columns_in_count) print('columns_non_empty_count:', columns_non_empty_count) print('columns_empty_count :', columns_empty_count) print('columns_empty_list :', columns_empty_list) print('columns_non_empty_list :', columns_non_empty_list) print('columns_bool_list :', columns_bool_list) print('columns_bool_count :', columns_bool_count) print('columns_float_list :', columns_float_list) print('columns_float_count :', columns_float_count) print('columns_integer_list :', columns_integer_list) print('columns_integer_count :', columns_integer_count) print('columns_datetime_list :', columns_datetime_list) print('columns_datetime_count :', columns_datetime_count) print('columns_object_list :', columns_object_list) print('columns_object_count :', columns_object_count) print('columns_category_list :', columns_category_list) print('columns_category_count :', columns_category_count) print('columns_timedelta_list :', columns_timedelta_list) print('columns_timedelta_count:', columns_timedelta_count) print('--------------------------') print('test process_rows') print('test example') df = ds.create_dataframe() df, rows_in_count, rows_out_count, rows_empty_count = ds.process_rows(df) print('rows_in_count :', rows_in_count) print('rows_out_count :', rows_out_count) print('rows_empty_count:', rows_empty_count) print('--------------------------') print('test save_file example 1') print('test example') df = ds.create_dataframe() ds.save_file(df=df, file_name='x_y.csv') print('--------------------------') print('test save_file example 2') print('test example') df = ds.create_dataframe() ds.save_file(df=df, file_name='x_y.csv', index=True, index_label='myindex') print('--------------------------') print('test save_file example 3') print('test example') df = ds.create_dataframe() ds.save_file(df=df, file_name='x_y.xlsx') print('--------------------------') print('test save_file example 4') print('test example') df = ds.create_dataframe() ds.save_file(df=df, file_name='x_y.xlsx', index=True, index_label='myindex') print('--------------------------') print('test byte_size') print('test example') df = ds.create_dataframe() print(ds.byte_size(num=df.memory_usage(index=True).sum())) print('--------------------------') print('test read_file') print('test example 1') my_file = 'myfile.csv' df = ds.create_dataframe() ds.save_file(df=df, file_name=my_file) df = ds.read_file(file_name=my_file) ds.dataframe_info(df=df, file_in=my_file) stop_time = time.time() print('--------------------------') print('test read_file') print('test example 2') file_name = 'myfile.csv' df = ds.create_dataframe() ds.save_file(df=df, file_name=file_name) parse_dates = ['t', 'u'] df = ds.read_file(file_name=file_name, parse_dates=parse_dates) ds.dataframe_info(df=df, file_in=my_file) print('--------------------------') print('test read_file') print('test example 3') file_name = 'myfile.csv' df = ds.create_dataframe() ds.save_file(df=df, file_name=file_name) column_names_dict = { 'a': 'A', 'b': 'B', 'c': 'C', 'd': 'D', 'i': 'I', 'r': 'R', 's': 'S', 't': 'T', 'u': 'U', 'y': 'Y', 'x': 'X', 'z': 'Z' } index_columns = ['Y'] parse_dates = ['t', 'u'] time_delta_columns = ['D'] category_columns = ['C'] integer_columns = ['A', 'I'] float_columns = ['X'] boolean_columns = ['R'] object_columns = ['Z'] df = ds.read_file(file_name=file_name, column_names_dict=column_names_dict, index_columns=index_columns, date_parser=date_parser(), parse_dates=parse_dates, time_delta_columns=time_delta_columns, category_columns=category_columns, integer_columns=integer_columns) ds.dataframe_info(df=df, file_in=file_name) print('--------------------------') print('test read_file') print('test example 4') file_name = 'myfile.ods' df = ds.create_dataframe() ds.save_file(df=df, file_name=file_name) parse_dates = ['t', 'u'] df = ds.read_file(file_name=file_name, parse_dates=parse_dates) ds.dataframe_info(df=df, file_in=file_name) stop_time = time.time() ds.page_break() ds.report_summary(start_time=start_time, stop_time=stop_time, read_file_names=file_name, save_file_names=file_name) ds.html_end(original_stdout=original_stdout, output_url=output_url)
train_test_split from sklearn.pipeline import make_pipeline from sklearn.impute import SimpleImputer import datasense as ds import pandas as pd import numpy as np pd.options.display.max_columns = None pd.options.display.max_rows = None file_name = 'lunch_and_learn_clean.csv' target = 'Y' features = [ 'X1', 'X2', 'X3', 'X4', 'X5', 'X6', 'X7', 'X8', 'X9', 'X10', 'X11', 'X12', 'X13' ] data = ds.read_file(file_name=file_name) print('Determine the number of rows and columns\n') print(data.shape) print('\nCheck for missing values in the data set\n') print(data.isna().sum()) mask_values = [('X1', -20, 20), ('X2', -25, 25), ('X3', -5, 5), ('X4', -10, 10), ('X5', -3, 3), ('X6', -5, 5), ('X7', -13, 13), ('X8', -9, 15), ('X9', -17, 15), ('X10', -16, 15), ('X11', -16, 17), ('X12', -16, 17), ('X13', -20, 23)] for column, lowvalue, highvalue in mask_values: data[column] = data[column].mask((data[column] <= lowvalue) | (data[column] >= highvalue)) # Describe the feature columns # for column in features: # print(column) # result = ds.nonparametric_summary(data[column])
def main(): pd.options.display.width = 220 pd.options.display.max_columns = 220 pd.options.display.max_rows = 220 output_url = 'pivot_tables.html' header_title = 'Pivot tables' header_id = 'pivot-tables' original_stdout = ds.html_begin(output_url=output_url, header_title=header_title, header_id=header_id) df = ds.read_file(file_name='sales_funnel.xlsx') ds.dataframe_info(df=df, file_in='sales_funnel.xlsx') print(df.head()) print() print('Pivot table, implicit parameters') print( pd.pivot_table(data=df, values=['Price'], index=['Manager']).round(2)) print() print('Pivot table, explicit parameters') print( pd.pivot_table(data=df, values=['Price'], index=['Manager'], aggfunc='mean').round(2)) print() print('Pivot table, multiple-level index') print( pd.pivot_table(data=df, values=['Price'], index=['Manager', 'Rep'], aggfunc='mean').round(2)) print() print('Pivot table, multi-parameter aggfunc') print( pd.pivot_table(data=df, values=['Price'], index=['Manager', 'Rep'], aggfunc={ 'Price': [np.mean, np.sum, len] }).round(2)) print() print('Pivot table, columns parameter is optional') print( pd.pivot_table(data=df, values=['Price'], index=['Manager', 'Rep'], columns=['Product'], aggfunc=[np.sum]).round(2)) print() print('Pivot table, replace NaN with 0') print( pd.pivot_table(data=df, values=['Price'], index=['Manager', 'Rep'], columns=['Product'], aggfunc=[np.sum], fill_value=0).round(2)) print() print('Pivot table, add second colume to values parameter') print( pd.pivot_table(data=df, values=['Price', 'Quantity'], index=['Manager', 'Rep'], columns=['Product'], aggfunc=[np.sum], fill_value=0).round(2)) print() print('Pivot table, product column moved to the index') print( pd.pivot_table(data=df, values=['Price', 'Quantity'], index=['Manager', 'Rep', 'Product'], aggfunc=[np.sum], fill_value=0).round(2)) print() print('Pivot table, show totals') print( pd.pivot_table(data=df, values=['Price', 'Quantity'], index=['Manager', 'Rep', 'Product'], aggfunc=[np.sum], fill_value=0, margins=True).round(2)) print() print('Pivot table, change categories') print( pd.pivot_table(data=df, values=['Price', 'Quantity'], index=['Manager', 'Status'], aggfunc=[np.sum], fill_value=0, margins=True).round(2)) print() print('Pivot table, pass dictionary to aggfunc') print( pd.pivot_table(data=df, values=['Price', 'Quantity'], index=['Manager', 'Status'], columns=['Product'], aggfunc={ 'Quantity': len, 'Price': np.sum }, fill_value=0, margins=True).round(2)) print() print('Pivot table, pass dictionary to aggfunc') print( pd.pivot_table(data=df, values=['Price', 'Quantity'], index=['Manager', 'Status'], columns=['Product'], aggfunc={ 'Quantity': len, 'Price': [np.sum, np.mean] }, fill_value=0).round(2)) print() print('Pivot table, save to variable') table = pd.pivot_table(data=df, values=['Price', 'Quantity'], index=['Manager', 'Status'], columns=['Product'], aggfunc={ 'Quantity': len, 'Price': [np.sum, np.mean] }, fill_value=0).round(2) print(table) print() print('Pivot table, sort on price, mean CPU') table = table.sort_values(by=('Price', 'mean', 'CPU'), ascending=False) print(table) print() print('Pivot table, filter for one manager') table = table.query('Manager == ["Debra Henley"]') print(table) print() print('Pivot table, sort and filter with multiple "dots"') table = pd.pivot_table( data=df, values=['Price', 'Quantity'], index=['Manager', 'Status'], columns=['Product'], aggfunc={'Quantity': len, 'Price': [np.sum, np.mean]}, fill_value=0 )\ .sort_values(by=('Price', 'mean', 'CPU'), ascending=False)\ .query('Manager == ["Debra Henley"]')\ .round(2) print(table) print() print('Pivot table, another query') table = pd.pivot_table( data=df, values=['Price', 'Quantity'], index=['Manager', 'Status'], columns=['Product'], aggfunc={'Quantity': len, 'Price': [np.sum, np.mean]}, fill_value=0 )\ .query('Status == ["pending", "won"]')\ .round(2) print(table) print() print('Pivot table, another query') table = pd.pivot_table( data=df, values=['Price', 'Quantity'], index=['Manager', 'Status'], columns=['Product'], aggfunc={'Quantity': len, 'Price': [np.sum, np.mean]}, fill_value=0 )\ .query('Status == ["pending", "won"]')\ .query('Manager == ["Debra Henley"]')\ .round(2) print(table) print() print('Pivot table, another query') table = pd.pivot_table( data=df, values=['Price', 'Quantity'], index=['Manager', 'Status'], columns=['Product'], aggfunc={'Quantity': len, 'Price': [np.sum, np.mean]}, fill_value=0 )\ .query('Status == ["pending", "won"] & Manager == ["Debra Henley"]')\ .round(2) print(table) ds.html_end(original_stdout=original_stdout, output_url=output_url)
def main(): start_time = time.time() pd.options.display.max_columns = 100 pd.options.display.max_rows = 100 pd.options.display.width = 120 file_name = 'myfile.csv' original_stdout = ds.html_begin( output_url=output_url, header_title=header_title, header_id=header_id ) help(ds.read_file) print() print('Create dataframe') df = ds.create_dataframe() print(df.head()) print(df.columns) print(df.dtypes) ds.dataframe_info( df=df, file_in=file_name ) help(ds.save_file) print() ds.save_file( df=df, file_name=file_name ) # Example 1 # Read a csv file. There is no guarantee the column dtypes will be correct. print('Example 1. Only [a, i, s, x, z] have the correct dtypes.') df = ds.read_file(file_name=file_name) print(df.dtypes) print() # Example 2 # Read a csv file. Ensure the dtypes of datetime columns. print('Example 2. Ensure the dtypes of datetime columns.') parse_dates = ['t', 'u'] df = ds.read_file( file_name=file_name, parse_dates=parse_dates ) print(df.dtypes) print() # Example 3 # Read a csv file. Ensure the dtypes of columns; not timedelta, datetime. print('Example 3. Ensure the dtypes of cols; not timedelta, datetime.') convert_dict = { 'a': 'float64', 'b': 'boolean', 'c': 'category', 'i': 'float64', 'r': 'str', 's': 'str', 'x': 'float64', 'y': 'Int64', 'z': 'float64' } df = ds.read_file( file_name=file_name, dtype=convert_dict ) print(df.dtypes) print() # Example 4 # Read a csv file. Ensure the dtypes of columns. Rename the columns. print( 'Example 4. Ensure the column dtypes are correct. Rename the columns.' ) column_names_dict = { 'a': 'A', 'b': 'B', 'c': 'C', 'd': 'D', 'i': 'I', 'r': 'R', 'r': 'R', 's': 'S', 't': 'T', 'u': 'U', 'x': 'X', 'y': 'Y', 'z': 'Z' } index_columns = ['Y'] parse_dates = ['t', 'u'] time_delta_columns = ['D'] category_columns = ['C'] integer_columns = ['A', 'I'] float_columns = ['X'] boolean_columns = ['R'] object_columns = ['Z'] sort_columns = ['I', 'A'] sort_columns_bool = [True, False] data = ds.read_file( file_name=file_name, column_names_dict=column_names_dict, index_columns=index_columns, parse_dates=parse_dates, date_parser=date_parser(), time_delta_columns=time_delta_columns, category_columns=category_columns, integer_columns=integer_columns, float_columns=float_columns, boolean_columns=boolean_columns, object_columns=object_columns, sort_columns=sort_columns, sort_columns_bool=sort_columns_bool ) print(data.head(10)) print() print('column dtypes') print(data.dtypes) print(data.info(verbose=True)) print() print('index', data.index.name, 'dtype:', data.index.dtype) print() ds.dataframe_info( df=data, file_in=file_name ) # Example 5 # Read an ods file. file_name = 'myfile.ods' df = ds.create_dataframe() ds.save_file( df=df, file_name=file_name ) parse_dates = ['t', 'u'] df = ds.read_file( file_name=file_name, parse_dates=parse_dates ) print( 'Example 5. Read an ods file.' ) print(data.head(10)) print() print('column dtypes') print(data.dtypes) print(data.info(verbose=True)) print() ds.dataframe_info( df=data, file_in=file_name ) # Example 6 # Read an xlsx file. df = ds.read_file(file_name=file_name) file_name = 'myfile.xlsx' sheet_name = 'raw_data' ds.save_file( df=df, file_name=file_name, sheet_name=sheet_name ) df = ds.read_file( file_name=file_name, sheet_name=sheet_name ) ds.dataframe_info( df=df, file_in=file_name ) stop_time = time.time() ds.page_break() ds.report_summary( start_time=start_time, stop_time=stop_time, read_file_names=file_name, save_file_names=file_name ) ds.html_end( original_stdout=original_stdout, output_url=output_url )