def benchmark_Dense_structure(benchmark_data_folder, layer_range, neuron_range, neuron_steps, run_count=3, verbose=0): df = lf.Load_Forecaster() df.load_data(benchmark_data_folder) df._override_training_settings(epochs=benchmarking_max_epochs, lossplot=False) df.verbose = verbose df.db["flag"] = "Dense_structure_benchmark" input_shape = df.get_train_data_shape(RNN=False) models = [] layer_start, layer_end = [int(x) for x in layer_range.split('-')] layer_range = range(layer_start, layer_end+1) neuron_start, neuron_end = [int(float(x[:-1]) * input_shape[1]) if x.endswith('X') else int(x) for x in neuron_range.split('-')] neuron_range = [int(x) for x in np.linspace(neuron_start, neuron_end, neuron_steps)] for layer_count in layer_range: for neuron_count in neuron_range: conf = [str(neuron_count)] * layer_count models.append(mlu.build_Dense_model(conf,input_shape=input_shape)) for model in tqdm(models): for n in tqdm(range(run_count)): df.train_model(model=model, RNN=False) df.predict_load(graph=False, store=True)
def benchmark_CNN_structure(benchmark_data_folder, layer_range, filters_range, filters_steps, last_layer_type, kernel_size, run_count=3, verbose=0): df = lf.Load_Forecaster() df.load_data(benchmark_data_folder) df._override_training_settings(epochs=benchmarking_max_epochs, lossplot=False) df.verbose = verbose df.db["flag"] = "CNN_structure_benchmark" input_shape = df.get_train_data_shape(RNN=True) models = [] layer_start, layer_end = [int(x) for x in layer_range.split('-')] layer_range = range(layer_start, layer_end+1) filters_start, filters_end = [int(float(x[:-1]) * input_shape[2]) if x.endswith('X') else int(x) for x in filters_range.split('-')] filters_range = [int(x) for x in np.linspace(filters_start, filters_end, filters_steps)] for layer_count in layer_range: kernel_size_arr = [kernel_size] * layer_count for filter_count in filters_range: nb_filters_arr = [filter_count] * layer_count models.append(mlu.build_CNN_model(nb_filters_arr, kernel_size_arr, input_shape=input_shape, last_layer_type=last_layer_type)) for model in tqdm(models): for n in tqdm(range(run_count)): df.train_model(model=model, RNN=True) df.predict_load(graph=False, store=True)
def plot_models_benchs(location): import matplotlib.pyplot as plt df = lf.Load_Forecaster() data = df.load_results('models_benchmark') ax = data[['testing_MAPE', 'training_MAPE', 'location', 'training_time', 'model_summary']].groupby(['location','model_summary']).mean().xs(location).sort_values('testing_MAPE').plot.bar(secondary_y='training_time', rot=0) ax.set_title("Dataset : {0}\nDay ahead forecasting".format(location), fontsize=13) ax.set_xlabel("Model", fontsize=13) ax.set_ylabel("MAPE", fontsize=13) ax.right_ax.set_ylabel('Seconds', fontsize=13) plt.show()
def benchmark_datasets(benchmark_datasets, flag, run_count=3, verbose=0): for dataset in tqdm(benchmark_datasets): if verbose != 0: print("Benchmarking dataset {0}".format(dataset)) df = lf.Load_Forecaster() df.db["flag"] = flag + "_benchmark" df.verbose = verbose df.load_data(dataset) df._override_training_settings(epochs=benchmarking_max_epochs, lossplot=False) _benchmark_loop(df, run_count)
def benchmark_models(dataset, models, RNN=None, run_count=3, verbose=0): df = lf.Load_Forecaster() df.db["flag"] = "models_benchmark" df.db["save_detailed_results"] = True df.load_data(dataset) df._override_training_settings(epochs=benchmarking_max_epochs, lossplot=False) df.evaluate_training_set = False for model in tqdm(models): for n in tqdm(range(run_count)): df.train_model(model, RNN) df.predict_load(graph=False, store=True)
def _benchmark_base(benchmark_dataset, flag, verbose, early_override): df = lf.Load_Forecaster() df.load_data(benchmark_dataset) df._override_training_settings(epochs=benchmarking_max_epochs, lossplot=False) df.db["flag"] = flag + '_benchmark' df.verbose = verbose if type(early_override) == dict and len(early_override): override_method, var_name, var_value = early_override.values() getattr(df, override_method)(**{var_name:var_value}) return df
def plot_structure_bench_RNN_Dense(Dense=None, RNN=None, db=None): import seaborn as sns import matplotlib.pyplot as plt df = lf.Load_Forecaster() if db is not None: df.db["filename"] = db if Dense is not None: res = df.load_results(filter_flag="Dense_structure_benchmark") unit = 'Neurons' elif RNN is not None: res = df.load_results(filter_flag="RNN_structure_benchmark") unit = 'Units' else: return # Assuming all hidden layers has the same number of hidden neurons. res['neuron_per_layer'] = res['layer_config'].apply(lambda x: x[0]) grp_model_type = res.groupby('model_type') for model, grp in grp_model_type: # MultiLineplot neurons per layer vs MAPE, one line per layer count ax = grp.groupby(['layer_count', 'neuron_per_layer']).mean()[['testing_MAPE']].unstack(level=1).transpose().xs('testing_MAPE').plot(fontsize=13) ax.set_title("Model : {0}".format(model), fontsize=13) ax.set_xlabel("{0} per hidden layer".format(unit), fontsize=13) ax.set_ylabel("Testing MAPE", fontsize=13) # Plot on new figure (plt.figure) of heatmap "x:layer_count vs y:neurons per layer vs z:MAPE" plt.figure() # Annot 'fmt' -> '.Xg' : float type annotations, if more than X digits, uses scientific notation. ax2 = sns.heatmap(grp.groupby(['neuron_per_layer', 'layer_count']).mean()[['testing_MAPE']].reset_index().pivot("neuron_per_layer","layer_count","testing_MAPE"), annot=True, fmt='.3g', cbar_kws={'label':'MAPE'}) ax2.set_title("Testing performance (MAPE)\nModel : {0}".format(model), fontsize=13) ax2.set_xlabel("Layer count", fontsize=13) ax2.set_ylabel("{0} per hidden layer".format(unit), fontsize=13) # Plot on new figure (plt.figure) of heatmap "x:layer_count vs y:neurons per layer vs z:MAPE" plt.figure() # Annot 'fmt' -> '.Xg' : float type annotations, if more than X digits, uses scientific notation. ax2 = sns.heatmap(grp.groupby(['neuron_per_layer', 'layer_count']).mean()[['training_MAPE']].reset_index().pivot("neuron_per_layer","layer_count","training_MAPE"), annot=True, fmt='.3g', cbar_kws={'label':'MAPE'}) ax2.set_title("Training performance (MAPE)\nModel : {0}".format(model), fontsize=13) ax2.set_xlabel("Layer count", fontsize=13) ax2.set_ylabel("{0} per hidden layer".format(unit), fontsize=13) # Plot on new figure (plt.figure) of heatmap "x:layer_count vs y:neurons per layer vs z:training_time" plt.figure() # Annot 'fmt' -> '.Xg' : float type annotations, if more than X digits, uses scientific notation. ax2 = sns.heatmap(grp.groupby(['neuron_per_layer', 'layer_count']).mean()[['training_time']].reset_index().pivot("neuron_per_layer","layer_count","training_time"), annot=True, fmt='.3g', cbar_kws={'label':'Seconds'}) ax2.set_title("Training time (seconds)\nModel : {0}".format(model), fontsize=13) ax2.set_xlabel("Layer count", fontsize=13) ax2.set_ylabel("{0} per hidden layer".format(unit), fontsize=13)
def plot_benchmark(var_name, mode='barplot', rot=0, title=None, xlabel=None, split_labels_line=True, secondary_y='training_time', secondary_y_label='Seconds', return_data=False, merge_models=True): df = lf.Load_Forecaster() res = df.load_results(filter_flag=var_name + '_benchmark') cols = [var_name, 'testing_MAPE', 'training_MAPE'] if secondary_y: cols.append(secondary_y) ret = [] if merge_models: res['model_type'] = res['model_type'].apply(lambda x: x if not x.startswith('CuDNN') else x[5:]) for loc, d in res.groupby('location'): for model, data in d.groupby('model_type'): if mode not in ['boxplot', 'detailed_table']: data_mean = data[cols].groupby(var_name).mean() if mode == 'lineplot': data_mean = data_mean.sort_values(var_name) elif mode == 'barplot': data_mean = data_mean.sort_values('testing_MAPE') elif mode == 'table': if not return_data: print("Results for location {0} and model type {1}".format(loc, model)) ICD.display(data_mean.sort_values('testing_MAPE')) print("------------------------") else: ret.append(((loc, model), data_mean.sort_values('testing_MAPE'))) if mode in ['lineplot', 'barplot']: ax = data_mean.plot(secondary_y=secondary_y, kind=mode[:-4], fontsize=11, rot=rot) _set_labels(ax, loc, var_name, title, xlabel, rot, split_labels_line, secondary_y, secondary_y_label) elif mode == 'boxplot': ax = data.boxplot(column='testing_MAPE', by=var_name, rot=rot) _set_labels(ax, loc, var_name, title, xlabel, rot, split_labels_line, False) elif mode == 'detailed_table': if not return_data: print("Results for location {0} and model type {1}".format(loc, model)) ICD.display(data[cols]) print("------------------------") else: ret.append(((loc, model), data[cols].sort_values('testing_MAPE'))) if return_data: return ret
def plot_structure_bench_CNN(database=None): import seaborn as sns import matplotlib.pyplot as plt df = lf.Load_Forecaster() if database is not None: df._override_database_settings(filename=database) res = df.load_results(filter_flag="CNN_structure_benchmark") # Assuming all hidden layers have the same number of hidden neurons. res['filters_per_layer'] = res['layer_config'].apply(lambda x: x[0]) # MultiLineplot filters per layer vs MAPE, one line per layer count ax = res.groupby(['layer_count', 'filters_per_layer']).mean()[['testing_MAPE']].unstack(level=1).transpose().xs('testing_MAPE').plot(fontsize=13) ax.set_title("Model : CNN", fontsize=13) ax.set_xlabel("Filters per hidden layer", fontsize=13) ax.set_ylabel("Testing MAPE", fontsize=13) # Plot on new figure (plt.figure) of heatmap "x:layer_count vs y:neurons per layer vs z:MAPE" plt.figure() # Annot 'fmt' -> '.Xg' : float type annotations, if more than X digits, uses scientific notation. ax2 = sns.heatmap(res.groupby(['filters_per_layer', 'layer_count']).mean()[['testing_MAPE']].reset_index().pivot("filters_per_layer","layer_count","testing_MAPE"), annot=True, fmt='.3g', cbar_kws={'label':'MAPE'}) ax2.set_title("Testing performance (MAPE)\nModel : CNN", fontsize=13) ax2.set_xlabel("Layer count", fontsize=13) ax2.set_ylabel("Filters per hidden layer", fontsize=13) # Plot on new figure (plt.figure) of heatmap "x:layer_count vs y:neurons per layer vs z:training_time" plt.figure() # Annot 'fmt' -> '.Xg' : float type annotations, if more than X digits, uses scientific notation. ax2 = sns.heatmap(res.groupby(['filters_per_layer', 'layer_count']).mean()[['training_time']].reset_index().pivot("filters_per_layer","layer_count","training_time"), annot=True, fmt='.3g', cbar_kws={'label':'Seconds'}) ax2.set_title("Training time (seconds)\nModel : CNN", fontsize=13) ax2.set_xlabel("Layer count", fontsize=13) ax2.set_ylabel("Filters per hidden layer", fontsize=13) # Plot on new figure (plt.figure) of heatmap "x:layer_count vs y:neurons per layer vs z:training_time" plt.figure() # Annot 'fmt' -> '.Xg' : float type annotations, if more than X digits, uses scientific notation. ax2 = sns.heatmap(res.groupby(['filters_per_layer', 'layer_count']).mean()[['training_MAPE']].reset_index().pivot("filters_per_layer","layer_count","training_MAPE"), annot=True, fmt='.3g', cbar_kws={'label':'Seconds'}) ax2.set_title("Training performance (MAPE)\nModel : CNN", fontsize=13) ax2.set_xlabel("Layer count", fontsize=13) ax2.set_ylabel("Filters per hidden layer", fontsize=13)
def get_train_shape(dataset, RNN): df = lf.Load_Forecaster() df.load_data(dataset) return df.get_train_data_shape(RNN)