def find_best_model_over_scan_logs(metric_weight='val_f1', *filepaths): """ Finds the best model against multiple Talos scanned configurations. The scan configuration that has the maximum <metric_weight> is the one that is described as the best. :param metric_weight: It describes the evaluation metric that will be user as a qualifier for the best model. :param filepaths: An iterable that will contains the correct filepaths of the saved Talos configurations :return (dict): A dictionary with the best model configuration. """ assert metric_weight is not None, "Argument <metric_weight> can not be None." assert isinstance(filepaths, Iterable), "Argument <filepaths> must be iterable " # Cre config_pd = pd.concat(map(lambda file: Reporting(file).data, filepaths)) config_pd.index = range(config_pd.shape[0]) best_model_idx = config_pd[metric_weight].idxmax() best_model = config_pd.loc[best_model_idx].to_dict() for key, value in best_model.items(): if isinstance(value, float) and value >= 1: best_model[key] = int(value) elif value == 'False' or value == 'True': best_model[key] = value == 'True' return best_model
params=p, model=the_network, fraction_limit=0.01, experiment_name='boy_4_12') analyze_object = ta.Analyze(scan_object) # Plot 'training' bars analyze_object.plot_bars('batch_size', 'mse', 'lr', 'first_neuron') # Run reporting on training log - to be changed accordingly from talos import Reporting r = Reporting( os.path.join(r'C:\Users\Szymek\Documents\magisterka_1\boy_4_12', '120419221138.csv').replace(os.sep, '/')) # Find the lowest score r.low(metric='loss') # Find the round number with the lowest score r.data.loss.idxmin() # Plot the loss function across epochs r.plot_line(metric='loss') # Plot correlation matrix
model.compile(optimizer=params['optimizer'], loss=params['losses'], metrics=['acc']) out = model.fit(x_train, y_train, batch_size=params['batch_size'], epochs=params['epochs'], verbose=0, validation_data=[x_val, y_val]) # callbacks=[live()]) return out, model # and run the scan h = ta.Scan(x, y, params=p, dataset_name='first_test', experiment_no='aaa', model=iris_model, grid_downsample=0.5, print_params=True) r = Reporting('first_test_aaa.csv') # draws a histogram for 'val_acc' r.plot_hist() # Deploy(h, 'experiment_name')
def HyperReport(self,eval_criterion='val_loss'): """ Reports the model from csv file of previous scan Plot several quantities and comparisons in dir /$name/ Selects the best models according to the eval_criterion (val_loss or eval_error) Reference : """ logging.info(' Starting reporting '.center(80,'-')) # Get reporting # report_file = os.path.join('model',self.name+'.csv') if os.path.exists(report_file): r = Reporting(report_file) else: logging.critical('Could not find %s'%(report_file)) sys.exit(1) # returns the results dataframe logging.info('='*80) logging.info('Complete data after n_round = %d'%(r.rounds())) logging.debug(r.data) # Lowest eval_error # logging.info('-'*80) if eval_criterion == 'eval_error': logging.info('Lowest eval_error = %0.5f obtained after %0.f rounds'%(r.low('eval_mean'),r.rounds2high('eval_mean'))) elif eval_criterion == 'val_loss': logging.info('Lowest val_loss = %0.5f obtained after %0.f rounds'%(r.low('val_loss'),r.rounds2high('val_loss'))) else: logging.critical('Could not find evaluation criterion "%s" in the results'%eval_criterion) sys.exit(1) # Best params # logging.info('='*80) logging.info('Best parameters sets') if eval_criterion == 'eval_error': sorted_data = r.data.sort_values('eval_mean',ascending=False) elif eval_criterion == 'val_loss': sorted_data = r.data.sort_values('val_loss',ascending=False) for i in range(0,10): logging.info('-'*80) logging.info('Best params no %d'%(i+1)) try: logging.info(sorted_data.iloc[i]) except: logging.warning('\tNo more parameters') break # Hist in terminal # eval_mean_arr = r.data['eval_mean'].values val_loss_arr = r.data['val_loss'].values fig1 = plotille.Figure() fig1.width = 150 fig1.height = 50 fig1.set_x_limits(min_=np.amin(eval_mean_arr),max_=np.amax(eval_mean_arr)) fig1.color_mode = 'byte' fig1.histogram(eval_mean_arr, bins=200, lc=25) print (' Evaluation error '.center(80,'-')) print ('Best model : ',sorted_data.iloc[0][['eval_mean']]) print(fig1.show(legend=True)) fig2 = plotille.Figure() fig2.width = 150 fig2.height = 50 fig2.set_x_limits(min_=np.amin(val_loss_arr),max_=np.amax(val_loss_arr)) fig2.color_mode = 'byte' fig2.histogram(val_loss_arr, bins=200, lc=100) print (' Val loss '.center(80,'-')) print ('Best model : ',sorted_data.iloc[0][['val_loss']]) print(fig2.show(legend=True)) logging.info('='*80) # Generate dir # path_plot = os.path.join(parameters.main_path,'model',self.name) if not os.path.isdir(path_plot): os.makedirs(path_plot) logging.info('Starting plots') # Make plots # PlotScans(data=r.data,path=path_plot,tag='')
print("test size", len(pred)) confusion_matrix(y_test, pred) result = None x_train, x_test, y_train, y_test = training_processing(*basic_processing()) pred = result.best_model().predict([x_test.values])>0.5 print("final test score", accuracy_score(pred, y_test)) print("test size", len(pred)) confusion_matrix(y_test, pred) if False: runfile('/home/gbfm/Workspaces/PyCharm/ANN/ann.py', wdir='/home/gbfm/Workspaces/PyCharm/ANN') data = training_processing(*basic_processing()) result = do_nn2_talos(*data) do_nn1(*data) do_nn2(*data) do_sklearn(MLPClassifier(), *data) do_sklearn(xgboost.XGBClassifier(), *data) report = Reporting(result) f = "n_layers learning_rate round_epochs n_nodes batch_size".split(" ") history = report.data.sort_values("val_acc", ascending=False) y = report.data["val_acc"] x = report.data lineplot(x[f[4]],y) plt.show()