def post(self): json_obj = json_decode(self.request.body) target_col = json_obj["target_col"] input_cols = json_obj["input_cols"] num_fold = json_obj["num_fold"] preset = json_obj["preset"] scaler_option = json_obj["scaler"] file_path = json_obj["path_to_data"] model_abbr = json_obj["model_abbr"] data_df, x_train, y_train, header_x, header_y = asc.data_load_shuffle(csv_file = file_path, input_col=input_cols, cols_to_remove=[], target_col=target_col, random_state=None) if(preset=='default'): model_parameters = asc.default_model_parameters() #scaler_option = model_parameters['scaler_option'] else: model_parameters = asc.load_model_parameter_from_file(preset) #scaler_option = model_parameters['scaler_option'] if scaler_option=="AutoLoad": scaler_option = model_parameters['scaler_option'] try: if model_abbr=='NET': lr = float(model_parameters['net_learning_rate']) layer = int(model_parameters['net_layer_n']) dropout = float(model_parameters['net_dropout']) l_2 = float(model_parameters['net_l_2']) epochs = int(model_parameters['net_epochs']) batch_size = int(model_parameters['net_batch_size']) net_structure = [int(x) for x in model_parameters['net_structure'].split(" ")] optimizer = keras.optimizers.Adam(lr=lr) model = asc.net_define(params=net_structure, layer_n = layer, input_size = x_train.shape[1], dropout=dropout, l_2=l_2, optimizer=optimizer) predictions, actual_values = asc.cross_val_predict_net(model, epochs=epochs, batch_size=batch_size, x_train = x_train, y_train = y_train, verbose = 0, scaler_option = scaler_option, force_to_proceed=True) MAE, R2 = asc.evaluate(predictions, actual_values) else: model = asc.define_model_regression(model_abbr, model_parameters, x_header_size = x_train.shape[1]) predictions, actual_values = asc.train_and_predict(model, x_train, y_train, scaler_option=scaler_option, num_of_folds=int(num_fold)) MAE, R2 = asc.evaluate(predictions, actual_values) except Exception as e: MAE = -1 R2 = -1 if MAE!=-1: asc.save_comparison_chart(predictions, actual_values, PurePath("static/output/ml/ml_result.png")) response_to_send = {} response_to_send["MAE"]=float(MAE) response_to_send["R2"]=float(R2) response_to_send["input_cols"]=input_cols response_to_send["target_col"]=target_col response_to_send["model_abbr"]=model_abbr response_to_send["num_fold"]=num_fold response_to_send["scaler"]=scaler_option print(response_to_send) self.write(json.dumps(response_to_send))
def post(self): json_obj = json_decode(self.request.body) target_col = json_obj["target_col"] input_cols = json_obj["input_cols"] num_of_folds = int(json_obj["num_fold"]) preset = json_obj["preset"] scaler_option = json_obj["scaler"] file_path = json_obj["path_to_data"] model_type = json_obj["model_abbr"] auto_tune_iter = 1000 random_state = None data_df, x_train, y_train, header_x, header_y = asc.data_load_shuffle(csv_file = file_path, input_col=input_cols, cols_to_remove=[], target_col=target_col, random_state=None) if model_type=='NET': net_neuron_max, net_structure, net_l_2, net_learning_rate, net_epochs, net_dropout, net_layer_n, net_batch_size = \ clean_up_net_params(-1,'Tune','Tune','Tune','Tune','Tune','Tune','Tune') net_batch_size_max = 5 net_layer_min = 3 net_layer_max = 5 net_dropout_max = 0.2 net_default_neuron_max = 32 checkpoint = None model_parameters = asc.net_tuning(tries = auto_tune_iter, lr = net_learning_rate, x_train = x_train, y_train = y_train, layer = net_layer_n, \ params=net_structure, epochs=net_epochs, batch_size=net_batch_size, dropout=net_dropout, l_2 = net_l_2, neuron_max=net_neuron_max, batch_size_max=net_batch_size_max, \ layer_min = net_layer_min, layer_max=net_layer_max, dropout_max=net_dropout_max, default_neuron_max=net_default_neuron_max, checkpoint = checkpoint, num_of_folds=num_of_folds) if model_parameters == {}: print(" The tool couldn't find good parameters ") print (" Using default scikit-learn hyperparameters ") model_parameters = asc.default_model_parameters() else: print (" Auto hyperparameter tuning initiated. ") model_parameters = asc.hyperparameter_tuning(model_type, x_train, y_train , num_of_folds, scaler_option , n_iter=auto_tune_iter, random_state=random_state, verbose=1) csv_file = PurePath('static/config/') / PurePath(file_path).name print(" Saving tuned hyperparameters to file: ", str(csv_file)+",WEB,Model="+model_type+",Scaler="+scaler_option+".tuned.prop") asc.save_parameters(model_parameters, str(csv_file)+",Model="+model_type+",Scaler="+scaler_option+".tuned.prop") response_to_send = {'output':str(csv_file)+",Model="+model_type+",Scaler="+scaler_option+".tuned.prop"} self.write(json.dumps(response_to_send))
def post(self): json_obj = json_decode(self.request.body) target_col = json_obj["target_col"] input_cols = json_obj["input_cols"] num_fold = json_obj["num_fold"] tag = json_obj["tag"] MAE = json_obj["MAE"] R2 = json_obj["R2"] preset = json_obj["preset"] scaler_option = json_obj["scaler"] file_path = json_obj["path_to_data"] model_abbr = json_obj["model_abbr"] if(preset=='default'): model_parameters = asc.default_model_parameters() else: model_parameters = asc.load_model_parameter_from_file(preset) data_df, x_train, y_train, header_x, header_y = asc.data_load_shuffle(csv_file = file_path, input_col=input_cols, cols_to_remove=[], target_col=target_col, random_state=0) if model_abbr!='NET': model = asc.define_model_regression(model_type=model_abbr, model_parameters = model_parameters, x_header_size = x_train.shape[1]) asc.train_and_save(model, PurePath('static/learned_models/'+tag+'.pkl'), model_abbr , input_cols=header_x, target_col=header_y , x_train=x_train, y_train=y_train, scaler_option=scaler_option, path_to_save = '.', MAE=MAE, R2=R2) else: lr = float(model_parameters['net_learning_rate']) layer = int(model_parameters['net_layer_n']) dropout = float(model_parameters['net_dropout']) l_2 = float(model_parameters['net_l_2']) epochs = int(model_parameters['net_epochs']) batch_size = int(model_parameters['net_batch_size']) net_structure = [int(x) for x in model_parameters['net_structure'].split(" ")] optimizer = keras.optimizers.Adam(lr=lr) model = asc.net_define(params=net_structure, layer_n = layer, input_size = x_train.shape[1], dropout=dropout, l_2=l_2, optimizer=optimizer) asc.train_and_save_net(model, PurePath('static/learned_models/'+tag+'.pkl'), input_cols=header_x, target_col=header_y, x_train=x_train, y_train=y_train, scaler_option=scaler_option, MAE=MAE, R2=R2, path_to_save = '.', num_of_folds=5, epochs=epochs, batch_size=batch_size) model_files = glob.glob(str(PurePath("static/learned_models/*.pkl"))) response_to_send = {} response_to_send['model_files'] = model_files self.write(json.dumps(response_to_send))
def main(args): # loading data try: print("\n [ Data Loading ]") csv_file = args.input_file cols_to_remove = args.ignore_col target_col = args.target_col input_col = args.input_col model_type = args.model_type hyperparameter_file = asc.fix_value(args.hyperparameter_file,'str') num_of_features = int(args.num_of_features) num_of_folds = int(args.num_of_folds) test = asc.str2bool(args.test) mapping = args.mapping output_file = args.output_file save_test_chart = asc.str2bool(args.save_test_chart) save_auto_tune = asc.str2bool(args.save_auto_tune) save_test_csv = asc.str2bool(args.save_test_csv) auto_tune = asc.str2bool(args.auto_tune) auto_tune_iter = int(args.auto_tune_iter) random_state = asc.fix_value(args.random_state,'int') feature_selection = args.feature_selection scaler_option = args.scaler save_corr_chart = args.save_corr_chart save_corr_report = args.save_corr_report net_structure = args.net_structure net_layer_n = args.net_layer_n net_dropout = args.net_dropout net_l_2 = args.net_l_2 net_learning_rate = args.net_learning_rate net_epochs = args.net_epochs net_batch_size = args.net_batch_size net_neuron_max = args.net_neuron_max net_batch_size_max = int(args.net_batch_size_max) net_layer_min = int(args.net_layer_min) net_layer_max = int(args.net_layer_max) net_dropout_max = float(args.net_dropout_max) net_default_neuron_max = int(args.net_default_neuron_max) net_checkpoint = args.net_checkpoint print(" Loading data from :%s"%(csv_file)) print(" Columns to ignore :%s"%(cols_to_remove)) data_df, x_train, y_train, header_x, header_y = asc.data_load_shuffle(csv_file, input_col, cols_to_remove, target_col, map_all = ast.literal_eval(mapping), random_state = random_state) print(" Input columns :%s"%(header_x)) print(" Target column :%s"%(target_col)) except Exception as e: print("* An error occurred while loading data from ", args.input_file) print(e) sys.exit() if feature_selection is not None: if save_corr_report is not None: if save_corr_report =='True': save_corr_report = output_file+".correlation, target_col="+target_col+".csv" else: save_corr_report = None if save_corr_chart is not None: if save_corr_chart=='True': save_corr_chart = output_file else: save_corr_chart = None fs_dict, final_report = asc.correlation_analysis_all(data_df, target_col, num_of_features, file_to_save = save_corr_report, save_chart = save_corr_chart) input_col = fs_dict[feature_selection] print("\n [ Feature Selection ]") print(" Reloading the data using the selected features : ", input_col," by criteron ", feature_selection, "top_k=", num_of_features) data_df, x_train, y_train, header_x, header_y = asc.data_load_shuffle(csv_file, input_col, cols_to_remove, target_col, map_all = ast.literal_eval(mapping), random_state = random_state) print(" Input columns :%s"%(header_x)) print(" Target column :%s"%(target_col)) if auto_tune is True and model_type!='LR': print("\n [ Hyperparameter Tuning ]") print(" Training with %s ..."%asc.model_name(model_type)) if model_type=='NET': if net_checkpoint=='True': checkpoint = csv_file else: checkpoint = None net_neuron_max, net_structure, net_l_2, net_learning_rate, net_epochs, net_dropout, net_layer_n, net_batch_size = \ clean_up_net_params(net_neuron_max, net_structure, net_l_2, net_learning_rate, net_epochs, net_dropout, net_layer_n, net_batch_size) model_parameters = asc.net_tuning(tries = auto_tune_iter, lr = net_learning_rate, x_train = x_train, y_train = y_train, layer = net_layer_n, \ params=net_structure, epochs=net_epochs, batch_size=net_batch_size, dropout=net_dropout, l_2 = net_l_2, neuron_max=net_neuron_max, batch_size_max=net_batch_size_max, \ layer_min = net_layer_min, layer_max=net_layer_max, dropout_max=net_dropout_max, default_neuron_max=net_default_neuron_max, checkpoint = checkpoint, num_of_folds=num_of_folds) if model_parameters == {}: print(" The tool couldn't find good parameters ") print (" Using default scikit-learn hyperparameters ") model_parameters = asc.default_model_parameters() else: print (" Auto hyperparameter tuning initiated. ") if hyperparameter_file is not None: print (" Warning: %s will be overrided and not be used."%(hyperparameter_file)) model_parameters = asc.hyperparameter_tuning(model_type, x_train, y_train , num_of_folds, scaler_option , n_iter=auto_tune_iter, random_state=random_state, verbose=1) else: if hyperparameter_file is not None and model_type!='LR': print (" Using hyperparameters from the file %s"%(hyperparameter_file)) model_parameters = asc.load_model_parameter_from_file(hyperparameter_file) else: print (" Using default scikit-learn hyperparameters ") model_parameters = asc.default_model_parameters() print (" Overriding parameters from command-line arguments ..") if net_structure !='Tune': print("net_structure is set to ", net_structure) model_parameters['net_structure'] = net_structure if net_dropout !='Tune': print("net_dropout is set to ", net_dropout) model_parameters['net_dropout'] = net_dropout if net_l_2 !='Tune': print("net_l_2 is set to ", net_l_2) model_parameters['net_l_2'] = net_l_2 if net_learning_rate !='Tune': print("net_learning_rate is set to ", net_learning_rate) model_parameters['net_learning_rate'] = net_learning_rate if net_epochs !='Tune': print("net_epochs is set to ", net_epochs) model_parameters['net_epochs'] = net_epochs if net_batch_size !='Tune': print("net_batch_size is set to ", net_batch_size) model_parameters['net_batch_size'] = net_batch_size print("\n The following parameters will be used: ") print(model_parameters) MAE = None R2 = None if test is True: try: print("\n [ Model Evaluation ]") if model_type!='NET': model = asc.define_model_regression(model_type, model_parameters, x_header_size = x_train.shape[1]) predictions, actual_values = asc.train_and_predict(model, x_train, y_train, scaler_option=scaler_option, num_of_folds=num_of_folds) MAE, R2 = asc.evaluate(predictions, actual_values) else: lr = float(model_parameters['net_learning_rate']) layer = int(model_parameters['net_layer_n']) dropout = float(model_parameters['net_dropout']) l_2 = float(model_parameters['net_l_2']) epochs = int(model_parameters['net_epochs']) batch_size = int(model_parameters['net_batch_size']) net_structure = [int(x) for x in model_parameters['net_structure'].split(" ")] optimizer = keras.optimizers.Adam(lr=lr) model = asc.net_define(params=net_structure, layer_n = layer, input_size = x_train.shape[1], dropout=dropout, l_2=l_2, optimizer=optimizer) predictions, actual_values = asc.cross_val_predict_net(model, epochs=epochs, batch_size=batch_size, x_train = x_train, y_train = y_train, verbose = 0, scaler_option = scaler_option, num_of_folds = num_of_folds) MAE, R2 = asc.evaluate(predictions, actual_values) except Exception as e: print("* An error occurred while performing ML evaluation") print(e) sys.exit() if save_test_chart is True: print(" Saving test charts to : ", output_file+",Model="+model_type+",MAE="+str(MAE)+",R2="+str(R2)+",Scaler="+scaler_option+".png") try: asc.save_comparison_chart(predictions, actual_values, output_file+",Model="+model_type+",MAE="+str(MAE)+",R2="+str(R2)+",Scaler="+scaler_option+".png") except: print(" * Warning: couldn't generate a chart - please make sure the model is properly trained .. ") if save_test_csv is True: print(" Saving test csv to : ", output_file+",Model="+model_type+",MAE="+str(MAE)+",R2="+str(R2)+",Scaler="+scaler_option+".csv") try: asc.save_test_data(predictions, actual_values, output_file+",Model="+model_type+",MAE="+str(MAE)+",R2="+str(R2)+",Scaler="+scaler_option+".csv") except: print(" * Warning: couldn't generate a csv - please make sure the model is properly trained .. ") print("* (%s)\t MAE = %8.3f, R2 = %8.3f via %d-fold cross validation "%(model_type, MAE, R2, num_of_folds)) if save_auto_tune is True: print(" Saving tuned hyperparameters to file: ", csv_file+",Model="+model_type+",Scaler="+scaler_option+".tuned.prop") asc.save_parameters(model_parameters, csv_file+",Model="+model_type+",Scaler="+scaler_option+".tuned.prop") try: print("\n [ Model Save ]") if model_type!='NET': model = asc.define_model_regression(model_type, model_parameters, x_header_size = x_train.shape[1]) asc.train_and_save(model, output_file+",Model="+model_type+",MAE="+str(MAE)+",R2="+str(R2)+",Scaler="+scaler_option, model_type , input_cols=header_x, target_col=header_y , x_train=x_train, y_train=y_train, scaler_option=scaler_option, path_to_save = '.', MAE=MAE, R2=R2) else: lr = float(model_parameters['net_learning_rate']) layer = int(model_parameters['net_layer_n']) dropout = float(model_parameters['net_dropout']) l_2 = float(model_parameters['net_l_2']) epochs = int(model_parameters['net_epochs']) batch_size = int(model_parameters['net_batch_size']) net_structure = [int(x) for x in model_parameters['net_structure'].split(" ")] optimizer = keras.optimizers.Adam(lr=lr) model = asc.net_define(params=net_structure, layer_n = layer, input_size = x_train.shape[1], dropout=dropout, l_2=l_2, optimizer=optimizer) asc.train_and_save_net(model, output_file+",Model="+model_type+",MAE="+str(MAE)+",R2="+str(R2)+",Scaler="+scaler_option, input_cols=header_x, target_col=header_y, x_train=x_train, y_train=y_train, scaler_option=scaler_option, MAE=MAE, R2=R2, path_to_save = '.', num_of_folds=num_of_folds, epochs=epochs, batch_size=batch_size) except Exception as e: print("* An error occurred while training and saving .. ") print(e) sys.exit()
def main(args): """ Load data """ try: print("\n [ Data Loading ]") save_metadata = asc.str2bool(args.save_metadata) train_type = args.train_type csv_file = PurePath(args.input_file) cols_to_remove = args.ignore_col target_col = args.target_col input_col = args.input_col model_type = args.model_type hyperparameter_file = asc.fix_value(args.hyperparameter_file, 'PurePath') num_of_features = int(args.num_of_features) num_of_folds = int(args.num_of_folds) test = asc.str2bool(args.test) mapping = args.mapping project_file = PurePath(args.project_file) save_test_chart = asc.str2bool(args.save_test_chart) save_auto_tune = asc.str2bool(args.save_auto_tune) save_test_csv = asc.str2bool(args.save_test_csv) auto_tune = asc.str2bool(args.auto_tune) auto_tune_iter = int(args.auto_tune_iter) random_state = asc.fix_value(args.random_state, 'int') feature_selection = args.feature_selection scaler_option = args.scaler save_corr_chart = args.save_corr_chart save_corr_report = args.save_corr_report net_structure = args.net_structure net_layer_n = args.net_layer_n net_dropout = args.net_dropout net_l_2 = args.net_l_2 net_learning_rate = args.net_learning_rate net_epochs = args.net_epochs net_batch_size = args.net_batch_size net_neuron_max = args.net_neuron_max net_batch_size_max = int(args.net_batch_size_max) net_layer_min = int(args.net_layer_min) net_layer_max = int(args.net_layer_max) net_dropout_max = float(args.net_dropout_max) net_default_neuron_max = int(args.net_default_neuron_max) net_checkpoint = args.net_checkpoint num_of_class = int(args.num_of_class) print(" Loading data from :%s" % (csv_file)) print(" Columns to ignore :%s" % (cols_to_remove)) data_df, x_train, y_train, header_x, header_y = asc.data_load_shuffle( csv_file, input_col, cols_to_remove, target_col, map_all=ast.literal_eval(mapping), random_state=random_state) print(" Input columns :%s" % (header_x)) print(" Target column :%s" % (target_col)) if not os.path.exists(project_file): os.makedirs(project_file) for folder in [ "predictions", "correlations", "tests", "parameters", "graphs", "models" ]: if not os.path.exists(project_file / folder): os.makedirs(project_file / folder) input_name = csv_file.stem except Exception as e: print("* An error occurred while loading data from ", args.input_file) print(e) sys.exit() """ Analyze correlation """ if feature_selection is not None: session_number = asc.get_session(project_file) if save_corr_report is not None: if save_corr_report == 'True': save_corr_report = project_file / "correlations" / ( "session" + str(session_number) + train_type + "_" + csv_file.name + "_target=" + target_col + ".csv") else: save_corr_report = None if save_corr_chart is not None: if save_corr_chart == 'True': save_corr_chart = project_file / "correlations" / ( "session" + str(session_number) + train_type + "_" + csv_file.name + "_target=" + target_col + ".png") else: save_corr_chart = None fs_dict, final_report = asc.correlation_analysis_all( data_df, target_col, num_of_features, file_to_save=save_corr_report, save_chart=save_corr_chart) input_col = fs_dict[feature_selection] print("\n [ Feature Selection ]") print(" Reloading the data using the selected features : ", input_col, " by criteron ", feature_selection, "top_k=", num_of_features) data_df, x_train, y_train, header_x, header_y = asc.data_load_shuffle( csv_file, input_col, cols_to_remove, target_col, map_all=ast.literal_eval(mapping), random_state=random_state) print(" Input columns :%s" % (header_x)) print(" Target column :%s" % (target_col)) print(" Saving correlation report to " + str(project_file / "correlations" / ("session" + str(session_number) + train_type + "_" + csv_file.name + "_target=" + target_col + ".csv"))) print(" Saving correlation chart to " + str(project_file / "correlations" / ("session" + str(session_number) + train_type + "_" + csv_file.name + "_target=" + target_col + ".png"))) """ Tune model """ if auto_tune is True and model_type != 'LR' and model_type != 'LRC': print("\n [ Hyperparameter Tuning ]") print(" Training with %s ..." % asc.model_name(model_type)) if model_type == 'NET': if net_checkpoint == 'True': checkpoint = csv_file else: checkpoint = None model_parameters = {} net_neuron_max, net_structure, net_l_2, net_learning_rate, net_epochs, net_dropout, net_layer_n, net_batch_size = \ clean_up_net_params(net_neuron_max, net_structure, net_l_2, net_learning_rate, net_epochs, net_dropout, net_layer_n, net_batch_size) if train_type == 'r': model_parameters = asc.net_tuning(tries = auto_tune_iter, lr = net_learning_rate, x_train = x_train, y_train = y_train, layer = net_layer_n, \ params=net_structure, epochs=net_epochs, batch_size=net_batch_size, dropout=net_dropout, l_2 = net_l_2, neuron_max=net_neuron_max, batch_size_max=net_batch_size_max, \ layer_min = net_layer_min, layer_max=net_layer_max, dropout_max=net_dropout_max, default_neuron_max=net_default_neuron_max, checkpoint = checkpoint, num_of_folds=num_of_folds) else: model_parameters = asc.net_tuning_classifier(num_of_class = num_of_class, tries = auto_tune_iter, lr = net_learning_rate, x_train = x_train, y_train = y_train, layer = net_layer_n, \ params=net_structure, epochs=net_epochs, batch_size=net_batch_size, dropout=net_dropout, l_2 = net_l_2, neuron_max=net_neuron_max, batch_size_max=net_batch_size_max, \ layer_min = net_layer_min, layer_max=net_layer_max, dropout_max=net_dropout_max, default_neuron_max=net_default_neuron_max, checkpoint = checkpoint, num_of_folds=num_of_folds) else: print(" Auto hyperparameter tuning initiated. ") if hyperparameter_file is not None: print(" Warning: %s will be overrided and not be used." % (hyperparameter_file)) if train_type == 'r': model_parameters = asc.hyperparameter_tuning( model_type, x_train, y_train, num_of_folds, scaler_option, n_iter=auto_tune_iter, random_state=random_state, verbose=1) else: model_parameters = asc.hyperparameter_tuning_classifier( model_type, x_train, y_train, num_of_folds, scaler_option, n_iter=auto_tune_iter, random_state=random_state, verbose=1) if model_parameters == {}: print(" The tool couldn't find good parameters ") print(" Using default scikit-learn hyperparameters ") model_parameters = asc.default_model_parameters() else: if hyperparameter_file is not None and model_type != 'LRC': print(" Using hyperparameters from the file %s" % (hyperparameter_file)) model_parameters = asc.load_model_parameter_from_file( hyperparameter_file) else: print(" Using default scikit-learn hyperparameters ") if train_type == 'c': model_parameters = asc.default_model_parameters_classifier() else: model_parameters = asc.default_model_parameters() print(" Overriding parameters from command-line arguments ..") if net_structure != 'Tune': print(" net_structure is set to ", net_structure) model_parameters['net_structure'] = net_structure if net_dropout != 'Tune': print(" net_dropout is set to ", net_dropout) model_parameters['net_dropout'] = net_dropout if net_l_2 != 'Tune': print(" net_l_2 is set to ", net_l_2) model_parameters['net_l_2'] = net_l_2 if net_learning_rate != 'Tune': print(" net_learning_rate is set to ", net_learning_rate) model_parameters['net_learning_rate'] = net_learning_rate if net_epochs != 'Tune': print(" net_epochs is set to ", net_epochs) model_parameters['net_epochs'] = net_epochs if net_batch_size != 'Tune': print(" net_batch_size is set to ", net_batch_size) model_parameters['net_batch_size'] = net_batch_size if train_type == 'r': model_parameters['scaler_option'] = scaler_option MAE = None R2 = None accuracy = None """ Evaluate model """ if test is True: try: print("\n [ Model Evaluation ]") if model_type != 'NET': if train_type == 'r': model = asc.define_model_regression( model_type, model_parameters, x_header_size=x_train.shape[1]) predictions, actual_values = asc.train_and_predict( model, x_train, y_train, scaler_option=scaler_option, num_of_folds=num_of_folds) MAE, R2 = asc.evaluate(predictions, actual_values) else: model = asc.define_model_classifier( model_type, model_parameters, x_header_size=x_train.shape[1]) predictions, actual_values = asc.train_and_predict( model, x_train, y_train, scaler_option=scaler_option, num_of_folds=num_of_folds) accuracy = asc.evaluate_classifier(predictions, actual_values) print("") print("* Classification Report") print(classification_report(actual_values, predictions)) print( "* Confusion Matrix (See here: http://bit.ly/2WxfXTy)") print(confusion_matrix(actual_values, predictions)) print("") else: lr = float(model_parameters['net_learning_rate']) layer = int(model_parameters['net_layer_n']) dropout = float(model_parameters['net_dropout']) l_2 = float(model_parameters['net_l_2']) epochs = int(model_parameters['net_epochs']) batch_size = int(model_parameters['net_batch_size']) net_structure = [ int(x) for x in model_parameters['net_structure'].split(" ") ] optimizer = keras.optimizers.Adam(lr=lr) if train_type == 'r': model = asc.net_define(params=net_structure, layer_n=layer, input_size=x_train.shape[1], dropout=dropout, l_2=l_2, optimizer=optimizer) else: model = asc.net_define_classifier( params=net_structure, layer_n=layer, input_size=x_train.shape[1], dropout=dropout, l_2=l_2, optimizer=optimizer, num_of_class=num_of_class) predictions, actual_values = asc.cross_val_predict_net( model, epochs=epochs, batch_size=batch_size, x_train=x_train, y_train=y_train, verbose=0, scaler_option=scaler_option, num_of_folds=num_of_folds) if train_type == 'r': MAE, R2 = asc.evaluate(predictions, actual_values) print( "* (%s)\t MAE = %8.3f, R2 = %8.3f via %d-fold cross validation " % (model_type, MAE, R2, num_of_folds)) else: accuracy = asc.evaluate_classifier(predictions, actual_values) except Exception as e: print("* An error occurred while performing ML evaluation") print(e) sys.exit() project_name = project_file.stem project_path = project_file.parent if save_metadata is True: print(" Saving metadata to " + str(project_file / "metadata") + ".csv") try: session_number = asc.save_metadata( vars(args), { 'MAE': MAE, 'R2': R2, 'Accuracy': accuracy }, project_file / "metadata.csv") except: print( " * Warning: couldn't generate metadata - please make sure the model is properly trained .. " ) if save_test_chart is True and train_type == 'r': print( " Saving test charts to : ", str(project_file / "graphs" / ("session" + str(session_number) + "r_" + input_name + "_" + model_type + ".png"))) try: asc.save_comparison_chart( predictions, actual_values, project_file / "graphs" / ("session" + str(session_number) + "r_" + input_name + "_" + model_type + ".png")) except: print( " * Warning: couldn't generate a chart - please make sure the model is properly trained .. " ) if save_test_csv is True and train_type == 'r': print( " Saving test csv to : ", str(project_file / "tests" / ("session" + str(session_number) + "r_" + input_name + "_" + model_type + ".csv"))) try: asc.save_test_data( predictions, actual_values, project_file / "tests" / ("session" + str(session_number) + "r_" + input_name + "_" + model_type + ".csv")) except: print( " * Warning: couldn't generate a csv - please make sure the model is properly trained .. " ) if save_auto_tune is True: print( " Saving hyperparameters to file: ", str(project_file / "parameters" / ("session" + str(session_number) + train_type + "_" + input_name + "_" + model_type + ".tuned.prop"))) asc.save_parameters( model_parameters, project_file / "parameters" / ("session" + str(session_number) + train_type + "_" + input_name + "_" + model_type + ".tuned.prop")) """ Save model """ try: print("\n [ Model Save ]") if model_type != 'NET': if train_type == 'r': model = asc.define_model_regression( model_type, model_parameters, x_header_size=x_train.shape[1]) asc.train_and_save( model, project_file / "models" / ("session" + str(session_number) + train_type + "_" + input_name + "_" + model_type + ".pkl"), model_type, input_cols=header_x, target_col=header_y, x_train=x_train, y_train=y_train, scaler_option=scaler_option, path_to_save='.', MAE=MAE, R2=R2) else: model = asc.define_model_classifier( model_type, model_parameters, x_header_size=x_train.shape[1]) asc.train_and_save_classifier( model, project_file / "models" / ("session" + str(session_number) + train_type + "_" + input_name + "_" + model_type + ".pkl"), model_type, input_cols=header_x, target_col=header_y, x_train=x_train, y_train=y_train, scaler_option=scaler_option, path_to_save='.', accuracy=accuracy) else: lr = float(model_parameters['net_learning_rate']) layer = int(model_parameters['net_layer_n']) dropout = float(model_parameters['net_dropout']) l_2 = float(model_parameters['net_l_2']) epochs = int(model_parameters['net_epochs']) batch_size = int(model_parameters['net_batch_size']) net_structure = [ int(x) for x in model_parameters['net_structure'].split(" ") ] optimizer = keras.optimizers.Adam(lr=lr) if train_type == 'c': model = asc.net_define_classifier(params=net_structure, layer_n=layer, input_size=x_train.shape[1], dropout=dropout, l_2=l_2, optimizer=optimizer, num_of_class=num_of_class) asc.train_and_save_net_classifier( model, project_file / "models" / ("session" + str(session_number) + train_type + "_" + input_name + "_" + model_type + ".pkl"), input_cols=header_x, target_col=header_y, x_train=x_train, y_train=y_train, scaler_option=scaler_option, accuracy=accuracy, path_to_save='.', num_of_folds=num_of_folds, epochs=epochs, batch_size=batch_size, num_of_class=num_of_class) else: model = asc.net_define(params=net_structure, layer_n=layer, input_size=x_train.shape[1], dropout=dropout, l_2=l_2, optimizer=optimizer) asc.train_and_save_net( model, project_file / "models" / ("session" + str(session_number) + train_type + "_" + input_name + "_" + model_type + ".pkl"), input_cols=header_x, target_col=header_y, x_train=x_train, y_train=y_train, scaler_option=scaler_option, MAE=MAE, R2=R2, path_to_save='.', num_of_folds=num_of_folds, epochs=epochs, batch_size=batch_size) except Exception as e: print("* An error occurred while training and saving .. ") print(e) sys.exit() if test == True: if train_type == 'r': print("\n MAE: %s R2: %s" % (MAE, R2)) else: print("\n Accuracy: %s" % accuracy)