Exemple #1
0
    def post(self):
        json_obj = json_decode(self.request.body)
        target_col = json_obj["target_col"]
        input_cols = json_obj["input_cols"]
        num_fold = json_obj["num_fold"]
        preset = json_obj["preset"]
        scaler_option = json_obj["scaler"]
        file_path = json_obj["path_to_data"]
        model_abbr = json_obj["model_abbr"]
        

        data_df, x_train, y_train, header_x, header_y = asc.data_load_shuffle(csv_file = file_path, input_col=input_cols, cols_to_remove=[], target_col=target_col, random_state=None)
    
        if(preset=='default'):
            model_parameters = asc.default_model_parameters()
            #scaler_option = model_parameters['scaler_option']
        else:
            model_parameters = asc.load_model_parameter_from_file(preset)
            #scaler_option = model_parameters['scaler_option']
        if scaler_option=="AutoLoad":
            scaler_option = model_parameters['scaler_option'] 

        try:
            
            if model_abbr=='NET':
                lr = float(model_parameters['net_learning_rate'])
                layer = int(model_parameters['net_layer_n'])
                dropout = float(model_parameters['net_dropout'])
                l_2 = float(model_parameters['net_l_2'])
                epochs = int(model_parameters['net_epochs'])
                batch_size = int(model_parameters['net_batch_size'])
                net_structure = [int(x) for x in model_parameters['net_structure'].split(" ")]

                optimizer = keras.optimizers.Adam(lr=lr)
                model = asc.net_define(params=net_structure, layer_n = layer, input_size = x_train.shape[1], dropout=dropout, l_2=l_2, optimizer=optimizer)
                predictions, actual_values = asc.cross_val_predict_net(model, epochs=epochs, batch_size=batch_size, x_train = x_train, y_train = y_train, verbose = 0, scaler_option = scaler_option, force_to_proceed=True)
                MAE, R2 = asc.evaluate(predictions, actual_values)
                
            else:
                model = asc.define_model_regression(model_abbr, model_parameters, x_header_size = x_train.shape[1])
                predictions, actual_values = asc.train_and_predict(model, x_train, y_train, scaler_option=scaler_option, num_of_folds=int(num_fold))
                MAE, R2 = asc.evaluate(predictions, actual_values)

        except Exception as e:
            MAE = -1
            R2 = -1

        if MAE!=-1:          
            asc.save_comparison_chart(predictions, actual_values, PurePath("static/output/ml/ml_result.png"))
        response_to_send = {}
        response_to_send["MAE"]=float(MAE)
        response_to_send["R2"]=float(R2)
        response_to_send["input_cols"]=input_cols
        response_to_send["target_col"]=target_col
        response_to_send["model_abbr"]=model_abbr
        response_to_send["num_fold"]=num_fold
        response_to_send["scaler"]=scaler_option
        print(response_to_send)
        
        self.write(json.dumps(response_to_send))
Exemple #2
0
    def post(self):
        json_obj = json_decode(self.request.body)
        target_col = json_obj["target_col"]
        input_cols = json_obj["input_cols"]
        num_of_folds = int(json_obj["num_fold"])
        preset = json_obj["preset"]
        scaler_option = json_obj["scaler"]
        
        file_path = json_obj["path_to_data"]
        model_type = json_obj["model_abbr"]
        auto_tune_iter = 1000
        random_state = None
        
        data_df, x_train, y_train, header_x, header_y = asc.data_load_shuffle(csv_file = file_path, input_col=input_cols, cols_to_remove=[], target_col=target_col, random_state=None)
        
        if model_type=='NET':
            
            net_neuron_max, net_structure, net_l_2, net_learning_rate, net_epochs, net_dropout, net_layer_n, net_batch_size = \
            clean_up_net_params(-1,'Tune','Tune','Tune','Tune','Tune','Tune','Tune')
            net_batch_size_max = 5
            net_layer_min = 3
            net_layer_max = 5
            net_dropout_max = 0.2
            net_default_neuron_max = 32
            checkpoint = None
            model_parameters = asc.net_tuning(tries = auto_tune_iter, lr = net_learning_rate, x_train = x_train, y_train = y_train, layer = net_layer_n, \
            params=net_structure, epochs=net_epochs, batch_size=net_batch_size, dropout=net_dropout, l_2 = net_l_2, neuron_max=net_neuron_max, batch_size_max=net_batch_size_max, \
            layer_min = net_layer_min, layer_max=net_layer_max, dropout_max=net_dropout_max, default_neuron_max=net_default_neuron_max, checkpoint = checkpoint, num_of_folds=num_of_folds)

            if model_parameters == {}:
                print(" The tool couldn't find good parameters ")
                print (" Using default scikit-learn hyperparameters ")
                model_parameters = asc.default_model_parameters() 

        else:
            print (" Auto hyperparameter tuning initiated. ")
            model_parameters = asc.hyperparameter_tuning(model_type, x_train, y_train
                                                , num_of_folds, scaler_option
                                                , n_iter=auto_tune_iter, random_state=random_state, verbose=1)

        csv_file = PurePath('static/config/') / PurePath(file_path).name
        print(" Saving tuned hyperparameters to file: ", str(csv_file)+",WEB,Model="+model_type+",Scaler="+scaler_option+".tuned.prop")
        asc.save_parameters(model_parameters, str(csv_file)+",Model="+model_type+",Scaler="+scaler_option+".tuned.prop")

        response_to_send = {'output':str(csv_file)+",Model="+model_type+",Scaler="+scaler_option+".tuned.prop"}
        
        self.write(json.dumps(response_to_send))
Exemple #3
0
    def post(self):
        
        json_obj = json_decode(self.request.body)
        target_col = json_obj["target_col"]
        input_cols = json_obj["input_cols"]
        num_fold = json_obj["num_fold"]
        tag = json_obj["tag"]
        MAE = json_obj["MAE"]
        R2 = json_obj["R2"]
        preset = json_obj["preset"]
        scaler_option = json_obj["scaler"]
        
        file_path = json_obj["path_to_data"]
        model_abbr = json_obj["model_abbr"]

        if(preset=='default'):
            model_parameters = asc.default_model_parameters()
        else:
            model_parameters = asc.load_model_parameter_from_file(preset)

        data_df, x_train, y_train, header_x, header_y = asc.data_load_shuffle(csv_file = file_path, input_col=input_cols, cols_to_remove=[], target_col=target_col, random_state=0)
            
        if model_abbr!='NET':
            model = asc.define_model_regression(model_type=model_abbr, model_parameters = model_parameters, x_header_size = x_train.shape[1])
            asc.train_and_save(model, PurePath('static/learned_models/'+tag+'.pkl'), model_abbr
                            , input_cols=header_x, target_col=header_y
                            , x_train=x_train, y_train=y_train, scaler_option=scaler_option, path_to_save = '.', MAE=MAE, R2=R2)
        else:
            
            lr = float(model_parameters['net_learning_rate'])
            layer = int(model_parameters['net_layer_n'])
            dropout = float(model_parameters['net_dropout'])
            l_2 = float(model_parameters['net_l_2'])
            epochs = int(model_parameters['net_epochs'])
            batch_size = int(model_parameters['net_batch_size'])
            net_structure = [int(x) for x in model_parameters['net_structure'].split(" ")]

            optimizer = keras.optimizers.Adam(lr=lr)
            model = asc.net_define(params=net_structure, layer_n = layer, input_size = x_train.shape[1], dropout=dropout, l_2=l_2, optimizer=optimizer)
            asc.train_and_save_net(model, PurePath('static/learned_models/'+tag+'.pkl'), input_cols=header_x, target_col=header_y, x_train=x_train, y_train=y_train, scaler_option=scaler_option, MAE=MAE, R2=R2, path_to_save = '.', num_of_folds=5, epochs=epochs, batch_size=batch_size)

        model_files =  glob.glob(str(PurePath("static/learned_models/*.pkl")))

        response_to_send = {}
        response_to_send['model_files'] = model_files
        self.write(json.dumps(response_to_send))
def main(args):

    # loading data
    try:
        
        print("\n [ Data Loading ]")

        csv_file = args.input_file
        cols_to_remove = args.ignore_col
        target_col = args.target_col
        input_col = args.input_col
        model_type = args.model_type
        hyperparameter_file = asc.fix_value(args.hyperparameter_file,'str')
        num_of_features = int(args.num_of_features)
        num_of_folds = int(args.num_of_folds)
        test = asc.str2bool(args.test)
        mapping = args.mapping
        output_file = args.output_file
        save_test_chart = asc.str2bool(args.save_test_chart)
        save_auto_tune = asc.str2bool(args.save_auto_tune)
        save_test_csv = asc.str2bool(args.save_test_csv)
        auto_tune = asc.str2bool(args.auto_tune)
        auto_tune_iter = int(args.auto_tune_iter)
        random_state = asc.fix_value(args.random_state,'int')
        feature_selection = args.feature_selection
        scaler_option = args.scaler
        save_corr_chart = args.save_corr_chart
        save_corr_report = args.save_corr_report

        net_structure = args.net_structure
        net_layer_n = args.net_layer_n
        net_dropout = args.net_dropout
        net_l_2 = args.net_l_2
        net_learning_rate = args.net_learning_rate
        net_epochs = args.net_epochs
        net_batch_size = args.net_batch_size
        net_neuron_max = args.net_neuron_max
        net_batch_size_max = int(args.net_batch_size_max)
        net_layer_min = int(args.net_layer_min)
        net_layer_max = int(args.net_layer_max)
        net_dropout_max = float(args.net_dropout_max)
        net_default_neuron_max = int(args.net_default_neuron_max)
        net_checkpoint = args.net_checkpoint

        print(" Loading data from :%s"%(csv_file))
        print(" Columns to ignore :%s"%(cols_to_remove))
        
        data_df, x_train, y_train, header_x, header_y = asc.data_load_shuffle(csv_file, input_col, cols_to_remove, target_col, map_all = ast.literal_eval(mapping), random_state = random_state)

        print(" Input columns :%s"%(header_x))
        print(" Target column :%s"%(target_col))
        

    except Exception as e:
        print("* An error occurred while loading data from ", args.input_file)
        print(e)
        sys.exit()

    if feature_selection is not None:

        if save_corr_report is not None:
            if save_corr_report =='True':
                save_corr_report = output_file+".correlation, target_col="+target_col+".csv"
            else:
                save_corr_report = None
            
        if save_corr_chart is not None:
            if save_corr_chart=='True':
                save_corr_chart = output_file
            else:
                save_corr_chart = None


        fs_dict, final_report = asc.correlation_analysis_all(data_df, target_col, num_of_features, file_to_save = save_corr_report, save_chart = save_corr_chart)
        input_col = fs_dict[feature_selection]
        print("\n [ Feature Selection ]")
        
        print(" Reloading the data using the selected features : ", input_col," by criteron ", feature_selection, "top_k=", num_of_features)
        
        data_df, x_train, y_train, header_x, header_y = asc.data_load_shuffle(csv_file, input_col, cols_to_remove, target_col, map_all = ast.literal_eval(mapping), random_state = random_state)

        print(" Input columns :%s"%(header_x))
        print(" Target column :%s"%(target_col))

    if auto_tune is True and model_type!='LR':
        
        print("\n [ Hyperparameter Tuning ]")
        print(" Training with %s ..."%asc.model_name(model_type))

        if model_type=='NET':
            
            if net_checkpoint=='True':
                checkpoint = csv_file
            else:
                checkpoint = None

            net_neuron_max, net_structure, net_l_2, net_learning_rate, net_epochs, net_dropout, net_layer_n, net_batch_size = \
            clean_up_net_params(net_neuron_max, net_structure, net_l_2, net_learning_rate, net_epochs, net_dropout, net_layer_n, net_batch_size)
            
            model_parameters = asc.net_tuning(tries = auto_tune_iter, lr = net_learning_rate, x_train = x_train, y_train = y_train, layer = net_layer_n, \
            params=net_structure, epochs=net_epochs, batch_size=net_batch_size, dropout=net_dropout, l_2 = net_l_2, neuron_max=net_neuron_max, batch_size_max=net_batch_size_max, \
            layer_min = net_layer_min, layer_max=net_layer_max, dropout_max=net_dropout_max, default_neuron_max=net_default_neuron_max, checkpoint = checkpoint, num_of_folds=num_of_folds)

            if model_parameters == {}:
                print(" The tool couldn't find good parameters ")
                print (" Using default scikit-learn hyperparameters ")
                model_parameters = asc.default_model_parameters() 

        else:
            print (" Auto hyperparameter tuning initiated. ")
            if hyperparameter_file is not None:
                print (" Warning: %s will be overrided and not be used."%(hyperparameter_file))

            model_parameters = asc.hyperparameter_tuning(model_type, x_train, y_train
                                                , num_of_folds, scaler_option
                                                , n_iter=auto_tune_iter, random_state=random_state, verbose=1)
    else:

        if hyperparameter_file is not None and model_type!='LR':
            
            print (" Using hyperparameters from the file %s"%(hyperparameter_file))
            model_parameters = asc.load_model_parameter_from_file(hyperparameter_file)

        else:
            print (" Using default scikit-learn hyperparameters ")
            model_parameters = asc.default_model_parameters()            

            print (" Overriding parameters from command-line arguments ..")
            if net_structure !='Tune':
                print("net_structure is set to ", net_structure)
                model_parameters['net_structure'] = net_structure
            if net_dropout !='Tune':
                print("net_dropout is set to ", net_dropout)
                model_parameters['net_dropout'] = net_dropout
            if net_l_2 !='Tune':
                print("net_l_2 is set to ", net_l_2)
                model_parameters['net_l_2'] = net_l_2
            if net_learning_rate !='Tune':
                print("net_learning_rate is set to ", net_learning_rate)
                model_parameters['net_learning_rate'] = net_learning_rate
            if net_epochs !='Tune':
                print("net_epochs is set to ", net_epochs)
                model_parameters['net_epochs'] = net_epochs
            if net_batch_size !='Tune':
                print("net_batch_size is set to ", net_batch_size)
                model_parameters['net_batch_size'] = net_batch_size
                           
    print("\n The following parameters will be used: ")
    print(model_parameters)

    MAE = None
    R2 = None

    if test is True:

        try:
           
            print("\n [ Model Evaluation ]")

            if model_type!='NET':
                model = asc.define_model_regression(model_type, model_parameters, x_header_size = x_train.shape[1])
                predictions, actual_values = asc.train_and_predict(model, x_train, y_train, scaler_option=scaler_option, num_of_folds=num_of_folds)
                MAE, R2 = asc.evaluate(predictions, actual_values)
               
            else:   
                
                lr = float(model_parameters['net_learning_rate'])
                layer = int(model_parameters['net_layer_n'])
                dropout = float(model_parameters['net_dropout'])
                l_2 = float(model_parameters['net_l_2'])
                epochs = int(model_parameters['net_epochs'])
                batch_size = int(model_parameters['net_batch_size'])
                net_structure = [int(x) for x in model_parameters['net_structure'].split(" ")]

                optimizer = keras.optimizers.Adam(lr=lr)
                model = asc.net_define(params=net_structure, layer_n = layer, input_size = x_train.shape[1], dropout=dropout, l_2=l_2, optimizer=optimizer)
                predictions, actual_values = asc.cross_val_predict_net(model, epochs=epochs, batch_size=batch_size, x_train = x_train, y_train = y_train, verbose = 0, scaler_option = scaler_option, num_of_folds = num_of_folds)
                MAE, R2 = asc.evaluate(predictions, actual_values)

            
        except Exception as e:
            print("* An error occurred while performing ML evaluation")
            print(e)
            sys.exit()

        if save_test_chart is True:
            print(" Saving test charts to : ", output_file+",Model="+model_type+",MAE="+str(MAE)+",R2="+str(R2)+",Scaler="+scaler_option+".png")
            try:    
                asc.save_comparison_chart(predictions, actual_values, output_file+",Model="+model_type+",MAE="+str(MAE)+",R2="+str(R2)+",Scaler="+scaler_option+".png")
            except:
                print(" * Warning: couldn't generate a chart - please make sure the model is properly trained .. ")
        
        if save_test_csv is True:
            print(" Saving test csv to : ", output_file+",Model="+model_type+",MAE="+str(MAE)+",R2="+str(R2)+",Scaler="+scaler_option+".csv")
            try:   
                asc.save_test_data(predictions, actual_values, output_file+",Model="+model_type+",MAE="+str(MAE)+",R2="+str(R2)+",Scaler="+scaler_option+".csv")
            except:
                print(" * Warning: couldn't generate a csv - please make sure the model is properly trained .. ")

        print("* (%s)\t MAE = %8.3f, R2 = %8.3f via %d-fold cross validation "%(model_type, MAE, R2, num_of_folds))

        if save_auto_tune is True:
            print(" Saving tuned hyperparameters to file: ", csv_file+",Model="+model_type+",Scaler="+scaler_option+".tuned.prop")
            asc.save_parameters(model_parameters, csv_file+",Model="+model_type+",Scaler="+scaler_option+".tuned.prop")

    try:

        print("\n [ Model Save ]")

        if model_type!='NET':
            model = asc.define_model_regression(model_type, model_parameters, x_header_size = x_train.shape[1])
            asc.train_and_save(model, output_file+",Model="+model_type+",MAE="+str(MAE)+",R2="+str(R2)+",Scaler="+scaler_option, model_type
                        , input_cols=header_x, target_col=header_y
                        , x_train=x_train, y_train=y_train, scaler_option=scaler_option, path_to_save = '.', MAE=MAE, R2=R2)
        else:
            
            lr = float(model_parameters['net_learning_rate'])
            layer = int(model_parameters['net_layer_n'])
            dropout = float(model_parameters['net_dropout'])
            l_2 = float(model_parameters['net_l_2'])
            epochs = int(model_parameters['net_epochs'])
            batch_size = int(model_parameters['net_batch_size'])
            net_structure = [int(x) for x in model_parameters['net_structure'].split(" ")]

            optimizer = keras.optimizers.Adam(lr=lr)
            model = asc.net_define(params=net_structure, layer_n = layer, input_size = x_train.shape[1], dropout=dropout, l_2=l_2, optimizer=optimizer)
            asc.train_and_save_net(model, output_file+",Model="+model_type+",MAE="+str(MAE)+",R2="+str(R2)+",Scaler="+scaler_option, input_cols=header_x, target_col=header_y, x_train=x_train, y_train=y_train, scaler_option=scaler_option, MAE=MAE, R2=R2, path_to_save = '.', num_of_folds=num_of_folds, epochs=epochs, batch_size=batch_size)
    
    except Exception as e:
        print("* An error occurred while training and saving .. ")
        print(e)
        sys.exit()
Exemple #5
0
def main(args):
    """
    Load data
    """
    try:

        print("\n [ Data Loading ]")

        save_metadata = asc.str2bool(args.save_metadata)
        train_type = args.train_type
        csv_file = PurePath(args.input_file)
        cols_to_remove = args.ignore_col
        target_col = args.target_col
        input_col = args.input_col
        model_type = args.model_type
        hyperparameter_file = asc.fix_value(args.hyperparameter_file,
                                            'PurePath')
        num_of_features = int(args.num_of_features)
        num_of_folds = int(args.num_of_folds)
        test = asc.str2bool(args.test)
        mapping = args.mapping
        project_file = PurePath(args.project_file)
        save_test_chart = asc.str2bool(args.save_test_chart)
        save_auto_tune = asc.str2bool(args.save_auto_tune)
        save_test_csv = asc.str2bool(args.save_test_csv)
        auto_tune = asc.str2bool(args.auto_tune)
        auto_tune_iter = int(args.auto_tune_iter)
        random_state = asc.fix_value(args.random_state, 'int')
        feature_selection = args.feature_selection
        scaler_option = args.scaler
        save_corr_chart = args.save_corr_chart
        save_corr_report = args.save_corr_report

        net_structure = args.net_structure
        net_layer_n = args.net_layer_n
        net_dropout = args.net_dropout
        net_l_2 = args.net_l_2
        net_learning_rate = args.net_learning_rate
        net_epochs = args.net_epochs
        net_batch_size = args.net_batch_size
        net_neuron_max = args.net_neuron_max
        net_batch_size_max = int(args.net_batch_size_max)
        net_layer_min = int(args.net_layer_min)
        net_layer_max = int(args.net_layer_max)
        net_dropout_max = float(args.net_dropout_max)
        net_default_neuron_max = int(args.net_default_neuron_max)
        net_checkpoint = args.net_checkpoint
        num_of_class = int(args.num_of_class)

        print(" Loading data from :%s" % (csv_file))
        print(" Columns to ignore :%s" % (cols_to_remove))

        data_df, x_train, y_train, header_x, header_y = asc.data_load_shuffle(
            csv_file,
            input_col,
            cols_to_remove,
            target_col,
            map_all=ast.literal_eval(mapping),
            random_state=random_state)

        print(" Input columns :%s" % (header_x))
        print(" Target column :%s" % (target_col))

        if not os.path.exists(project_file): os.makedirs(project_file)
        for folder in [
                "predictions", "correlations", "tests", "parameters", "graphs",
                "models"
        ]:
            if not os.path.exists(project_file / folder):
                os.makedirs(project_file / folder)
        input_name = csv_file.stem

    except Exception as e:
        print("* An error occurred while loading data from ", args.input_file)
        print(e)
        sys.exit()
    """
    Analyze correlation
    """
    if feature_selection is not None:
        session_number = asc.get_session(project_file)
        if save_corr_report is not None:
            if save_corr_report == 'True':

                save_corr_report = project_file / "correlations" / (
                    "session" + str(session_number) + train_type + "_" +
                    csv_file.name + "_target=" + target_col + ".csv")
            else:
                save_corr_report = None

        if save_corr_chart is not None:
            if save_corr_chart == 'True':
                save_corr_chart = project_file / "correlations" / (
                    "session" + str(session_number) + train_type + "_" +
                    csv_file.name + "_target=" + target_col + ".png")
            else:
                save_corr_chart = None

        fs_dict, final_report = asc.correlation_analysis_all(
            data_df,
            target_col,
            num_of_features,
            file_to_save=save_corr_report,
            save_chart=save_corr_chart)
        input_col = fs_dict[feature_selection]
        print("\n [ Feature Selection ]")

        print(" Reloading the data using the selected features : ", input_col,
              " by criteron ", feature_selection, "top_k=", num_of_features)

        data_df, x_train, y_train, header_x, header_y = asc.data_load_shuffle(
            csv_file,
            input_col,
            cols_to_remove,
            target_col,
            map_all=ast.literal_eval(mapping),
            random_state=random_state)

        print(" Input columns :%s" % (header_x))
        print(" Target column :%s" % (target_col))
        print(" Saving correlation report to " +
              str(project_file / "correlations" /
                  ("session" + str(session_number) + train_type + "_" +
                   csv_file.name + "_target=" + target_col + ".csv")))
        print(" Saving correlation chart to " +
              str(project_file / "correlations" /
                  ("session" + str(session_number) + train_type + "_" +
                   csv_file.name + "_target=" + target_col + ".png")))
    """
    Tune model
    """
    if auto_tune is True and model_type != 'LR' and model_type != 'LRC':

        print("\n [ Hyperparameter Tuning ]")
        print(" Training with %s ..." % asc.model_name(model_type))

        if model_type == 'NET':

            if net_checkpoint == 'True':
                checkpoint = csv_file
            else:
                checkpoint = None

            model_parameters = {}
            net_neuron_max, net_structure, net_l_2, net_learning_rate, net_epochs, net_dropout, net_layer_n, net_batch_size = \
            clean_up_net_params(net_neuron_max, net_structure, net_l_2, net_learning_rate, net_epochs, net_dropout, net_layer_n, net_batch_size)

            if train_type == 'r':
                model_parameters = asc.net_tuning(tries = auto_tune_iter, lr = net_learning_rate, x_train = x_train, y_train = y_train, layer = net_layer_n, \
                params=net_structure, epochs=net_epochs, batch_size=net_batch_size, dropout=net_dropout, l_2 = net_l_2, neuron_max=net_neuron_max, batch_size_max=net_batch_size_max, \
                layer_min = net_layer_min, layer_max=net_layer_max, dropout_max=net_dropout_max, default_neuron_max=net_default_neuron_max, checkpoint = checkpoint, num_of_folds=num_of_folds)
            else:
                model_parameters = asc.net_tuning_classifier(num_of_class = num_of_class, tries = auto_tune_iter, lr = net_learning_rate, x_train = x_train, y_train = y_train, layer = net_layer_n, \
                params=net_structure, epochs=net_epochs, batch_size=net_batch_size, dropout=net_dropout, l_2 = net_l_2, neuron_max=net_neuron_max, batch_size_max=net_batch_size_max, \
                layer_min = net_layer_min, layer_max=net_layer_max, dropout_max=net_dropout_max, default_neuron_max=net_default_neuron_max, checkpoint = checkpoint, num_of_folds=num_of_folds)

        else:
            print(" Auto hyperparameter tuning initiated. ")

            if hyperparameter_file is not None:
                print(" Warning: %s will be overrided and not be used." %
                      (hyperparameter_file))
            if train_type == 'r':
                model_parameters = asc.hyperparameter_tuning(
                    model_type,
                    x_train,
                    y_train,
                    num_of_folds,
                    scaler_option,
                    n_iter=auto_tune_iter,
                    random_state=random_state,
                    verbose=1)
            else:
                model_parameters = asc.hyperparameter_tuning_classifier(
                    model_type,
                    x_train,
                    y_train,
                    num_of_folds,
                    scaler_option,
                    n_iter=auto_tune_iter,
                    random_state=random_state,
                    verbose=1)

        if model_parameters == {}:
            print(" The tool couldn't find good parameters ")
            print(" Using default scikit-learn hyperparameters ")
            model_parameters = asc.default_model_parameters()

    else:

        if hyperparameter_file is not None and model_type != 'LRC':

            print(" Using hyperparameters from the file %s" %
                  (hyperparameter_file))
            model_parameters = asc.load_model_parameter_from_file(
                hyperparameter_file)

        else:
            print(" Using default scikit-learn hyperparameters ")
            if train_type == 'c':
                model_parameters = asc.default_model_parameters_classifier()
            else:
                model_parameters = asc.default_model_parameters()

            print(" Overriding parameters from command-line arguments ..")
            if net_structure != 'Tune':
                print(" net_structure is set to ", net_structure)
                model_parameters['net_structure'] = net_structure
            if net_dropout != 'Tune':
                print(" net_dropout is set to ", net_dropout)
                model_parameters['net_dropout'] = net_dropout
            if net_l_2 != 'Tune':
                print(" net_l_2 is set to ", net_l_2)
                model_parameters['net_l_2'] = net_l_2
            if net_learning_rate != 'Tune':
                print(" net_learning_rate is set to ", net_learning_rate)
                model_parameters['net_learning_rate'] = net_learning_rate
            if net_epochs != 'Tune':
                print(" net_epochs is set to ", net_epochs)
                model_parameters['net_epochs'] = net_epochs
            if net_batch_size != 'Tune':
                print(" net_batch_size is set to ", net_batch_size)
                model_parameters['net_batch_size'] = net_batch_size

    if train_type == 'r': model_parameters['scaler_option'] = scaler_option
    MAE = None
    R2 = None

    accuracy = None
    """
    Evaluate model
    """
    if test is True:

        try:

            print("\n [ Model Evaluation ]")

            if model_type != 'NET':
                if train_type == 'r':
                    model = asc.define_model_regression(
                        model_type,
                        model_parameters,
                        x_header_size=x_train.shape[1])
                    predictions, actual_values = asc.train_and_predict(
                        model,
                        x_train,
                        y_train,
                        scaler_option=scaler_option,
                        num_of_folds=num_of_folds)
                    MAE, R2 = asc.evaluate(predictions, actual_values)
                else:
                    model = asc.define_model_classifier(
                        model_type,
                        model_parameters,
                        x_header_size=x_train.shape[1])
                    predictions, actual_values = asc.train_and_predict(
                        model,
                        x_train,
                        y_train,
                        scaler_option=scaler_option,
                        num_of_folds=num_of_folds)
                    accuracy = asc.evaluate_classifier(predictions,
                                                       actual_values)
                    print("")
                    print("* Classification Report")
                    print(classification_report(actual_values, predictions))

                    print(
                        "* Confusion Matrix (See here: http://bit.ly/2WxfXTy)")
                    print(confusion_matrix(actual_values, predictions))
                    print("")
            else:

                lr = float(model_parameters['net_learning_rate'])
                layer = int(model_parameters['net_layer_n'])
                dropout = float(model_parameters['net_dropout'])
                l_2 = float(model_parameters['net_l_2'])
                epochs = int(model_parameters['net_epochs'])
                batch_size = int(model_parameters['net_batch_size'])
                net_structure = [
                    int(x)
                    for x in model_parameters['net_structure'].split(" ")
                ]

                optimizer = keras.optimizers.Adam(lr=lr)
                if train_type == 'r':
                    model = asc.net_define(params=net_structure,
                                           layer_n=layer,
                                           input_size=x_train.shape[1],
                                           dropout=dropout,
                                           l_2=l_2,
                                           optimizer=optimizer)
                else:
                    model = asc.net_define_classifier(
                        params=net_structure,
                        layer_n=layer,
                        input_size=x_train.shape[1],
                        dropout=dropout,
                        l_2=l_2,
                        optimizer=optimizer,
                        num_of_class=num_of_class)
                predictions, actual_values = asc.cross_val_predict_net(
                    model,
                    epochs=epochs,
                    batch_size=batch_size,
                    x_train=x_train,
                    y_train=y_train,
                    verbose=0,
                    scaler_option=scaler_option,
                    num_of_folds=num_of_folds)
                if train_type == 'r':
                    MAE, R2 = asc.evaluate(predictions, actual_values)
                    print(
                        "* (%s)\t MAE = %8.3f, R2 = %8.3f via %d-fold cross validation "
                        % (model_type, MAE, R2, num_of_folds))
                else:
                    accuracy = asc.evaluate_classifier(predictions,
                                                       actual_values)

        except Exception as e:
            print("* An error occurred while performing ML evaluation")
            print(e)
            sys.exit()

        project_name = project_file.stem
        project_path = project_file.parent

        if save_metadata is True:
            print(" Saving metadata to " + str(project_file / "metadata") +
                  ".csv")
            try:
                session_number = asc.save_metadata(
                    vars(args), {
                        'MAE': MAE,
                        'R2': R2,
                        'Accuracy': accuracy
                    }, project_file / "metadata.csv")
            except:
                print(
                    " * Warning: couldn't generate metadata - please make sure the model is properly trained .. "
                )

        if save_test_chart is True and train_type == 'r':
            print(
                " Saving test charts to : ",
                str(project_file / "graphs" /
                    ("session" + str(session_number) + "r_" + input_name +
                     "_" + model_type + ".png")))
            try:
                asc.save_comparison_chart(
                    predictions, actual_values, project_file / "graphs" /
                    ("session" + str(session_number) + "r_" + input_name +
                     "_" + model_type + ".png"))
            except:
                print(
                    " * Warning: couldn't generate a chart - please make sure the model is properly trained .. "
                )

        if save_test_csv is True and train_type == 'r':
            print(
                " Saving test csv to : ",
                str(project_file / "tests" /
                    ("session" + str(session_number) + "r_" + input_name +
                     "_" + model_type + ".csv")))
            try:
                asc.save_test_data(
                    predictions, actual_values, project_file / "tests" /
                    ("session" + str(session_number) + "r_" + input_name +
                     "_" + model_type + ".csv"))
            except:
                print(
                    " * Warning: couldn't generate a csv - please make sure the model is properly trained .. "
                )

        if save_auto_tune is True:
            print(
                " Saving hyperparameters to file: ",
                str(project_file / "parameters" /
                    ("session" + str(session_number) + train_type + "_" +
                     input_name + "_" + model_type + ".tuned.prop")))
            asc.save_parameters(
                model_parameters, project_file / "parameters" /
                ("session" + str(session_number) + train_type + "_" +
                 input_name + "_" + model_type + ".tuned.prop"))
    """
    Save model
    """
    try:

        print("\n [ Model Save ]")

        if model_type != 'NET':
            if train_type == 'r':
                model = asc.define_model_regression(
                    model_type,
                    model_parameters,
                    x_header_size=x_train.shape[1])
                asc.train_and_save(
                    model,
                    project_file / "models" /
                    ("session" + str(session_number) + train_type + "_" +
                     input_name + "_" + model_type + ".pkl"),
                    model_type,
                    input_cols=header_x,
                    target_col=header_y,
                    x_train=x_train,
                    y_train=y_train,
                    scaler_option=scaler_option,
                    path_to_save='.',
                    MAE=MAE,
                    R2=R2)
            else:
                model = asc.define_model_classifier(
                    model_type,
                    model_parameters,
                    x_header_size=x_train.shape[1])
                asc.train_and_save_classifier(
                    model,
                    project_file / "models" /
                    ("session" + str(session_number) + train_type + "_" +
                     input_name + "_" + model_type + ".pkl"),
                    model_type,
                    input_cols=header_x,
                    target_col=header_y,
                    x_train=x_train,
                    y_train=y_train,
                    scaler_option=scaler_option,
                    path_to_save='.',
                    accuracy=accuracy)
        else:

            lr = float(model_parameters['net_learning_rate'])
            layer = int(model_parameters['net_layer_n'])
            dropout = float(model_parameters['net_dropout'])
            l_2 = float(model_parameters['net_l_2'])
            epochs = int(model_parameters['net_epochs'])
            batch_size = int(model_parameters['net_batch_size'])
            net_structure = [
                int(x) for x in model_parameters['net_structure'].split(" ")
            ]

            optimizer = keras.optimizers.Adam(lr=lr)

            if train_type == 'c':
                model = asc.net_define_classifier(params=net_structure,
                                                  layer_n=layer,
                                                  input_size=x_train.shape[1],
                                                  dropout=dropout,
                                                  l_2=l_2,
                                                  optimizer=optimizer,
                                                  num_of_class=num_of_class)
                asc.train_and_save_net_classifier(
                    model,
                    project_file / "models" /
                    ("session" + str(session_number) + train_type + "_" +
                     input_name + "_" + model_type + ".pkl"),
                    input_cols=header_x,
                    target_col=header_y,
                    x_train=x_train,
                    y_train=y_train,
                    scaler_option=scaler_option,
                    accuracy=accuracy,
                    path_to_save='.',
                    num_of_folds=num_of_folds,
                    epochs=epochs,
                    batch_size=batch_size,
                    num_of_class=num_of_class)
            else:
                model = asc.net_define(params=net_structure,
                                       layer_n=layer,
                                       input_size=x_train.shape[1],
                                       dropout=dropout,
                                       l_2=l_2,
                                       optimizer=optimizer)
                asc.train_and_save_net(
                    model,
                    project_file / "models" /
                    ("session" + str(session_number) + train_type + "_" +
                     input_name + "_" + model_type + ".pkl"),
                    input_cols=header_x,
                    target_col=header_y,
                    x_train=x_train,
                    y_train=y_train,
                    scaler_option=scaler_option,
                    MAE=MAE,
                    R2=R2,
                    path_to_save='.',
                    num_of_folds=num_of_folds,
                    epochs=epochs,
                    batch_size=batch_size)

    except Exception as e:
        print("* An error occurred while training and saving .. ")
        print(e)
        sys.exit()

    if test == True:
        if train_type == 'r': print("\n MAE: %s R2: %s" % (MAE, R2))
        else: print("\n Accuracy: %s" % accuracy)