def run_best_mlp(ds_id, all_labels, train_features, train_labels, val_features, val_labels, test_features, test_labels): # These hyper parameters were found using the GridSearchCV algorithm best_mlp_clf = MLPClassifier(hidden_layer_sizes=(60, 80), activation='tanh', solver='adam') # Define hyper params to search # parameter_space = { # 'hidden_layer_sizes': [(50, 80), (20, 20, 20)], # 'activation': ['identity', 'tanh', 'logistic', 'relu'], # 'solver': ['adam', 'sgd'] # } # mlp_clf_with_gs = GridSearchCV(best_mlp_clf, parameter_space, n_jobs=-1, cv=5) # Train best_mlp_clf = best_mlp_clf.fit(train_features, train_labels) # print('Best parameters found:\n', mlp_clf_with_gs.best_params_) if val_features is not None and val_labels is not None: # Validation pred_labels = best_mlp_clf.predict(val_features) # Evaluation of Validation data clf_evaluation(val_labels, pred_labels, all_labels, 'Best-MLP_val.csv') else: # Test pred_labels = best_mlp_clf.predict(test_features) # Evaluation of Test data clf_evaluation(test_labels, pred_labels, all_labels, 'Best-MLP-' + ds_id + '.csv')
def run_base_dt(ds_id, all_labels, train_features, train_labels, val_features, val_labels, test_features, test_labels): base_dt_clf = DecisionTreeClassifier(criterion='entropy') # Train base_dt_clf = base_dt_clf.fit(train_features, train_labels) if val_features is not None and val_labels is not None: # Validation pred_labels = base_dt_clf.predict(val_features) # Evaluation of Validation data clf_evaluation(val_labels, pred_labels, all_labels, 'Base-DT_val.csv') else: # Test pred_labels = base_dt_clf.predict(test_features) # Evaluation of Test data clf_evaluation(test_labels, pred_labels, all_labels, 'Base-DT-' + ds_id + '.csv')
def run_naive_bayes(ds_id, all_labels, train_features, train_labels, val_features, val_labels, test_features, test_labels): clf = GaussianNB() # Train clf.fit(train_features, train_labels) if val_features is not None and val_labels is not None: # Validation pred_labels = clf.predict(val_features) # Evaluation of Validation data clf_evaluation(val_labels, pred_labels, all_labels, 'GNB_val.csv') else: # Test pred_labels = clf.predict(test_features) # Evaluation of Test data clf_evaluation(test_labels, pred_labels, all_labels, 'GNB-' + ds_id + '.csv')
def run_perceptron(ds_id, all_labels, train_features, train_labels, val_features, val_labels, test_features, test_labels): perceptron_clf = Perceptron() # Train perceptron_clf.fit(train_features, train_labels) if val_features is not None and val_labels is not None: # Validation pred_labels = perceptron_clf.predict(val_features) # Evaluation of Validation data clf_evaluation(val_labels, pred_labels, all_labels, 'PER_val.csv') else: # Test pred_labels = perceptron_clf.predict(test_features) # Evaluation of Test data clf_evaluation(test_labels, pred_labels, all_labels, 'PER-' + ds_id + '.csv')
def run_best_dt(ds_id, all_labels, train_features, train_labels, val_features, val_labels, test_features, test_labels): # These hyper parameters were found using the GridSearchCV algorithm base_dt_clf = DecisionTreeClassifier(criterion='entropy', max_depth=None, class_weight=None, min_impurity_decrease=0, min_samples_split=2) # Define hyper params to search # parameter_space = { # 'criterion': ['gini', 'entropy'], # 'max_depth': [10, None], # 'min_samples_split': [8, 10, 20, 30, 40], # 'min_impurity_decrease': [0, 0.0002, 0.0003, 0.0001], # 'class_weight': [None, 'balanced'] # } # dt_clf_with_gs = GridSearchCV(base_dt_clf, parameter_space, n_jobs=-1, cv=5) # Train base_dt_clf = base_dt_clf.fit(train_features, train_labels) # print('Best parameters found:\n', dt_clf_with_gs.best_params_) if val_features is not None and val_labels is not None: # Validation pred_labels = base_dt_clf.predict(val_features) # Evaluation of Validation data clf_evaluation(val_labels, pred_labels, all_labels, 'Best-DT_val.csv') else: # Test pred_labels = base_dt_clf.predict(test_features) # Evaluation of Test data clf_evaluation(test_labels, pred_labels, all_labels, 'Best-DT-' + ds_id + '.csv')
def run_base_mlp(ds_id, all_labels, train_features, train_labels, val_features, val_labels, test_features, test_labels): clf_base_mlp = MLPClassifier(hidden_layer_sizes=100, activation='logistic', solver='sgd') # Train clf_base_mlp.fit(train_features, train_labels) if val_features is not None and val_labels is not None: # Validation pred_labels = clf_base_mlp.predict(val_features) # Evaluation of Validation data clf_evaluation(val_labels, pred_labels, all_labels, 'Base-MLP_val.csv') else: # Test pred_labels = clf_base_mlp.predict(test_features) # Evaluation of Test data clf_evaluation(test_labels, pred_labels, all_labels, 'Base-MLP-' + ds_id + '.csv')