def train_support_vector_machine(self, X_train, X_test, y_train, y_test, save_mdl=False, save_loc=Weights_File, max_epochs=17000, cost_thresh=0.001, l_r=0.000001, r_s=10000): # set global parameters global Learning_rate global Reg_strength Learning_rate = l_r Reg_strength = r_s # normalize the data x_train_cpy = Utils.normalize_numpy_array(X_train, self.__min_max_scalar) x_test_cpy = Utils.normalize_numpy_array(X_test, self.__min_max_scalar) # print(type(x_train_cpy)) # debug # creating and appending col of 1's to train and test data (this col is the intercept) intercept_train = np.ones(x_train_cpy.shape[0]) intercept_test = np.ones(x_test_cpy.shape[0]) x_train_cpy = np.hstack( (x_train_cpy, np.atleast_2d(intercept_train).T)) x_test_cpy = np.hstack((x_test_cpy, np.atleast_2d(intercept_test).T)) # setting attributes self.__x_train = x_train_cpy self.__x_test = x_test_cpy self.__y_train = y_train self.__y_test = y_test # using stochastic gradient to find the optimal weights for linear classifier print("Training Support Vector Machine") # debug self.__trained_weights = stochastic_gradient_descent( self.__x_train, self.__y_train, max_epochs=max_epochs, cost_thresh=cost_thresh) print("Trained Weights: ", self.__trained_weights) print("Finished Training SupportVectorMachine") self.__test_support_vector_machine() if save_mdl: self.__save_trained_weights(save_loc)
def __test_support_vector_machine(self): if self.__trained_weights is not None and self.__x_test is not None and self.__y_test is not None: print("Testing SupportVectorMachine") y_test_predicted = np.array([]) # for i in range(self.x_test.shape[0]): for r in self.__x_test: y_predict = np.sign(np.dot(r, self.__trained_weights)) y_test_predicted = np.append(y_test_predicted, y_predict) accuracy_score = Utils.calculate_accuracy_score( self.__y_test, y_test_predicted) print("accuracy of the model: {}".format(accuracy_score)) confusion_mat = Utils.calculate_confusion_matrix( self.__y_test, y_test_predicted) print("SVM confusion Matrix") print(confusion_mat[0]) print(confusion_mat[1]) # write accuracy and confusion matrix to file try: report_fl_name = "C:/Users/Brijesh Prajapati/Documents/Projects/Autism_Detection_Hons_Proj/Classifiers/" \ "Classifier_Reports/svm_current_report.json" classifier_desc = "SVM l_r: " + str( Learning_rate) + ", r_s: " + str(Reg_strength) dict_report = { "desc": classifier_desc, "accuracy_score": str(accuracy_score), "tp": str(confusion_mat[0][0]), "fn": str(confusion_mat[0][1]), "fp": str(confusion_mat[1][0]), "tn": str(confusion_mat[1][1]), "specificity": str(confusion_mat[2][0]), "sensitivity": str(confusion_mat[2][1]) } with open(report_fl_name, 'w') as report_fl: report_fl.write(json.dumps(dict_report)) except Exception as ex: print("Exception occurred saving SVM report", ex) # print("SK Learn Metrics") # print(confusion_matrix(self.y_test, y_test_predicted)) # print(classification_report(self.y_test, y_test_predicted)) else: print("Train SVM before Testing.")
def predict_sample(self, sample_data, return_lbl=False): try: if self.__trained_weights is not None: # min_max_scalar = Utils.calculate_min_max_scalar(pd.read_csv(self.training_data_dir)) # sample_data_copy = sample_data.copy(deep=True) sample_data_copy = copy.deepcopy(sample_data) # sample_data_copy = Utils.normalize_dataset(sample_data_copy, self.__min_max_scalar) sample_data_copy = Utils.normalize_numpy_array( sample_data_copy, self.__min_max_scalar) # sample_data_copy.insert(loc=len(sample_data_copy.columns), column='intercept', value=1) intercept_train = np.ones(sample_data_copy.shape[0]) sample_data_copy = np.hstack( (sample_data_copy, np.atleast_2d(intercept_train).T)) y_predict = np.sign( np.dot(sample_data_copy, self.__trained_weights)) if return_lbl is True: if y_predict > 0: return 'ASD' else: return 'Normal' return y_predict else: print("Train the SVM first OR load the trained weights") except Exception as e: print("Error occurred predicting the sample in SVM.") print('Exception', e) traceback.print_exc()
def trained_decision_tree(max_depth, x, y): # Create decision tree # print("Called trained_decision_tree") # debug dcsn_tree = DecisionTree(max_depth=max_depth) x_sample, y_sample = Utils.bootstrap_samples(x, y) # bootstrapping samples dcsn_tree.train_decision_tree(x_sample, y_sample) return dcsn_tree
def train_classifiers(self, svm_param_dict=None, rf_param_dict=None, mlp_param_dict=None, adab_param_dict=None): self.__x_train, self.__x_test, self.__y_train, self.__y_test = prepare_data_for_training(Training_Data_Dir) save_mdl = True # ******************************************************************************Setting Up SVM for training x_temp, y_temp = Utils.bootstrap_samples(self.__x_train, self.__y_train) # bootstrap samples if svm_param_dict is not None: self.svm.train_support_vector_machine(x_temp, self.__x_test, y_temp, self.__y_test, save_mdl=save_mdl, **svm_param_dict) else: self.svm.train_support_vector_machine(x_temp, self.__x_test, y_temp, self.__y_test) # ******************************************************************************Setting Up RF for training x_temp, y_temp = Utils.bootstrap_samples(self.__x_train, self.__y_train) # bootstrap samples if rf_param_dict is not None: self.random_forest.train_random_forest(x_temp, self.__x_test, y_temp, self.__y_test, save_mdl=save_mdl, **rf_param_dict) else: self.random_forest.train_random_forest(x_temp, self.__x_test, y_temp, self.__y_test, save_mdl=save_mdl) # ******************************************************************************Setting Up MLP for training x_temp, y_temp = Utils.bootstrap_samples(self.__x_train, self.__y_train) if mlp_param_dict is not None: self.multi_layer_perceptron.train_MultiLayerPerceptron(x_temp, self.__x_test, y_temp, self.__y_test, save_mdl=save_mdl, **mlp_param_dict) else: self.multi_layer_perceptron.train_MultiLayerPerceptron(x_temp, self.__x_test, y_temp, self.__y_test, save_mdl=save_mdl) # ******************************************************************************Setting Up Adaboost for training x_temp, y_temp = Utils.bootstrap_samples(self.__x_train, self.__y_train) if adab_param_dict is not None: self.adaboost.train_adaboost(x_temp, self.__x_test, y_temp, self.__y_test, save_classifiers=save_mdl, **adab_param_dict) else: self.adaboost.train_adaboost(x_temp, self.__x_test, y_temp, self.__y_test, save_classifiers=save_mdl) # self.__models_loaded = True # self.test_main_classifier() return 0
def __init__(self, training_data_dir): self.training_data_dir = training_data_dir self.__trained_weights = None self.__x_train = None self.__x_test = None self.__y_train = None self.__y_test = None # finding the min max scalar self.__min_max_scalar = Utils.calculate_min_max_np_scalar( pd.read_csv(training_data_dir))
def train_MultiLayerPerceptron(self, X_train, X_test, y_train, y_test, save_mdl=False, save_loc=MultilayerPerceptronMdlFl, hidden_layer_sizes=16, max_iter=400): x_train_cpy = Utils.normalize_numpy_array(X_train, self.__min_max_scalar) x_test_cpy = Utils.normalize_numpy_array(X_test, self.__min_max_scalar) self.__x_train = x_train_cpy self.__x_test = x_test_cpy self.__y_train = y_train self.__y_test = y_test self.__max_iter = max_iter self.__hidden_layer_size = hidden_layer_sizes print("Training MultiLayerPerceptron...") t0 = time.time() # test purposes self.__mlp_classifier = MLPClassifier( hidden_layer_sizes=self.__hidden_layer_size, activation='relu', max_iter=self.__max_iter, solver='adam') self.__mlp_classifier = self.__mlp_classifier.fit( self.__x_train, self.__y_train) t1 = time.time() # test purposes print("Finished Training MultiLayerPerceptron in: ", t1 - t0) self.__test_MultiLayerPerceptron() if save_mdl: self.__save_MultiLayerPerceptron(save_loc)
def prepare_data_for_training(file_dir): dataframe = pd.read_csv(file_dir) # replacing the labels dataframe['feature_class']. \ replace({'ASD': 1.0, 'TD': -1.0}, inplace=True) # splitting data into train and test x_train, x_test, y_train, y_test = Utils.train_test_split(dataframe=dataframe, test_size=0.2) # convert to dataframe to numpy array and return return x_train.to_numpy(), x_test.to_numpy(), y_train.to_numpy(), y_test.to_numpy()
def __test_adaboost(self): print("Testing AdaBoost") y_predictions = np.ones(len(self.__x_test)) for test, i in zip(self.__x_test, range(len(self.__x_test))): test = np.reshape(test, (-1, len(test))) y_predictions[i] = self.predict_sample_adaboost(test) accuracy_score = Utils.calculate_accuracy_score( self.__y_test, y_predictions) print("Accuracy: ", accuracy_score) cf = Utils.calculate_confusion_matrix(self.__y_test, y_predictions) print("Adaboost Matrix") print(cf[0]) print(cf[1]) # write accuracy and confusion matrix to file try: report_fl_name = "C:/Users/Brijesh Prajapati/Documents/Projects/Autism_Detection_Hons_Proj/Classifiers/" \ "Classifier_Reports/adaboost_current_report.json" classifier_desc = "Adaboost number of stumps: " + str( self.__num_classifiers) dict_report = { "desc": classifier_desc, "accuracy_score": str(accuracy_score), "tp": str(cf[0][0]), "fn": str(cf[0][1]), "fp": str(cf[1][0]), "tn": str(cf[1][1]), "specificity": str(cf[2][0]), "sensitivity": str(cf[2][1]) } with open(report_fl_name, 'w') as report_fl: report_fl.write(json.dumps(dict_report)) except Exception as ex: print("Exception occurred saving adaboost report", ex)
def __init__(self, training_data_dir): self.__max_iter = 400 self.__hidden_layer_size = 16 self.training_data_dir = training_data_dir self.__mlp_classifier = MLPClassifier() self.__x_train = None self.__x_test = None self.__y_train = None self.__y_test = None # calculating the min max scalar for normalization self.__min_max_scalar = Utils.calculate_min_max_np_scalar( pd.read_csv(training_data_dir))
def process_training_data(fl_dir, min_max_scalar=None): df = pd.read_csv(fl_dir) # replace labels df['feature_class'].replace({'ASD': 1.0, 'TD': -1.0}, inplace=True) x_train, x_test, y_train, y_test = Utils.train_test_split(df, 0.2) # if min_max_scalar is not None: # x_train = Utils.normalize_dataset(x_train, min_max_scalar) # x_test = Utils.normalize_dataset(x_test, min_max_scalar) return x_train, x_test, y_train, y_test
def test_main_classifier(self): self.__load_classifiers() y_pred = [] for test_data in self.__x_test: svm_p = self.svm.predict_sample([test_data]) rf_p = self.random_forest.predict([test_data]) mlp_p = self.multi_layer_perceptron.predict(test_data) adb_p = self.adaboost.predict_sample_adaboost(np.array([test_data])) predictions = [float(svm_p), rf_p[0], mlp_p[0], adb_p[0]] y_pred.append(most_common_label(predictions)) accuracy_score = Utils.calculate_accuracy_score(self.__y_test, y_pred) print("Accuracy of main model: ", accuracy_score) confusion_mat = Utils.calculate_confusion_matrix(self.__y_test, y_pred) print("Main Classifier Confusion Matrix") print(confusion_mat[0]) print(confusion_mat[1]) # write accuracy and confusion matrix to file try: report_fl_name = "C:/Users/Brijesh Prajapati/Documents/Projects/Autism_Detection_Hons_Proj/Classifiers/" \ "Classifier_Reports/mainmodel_current_report.json" classifier_desc = "Main Model" dict_report = {"desc": classifier_desc, "accuracy_score": str(accuracy_score), "tp": str(confusion_mat[0][0]), "fn": str(confusion_mat[0][1]), "fp": str(confusion_mat[1][0]), "tn": str(confusion_mat[1][1]), "specificity": str(confusion_mat[2][0]), "sensitivity": str(confusion_mat[2][1])} with open(report_fl_name, 'w') as report_fl: report_fl.write(json.dumps(dict_report)) except Exception as ex: print("Exception occurred saving Main Model report", ex)
def __test_MultiLayerPerceptron(self): print("Testing Multi Layer Perceptron") mlp_predictions = self.__mlp_classifier.predict(self.__x_test) accuracy_score = Utils.calculate_accuracy_score( self.__y_test, mlp_predictions) print("Accuracy: ", accuracy_score) cf = Utils.calculate_confusion_matrix(self.__y_test, mlp_predictions) print("Multi Layer Perceptron Confusion Matrix") print(cf[0]) print(cf[1]) print(classification_report(self.__y_test, mlp_predictions)) # write accuracy and confusion matrix to file try: report_fl_name = "C:/Users/Brijesh Prajapati/Documents/Projects/Autism_Detection_Hons_Proj/Classifiers/" \ "Classifier_Reports/mlp_current_report.json" classifier_desc = "MLP hidden_layer_size: " + str(self.__hidden_layer_size) + ", max_iteration: " + \ str(self.__max_iter) dict_report = { "desc": classifier_desc, "accuracy_score": str(accuracy_score), "tp": str(cf[0][0]), "fn": str(cf[0][1]), "fp": str(cf[1][0]), "tn": str(cf[1][1]), "specificity": str(cf[2][0]), "sensitivity": str(cf[2][1]) } with open(report_fl_name, 'w') as report_fl: report_fl.write(json.dumps(dict_report)) except Exception as ex: print("Exception occurred saving MLP report", ex)
def __test_random_forest(self): print("Testing Random Forest Classifier") y_predictions = self.predict(self.__x_test) accuracy_score = Utils.calculate_accuracy_score( self.__y_test, y_predictions) print("Accuracy: ", accuracy_score) cf = Utils.calculate_confusion_matrix(self.__y_test, y_predictions) print("Random Forest Confusion Matrix") print(cf[0]) print(cf[1]) # write accuracy and confusion matrix to file try: report_fl_name = "C:/Users/Brijesh Prajapati/Documents/Projects/Autism_Detection_Hons_Proj/Classifiers/" \ "Classifier_Reports/rf_current_report.json" classifier_desc = "RF number of trees: " + str(self.__num_tree) + ", max depth: " + \ str(self.__max_depth) dict_report = { "desc": classifier_desc, "accuracy_score": str(accuracy_score), "tp": str(cf[0][0]), "fn": str(cf[0][1]), "fp": str(cf[1][0]), "tn": str(cf[1][1]), "specificity": str(cf[2][0]), "sensitivity": str(cf[2][1]) } with open(report_fl_name, 'w') as report_fl: report_fl.write(json.dumps(dict_report)) except Exception as ex: print("Exception occurred saving random forest report", ex)
def predict(self, sample_data, return_lbl=False): # normalize the sample data # sample_data = Utils.normalize_dataset(sample_data, self.__min_max_scalar) sample_data = Utils.normalize_numpy_array(sample_data, self.__min_max_scalar) prediction = self.__mlp_classifier.predict([sample_data]) if return_lbl is True: if prediction > 0: return 'ASD' else: return 'Normal' else: return prediction
def process_training_data(fl_dir): df = pd.read_csv(fl_dir) # finding min max scalar # min_max_scalar = Utils.calculate_min_max_scalar(pd.read_csv(fl_dir)) # replace labels df['feature_class'].replace({'ASD': 1.0, 'TD': -1.0}, inplace=True) X_train, X_test, y_train, y_test = Utils.train_test_split(df, 0.2) # X_train = Utils.normalize_dataset(X_train, min_max_scalar) # X_test = Utils.normalize_dataset(X_test, min_max_scalar) X_train = X_train.to_numpy() X_test = X_test.to_numpy() y_train = y_train.to_numpy() y_test = y_test.to_numpy() return X_train, X_test, y_train, y_test
def process_training_data(fl_dir, min_max_scalar): dataframe = pd.read_csv(fl_dir) # calculates the min max value of each col in the dataframe # min_max_scalar = Utils.calculate_min_max_scalar(dataset=dataframe) # replacing the labels dataframe['feature_class']. \ replace({'ASD': 1.0, 'TD': -1.0}, inplace=True) # splitting data into train and test x_train, x_test, y_train, y_test = Utils.train_test_split( dataframe=dataframe, test_size=0.2) # normalizing train and test data # x_train = Utils.normalize_dataset(df=x_train, min_max=min_max_scalar) # x_test = Utils.normalize_dataset(df=x_test, min_max=min_max_scalar) # # # insert intercept col 'b' in (W * Xi + b) # x_train.insert(loc=len(x_train.columns), column='intercept', value=1) # x_test.insert(loc=len(x_test.columns), column='intercept', value=1) return x_train.to_numpy(), x_test.to_numpy(), y_train.to_numpy( ), y_test.to_numpy()
def is_leaf_node(self): # if self.value is not None: # return True # else: # return False return self.value is not None if __name__ == "__main__": df = pd.read_csv("D:/TrainingDataset_YEAR_PROJECT/TrainingSet.csv") # replace labels df['feature_class'].replace({'ASD': 1.0, 'TD': -1.0}, inplace=True) X_train, X_test, y_train, y_test = Utils.train_test_split(df, 0.2) X_train = X_train.values X_test = X_test.values y_train = y_train.values y_test = y_test.values decision_tree = DecisionTree(max_depth=10) decision_tree.train_decision_tree(X_train, y_train) y_pred = decision_tree.predict(X_test) accuracy_score = Utils.calculate_accuracy_score(y_test, y_pred) print("Accuracy: ", accuracy_score) # References:
def train_svm_model(fl_dir): # X_train, X_test, Y_train, Y_test = SupportVectorMachine.proc_CSV_data(fl_dir) dataframe = pd.read_csv(fl_dir) dataframe["feature_class"]. \ replace({"ASD": 1, "TD": 0}, inplace=True) dataframe = dataframe.sample(frac=1) # DATA NORMALIZATION # min_max_scalar = Utils.calculate_min_max_scalar(dataframe) # dataframe = Utils.normalize_dataset(dataframe, min_max_scalar) # # X = dataframe.drop(labels="feature_class", axis=1) # Y = dataframe['feature_class'] # # # feature selection BEGIN # print("Shape before feature selection", X.shape) # # # L1-based feature selection # lsvc = LinearSVC(C=0.01, penalty="l1", dual=False).fit(X, Y) # model = SelectFromModel(lsvc, prefit=True) # X = model.transform # # # Univariate feature selection # X = SelectKBest(chi2, k=6).fit_transform(X, Y) # Tree-based feature selection # clf = ExtraTreesClassifier(n_estimators=50) # clf = clf.fit(X, Y) # clf.feature_importances_ # model = SelectFromModel(clf, prefit=True) # X = model.transform(X) # print("Shape after feature seleciton", X.shape) # feature selection END # train test split # X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.20) X_train, X_test, Y_train, Y_test = Utils.train_test_split( dataframe=dataframe, test_size=0.20) # SVM # scalar = MinMaxScaler() # X_train = pd.DataFrame(scalar.fit_transform(X_train.values)) # X_test = pd.DataFrame(scalar.transform(X_test.values)) # # ns_probs = [0 for _ in range(len(Y_test))] # svm_model_linear = SVC(kernel='rbf', probability=True).fit(X_train, Y_train) # polynomial kernel # # # load the saved model # # load_model = joblib.load(filename=saved_mdl_path) # # svm_prediction = svm_model_linear.predict(X_test) # # accuracy = svm_model_linear.score(X_test, Y_test) # print(accuracy) # debug # # # creating a confusion matrix # cm = confusion_matrix(Y_test, svm_prediction) # # print(cm) # print(classification_report(Y_test, svm_prediction)) # # # predict probabilities # lr_probs = svm_model_linear.predict_proba(X_test) # # keep probabilities for the positive outcome only # lr_probs = lr_probs[:, 1] # # calculate scores # ns_auc = roc_auc_score(Y_test, ns_probs) # lr_auc = roc_auc_score(Y_test, lr_probs) # # summarize scores # print('No Skill: ROC AUC=%.3f' % (ns_auc)) # print('Logistic: ROC AUC=%.3f' % (lr_auc)) # # calculate roc curves # ns_fpr, ns_tpr, _ = roc_curve(Y_test, ns_probs) # lr_fpr, lr_tpr, _ = roc_curve(Y_test, lr_probs) # # plot the roc curve for the model # pyplot.plot(ns_fpr, ns_tpr, linestyle='--', label='No Skill') # pyplot.plot(lr_fpr, lr_tpr, marker='.', label='Logistic') # # axis labels # pyplot.xlabel('False Positive Rate') # pyplot.ylabel('True Positive Rate') # # # save the model # # saved_mdl_path = 'normal_leaf_model.sav' # # joblib.dump(svm_model_linear, saved_mdl_path) # # # show the legend # pyplot.legend() # # show the plot # pyplot.show() # RANDOM FOREST CLASSIFIER # scalar = MinMaxScaler() # X_train = pd.DataFrame(scalar.fit_transform(X_train.values)) # X_test = pd.DataFrame(scalar.transform(X_test.values)) # # ns_probs = [0 for _ in range(len(Y_test))] # regressor = RandomForestClassifier(n_estimators=18, max_depth=10).fit(X_train, Y_train) # y_pred = regressor.predict(X_test) # # print(confusion_matrix(Y_test, y_pred)) # print(classification_report(Y_test, y_pred)) # # print(Y_test) # # print(y_pred) # print(SupportVectorMachine.calc_accuracy_score(Y_test, y_pred)) # # accuracy = regressor.score(X_test, Y_test) # print(accuracy) # debug # # print(accuracy_score(Y_test, y_pred.round(), normalize=False)) # NN.............................................................. # mlp = MLPClassifier(hidden_layer_sizes=9, activation='relu', max_iter=400, solver='adam').fit(X_train, Y_train) # # predictions = mlp.predict(X_test) # print(confusion_matrix(Y_test, predictions)) # print(classification_report(Y_test, predictions)) # print(Utils.calculate_accuracy_score(Y_test, predictions)) # KNN # scalar = MinMaxScaler() # X_train = pd.DataFrame(scalar.fit_transform(X_train.values)) # X_test = pd.DataFrame(scalar.transform(X_test.values)) model = KNeighborsClassifier(n_neighbors=2).fit(X_train, Y_train) y_pred = model.predict(X_test) # accuracy = model.score(X_test, Y_test) print(accuracy) # debug print(confusion_matrix(Y_test, y_pred)) print(classification_report(Y_test, y_pred)) accuracy = model.score(X_test, Y_test) print(accuracy) # debug