def my_LogisticRegression(X_train,Y_train,X_test,Y_test): C = [0.01, 0.1, 1, 10] i = 1 for param in C: log_reg = linear_model.LogisticRegression(C=param, solver='lbfgs') log_reg.fit(X_train, Y_train) Y_predict_test = log_reg.predict(X_test) y_predict_prob = log_reg.predict_proba(X_test)[:, 1] false_positive_rate, true_positive_rate, thresholds = metrics.roc_curve(Y_test, y_predict_prob) plt.subplot(2, 2, i) plt.tight_layout() plt.plot(false_positive_rate, true_positive_rate, i) plt.xlim([-0.2, 1.2]) plt.ylim([-0.2, 1.2]) plt.title('ROC curve C = {}'.format(param)) plt.xlabel('False Positive Rate') plt.ylabel('True Positive Rate') plt.grid(True) # Use score method to get accuracy of model score = log_reg.score(X_test, Y_test) print("+ C = %f => Test accuracy: %f" % (param, score)) print_performance_metrics(Y_predict_test,Y_test) i = i + 1 print("* Save Logistic Regression classification result into image/logistic_regression.png") plt.savefig('image/logistic_regression.png') plt.show() # define a function that accepts a threshold and prints sensitivity and specificity def evaluate_threshold(threshold): print('Sensitivity:', true_positive_rate[thresholds > threshold][-1]) print('Specificity:', 1 - false_positive_rate[thresholds > threshold][-1]) evaluate_threshold(0.9)
def my_NonLinearSVM(X_train, Y_train, X_test, Y_test): C = [0.01, 0.1, 0.2, 0.5, 0.8, 1, 5, 10, 20, 50] NL_SVM_train_Accuracy = [] NL_SVM_test_Accuracy = [] for param in C: clf = SVC(C=param, kernel='rbf', gamma='auto') clf.fit(X_train, Y_train) Y_predict_NL_SVMTrain = clf.predict(X_train) Y_predict_NL_SVM_Test = clf.predict(X_test) NL_SVM_train_Accuracy.append( accuracy_score(Y_train, Y_predict_NL_SVMTrain)) NL_SVM_test_Accuracy.append( accuracy_score(Y_test, Y_predict_NL_SVM_Test)) print(" * C = %f => Train accuracy = %f and Test accuracy = %f " % (param, accuracy_score(Y_train, Y_predict_NL_SVMTrain), accuracy_score(Y_test, Y_predict_NL_SVM_Test))) print_performance_metrics(Y_predict_NL_SVM_Test, Y_test) plt.plot(C, NL_SVM_train_Accuracy, 'ro-') plt.plot(C, NL_SVM_test_Accuracy, 'bv--') plt.legend(['Training Accuracy', 'Test Accuracy']) plt.xlabel('C') plt.xscale('log') plt.ylabel('Accuracy') plt.title('Nonlinear Support Vector Machine') print( "* Save Non Linear Support Vector Machine classification result into image/non_linear_svm.png" ) plt.savefig('image/non_linear_svm.png') plt.show()
def my_SVM(X_train, Y_train, X_test, Y_test): # Support Vector Machine Classifier plt.clf() C = [0.01, 0.1, 0.2, 0.5, 0.8, 1, 5, 10, 20, 50] SVM_train_accuracy = [] SVM_test_accuracy = [] for param in C: clf = SVC(C=param, kernel='linear') clf.fit(X_train, Y_train) Y_predict_SVM_train = clf.predict(X_train) Y_predict_SVM_test = clf.predict(X_test) SVM_train_accuracy.append(accuracy_score(Y_train, Y_predict_SVM_train)) SVM_test_accuracy.append(accuracy_score(Y_test, Y_predict_SVM_test)) print("+ C = %f => Train accuracy = %f and Test accuracy = %f " % (param, accuracy_score(Y_train, Y_predict_SVM_train), accuracy_score(Y_test, Y_predict_SVM_test))) print_performance_metrics(Y_predict_SVM_test, Y_test) plt.plot(C, SVM_train_accuracy, 'ro-') plt.plot(C, SVM_test_accuracy, 'bv--') plt.legend(['Training Accuracy', 'Test Accuracy']) plt.xlabel('C') plt.xscale('log') plt.ylabel('Accuracy') plt.title('Support Vector Machine') print( "* Save Support Vector Machine classification result into image/svm.png" ) plt.savefig('image/svm.png') plt.show()
def my_KNeighborsClassifier(X_train_std,X_test,y_train,y_test): acc_list_train=[] acc_list_test=[] n=[i for i in range(1,51)] for i in range(1,51): knn = KNeighborsClassifier(n_neighbors=i) knn.fit(X_train_std, y_train) acc_list_train.append(accuracy_score(y_train,knn.predict(X_train_std))) acc_list_test.append(accuracy_score(y_test,knn.predict(X_test))) fig,axes_knn=plt.subplots(1,1) plt.plot(n,acc_list_train,color='Red',label='Training accuracy') plt.plot(n,acc_list_test,color='Blue',label='Testing accuracy') plt.ylabel('Accuracy Score') plt.xlabel('n (Nearest Neighbors)') plt.title('KNN') plt.legend(loc='upper right') print("* Save KNN classification result into image/knn.png") plt.savefig('image/knn.png') max_indexes_test=[i+1 for i,value in enumerate(acc_list_test) if value == max(acc_list_test)] print("* Best K is "+ str(max_indexes_test[0]) ) clf = KNeighborsClassifier(n_neighbors=max_indexes_test[0], metric='euclidean', p=2) clf.fit(X_train_std, y_train) Y_predTrain = clf.predict(X_train_std) Y_predTest_best = clf.predict(X_test) print("* Training accuracy : ",accuracy_score(y_train, Y_predTrain)) print("* Testing accuracy : ",accuracy_score(y_test, Y_predTest_best)) print_performance_metrics(Y_predTest_best,y_test)
def my_GaussianNB(X_train,Y_train,X_test,Y_test): gnb = GaussianNB() gnb.fit(X_train, Y_train) Y_predTrain = gnb.predict(X_train) y_predTest = gnb.predict(X_test) print("* Training accuracy ",accuracy_score(Y_train, Y_predTrain)) print("* Testing accuracy",accuracy_score(Y_test, y_predTest)) print_performance_metrics(y_predTest,Y_test)
def my_RandomForestClassifier(X_train, X_test, Y_train, Y_test, numBaseClassifiers, train_Acc, test_Acc): clf = ensemble.RandomForestClassifier(n_estimators=numBaseClassifiers) clf.fit(X_train, Y_train) Y_predict_train_EM = clf.predict(X_train) Y_predict_test_EM = clf.predict(X_test) train_Acc.append(accuracy_score(Y_train, Y_predict_train_EM)) test_Acc.append(accuracy_score(Y_test, Y_predict_test_EM)) print("* Train accuracy = %f and Test accuracy = %f " % (accuracy_score(Y_train, Y_predict_train_EM), accuracy_score(Y_test, Y_predict_test_EM))) print_performance_metrics(Y_predict_test_EM, Y_test)
def my_Bagging(X_train, X_test, Y_train, Y_test, numBaseClassifiers, max_depth_EM, train_Acc, test_Acc): clf = ensemble.BaggingClassifier( DecisionTreeClassifier(max_depth=max_depth_EM), n_estimators=numBaseClassifiers) clf.fit(X_train, Y_train) Y_predict_train_EM = clf.predict(X_train) Y_predict_test_EM = clf.predict(X_test) train_Acc.append(accuracy_score(Y_train, Y_predict_train_EM)) test_Acc.append(accuracy_score(Y_test, Y_predict_test_EM)) print("* Train accuracy = %f and Test accuracy = %f " % (accuracy_score(Y_train, Y_predict_train_EM), accuracy_score(Y_test, Y_predict_test_EM))) print_performance_metrics(Y_predict_test_EM, Y_test)
def my_DecisionTree(X_train, X_test, y_train, y_test): print('* Use Entropy index for impurity measure :') accuracy = np.empty(2, dtype=float) max_depths = [2, 3] i = 0 for max_depth in max_depths: clf = tree.DecisionTreeClassifier(criterion='entropy', max_depth=max_depth) clf = clf.fit(X_train, y_train) # create graph tree with class names dot_data = tree.export_graphviz(clf, feature_names=X_train.columns, class_names=['1', '0'], filled=True, out_file=None) graph = pydotplus.graph_from_dot_data(dot_data) graph.write_png('image/tree_entropy_%d.png' % (max_depth)) predY = clf.predict(X_test) accuracy[i] = accuracy_score(y_test, predY) print('+ Entropy: Max depth %d , Accuracy on test data is %.2f' % (max_depth, (accuracy[i]))) print_performance_metrics(predY, y_test) i += 1 # plt.clf() fig, ax = plt.subplots(nrows=1, ncols=2) ax[0].set_prop_cycle(color=['red']) ax[0].set_ylim([0.8, 1.1]) ax[0].plot(max_depths, accuracy) ax[0].legend(['accuracy-Entropy'], loc='upper left') # Using Gini index as impurity measure, fit decision trees of different maximum depths [2, 3, 4, 5, # 6, 7, 8, 9, 10, 15, 20, 25] to the training set i = 0 print('* Use Gini index for impurity measure :') for max_depth in [2, 3]: clf = tree.DecisionTreeClassifier(criterion='gini', max_depth=max_depth) clf = clf.fit(X_train, y_train) dot_data = tree.export_graphviz(clf, feature_names=X_train.columns, class_names=['1', '0'], filled=True, out_file=None) graph = pydotplus.graph_from_dot_data(dot_data) graph.write_png('image/tree_gini_%d.png' % (max_depth)) predY = clf.predict(X_test) accuracy[i] = accuracy_score(y_test, predY) print('+ Gini: Max depth %d , Accuracy on test data is %.2f' % (max_depth, (accuracy_score(y_test, predY)))) print_performance_metrics(predY, y_test) i += 1 ax[1].set_prop_cycle(color=['green']) ax[1].set_ylim([0.8, 1.1]) ax[1].plot(max_depths, accuracy) ax[1].legend(['accuracy-Gini'], loc='upper left') plt.title('Decision Tree') print( "* Save Decision Tree classification result into image/decision_tree.png" ) plt.savefig('image/decision_tree.png') plt.show()