def my_LogisticRegression(X_train,Y_train,X_test,Y_test):

    C = [0.01, 0.1, 1, 10]
    i = 1
    for param in C:
        log_reg = linear_model.LogisticRegression(C=param, solver='lbfgs')
        log_reg.fit(X_train, Y_train)
        Y_predict_test = log_reg.predict(X_test)
        y_predict_prob = log_reg.predict_proba(X_test)[:, 1]
        false_positive_rate, true_positive_rate, thresholds = metrics.roc_curve(Y_test, y_predict_prob)
        plt.subplot(2, 2, i)
        plt.tight_layout()
        plt.plot(false_positive_rate, true_positive_rate, i)
        plt.xlim([-0.2, 1.2])
        plt.ylim([-0.2, 1.2])
        plt.title('ROC curve C = {}'.format(param))
        plt.xlabel('False Positive Rate')
        plt.ylabel('True Positive Rate')
        plt.grid(True)
        # Use score method to get accuracy of model
        score = log_reg.score(X_test, Y_test)
        print("+ C = %f => Test accuracy: %f" % (param, score))
        print_performance_metrics(Y_predict_test,Y_test)
        i = i + 1
    print("* Save Logistic Regression classification result into image/logistic_regression.png")
    plt.savefig('image/logistic_regression.png')
    plt.show()

    # define a function that accepts a threshold and prints sensitivity and specificity
    def evaluate_threshold(threshold):
        print('Sensitivity:', true_positive_rate[thresholds > threshold][-1])
        print('Specificity:', 1 - false_positive_rate[thresholds > threshold][-1])

    evaluate_threshold(0.9)
Example #2
0
def my_NonLinearSVM(X_train, Y_train, X_test, Y_test):

    C = [0.01, 0.1, 0.2, 0.5, 0.8, 1, 5, 10, 20, 50]
    NL_SVM_train_Accuracy = []
    NL_SVM_test_Accuracy = []

    for param in C:
        clf = SVC(C=param, kernel='rbf', gamma='auto')
        clf.fit(X_train, Y_train)
        Y_predict_NL_SVMTrain = clf.predict(X_train)
        Y_predict_NL_SVM_Test = clf.predict(X_test)
        NL_SVM_train_Accuracy.append(
            accuracy_score(Y_train, Y_predict_NL_SVMTrain))
        NL_SVM_test_Accuracy.append(
            accuracy_score(Y_test, Y_predict_NL_SVM_Test))
        print(" * C = %f => Train accuracy = %f and Test accuracy = %f " %
              (param, accuracy_score(Y_train, Y_predict_NL_SVMTrain),
               accuracy_score(Y_test, Y_predict_NL_SVM_Test)))
        print_performance_metrics(Y_predict_NL_SVM_Test, Y_test)
    plt.plot(C, NL_SVM_train_Accuracy, 'ro-')
    plt.plot(C, NL_SVM_test_Accuracy, 'bv--')
    plt.legend(['Training Accuracy', 'Test Accuracy'])
    plt.xlabel('C')
    plt.xscale('log')
    plt.ylabel('Accuracy')
    plt.title('Nonlinear Support Vector Machine')
    print(
        "* Save Non Linear Support Vector Machine classification result into image/non_linear_svm.png"
    )
    plt.savefig('image/non_linear_svm.png')
    plt.show()
Example #3
0
def my_SVM(X_train, Y_train, X_test, Y_test):
    # Support Vector Machine Classifier
    plt.clf()
    C = [0.01, 0.1, 0.2, 0.5, 0.8, 1, 5, 10, 20, 50]
    SVM_train_accuracy = []
    SVM_test_accuracy = []

    for param in C:
        clf = SVC(C=param, kernel='linear')
        clf.fit(X_train, Y_train)
        Y_predict_SVM_train = clf.predict(X_train)
        Y_predict_SVM_test = clf.predict(X_test)
        SVM_train_accuracy.append(accuracy_score(Y_train, Y_predict_SVM_train))
        SVM_test_accuracy.append(accuracy_score(Y_test, Y_predict_SVM_test))
        print("+ C = %f => Train accuracy = %f and Test accuracy = %f " %
              (param, accuracy_score(Y_train, Y_predict_SVM_train),
               accuracy_score(Y_test, Y_predict_SVM_test)))
        print_performance_metrics(Y_predict_SVM_test, Y_test)

    plt.plot(C, SVM_train_accuracy, 'ro-')
    plt.plot(C, SVM_test_accuracy, 'bv--')
    plt.legend(['Training Accuracy', 'Test Accuracy'])
    plt.xlabel('C')
    plt.xscale('log')
    plt.ylabel('Accuracy')
    plt.title('Support Vector Machine')
    print(
        "* Save Support Vector Machine classification result into image/svm.png"
    )
    plt.savefig('image/svm.png')
    plt.show()
Example #4
0
def my_KNeighborsClassifier(X_train_std,X_test,y_train,y_test):
   acc_list_train=[]
   acc_list_test=[]
   n=[i for i in range(1,51)]
   for i in range(1,51):
        knn = KNeighborsClassifier(n_neighbors=i)
        knn.fit(X_train_std, y_train)
        acc_list_train.append(accuracy_score(y_train,knn.predict(X_train_std)))
        acc_list_test.append(accuracy_score(y_test,knn.predict(X_test)))


   fig,axes_knn=plt.subplots(1,1)
   plt.plot(n,acc_list_train,color='Red',label='Training accuracy')
   plt.plot(n,acc_list_test,color='Blue',label='Testing accuracy')
   plt.ylabel('Accuracy Score')
   plt.xlabel('n (Nearest Neighbors)')
   plt.title('KNN')
   plt.legend(loc='upper right')
   print("* Save KNN classification result into image/knn.png")
   plt.savefig('image/knn.png')

   max_indexes_test=[i+1 for i,value in enumerate(acc_list_test) if value == max(acc_list_test)]
   print("* Best K is "+ str(max_indexes_test[0]) )
   clf = KNeighborsClassifier(n_neighbors=max_indexes_test[0], metric='euclidean', p=2)
   clf.fit(X_train_std, y_train)
   Y_predTrain = clf.predict(X_train_std)
   Y_predTest_best = clf.predict(X_test)
   print("* Training accuracy : ",accuracy_score(y_train, Y_predTrain))
   print("* Testing accuracy : ",accuracy_score(y_test, Y_predTest_best))
   print_performance_metrics(Y_predTest_best,y_test)
Example #5
0
def my_GaussianNB(X_train,Y_train,X_test,Y_test):
    gnb = GaussianNB()
    gnb.fit(X_train, Y_train)
    Y_predTrain = gnb.predict(X_train)
    y_predTest = gnb.predict(X_test)
    print("* Training accuracy ",accuracy_score(Y_train, Y_predTrain))
    print("* Testing accuracy",accuracy_score(Y_test, y_predTest))
    print_performance_metrics(y_predTest,Y_test)
Example #6
0
def my_RandomForestClassifier(X_train, X_test, Y_train, Y_test,
                              numBaseClassifiers, train_Acc, test_Acc):
    clf = ensemble.RandomForestClassifier(n_estimators=numBaseClassifiers)
    clf.fit(X_train, Y_train)
    Y_predict_train_EM = clf.predict(X_train)
    Y_predict_test_EM = clf.predict(X_test)
    train_Acc.append(accuracy_score(Y_train, Y_predict_train_EM))
    test_Acc.append(accuracy_score(Y_test, Y_predict_test_EM))
    print("* Train accuracy = %f and Test accuracy = %f " %
          (accuracy_score(Y_train, Y_predict_train_EM),
           accuracy_score(Y_test, Y_predict_test_EM)))
    print_performance_metrics(Y_predict_test_EM, Y_test)
Example #7
0
def my_Bagging(X_train, X_test, Y_train, Y_test, numBaseClassifiers,
               max_depth_EM, train_Acc, test_Acc):

    clf = ensemble.BaggingClassifier(
        DecisionTreeClassifier(max_depth=max_depth_EM),
        n_estimators=numBaseClassifiers)
    clf.fit(X_train, Y_train)
    Y_predict_train_EM = clf.predict(X_train)
    Y_predict_test_EM = clf.predict(X_test)
    train_Acc.append(accuracy_score(Y_train, Y_predict_train_EM))
    test_Acc.append(accuracy_score(Y_test, Y_predict_test_EM))
    print("* Train accuracy = %f and Test accuracy = %f " %
          (accuracy_score(Y_train, Y_predict_train_EM),
           accuracy_score(Y_test, Y_predict_test_EM)))
    print_performance_metrics(Y_predict_test_EM, Y_test)
Example #8
0
def my_DecisionTree(X_train, X_test, y_train, y_test):
    print('* Use Entropy index for impurity measure :')
    accuracy = np.empty(2, dtype=float)
    max_depths = [2, 3]
    i = 0
    for max_depth in max_depths:
        clf = tree.DecisionTreeClassifier(criterion='entropy',
                                          max_depth=max_depth)
        clf = clf.fit(X_train, y_train)
        # create graph tree with class names
        dot_data = tree.export_graphviz(clf,
                                        feature_names=X_train.columns,
                                        class_names=['1', '0'],
                                        filled=True,
                                        out_file=None)
        graph = pydotplus.graph_from_dot_data(dot_data)
        graph.write_png('image/tree_entropy_%d.png' % (max_depth))
        predY = clf.predict(X_test)
        accuracy[i] = accuracy_score(y_test, predY)
        print('+ Entropy: Max depth %d , Accuracy on test data is %.2f' %
              (max_depth, (accuracy[i])))
        print_performance_metrics(predY, y_test)
        i += 1
    # plt.clf()
    fig, ax = plt.subplots(nrows=1, ncols=2)
    ax[0].set_prop_cycle(color=['red'])
    ax[0].set_ylim([0.8, 1.1])
    ax[0].plot(max_depths, accuracy)
    ax[0].legend(['accuracy-Entropy'], loc='upper left')

    #  Using Gini index as impurity measure, fit decision trees of different maximum depths [2, 3, 4, 5,
    #  6, 7, 8, 9, 10, 15, 20, 25] to the training set
    i = 0
    print('* Use Gini index for impurity measure :')
    for max_depth in [2, 3]:
        clf = tree.DecisionTreeClassifier(criterion='gini',
                                          max_depth=max_depth)
        clf = clf.fit(X_train, y_train)
        dot_data = tree.export_graphviz(clf,
                                        feature_names=X_train.columns,
                                        class_names=['1', '0'],
                                        filled=True,
                                        out_file=None)
        graph = pydotplus.graph_from_dot_data(dot_data)
        graph.write_png('image/tree_gini_%d.png' % (max_depth))
        predY = clf.predict(X_test)
        accuracy[i] = accuracy_score(y_test, predY)
        print('+ Gini: Max depth %d , Accuracy on test data is %.2f' %
              (max_depth, (accuracy_score(y_test, predY))))
        print_performance_metrics(predY, y_test)
        i += 1

    ax[1].set_prop_cycle(color=['green'])
    ax[1].set_ylim([0.8, 1.1])
    ax[1].plot(max_depths, accuracy)
    ax[1].legend(['accuracy-Gini'], loc='upper left')
    plt.title('Decision Tree')
    print(
        "* Save Decision Tree classification result into image/decision_tree.png"
    )
    plt.savefig('image/decision_tree.png')
    plt.show()