Example #1
0
def run_neural_network(X_train, X_test, y_train, y_test):
    y_train_f = y_to_float(y_train)
    y_test_f = y_to_float(y_test)

    classifier = MLPClassifier(alpha=1, max_iter=1000)
    classifier.fit(X_train, y_train_f)

    y_pred = classifier.predict(X_test)

    print("Neural Network")
    print(confusion_matrix(y_test_f, y_pred))
    print(classification_report(y_test_f, y_pred))
    print(accuracy_score(y_test_f, y_pred))
Example #2
0
def run_random_forest(X_train, X_test, y_train, y_test):
    y_train_f = y_to_float(y_train)
    y_test_f = y_to_float(y_test)

    classifier = RandomForestClassifier(n_estimators=10, random_state=0, n_jobs=-1)
    classifier.fit(X_train, y_train_f)
    y_pred = classifier.predict(X_test)

    print("Random Forest")
    print(confusion_matrix(y_test_f, y_pred))
    print(classification_report(y_test_f, y_pred))
    print(accuracy_score(y_test_f, y_pred))

    '''
def run_bernoulliNB(X_train, X_test, y_train, y_test):
    y_train_f = y_to_float(y_train)
    y_test_f = y_to_float(y_test)

    classifier = BernoulliNB()
    classifier.fit(X_train, y_train_f)

    y_pred = classifier.predict(X_test)

    # Confusion matrix
    print("BERNOULLI NAIVE BAYES")
    print(confusion_matrix(y_test_f, y_pred))
    print(classification_report(y_test_f, y_pred))
    print(accuracy_score(y_test_f, y_pred))
Example #4
0
def run_ada_boost(X_train, X_test, y_train, y_test):
    y_train_f = y_to_float(y_train)
    y_test_f = y_to_float(y_test)

    classifier = AdaBoostClassifier(DecisionTreeClassifier(max_depth=1),
                                    n_estimators=200)
    classifier.fit(X_train, y_train_f)

    y_pred = classifier.predict(X_test)

    print("Ada Boost")
    print(confusion_matrix(y_test_f, y_pred))
    print(classification_report(y_test_f, y_pred))
    print(accuracy_score(y_test_f, y_pred))
def run_multinomialNB(X_train, X_test, y_train, y_test):
    y_train_f = y_to_float(y_train)
    y_test_f = y_to_float(y_test)
    tf_transformer = TfidfTransformer().fit_transform(X_train)

    classifier = MultinomialNB()
    classifier.fit(tf_transformer, y_train_f)

    y_pred = classifier.predict(X_test)

    # Confusion matrix
    print("MULTINOMIAL NAIVE BAYES")
    print(confusion_matrix(y_test_f, y_pred))
    print(classification_report(y_test_f, y_pred))
    print(accuracy_score(y_test_f, y_pred))
def run_gaussianNB(X_train, X_test, y_train, y_test):
    y_train_f = y_to_float(y_train)
    y_test_f = y_to_float(y_test)

    # Naive Bayes
    classifier = GaussianNB()
    classifier.fit(X_train, y_train_f)

    # predict class
    y_pred = classifier.predict(X_test)

    # Confusion matrix
    print("GAUSSIAN NAIVE BAYES")
    print(confusion_matrix(y_test_f, y_pred))
    print(classification_report(y_test_f, y_pred))
    print(accuracy_score(y_test_f, y_pred))
Example #7
0
def run_svm(X_train, X_test, y_train, y_test):
    y_train_f = y_to_float(y_train)
    y_test_f = y_to_float(y_test)

    classifier = svm.LinearSVC(random_state=0, max_iter=1000)
    classifier.fit(X_train, y_train_f)
    y_pred = classifier.predict(X_test)
    y_score = classifier.decision_function(X_test)

    print("SVM")
    print(confusion_matrix(y_test_f, y_pred))
    print(classification_report(y_test_f, y_pred))
    print(accuracy_score(y_test_f, y_pred))

    #plot_learning_curve(classifier, "SVM Learning Curve", X_train, y_train_f, ylim=(0.6, 1.01), cv=5, n_jobs=-1)
    #plot_roc_curve("SVM ROC Curve", y_test_f, classifier.decision_function(X_test))
    #plot_pr_curve("SVM Precision Recall Curve", y_test_f, y_pred, classifier.decision_function(X_test))
def run_linear_regression(X_train, X_test, y_train, y_test):
    y_train_f = y_to_float(y_train)
    y_test_f = y_to_float(y_test)

    regr = LinearRegression(n_jobs=-1)
    regr.fit(X_train, y_train_f)

    y_pred = regr.predict(X_test)
    y_pred = y_pred.round().astype(int)

    print("LINEAR REGRESSION")
    print(confusion_matrix(y_test_f, y_pred))
    print(classification_report(y_test_f, y_pred))
    print(accuracy_score(y_test_f, y_pred))

    # The coefficients
    #print('Coefficients: \n', regr.coef_)
    # The mean squared error
    print("Mean squared error: %.2f" % mean_squared_error(y_test_f, y_pred))
    # Explained variance score: 1 is perfect prediction
    print('Variance score: %.2f' % r2_score(y_test_f, y_pred))
    '''
Example #9
0
def run_decision_tree(X_train, X_test, y_train, y_test):
    y_train_f = y_to_float(y_train)
    y_test_f = y_to_float(y_test)

    classifier = DecisionTreeClassifier()
    classifier.fit(X_train, y_train_f)
    y_pred = classifier.predict(X_test)

    print("Decision Tree")
    print(confusion_matrix(y_test_f, y_pred))
    print(classification_report(y_test_f, y_pred))
    print(accuracy_score(y_test_f, y_pred))
    '''
    plot_learning_curve(classifier, "Decision Tree Learning Curve", X_train, y_train_f, ylim=(0.6, 1.01), cv=5, n_jobs=-1)
    plot_roc_curve("Decision Tree ROC Curve", y_test_f, classifier.predict_proba(X_test)[:, 1])
    plot_pr_curve("Decision Tree Precision Recall Curve", y_test_f, y_pred, classifier.predict_proba(X_test)[:, 1])
    '''
    '''
    max_depths = np.linspace(1, 32, 32, endpoint=True)
    train_results = []
    test_results = []
    for max_depth in max_depths:
        dt = DecisionTreeClassifier(max_depth=max_depth)
        dt.fit(X_train, y_train_f)
        train_pred = dt.predict(X_train)
        false_positive_rate, true_positive_rate, thresholds = roc_curve(y_train_f, train_pred)
        roc_auc = auc(false_positive_rate, true_positive_rate)
        # Add auc score to previous train results
        train_results.append(roc_auc)
        y_pred = dt.predict(X_test)
        false_positive_rate, true_positive_rate, thresholds = roc_curve(y_test_f, y_pred)
        roc_auc = auc(false_positive_rate, true_positive_rate)
        # Add auc score to previous test results
        test_results.append(roc_auc)
    from matplotlib.legend_handler import HandlerLine2D
    line1, = plt.plot(max_depths, train_results, 'b', label="Train AUC")
    line2, = plt.plot(max_depths, test_results, 'r', label="Test AUC")
    plt.legend(handler_map={line1: HandlerLine2D(numpoints=2)})
    plt.ylabel('AUC score')
    plt.xlabel('Tree depth')
    plt.show()
    '''
    '''
    min_samples_splits = np.linspace(0.1, 1.0, 10, endpoint=True)
    train_results = []
    test_results = []
    for min_samples_split in min_samples_splits:
        dt = DecisionTreeClassifier(min_samples_split=min_samples_split)
        dt.fit(X_train, y_train_f)
        train_pred = dt.predict(X_train)
        false_positive_rate, true_positive_rate, thresholds = roc_curve(y_train_f, train_pred)
        roc_auc = auc(false_positive_rate, true_positive_rate)
        train_results.append(roc_auc)
        y_pred = dt.predict(X_test)
        false_positive_rate, true_positive_rate, thresholds = roc_curve(y_test_f, y_pred)
        roc_auc = auc(false_positive_rate, true_positive_rate)
        test_results.append(roc_auc)
    from matplotlib.legend_handler import HandlerLine2D
    line1, = plt.plot(min_samples_splits, train_results, 'b', label="Train AUC")
    line2, = plt.plot(min_samples_splits, test_results, 'r', label="Test AUC")
    plt.legend(handler_map={line1: HandlerLine2D(numpoints=2)})
    plt.ylabel('AUC score')
    plt.xlabel('min samples split')
    plt.show()
    '''
    '''