def run_neural_network(X_train, X_test, y_train, y_test): y_train_f = y_to_float(y_train) y_test_f = y_to_float(y_test) classifier = MLPClassifier(alpha=1, max_iter=1000) classifier.fit(X_train, y_train_f) y_pred = classifier.predict(X_test) print("Neural Network") print(confusion_matrix(y_test_f, y_pred)) print(classification_report(y_test_f, y_pred)) print(accuracy_score(y_test_f, y_pred))
def run_random_forest(X_train, X_test, y_train, y_test): y_train_f = y_to_float(y_train) y_test_f = y_to_float(y_test) classifier = RandomForestClassifier(n_estimators=10, random_state=0, n_jobs=-1) classifier.fit(X_train, y_train_f) y_pred = classifier.predict(X_test) print("Random Forest") print(confusion_matrix(y_test_f, y_pred)) print(classification_report(y_test_f, y_pred)) print(accuracy_score(y_test_f, y_pred)) '''
def run_bernoulliNB(X_train, X_test, y_train, y_test): y_train_f = y_to_float(y_train) y_test_f = y_to_float(y_test) classifier = BernoulliNB() classifier.fit(X_train, y_train_f) y_pred = classifier.predict(X_test) # Confusion matrix print("BERNOULLI NAIVE BAYES") print(confusion_matrix(y_test_f, y_pred)) print(classification_report(y_test_f, y_pred)) print(accuracy_score(y_test_f, y_pred))
def run_ada_boost(X_train, X_test, y_train, y_test): y_train_f = y_to_float(y_train) y_test_f = y_to_float(y_test) classifier = AdaBoostClassifier(DecisionTreeClassifier(max_depth=1), n_estimators=200) classifier.fit(X_train, y_train_f) y_pred = classifier.predict(X_test) print("Ada Boost") print(confusion_matrix(y_test_f, y_pred)) print(classification_report(y_test_f, y_pred)) print(accuracy_score(y_test_f, y_pred))
def run_multinomialNB(X_train, X_test, y_train, y_test): y_train_f = y_to_float(y_train) y_test_f = y_to_float(y_test) tf_transformer = TfidfTransformer().fit_transform(X_train) classifier = MultinomialNB() classifier.fit(tf_transformer, y_train_f) y_pred = classifier.predict(X_test) # Confusion matrix print("MULTINOMIAL NAIVE BAYES") print(confusion_matrix(y_test_f, y_pred)) print(classification_report(y_test_f, y_pred)) print(accuracy_score(y_test_f, y_pred))
def run_gaussianNB(X_train, X_test, y_train, y_test): y_train_f = y_to_float(y_train) y_test_f = y_to_float(y_test) # Naive Bayes classifier = GaussianNB() classifier.fit(X_train, y_train_f) # predict class y_pred = classifier.predict(X_test) # Confusion matrix print("GAUSSIAN NAIVE BAYES") print(confusion_matrix(y_test_f, y_pred)) print(classification_report(y_test_f, y_pred)) print(accuracy_score(y_test_f, y_pred))
def run_svm(X_train, X_test, y_train, y_test): y_train_f = y_to_float(y_train) y_test_f = y_to_float(y_test) classifier = svm.LinearSVC(random_state=0, max_iter=1000) classifier.fit(X_train, y_train_f) y_pred = classifier.predict(X_test) y_score = classifier.decision_function(X_test) print("SVM") print(confusion_matrix(y_test_f, y_pred)) print(classification_report(y_test_f, y_pred)) print(accuracy_score(y_test_f, y_pred)) #plot_learning_curve(classifier, "SVM Learning Curve", X_train, y_train_f, ylim=(0.6, 1.01), cv=5, n_jobs=-1) #plot_roc_curve("SVM ROC Curve", y_test_f, classifier.decision_function(X_test)) #plot_pr_curve("SVM Precision Recall Curve", y_test_f, y_pred, classifier.decision_function(X_test))
def run_linear_regression(X_train, X_test, y_train, y_test): y_train_f = y_to_float(y_train) y_test_f = y_to_float(y_test) regr = LinearRegression(n_jobs=-1) regr.fit(X_train, y_train_f) y_pred = regr.predict(X_test) y_pred = y_pred.round().astype(int) print("LINEAR REGRESSION") print(confusion_matrix(y_test_f, y_pred)) print(classification_report(y_test_f, y_pred)) print(accuracy_score(y_test_f, y_pred)) # The coefficients #print('Coefficients: \n', regr.coef_) # The mean squared error print("Mean squared error: %.2f" % mean_squared_error(y_test_f, y_pred)) # Explained variance score: 1 is perfect prediction print('Variance score: %.2f' % r2_score(y_test_f, y_pred)) '''
def run_decision_tree(X_train, X_test, y_train, y_test): y_train_f = y_to_float(y_train) y_test_f = y_to_float(y_test) classifier = DecisionTreeClassifier() classifier.fit(X_train, y_train_f) y_pred = classifier.predict(X_test) print("Decision Tree") print(confusion_matrix(y_test_f, y_pred)) print(classification_report(y_test_f, y_pred)) print(accuracy_score(y_test_f, y_pred)) ''' plot_learning_curve(classifier, "Decision Tree Learning Curve", X_train, y_train_f, ylim=(0.6, 1.01), cv=5, n_jobs=-1) plot_roc_curve("Decision Tree ROC Curve", y_test_f, classifier.predict_proba(X_test)[:, 1]) plot_pr_curve("Decision Tree Precision Recall Curve", y_test_f, y_pred, classifier.predict_proba(X_test)[:, 1]) ''' ''' max_depths = np.linspace(1, 32, 32, endpoint=True) train_results = [] test_results = [] for max_depth in max_depths: dt = DecisionTreeClassifier(max_depth=max_depth) dt.fit(X_train, y_train_f) train_pred = dt.predict(X_train) false_positive_rate, true_positive_rate, thresholds = roc_curve(y_train_f, train_pred) roc_auc = auc(false_positive_rate, true_positive_rate) # Add auc score to previous train results train_results.append(roc_auc) y_pred = dt.predict(X_test) false_positive_rate, true_positive_rate, thresholds = roc_curve(y_test_f, y_pred) roc_auc = auc(false_positive_rate, true_positive_rate) # Add auc score to previous test results test_results.append(roc_auc) from matplotlib.legend_handler import HandlerLine2D line1, = plt.plot(max_depths, train_results, 'b', label="Train AUC") line2, = plt.plot(max_depths, test_results, 'r', label="Test AUC") plt.legend(handler_map={line1: HandlerLine2D(numpoints=2)}) plt.ylabel('AUC score') plt.xlabel('Tree depth') plt.show() ''' ''' min_samples_splits = np.linspace(0.1, 1.0, 10, endpoint=True) train_results = [] test_results = [] for min_samples_split in min_samples_splits: dt = DecisionTreeClassifier(min_samples_split=min_samples_split) dt.fit(X_train, y_train_f) train_pred = dt.predict(X_train) false_positive_rate, true_positive_rate, thresholds = roc_curve(y_train_f, train_pred) roc_auc = auc(false_positive_rate, true_positive_rate) train_results.append(roc_auc) y_pred = dt.predict(X_test) false_positive_rate, true_positive_rate, thresholds = roc_curve(y_test_f, y_pred) roc_auc = auc(false_positive_rate, true_positive_rate) test_results.append(roc_auc) from matplotlib.legend_handler import HandlerLine2D line1, = plt.plot(min_samples_splits, train_results, 'b', label="Train AUC") line2, = plt.plot(min_samples_splits, test_results, 'r', label="Test AUC") plt.legend(handler_map={line1: HandlerLine2D(numpoints=2)}) plt.ylabel('AUC score') plt.xlabel('min samples split') plt.show() ''' '''