Beispiel #1
0
def dtree_classifier(X_train, y_train, X_test, y_test, plotting):

    clf = DecisionTreeClassifier(random_state=3169, max_depth=3)
    st = time.time()
    clf.fit(X_train, y_train)
    end = time.time()
    train_time = end - st
    stt = time.time()
    y_pred = clf.predict(X_test)
    endt = time.time()
    y_test = np.array(y_test)
    y_pred = np.array(y_pred)
    title = 'Confusion Matrix with Decision Tree'
    #plot_tree(clf.fit(X_train, y_train),filled=True)
    plt.show()
    clf_name = 'dtree'
    if plotting == True:
        plot_learning_curve(clf,
                            'Learning Curve for Decision Tree',
                            X_train,
                            y_train, (0.7, 1.01),
                            n_jobs=4)
        plot_validation_curve(X_train, y_train, clf, clf_name)
        confusion(y_test, y_pred, title)

    print('Accuracy for Decision Tree classifier is ' +
          str(accuracy_score(y_test, y_pred)))
    print('Training time for Decision Tree classifier: ' + str(train_time) +
          ' seconds')
    print('Testing time for Decision Tree classifier: ' + str(endt - stt) +
          ' seconds')
    print()
Beispiel #2
0
def neural_net(X_train, y_train, X_test, y_test, learning_rate, plotting):
    clf = MLPClassifier(activation='tanh',
                        alpha=1e-03,
                        batch_size='auto',
                        learning_rate='adaptive',
                        learning_rate_init=learning_rate,
                        solver='adam')
    st = time.time()
    clf.fit(X_train, y_train)
    end = time.time()
    train_time = end - st
    stt = time.time()
    y_pred = clf.predict(X_test)
    endt = time.time()
    y_test = np.array(y_test)
    y_pred = np.array(y_pred)
    title = 'Confusion Matrix with Neural Network'
    clf_name = 'neural_net'
    if plotting == True:
        plot_learning_curve(clf,
                            'Learning Curve for Neural Network',
                            X_train,
                            y_train, (0.7, 1.01),
                            n_jobs=4)
        plot_validation_curve(X_train, y_train, clf, clf_name)
        confusion(y_test, y_pred, title)

    print('Accuracy for Neural Network is ' +
          str(accuracy_score(y_test, y_pred)))
    print('Training time for Neural Network: ' + str(train_time) + ' seconds')
    print('Testing time for Neural Network: ' + str(endt - stt) + ' seconds')
    print()
Beispiel #3
0
def tuned_svm_classifier(X_train, y_train, X_test, y_test, kernel, gamma,
                         plotting):
    # Apply SVM
    clf = svm.SVC(random_state=3169, kernel='linear', gamma='auto')
    param_range = [0.01, 0.25, 0.5, 0.75, 1, 1.25, 1.5, 2.75, 3]
    clf = GridSearchCV(clf, param_grid={'C': param_range}, cv=5)
    st = time.time()
    clf.fit(X_train, y_train)
    end = time.time()
    train_time = end - st
    stt = time.time()
    y_pred = clf.predict(X_test)
    endt = time.time()
    y_test = np.array(y_test)
    y_pred = np.array(y_pred)
    title = 'Confusion Matrix with SVM'
    clf_name = 'svm'
    if plotting == True:
        plot_learning_curve(clf,
                            'Learning Curve for SVM',
                            X_train,
                            y_train, (0.7, 1.01),
                            n_jobs=4)
        confusion(y_test, y_pred, title)

    print('Accuracy for Tuned SVM classifier is ' +
          str(accuracy_score(y_test, y_pred)))
    print('Training time for Tuned SVM classifier: ' + str(train_time) +
          ' seconds')
    print('Testing time for Tuned SVM classifier: ' + str(endt - stt) +
          ' seconds')
    print('Best parameters: ' + str(clf.best_params_))
    print()
    return clf.best_params_
Beispiel #4
0
def tuned_weighted_knn_classifier(X_train, y_train, X_test, y_test, neighbors,
                                  plotting):
    clf = KNeighborsClassifier(weights='distance')
    param_range = np.arange(1, 8)
    clf = GridSearchCV(clf, param_grid={'n_neighbors': param_range}, cv=5)
    st = time.time()
    clf.fit(X_train, y_train)
    end = time.time()
    train_time = end - st
    stt = time.time()
    y_pred = clf.predict(X_test)
    endt = time.time()
    y_test = np.array(y_test)
    y_pred = np.array(y_pred)
    title = 'Confusion Matrix with Tuned Weighted KNN'
    clf_name = 'knn'
    if plotting == True:
        plot_learning_curve(clf,
                            'Learning Curve for Tuned Weighted KNN',
                            X_train,
                            y_train, (0.7, 1.01),
                            n_jobs=4)
        confusion(y_test, y_pred, title)

    print('Accuracy for Tuned Weighted KNN classifier is ' +
          str(accuracy_score(y_test, y_pred)))
    print('Training time for Tuned Weighed KNN classifier: ' +
          str(train_time) + ' seconds')
    print('Testing time for Tuned Weighed KNN classifier: ' + str(endt - stt) +
          ' seconds')
    print('Best parameters: ' + str(clf.best_params_))
    print()
    return clf.best_params_
Beispiel #5
0
def knn_classifier(X_train, y_train, X_test, y_test, neighbors, plotting):
    clf = KNeighborsClassifier(n_neighbors=2)
    st = time.time()
    clf.fit(X_train, y_train)
    end = time.time()
    train_time = end - st
    stt = time.time()
    y_pred = clf.predict(X_test)
    endt = time.time()
    y_test = np.array(y_test)
    y_pred = np.array(y_pred)
    title = 'Confusion Matrix with KNN'
    clf_name = 'knn'
    if plotting == True:
        plot_learning_curve(clf,
                            'Learning Curve for KNN',
                            X_train,
                            y_train, (0.7, 1.01),
                            n_jobs=4)
        plot_validation_curve(X_train, y_train, clf, clf_name)
        confusion(y_test, y_pred, title)

    print('Accuracy for KNN classifier is ' +
          str(accuracy_score(y_test, y_pred)))
    print('Training time for KNN classifier: ' + str(train_time) + ' seconds')
    print('Testing time for KNN SVM classifier: ' + str(endt - stt) +
          ' seconds')
    print()
Beispiel #6
0
def svm_classifier(X_train, y_train, X_test, y_test, kernel, gamma, plotting):
    # Apply SVM
    clf = svm.SVC(gamma='auto', random_state=3169, kernel=kernel, C=4)
    st = time.time()
    clf.fit(X_train, y_train)
    end = time.time()
    train_time = end - st
    stt = time.time()
    y_pred = clf.predict(X_test)
    endt = time.time()
    y_test = np.array(y_test)
    y_pred = np.array(y_pred)
    title = 'Confusion Matrix with SVM'
    clf_name = 'svm'
    print(str(clf.get_params))
    if plotting == True:
        plot_learning_curve(clf,
                            'Learning Curve for SVM',
                            X_train,
                            y_train, (0.7, 1.01),
                            n_jobs=5)
        plot_validation_curve(X_train, y_train, clf, clf_name)
        confusion(y_test, y_pred, title)

    print('Accuracy for SVM classifier is ' +
          str(accuracy_score(y_test, y_pred)))
    print('Training time for SVM classifier: ' + str(train_time) + ' seconds')
    print('Testing time for SVM classifier: ' + str(endt - stt) + ' seconds')
Beispiel #7
0
def tuned_dtree_classifier(X_train, y_train, X_test, y_test, plotting):
    clf = DecisionTreeClassifier(random_state=3169)
    param_range = [
        1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20
    ]
    clf = GridSearchCV(clf, param_grid={'max_depth': param_range}, cv=5)
    st = time.time()
    clf.fit(X_train, y_train)
    end = time.time()
    train_time = end - st
    stt = time.time()
    y_pred = clf.predict(X_test)
    endt = time.time()
    y_test = np.array(y_test)
    y_pred = np.array(y_pred)
    title = 'Confusion Matrix with Tuned Decision Tree'
    #tree.plot_tree(clf.fit(X_train, Y_train))
    clf_name = 'dtree'
    if plotting == True:
        plot_learning_curve(clf,
                            'Learning Curve for Tuned Decision Tree',
                            X_train,
                            y_train, (0.7, 1.01),
                            n_jobs=4)
        confusion(y_test, y_pred, title)

    print('Accuracy for Tuned Decision Tree classifier is ' +
          str(accuracy_score(y_test, y_pred)))
    print('Training time for Tuned Decision Tree classifier: ' +
          str(train_time) + ' seconds')
    print('Testing time for Tuned Decision Tree classifier: ' +
          str(endt - stt) + ' seconds')
    print('Best parameters: ' + str(clf.best_params_))
    print()
    return clf.best_params_
def tuned_boosting_classifier(X_train, y_train, X_test, y_test, plotting):
    clf = DecisionTreeClassifier(random_state=42,
                                 max_depth=2,
                                 min_samples_leaf=1)
    boost_clf = AdaBoostClassifier(base_estimator=clf, random_state=42)
    param_range = np.arange(10, 100)
    boost_clf = GridSearchCV(boost_clf,
                             param_grid={'n_estimators': param_range},
                             cv=5)
    st = time.time()
    boost_clf.fit(X_train, y_train)
    end = time.time()
    train_time = end - st
    stt = time.time()
    y_pred = boost_clf.predict(X_test)
    endt = time.time()
    y_test = np.array(y_test)
    y_pred = np.array(y_pred)
    title = 'Confusion Matrix : Tuned Boosting'
    #tree.plot_tree(clf.fit(X_train, Y_train))
    clf_name = 'boosting'
    if plotting == True:
        plot_learning_curve(boost_clf,
                            'Learning Curve : Tuned Boosting classifier',
                            X_train,
                            y_train, (0.7, 1.01),
                            n_jobs=4)
        #plot_validation_curve(X_train,y_train,clf,clf_name)
        confusion(y_test, y_pred, title)

    print('Accuracy for Tuned Boosting classifier is ' +
          str(accuracy_score(y_test, y_pred)))
    print('Training time for Tuned Boosting classifier: ' + str(train_time) +
          ' seconds')
    print('Testing time for Tuned Boosting classifier: ' + str(endt - stt) +
          ' seconds')
    print('Best parameters: ' + str(boost_clf.best_params_))
    print()
    return boost_clf.best_params_
Beispiel #9
0
def boost_classifier(X_train, y_train, X_test, y_test, plotting):
    clf = DecisionTreeClassifier(random_state=3169,
                                 max_depth=2,
                                 min_samples_leaf=1)
    boost_clf = AdaBoostClassifier(base_estimator=clf,
                                   random_state=3169,
                                   n_estimators=100)
    st = time.time()
    boost_clf.fit(X_train, y_train)
    end = time.time()
    train_time = end - st
    stt = time.time()
    y_pred = boost_clf.predict(X_test)
    endt = time.time()
    y_test = np.array(y_test)
    y_pred = np.array(y_pred)
    title = 'Confusion Matrix with Boosting'
    #plot_tree(clf.fit(X_train, y_train),filled=True)
    plt.show()
    clf_name = 'boosting'
    if plotting == True:
        plot_learning_curve(boost_clf,
                            'Learning Curve for Boosting',
                            X_train,
                            y_train, (0.7, 1.01),
                            n_jobs=4)
        plot_validation_curve(X_train, y_train, boost_clf, clf_name)
        confusion(y_test, y_pred, title)

    print('Accuracy for Boosting classifier is ' +
          str(accuracy_score(y_test, y_pred)))
    print('Training time for Boosting classifier: ' + str(train_time) +
          ' seconds')
    print('Testing time for Boosting classifier: ' + str(endt - stt) +
          ' seconds')
    print()
Beispiel #10
0
def tuned_neural_net(X_train, y_train, X_test, y_test, learning_rate,
                     plotting):
    clf = MLPClassifier(activation='relu',
                        max_iter=5000,
                        alpha=1e-05,
                        batch_size='auto',
                        learning_rate='adaptive',
                        learning_rate_init=learning_rate,
                        solver='adam')
    alpha_range = [1e-05, 1e-04, 1e-03, 1e-02, 1e-01, 1, 2, 3]
    lr_range = [0.00001, 0.0001, 0.001, 0.01, 0.1, 0.5, 0.8, 1]
    params = {
        'alpha': alpha_range,
        'learning_rate_init': lr_range,
        'activation': ['relu', 'tanh']
    }
    clf = GridSearchCV(clf, param_grid=params, cv=5)
    st = time.time()
    clf.fit(X_train, y_train)
    end = time.time()
    train_time = end - st
    stt = time.time()
    y_pred = clf.predict(X_test)
    endt = time.time()
    y_test = np.array(y_test)
    y_pred = np.array(y_pred)
    title = 'Confusion Matrix with Tuned Neural Network'
    clf_name = 'neural_net'
    if plotting == True:
        plot_learning_curve(clf,
                            'Learning Curve for Neural Network',
                            X_train,
                            y_train, (0.7, 1.01),
                            n_jobs=4)
        confusion(y_test, y_pred, title)
        clf_best = MLPClassifier(hidden_layer_sizes=(5, 2),
                                 random_state=3169,
                                 max_iter=1)
        clf_best.set_params(
            alpha=clf.best_params_['alpha'],
            learning_rate_init=clf.best_params_['learning_rate_init'])
        num_epochs = 1000
        train_loss = np.zeros(num_epochs)
        train_scores = np.zeros(num_epochs)
        val_scores = np.zeros(num_epochs)
        # Split training set into training and validation
        X_train_, X_val, y_train_, y_val = train_test_split(X_train,
                                                            y_train,
                                                            test_size=0.4,
                                                            random_state=3169)
        for i in range(num_epochs):
            clf_best.fit(X_train_, y_train_)
            train_loss[i] = clf_best.loss_
            train_scores[i] = accuracy_score(y_train_,
                                             clf_best.predict(X_train_))
        range_loss = np.arange(num_epochs) + 1
        plt.figure()
        plt.plot(range_loss, train_loss)
        plt.title('Training loss curve for neural network')
        plt.xlabel('Epochs')
        plt.ylabel("Loss")
        plt.grid()
        plt.show()

    print('Accuracy for Tuned Neural Network is ' +
          str(accuracy_score(y_test, y_pred)))
    print('Training time for Tuned NN: ' + str(train_time) + ' seconds')
    print('Testing time for Tuned NN: ' + str(endt - stt) + ' seconds')
    print('Best parameters: ' + str(clf.best_params_))
    print()
    return clf.best_params_