예제 #1
0

nbc = NaiveBayesClassifier()
gnb = GaussianNB()


# finding best train/(train+test) ratio
train_fractions = np.linspace(start=0.1, stop=0.9, num=17)

nbc_prediction_accuracies = np.zeros((17, 1))

for idx, train_frac in enumerate(train_fractions):
    X_train, X_test, y_train, y_test = split_dataset(main_df, train_frac=train_frac)
    # alternatively sklearn.model_selection.train_test_split can be used
    nbc.fit(X_train, y_train)
    predictions = nbc.predict(X_test)
    nbc_prediction_accuracies[idx] = accuracy_score(y_test, predictions)

best_train_fraction_nbc = train_fractions[np.argmax(nbc_prediction_accuracies)]

gnb_prediction_accuracies = np.zeros((17, 1))

for idx, train_frac in enumerate(train_fractions):
    X_train, X_test, y_train, y_test = split_dataset(main_df, train_frac=train_frac)
    # alternatively sklearn.model_selection.train_test_split can be used
    gnb.fit(X_train, y_train)
    predictions = gnb.predict(X_test)
    gnb_prediction_accuracies[idx] = accuracy_score(y_test, predictions)

best_train_fraction_gnb = train_fractions[np.argmax(gnb_prediction_accuracies)]
예제 #2
0
    print("X_train_val shape: {}, X_test shape: {}".format(
        X_train_val.shape, X_test.shape))
    print("y_train_val shape: {}, y_test shape: {}".format(
        y_train_val.shape, y_test.shape))
    X_train, X_val, y_train, y_val = train_test_split(X_train_val,
                                                      y_train_val,
                                                      test_size=0.1,
                                                      random_state=42)
    print("X_train shape: {}, X_val shape: {}".format(X_train.shape,
                                                      X_val.shape))
    print("y_train shape: {}, y_val shape: {}".format(y_train.shape,
                                                      y_val.shape))

    nb_clf = NaiveBayesClassifier()
    nb_clf.fit(X_train, y_train)
    y_pred_val = nb_clf.predict(X_val)
    y_pred_test = nb_clf.predict(X_test)
    print('NB validation acc: {}'.format((y_pred_val == y_val).mean()))
    evaluate(y_test, y_pred_test)

    for k in [1, 5, 9]:
        knn_clf = KNNClassifier(k)
        knn_clf.fit(X_train, y_train)
        y_pred_val = knn_clf.predict(X_val)
        y_pred_test = knn_clf.predict(X_test)
        print('{}-nn validation acc: {}'.format(k,
                                                (y_pred_val == y_val).mean()))
        evaluate(y_test, y_pred_test)

    c_values = [0.5, 1, 1.5, 2]
    for C in c_values: