Beispiel #1
0
def get_best_knn_prediction(x_train, x_test, y_train, y_test):

    best_k = 0
    best_predictions = []
    max_test_score = 0
    ks = range(1, 10)
    test_scores = []
    train_scores = []
    for k in ks:
        results_knn, test_score, train_score = models.KNN(x_train, y_train, x_test, y_test, k)
        test_scores.append(test_score)
        train_scores.append(train_score)

        if test_score > max_test_score:
            best_k = k
            best_predictions = results_knn
            max_test_score = test_score

    print("Best K: ", best_k)
    plt.plot(ks, test_scores, label='Test Accuracy')
    plt.plot(ks, train_scores, label='Train Accuracy')
    plt.legend()
    plt.xlabel("K")
    plt.ylabel("Accuracy (%)")
    plt.title("")
    plt.show()

    return best_predictions, max_test_score, best_k
Beispiel #2
0
def main():
    args = get_args()

    X, y = load(args.data, 'train')
    test_X, _ = load(args.data, args.test_split)

    if args.dr_algorithm is not None:
        num_train = X.shape[0]
        concat_data = np.concatenate((X, test_X))

        start = time.time()
        if args.dr_algorithm == 'pca':
            reduced_X = models.PCA(concat_data,
                                   args.target_dim).fit(concat_data)
        elif args.dr_algorithm == 'lle':
            reduced_X = models.LLE(concat_data, args.target_dim,
                                   args.lle_k).fit(concat_data)
        else:
            raise Exception('Invalid dimensionality reduction algorithm')
        end = time.time()
        print(f"dimensionality reduction took {end - start} seconds!")
        X = reduced_X[:num_train]
        test_X = reduced_X[num_train:]

    model = models.KNN(args.knn_k)
    model.fit(X, y)
    y_hat = model.predict(test_X)
    np.savetxt(args.predictions_file, y_hat, fmt='%d')
def models_compare(x, y):

    X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.30)

    results_svm = models.SVM(X_train, y_train, X_test)
    sns.regplot(results_svm, y_test, color='red', label='SVM')
    Evaluationmatrix(y_test, results_svm, "SVM")

    results_tree = models.TREE(X_train, y_train, X_test)
    sns.regplot(results_tree, y_test, color='green', label='TREE')
    Evaluationmatrix(y_test, results_tree, "TREE")

    results_ridge = models.RIDGE(X_train, y_train, X_test)
    sns.regplot(results_ridge, y_test, color='orange', label='RIDGE')
    Evaluationmatrix(y_test, results_ridge, "RIDGE")

    results_knn = models.KNN(X_train, y_train, X_test)
    sns.regplot(results_knn, y_test, color='yellow', label='KNN')
    Evaluationmatrix(y_test, results_knn, "KNN")

    results_lr = models.LR(X_train, y_train, X_test)
    sns.regplot(results_lr, y_test, color='blue', label='LR')
    Evaluationmatrix(y_test, results_lr, "LR")

    results_rfr = models.RFR(X_train, y_train, X_test)
    sns.regplot(results_rfr, y_test, color='black', label='RFR')
    Evaluationmatrix(y_test, results_rfr, "RFR")

    plt.title('Models Comparison')
    plt.xlabel('Predicted Ratings')
    plt.ylabel('Actual Ratings')
    plt.legend()
    plt.show()
    for train_index, test_index in group_kfold.split(Xdata, Ydata, groups):
        model.train(Xdata[train_index], Ydata[train_index])
        Ypred = model.test(Xdata[test_index])
        confusion = sklearn.metrics.confusion_matrix(Ydata[test_index], Ypred,
                labels=features.labels)
        if sum_confusion is None:
            sum_confusion = np.zeros(confusion.shape)
        sum_confusion += confusion
    return sum_confusion / k

def select_best_model(Xdata, Ydata, models):
    avg_accuracies = [(i, k_fold_cross_validate(Xdata, Ydata, 4, model)) for
            i, model in enumerate(models)]
    print(avg_accuracies)
    return max(avg_accuracies, key=operator.itemgetter(1))


allfeatures = features.compute_or_read_features()
Xdata, Ydata = to_numpy_arrays(allfeatures)

models = [models.RandomForest(200, 'gini'), models.LogisticRegression(),
        models.SVMNonLinear('rbf'), models.SVMNonLinear('sigmoid'),
        models.NeuralNet(), models.KNN()]
#best = select_best_model(Xdata, Ydata, models)
#print(best)

for model in models:
    cm = k_fold_confusion_matrix(Xdata, Ydata, 4, model)
    save_confusion_matrix(cm, model._name)
    print(f"Confusion matrix for {model._name} saved")
Beispiel #5
0
        model.clf.fit(df_test, df_test_target)
        df_test_new_pred = model.clf.predict(df_test_new)
        print("--- %s seconds ---" % (time.time() - start_time))

        # Confusion Matrix
        models.plot_confusion_matrix(df_test_new_target, df_test_new_pred,
                                     names)

        # models.plot_loss_curve(model.clf)
        print(
            metrics.classification_report(df_test_new_target,
                                          df_test_new_pred,
                                          target_names=names))

    if (model_name.upper() == "K"):
        model = models.KNN()
        # df_test = df_test[:2000]
        # df_test_target = df_test_target[:2000]
        # df_test_new = df_test_new[:2000]
        # df_test_new_target = df_test_new[:2000]

        param_grid = {'n_neighbors': np.arange(0, 40, 5)}

        # Plot validation curves for different parameters
        models.plot_validation_curve(model.clf, df_test, df_test_target,
                                     'n_neighbors', np.arange(1, 40, 5), 5)
        models.plot_validation_curve(model.clf, df_test, df_test_target,
                                     'weights', ['uniform', 'distance'], 5)

        # start_time = time.time()
        # gs_model = models.conduct_grid_search(model.clf, df_test, df_test_target, param_grid)
Beispiel #6
0
    output_path = args.output_path + args.note + '/'
    output_file = args.output_path + args.note + '/' + args.output_file
    test_data = TestFile(test_file, train_data, output_path, output_file)

    if args.optim == 'sgd':
        optim = torch.optim.SGD
    elif args.optim == 'adam':
        optim = torch.optim.Adam
    elif args.optim == 'adagrad':
        optim = torch.optim.Adagrad
    else:
        raise NotImplementedError

    print('training')
    if args.model == 'knn':
        model = models.KNN(args.k, args.f, test_data.write, args.seed,
                           args.num_seed)
        model.train_model(train_data, args.no_valid)
    elif args.model == 'lr':
        model = models.RidgeRegression(args.lamb, args.f, test_data.write,
                                       args.seed, args.num_seed)
        model.train_model(train_data, args.no_valid)
    elif args.model == 'nb':
        model = models.NaiveBayes(args.model_type, args.f, test_data.write,
                                  args.seed, args.num_seed)
        model.train_model(train_data, args.no_valid)
    elif args.model == 'svm':
        model = models.SVM(args.kernel, args.c, args.f, test_data.write,
                           args.seed, args.num_seed)
        model.train_model(train_data, args.no_valid)
    elif args.model == 'mlp':
        model = models.MLP(