Exemple #1
0
def load_pca():
    my_data = genfromtxt('./results/joint_angle_data.csv', delimiter=',')
    # print("PCA input shape is: ")
    # print(my_data)
    # print(my_data.shape)
    # remember to do a transpose on the input
    pca_matrix = run_pca(my_data.T)
    return pca_matrix
Exemple #2
0
def main():
    if len(sys.argv) <= 2:
        print('not enough cmd args')
        exit()
        
    Y, Yt = pca.run_pca()
    #print('got data')
    Z, Zt = randomize_data(Y, Yt)

    '''h = 5
    F, S = train(Y, Yt, 10, h)
    FZ, SZ = train(Z, Zt, 10, h)

    pwrite(F, S, sys.argv[2]+'total_model_10_'+str(h))
    pwrite(FZ, SZ, sys.argv[2]+'total_randomized_10_'+str(h))

    F_basic, S_basic = train_basic(Y, Yt, 10, h)
    FZ_basic, SZ_basic = train_basic(Z, Zt, 10, h)

    pwrite(F_basic, S_basic, sys.argv[2]+'basic_model_10_'+str(h))
    pwrite(FZ_basic, SZ_basic, sys.argv[2]+'basic_randomized_10_'+str(h))'''

     
    x = range(0,10)
    for h in x:
        try:
            a = pread(out_dir+'pickled/models/total_model/total_model_10_' + str(h))
            #b = pread( out_dir+'pickled/models/total_randomized/total_randomized_10_' + str(h))
            c = pread(out_dir+'pickled/models/basic_model/basic_model_10_'+str(h))
            #d = pread( out_dir+'pickled/models/basic_randomized/basic_randomized_10_'+str(h))
        except:
            try:
                F, S = train(Y, Yt, 10, h)
                #FZ, SZ = train(Z, Zt, 10, h)

                pwrite(F, S, out_dir+'pickled/models/total_model/total_model_10_' + str(h))
                #pwrite(FZ, SZ, out_dir+'pickled/models/total_randomized/total_randomized_10_' + str(h))

                F_basic, S_basic = train_basic(Y, Yt, 10, h)
                #FZ_basic, SZ_basic = train_basic(Z, Zt, 10, h)

                pwrite(F_basic, S_basic, out_dir+'pickled/models/basic_model/basic_model_10_'+str(h))
                #pwrite(FZ_basic, SZ_basic, out_dir+'pickled/models/basic_randomized/basic_randomized_10_'+str(h)) 
            except:
                print('Singular matrix: ' + str(h))

    train_random()
Exemple #3
0
    run_naive_bayes(x_train, x_test, y_train, y_test)
    run_neural_network(x_train, x_test, y_train, y_test)
    run_perceptron(x_train, x_test, y_train, y_test)
    run_random_forest(x_train, x_test, y_train, y_test)
    run_svm(x_train, x_test, y_train, y_test)
    run_xg_boost(x_train, x_test, y_train, y_test)

    print(
        "\n-------------------------------------\nAccuracy with Voting in all 22 features:\n-------------------------------------"
    )
    run_voting(x_train, x_test, y_train, y_test)

    print(
        "\n-------------------------------------\nAccuracies with dimensionality reduction using PCA (5 components):\n-------------------------------------"
    )
    new_x_train, new_x_test = run_pca(x_train, x_test, y_train, y_test, 5)

    run_decision_tree(new_x_train, new_x_test, y_train, y_test)
    run_k_nearest_neighbour(new_x_train, new_x_test, y_train, y_test)
    run_logistic_regression(new_x_train, new_x_test, y_train, y_test)
    run_naive_bayes(new_x_train, new_x_test, y_train, y_test)
    run_neural_network(new_x_train, new_x_test, y_train, y_test)
    run_perceptron(new_x_train, new_x_test, y_train, y_test)
    run_random_forest(new_x_train, new_x_test, y_train, y_test)
    run_svm(new_x_train, new_x_test, y_train, y_test)
    run_xg_boost(new_x_train, new_x_test, y_train, y_test)

    print(
        "\n-------------------------------------\nAccuracy with Voting along with PCA:\n-------------------------------------"
    )
    run_voting(new_x_train, new_x_test, y_train, y_test)
    col.remove('type')
    col = col[5:15]

    sc = StandardScaler()
    temp = sc.fit_transform(df_movie[col])
    # df_movie[col] = temp

    df_standard = df_movie[list(df_movie.describe().columns)]
    return (df_movie, df_standard)

def classify(row):
    if row['imdbRating'] >= 0 and row['imdbRating'] < 4:
        return 0
    elif row['imdbRating'] >= 4 and row['imdbRating'] < 7:
        return 1
    elif row['imdbRating'] >= 7 and row['imdbRating'] <= 10:
        return 2

if __name__ == '__main__':
    df_movie, df_standard = data_prepocessing()
    run_pca(df_standard, df_movie)

    df_knn = df_movie
    df_knn["class"] = df_knn.apply(classify, axis=1)
    run_knn(df_knn)

    run_logistic_regression()

    run_xgboost_cornell()
    run_xgboost_imdb(df_knn)