Example #1
0
def first_set(min_sup):
    """ Database bbp2 """
    print("-- Database: bbp2 --")
    train_labels = load_labels("train_bbp2.gt")
    test_labels = load_labels("test_bbp2.gt")

    # Run graph mining to get matrices
    train_matrix, test_matrix, num_freq = graph_mining(
        os_join(DATA_PATH, "train_bbp2.gsp"),
        os_join(DATA_PATH, "test_bbp2.gsp"),
        len(train_labels) * min_sup)

    # Train RandomForrestClassifier with matrices and labels
    try:
        score = run_random_forest(train_matrix, train_labels, test_matrix,
                                  test_labels)
    except Exception as e:
        print(str(e))
        return str(e)

    print("Accuracy of classifier: " + str(score))
    return score, num_freq
Example #2
0
def second_set(min_sup):
    """ Database molecules """
    print("-- Database: molecules --")
    train_labels = load_labels("train_molecules.groundTruth", sep=" ")
    test_labels = load_labels("test_molecules.groundTruth", sep=" ")

    # Run graph mining to get matrices
    train_matrix, test_matrix, num_freq = graph_mining(
        os_join(DATA_PATH, "train_molecules.gsp"),
        os_join(DATA_PATH, "test_molecules.gsp"),
        len(train_labels) * min_sup)

    # Train RandomForrestClassifier with matrices and labels
    try:
        score = run_random_forest(train_matrix, train_labels, test_matrix,
                                  test_labels)
    except Exception as e:
        print(str(e))
        return str(e)

    print("Accuracy of classifier: " + str(score))
    return score, num_freq
from ada_boost import run_ada_boost
from data import get_data_frame, split_train_test
from decision_tree import run_decision_tree
from knn import run_knn
from linear_regression import run_linear_regression
from logistic_regression import run_logistic_regression
from naive_bayes import run_gaussianNB, run_multinomialNB, run_bernoulliNB
from neural_network import run_neural_network
from random_forest import run_random_forest
from svm import run_svm

if __name__ == "__main__":
    data = get_data_frame()
    X_train, X_test, y_train, y_test = split_train_test(data)

    # Uncomment the algorithms to run as you wish
    # be careful with knn it is too slow.

    #run_gaussianNB(X_train, X_test, y_train, y_test)
    #run_multinomialNB(X_train, X_test, y_train, y_test)
    #run_bernoulliNB(X_train, X_test, y_train, y_test)
    #run_knn(X_train, X_test, y_train, y_test)
    #run_linear_regression(X_train, X_test, y_train, y_test)
    #run_logistic_regression(X_train, X_test, y_train, y_test)
    #run_svm(X_train, X_test, y_train, y_test)
    #run_decision_tree(X_train, X_test, y_train, y_test)
    run_random_forest(X_train, X_test, y_train, y_test)
    #run_neural_network(X_train, X_test, y_train, y_test)
    #run_ada_boost(X_train, X_test, y_train, y_test)
Example #4
0
from data_preprocessor import get_data

if __name__ == '__main__':
    x_train, x_test, y_train, y_test = get_data(True)

    print(
        "\n-------------------------------------\nAccuracies with top 5 features:\n-------------------------------------"
    )

    run_decision_tree(x_train, x_test, y_train, y_test)
    run_k_nearest_neighbour(x_train, x_test, y_train, y_test)
    run_logistic_regression(x_train, x_test, y_train, y_test)
    run_naive_bayes(x_train, x_test, y_train, y_test)
    run_neural_network(x_train, x_test, y_train, y_test)
    run_perceptron(x_train, x_test, y_train, y_test)
    run_random_forest(x_train, x_test, y_train, y_test)
    run_svm(x_train, x_test, y_train, y_test)
    run_xg_boost(x_train, x_test, y_train, y_test)

    print(
        "\n-------------------------------------\nAccuracy with Voting in top 5 features:\n-------------------------------------"
    )
    run_voting(x_train, x_test, y_train, y_test)

    x_train, x_test, y_train, y_test = get_data()
    print(
        "\n-------------------------------------\nAccuracies with all 22 features:\n-------------------------------------"
    )

    run_decision_tree(x_train, x_test, y_train, y_test)
    run_k_nearest_neighbour(x_train, x_test, y_train, y_test)
Example #5
0
    df_movie = fill_nan(df_movie)
    df_movie = df_movie.drop(columns="imdb_score")

    print(df_movie["director_name"].head())

    col_mask = print(df_movie.isna().any(axis=0))
    print(col_mask)

    # return the processed dataset.
    return df_movie


if __name__ == "__main__":
    #     df_movie, df_standard = data_prepocessing()
    #     df_knn = df_movie
    #     df_knn = df_knn.reset_index()
    #     df_knn["class"] = df_knn.apply(classify, axis=1)
    #     classes = list(df_knn["class"])
    #     amazing = classes==['AMAZING']
    #     print(amazing)
    #     df_knn = df_knn.drop(columns="imdbRating")

    df_movie = load_metadata_dataset()
    #run_knn(df_movie)
    #run_logistic_regression(df_movie)
    classifier=run_random_forest(df_movie)

    run(classifier)