Beispiel #1
0
def main():

    # load data
    training_data = load_data.read_data("train.csv")
    testing_data = load_data.read_data("test.csv")
    testing_labels = load_data.read_data("submission.csv")
    X_train, X_test = load_data.vectorize_data(training_data, testing_data)

    Y_train = np.array(training_data)[:, -1]
    Y_test = np.array(testing_labels)[:, -1]

    svm(X_train, Y_train, X_test, Y_test)
Beispiel #2
0
def main():

    # load data
    training_data = load_data.read_data("train.csv")
    testing_data = load_data.read_data("test.csv")
    testing_labels = load_data.read_data("submission.csv")
    X_train, X_test = load_data.vectorize_data(training_data, testing_data)

    Y_train = np.array(training_data)[:, -1]
    Y_test = np.array(testing_labels)[:, -1]

    #uncommment for grid searching
    #params = grid_search_kmeans(X_train, Y_train)

    params = {'n_clusters': 2}

    kmeans(X_train, Y_train, X_test, Y_test, params)
Beispiel #3
0
def main():
    # load data
    training_data = load_data.read_data("train.csv")
    testing_data = load_data.read_data("test.csv")
    testing_labels = load_data.read_data("submission.csv")
    X_train, X_test = load_data.vectorize_data(training_data, testing_data)

    Y_train = np.array(training_data)[:, -1]
    Y_test = np.array(testing_labels)[:, -1]

    print(X_train.shape)
    print(X_test.shape)

    X_train = X_train.toarray()
    X_test = X_test.toarray()

    # reduce data
    X_train, X_test = fld(X_train, Y_train, X_test, 2)
Beispiel #4
0
def main():
    # load data
    training_data = load_data.read_data("train.csv")
    testing_data = load_data.read_data("test.csv")
    testing_labels = load_data.read_data("submission.csv")
    X_train, X_test = load_data.vectorize_data(training_data, testing_data)

    Y_train = np.array(training_data)[:, -1]
    Y_test = np.array(testing_labels)[:, -1]

    print(X_train.shape)
    print(X_test.shape)

    X_train = X_train.toarray()
    X_test = X_test.toarray()

    #
    # means = np.mean(X_train.T, axis=1)
    # # center columns
    # cols = X_train - means
    # # print(cols)
    #
    # # cov matrix
    # cov = np.cov(cols.T)
    #
    # # calculate dims needed to be kept based on error rate
    # values, vectors = np.linalg.eig(cov)
    # dim = pca_error_rate(values, 0.2)
    # print("Reduced DIMS to: " + str(dim) + " from " + str(len(training_data[0])))

    # reduce data
    X_train, X_test = pca(X_train, X_test, 2952)

    print(X_train.shape)
    print(X_test.shape)

    params = {
        'activation': 'relu',
        'solver': 'lbfgs',
        'hidden_layer_sizes': (100, 10),
        'learning_rate_init': 0.0009
    }

    bpnn.bpnn(X_train, Y_train, X_test, Y_test, params)
Beispiel #5
0
def main():

    # load data
    training_data = load_data.read_data("train.csv")
    testing_data = load_data.read_data("test.csv")
    testing_labels = load_data.read_data("submission.csv")
    X_train, X_test = load_data.vectorize_data(training_data, testing_data)

    Y_train = np.array(training_data)[:, -1]
    Y_test = np.array(testing_labels)[:, -1]

    #uncommment for grid searching
    #params = grid_search_bpnn(X_train, Y_train)

    params = {
        'activation': 'relu',
        'solver': 'lbfgs',
        'hidden_layer_sizes': (100, 10),
        'learning_rate_init': 0.0009
    }

    bpnn(X_train, Y_train, X_test, Y_test, params)
Beispiel #6
0
def main():

    # load data
    training_data = load_data.read_data("train.csv")
    testing_data = load_data.read_data("test.csv")
    testing_labels = load_data.read_data("submission.csv")
    X_train, X_test = load_data.vectorize_data(training_data, testing_data)

    X_train = X_train.toarray()
    X_test = X_test.toarray()

    Y_train = np.array(training_data)[:, -1]
    Y_test = np.array(testing_labels)[:, -1]

    # the training and testing datasets should have the same dimension
    _, nftrain = X_train.shape
    _, nftest = X_test.shape
    assert nftrain == nftest

    # ask the user to input which discriminant function to use
    prompt = '''
    Type of discriminant functions supported assuming Gaussian pdf:
    1 - minimum Euclidean distance classifier
    2 - minimum Mahalanobis distance classifier
    3 - quadratic classifier
    '''
    print(prompt)
    str = input('Please input 1, 2, or 3: ')
    cases = int(str)

    # ask the user to input prior probability that needs to sum to 1
    prop_str = input(
        "Please input prior probabilities in float numbers, separated by space, and they must add to 1: \n"
    )
    numbers = prop_str.split()
    P = np.zeros(len(numbers))
    Psum = 0
    for i in range(len(numbers)):
        P[i] = float(numbers[i])
        Psum += P[i]
    if Psum != 1:
        print("Prior probabilities do not add up to 1. Please check!")
        sys.exit(1)

    # derive the decision rule from the training set and apply on the test set
    t0 = time.time()  # start time
    Y_pred = mpp(X_train, Y_train, X_test, cases, P)
    t1 = time.time()  # ending time

    print(Y_pred)
    Y_pred = Y_pred.astype("int")
    Y_pred = Y_pred.astype("str")
    # calculate accuracy
    precision, recall, fscore, train_support = score(Y_test,
                                                     Y_pred,
                                                     pos_label='1',
                                                     average='binary')
    print('Precision: {} / Recall: {} / F1-Score: {} / Accuracy: {}'.format(
        round(precision, 3), round(recall, 3), round(fscore, 3),
        round(acs(Y_test, Y_pred), 3)))

    cm = confusion_matrix(Y_test, Y_pred)
    class_label = ["0", "1"]
    df_cm = pd.DataFrame(cm, index=class_label, columns=class_label)
    sns.heatmap(df_cm, annot=True, fmt='d')
    plt.title("Confusion Matrix")
    plt.xlabel("Predicted Label")
    plt.ylabel("True Label")
    plt.show()

    print(f'The learning process takes {t1 - t0} seconds.')