Example #1
0
def main():
    train_data, train_labels, test_data, test_labels = data.load_all_data(
        'data')
    knn = KNearestNeighbor(train_data, train_labels)

    # Example usage:
    #predicted_label = knn.query_knn(test_data[0], 1)

    ac_train1 = classification_accuracy(knn, 1, train_data, train_labels)
    ac_test1 = classification_accuracy(knn, 1, test_data, test_labels)
    print("Train Accuracy when k=1: " + str(ac_train1))
    print("Test Accuracy when k=1: " + str(ac_test1))

    ac_train15 = classification_accuracy(knn, 15, train_data, train_labels)
    ac_test15 = classification_accuracy(knn, 15, test_data, test_labels)
    print("Train Accuracy when k=15: " + str(ac_train15))
    print("Test Accuracy when k=15: " + str(ac_test15))

    cross_validation(train_data, train_labels, k_range=np.arange(1, 16))

    res = classification_accuracy(knn, 3, train_data, train_labels)
    print("Train Accuracy: " + str(res))

    res = classification_accuracy(knn, 3, test_data, test_labels)
    print("Test Accuracy: " + str(res))
def main():
    train_data, train_labels, test_data, test_labels = data.load_all_data(
        'data')

    # Fit the model
    means = compute_mean_mles(train_data, train_labels)
    covariances = compute_sigma_mles(train_data, train_labels)

    # Evaluation
    train_log_llh = avg_conditional_likelihood(train_data, train_labels, means,
                                               covariances)
    test_log_llh = avg_conditional_likelihood(test_data, test_labels, means,
                                              covariances)

    print('Train average conditional log-likelihood: ', train_log_llh)
    print('Test average conditional log-likelihood: ', test_log_llh)

    train_posterior_result = classify_data(train_data, means, covariances)
    test_posterior_result = classify_data(test_data, means, covariances)

    train_accuracy = np.mean(
        train_labels.astype(int) == train_posterior_result)
    test_accuracy = np.mean(test_labels.astype(int) == test_posterior_result)

    print('Train posterior accuracy: ', train_accuracy)
    print('Test posterior accuracy: ', test_accuracy)

    for i in range(10):
        (e_val, e_vec) = np.linalg.eig(covariances[i])
        # In particular, note the axis to access the eigenvector
        curr_leading_evec = e_vec[:, np.argmax(e_val)].reshape((8, 8))
        plt.subplot(3, 4, i + 1)
        plt.imshow(curr_leading_evec, cmap='gray')
    plt.show()
Example #3
0
def main():
    train_data, train_labels, test_data, test_labels = data.load_all_data(
        'data')

    # Fit the model
    means = compute_mean_mles(train_data, train_labels)
    covariances = compute_sigma_mles(train_data, train_labels)

    # 2.1.1
    plot_cov_diagonal(covariances)

    # 2.1.2
    print(
        "The average conditional log-likelihood for the training set is ",
        avg_conditional_likelihood(train_data, train_labels, means,
                                   covariances))
    print(
        "The average conditional log-likelihood for the testing set is ",
        avg_conditional_likelihood(test_data, test_labels, means, covariances))

    # 2.1.3
    print(
        "The test accuracy is ",
        accuracy_score(test_labels, classify_data(test_data, means,
                                                  covariances)))
    print(
        "The train accuracy is ",
        accuracy_score(train_labels,
                       classify_data(train_data, means, covariances)))
Example #4
0
def main():
    train_data, train_labels, test_data, test_labels = data.load_all_data(
        'data')
    # Fit the model
    means = compute_mean_mles(train_data, train_labels)
    covariances = compute_sigma_mles(train_data, train_labels)

    # q2.2.1 plot diagonal of covariances
    plot_cov_diagonal(covariances)

    # q2.2.2 report average conditional log likelihood for both test and train
    train_avg_cond_log_likelihood = avg_conditional_likelihood(
        train_data, train_labels, means, covariances)
    test_avg_cond_log_likelihood = avg_conditional_likelihood(
        test_data, test_labels, means, covariances)
    print("average conditional log likelihood for train set: {:f}".format(
        train_avg_cond_log_likelihood))
    print("average conditional log likelihood for test  set: {:f}".format(
        test_avg_cond_log_likelihood))

    # q2.2.3 report classification accuracy for train and test set
    train_accuracy = classification_accuracy(means, covariances, train_data,
                                             train_labels)
    test_accuracy = classification_accuracy(means, covariances, test_data,
                                            test_labels)
    print('classification accuracy for train set: {:f}'.format(train_accuracy))
    print('classification accuracy for test  set: {:f}'.format(test_accuracy))
Example #5
0
def main():
    train_data, train_labels, test_data, test_labels = data.load_all_data(
        'data')

    NN = NeuralNet(True)
    NN.train_model(train_data, train_labels)

    # accuracy
    accuracy = NN.test_model(test_data, test_labels)
    print(f"achieved {accuracy[1]} accuracy on test set")
    print(f"with params {NN.params}")

    # ROC
    NN.plot_ROC(test_data, test_labels)

    # confusion matrix
    NN.plot_Confusion_Matrix(test_data, test_labels)

    # recall and precision
    recalls = NN.get_recall(test_data, test_labels)
    precisions = NN.get_precision(test_data, test_labels)
    for num in range(10):
        print(
            f"{num} has a recall of {recalls[num]} and precision {precisions[num]}"
        )
Example #6
0
def main():
    train_data, train_labels, test_data, test_labels = data.load_all_data('data')

    # Fit the model
    means = compute_mean_mles(train_data, train_labels)
    covariances = compute_sigma_mles(train_data, train_labels)
    print "============Q2.2 Part1 plot of log of Sigma_k diagonal"
    plot_cov_diagonal(covariances)
    
    print "============Q2.2 part2 average log likelihood========"
    print "===========Train data average log likelihood========="
    avg_train = avg_conditional_likelihood(train_data, train_labels, means, covariances)
    
    print "===========Test data average log likelihood ========"
    avg_test = avg_conditional_likelihood(test_data, test_labels, means, covariances)
    
    
    
    #the final code for classify but need to get everything work now
    print "=============Q2.2 part3 prediction and accuracy of each predication======"
    print "=============Train data prediction and accuracy========"
    train_predict =  classify_data(train_data, means, covariances)
    n_dim_train = train_labels.shape[0]
    classify_accuracy(train_predict, train_labels, n_dim_train)
    
    print "=============Test data prediction and accuracy========="
    test_predict = classify_data(test_data, means, covariances)
    n_dim_test = test_labels.shape[0]
    classify_accuracy(test_predict, test_labels,n_dim_test )
Example #7
0
def main():
    train_data, train_labels, test_data, test_labels = data.load_all_data(
        'data')
    train_data, test_data = binarize_data(train_data), binarize_data(test_data)

    # Fit the model
    eta = compute_parameters(train_data, train_labels)

    # Evaluation
    plot_images(eta)
    generate_new_data(eta)

    avg_cond_like_train = avg_conditional_likelihood(train_data, train_labels,
                                                     eta)
    avg_cond_like_test = avg_conditional_likelihood(test_data, test_labels,
                                                    eta)
    classes_train = classify_data(train_data, eta)
    classes_test = classify_data(test_data, eta)
    class_accuracy_train = sum(1 for indx, k in enumerate(classes_train)
                               if train_labels[indx] == k) / len(train_data)
    class_accuracy_test = sum(1 for indx, k in enumerate(classes_test)
                              if test_labels[indx] == k) / len(test_data)

    print("Average conditional likelihood ->", "\nTrain: ",
          avg_cond_like_train, "\nTest: ", avg_cond_like_test)
    print("Classification accuracy->", "\nTrain: ", class_accuracy_train,
          "\nTest: ", class_accuracy_test)
Example #8
0
def main():
    train_data, train_labels, test_data, test_labels = data.load_all_data(
        'data')

    # Fit the model
    means = compute_mean_mles(train_data, train_labels)
    covariances = compute_sigma_mles(train_data, train_labels)

    # Evaluation
    # question 1(a)
    train_avg = avg_conditional_likelihood(train_data, train_labels, means,
                                           covariances)
    test_avg = avg_conditional_likelihood(test_data, test_labels, means,
                                          covariances)

    print(
        'The average conditional log-likelihood on train set is {}.\n'.format(
            train_avg))

    print('The average conditional log-likelihood on test set is {}.\n'.format(
        test_avg))

    # question 1(b)
    classify_train = classify_data(train_data, means, covariances)
    classify_test = classify_data(test_data, means, covariances)

    train_acc = compute_accuracy(classify_train, train_labels)
    test_acc = compute_accuracy(classify_test, test_labels)

    print('The accuracy on train set is {}.\n'.format(train_acc))

    print('The accuracy on test set is {}.\n'.format(test_acc))

    # question 1(c)
    plot_leading_eigenvectors(covariances)
Example #9
0
def main():
    train_data, train_labels, test_data, test_labels = data.load_all_data(
        'data')

    # Fit the model
    means = compute_mean_mles(train_data, train_labels)
    # print(means)
    covariances = compute_sigma_mles(train_data, train_labels)
    #print(covariances)

    log__generative_likelihood = generative_likelihood(train_data, means,
                                                       covariances)
    # print(log__generative_likelihood)

    log_conditional_likelihood = conditional_likelihood(
        train_data, means, covariances)
    # print(log_conditional_likelihood)

    # Evaluation
    # plot(covariances)
    avg_cond_train = avg_conditional_likelihood(train_data, train_labels,
                                                means, covariances)
    print("Average conditional log-likelihood for train set is: " +
          str(avg_cond_train))

    avg_cond_test = avg_conditional_likelihood(test_data, test_labels, means,
                                               covariances)
    print("Average conditional log-likelihood for test set is: " +
          str(avg_cond_test))

    train_accur = accuracy(train_data, train_labels, means, covariances)
    print("train set accuracy is: " + str(train_accur))

    test_accur = accuracy(test_data, test_labels, means, covariances)
    print("test set accuracy is: " + str(test_accur))
Example #10
0
def main():
    train_data, train_labels, test_data, test_labels = data.load_all_data('data')
    knn = KNearestNeighbor(train_data, train_labels)
    
    optimal_k, average_accuracy_for_best_k = cross_validation(train_data, train_labels) 
    print("The optimal k is {:d}".format(optimal_k))
    print("The average accuracy across folds for k={:d} is {:f}".format(optimal_k, average_accuracy_for_best_k))
    print("The training accuracy for k={:d} is {:f}".format(optimal_k, classification_accuracy(knn, optimal_k, train_data, train_labels)))
    print("The test accuracy for k={:d} is {:f}".format(optimal_k, classification_accuracy(knn, optimal_k, test_data, test_labels)))
Example #11
0
def main():
    train_data, train_labels, test_data, test_labels = data.load_all_data(
        'data')
    clf = AdaBoost(train_data, train_labels)
    print("Best Parameters: ", clf.best_params_)
    print("Test accuracy: ",
          classification_accuracy(clf, test_data, test_labels))
    print("Train accuracy: ",
          classification_accuracy(clf, train_data, train_labels))
Example #12
0
def main():
    train_data, train_labels, test_data, test_labels = data.load_all_data(
        'data')
    knn = KNearestNeighbor(train_data, train_labels)

    # Example usage:
    # part_2_1_1(knn, train_data, train_labels, test_data, test_labels)

    # part 2.1.3
    part_2_1_3(train_data, train_labels, test_data, test_labels)
Example #13
0
def load_data(TIME_STEPS, STEP):
    # Load Data
    directory = "MHEALTHDATASET/"

    raw, X, y = data_management.load_all_data(directory)
    raw.head()

    # to beginning we are only going to use accelerometer
    valid_activities_set = raw.query("label in (1,4)")  # raw.copy()
    valid_activities_set.columns

    # Get only 2 activities and accelerometer data
    # data=valid_activities_set[['id','acc_chest_x'    , 'acc_chest_y'    , 'acc_chest_z','label']].copy()
    data = valid_activities_set[[
        'id', 'acc_chest_x', 'acc_chest_y', 'acc_chest_z', 'acc_left_ank_x',
        'acc_left_ank_y', 'acc_left_ank_z', 'acc_right_arm_x',
        'acc_right_arm_y', 'acc_right_arm_z', 'label'
    ]].copy()

    # Separate in train and test
    df_train = data[data['id'] <= 7]
    df_test = data[data['id'] > 7]

    # Scale data [-1,1] with min_max
    scale_columns = [
        'acc_chest_x', 'acc_chest_y', 'acc_chest_z', 'acc_left_ank_x',
        'acc_left_ank_y', 'acc_left_ank_z', 'acc_right_arm_x',
        'acc_right_arm_y', 'acc_right_arm_z'
    ]

    df_train.loc[:, scale_columns] = data_management.range_normalization(
        df_train[scale_columns].to_numpy(), -1, 1)
    df_test.loc[:, scale_columns] = data_management.range_normalization(
        df_test[scale_columns].to_numpy(), -1, 1)

    # Create dataset as time series
    X_train, y_train = data_management.create_dataset(
        df_train[[
            'acc_chest_x', 'acc_chest_y', 'acc_chest_z', 'acc_left_ank_x',
            'acc_left_ank_y', 'acc_left_ank_z', 'acc_right_arm_x',
            'acc_right_arm_y', 'acc_right_arm_z'
        ]], df_train.label, TIME_STEPS, STEP)

    X_test, y_test = data_management.create_dataset(
        df_test[[
            'acc_chest_x', 'acc_chest_y', 'acc_chest_z', 'acc_left_ank_x',
            'acc_left_ank_y', 'acc_left_ank_z', 'acc_right_arm_x',
            'acc_right_arm_y', 'acc_right_arm_z'
        ]], df_test.label, TIME_STEPS, STEP)
    # Encode Target
    y_train = data_management.encode_target(y_train)
    y_test = data_management.encode_target(y_test)

    return X_train, y_train, X_test, y_test
Example #14
0
def main():
    train_data, train_labels, test_data, test_labels = data.load_all_data(
        'data')
    clf = MLP(train_data, train_labels)
    predicted_labels = clf.predict(train_data)
    fpr_rf, tpr_rf, _ = roc_curve(test_data, predicted_labels)

    print("Best Parameters: ", clf.best_params_)
    print("Test accuracy: ",
          classification_accuracy(clf, test_data, test_labels))
    print("Train accuracy: ",
          classification_accuracy(clf, train_data, train_labels))
Example #15
0
def main():
    train_data, train_labels, test_data, test_labels = data.load_all_data('data')
    mlp = MLP(train_data, train_labels)
    solver, activation, learning_rate, layer_size = cross_validation(train_data, train_labels)
    clf = mlp.mlpclassifier(solver, activation, learning_rate, layer_size)
    print("Solver: ", solver, " Activation: ", activation, " Learning Rate: ",
           learning_rate, " Layer Size: ", layer_size)
    print(clf.predict(test_data))
    print("Test accuracy: ",
          classification_accuracy(clf, test_data, test_labels))
    print("Train accuracy: ",
          classification_accuracy(clf, train_data, train_labels))
Example #16
0
def main():
    train_data, train_labels, test_data, test_labels = data.load_all_data(
        'data')
    train_data, test_data = binarize_data(train_data), binarize_data(test_data)

    # Fit the model
    eta = compute_parameters(train_data, train_labels)

    # Evaluation
    plot_images(eta)

    generate_new_data(eta)
Example #17
0
def main():
    train_data, train_labels, test_data, test_labels = data.load_all_data('data')
    knn = KNearestNeighbor(train_data, train_labels)

    # Example usage:
    print("K = 1 training set accuracy: %f" %classification_accuracy(knn,1,train_data,train_labels))
    print("K = 1 testing set accuracy: %f" %classification_accuracy(knn,1,test_data,test_labels))
    print("K = 15 training set accuracy: %f" %classification_accuracy(knn,15,train_data,train_labels))
    print("K = 15 testing set accuracy: %f" %classification_accuracy(knn,15,test_data,test_labels))
    print("10 fold cross validation to find the opitmalK in the 1-15 range")
    cross_validation(train_data,train_labels)
    print("K = 4 training set accuracy: %f" %classification_accuracy(knn,4,train_data,train_labels))
    print("For K = 4 testing set accuracy: %f" %classification_accuracy(knn,4,test_data,test_labels))
Example #18
0
def main():
    train_data, train_labels, test_data, test_labels = data.load_all_data('data')

    # Fit the model
    means = compute_mean_mles(train_data, train_labels)
    covariances = compute_sigma_mles(train_data, train_labels)
    plot_cov_diagonal(covariances)
    
    # Evaluation
    print("The average training conditional likelihood is {:f}".format(avg_conditional_likelihood(train_data, train_labels, means, covariances)))
    print("The average test conditional likelihood is {:f}".format(avg_conditional_likelihood(test_data, test_labels, means, covariances)))
    
    print("The training accuracy is {:f}".format(calculate_accuracy(train_data, means, covariances, train_labels)))
    print("The test accuracy is {:f}".format(calculate_accuracy(test_data, means, covariances, test_labels)))
Example #19
0
def main():
    train_data, train_labels, test_data, test_labels = data.load_all_data('data')
    knn = KNearestNeighbor(train_data, train_labels)

    # Example usage:
    predicted_label = knn.query_knn(test_data[0], 1)
    # plt.imshow(predicted_label, cmap='gray')
    # plt.show()
    # print(predicted_label)
    print("test data, k=1: ",classification_accuracy(knn, 1, test_data, test_labels))
    print("test data, k=15: ",classification_accuracy(knn, 15, test_data, test_labels))
    print("train data, k=1: ",classification_accuracy(knn, 1, train_data, train_labels))
    print("train data, k=15: ",classification_accuracy(knn, 15, train_data, train_labels))

    print("Optimal k is ", cross_validation(train_data, train_labels))
Example #20
0
def main():
    train_data, train_labels, test_data, test_labels = data.load_all_data(
        'data', shuffle=True)
    knn = KNearestNeighbor(train_data, train_labels)

    # sub part 1

    print("For K = 1 Classification Accuracy: {}".format(
        classification_accuracy(knn, 1, test_data, test_labels)))
    print("For K = 15 CLassification Accuracy: {}".format(
        classification_accuracy(knn, 15, test_data, test_labels)))

    #sub part 3
    print(cross_validation(train_data, train_labels))
    '''
def main():
    train_data, train_labels, test_data, test_labels = data.load_all_data('data')
    knn = KNearestNeighbor(train_data, train_labels)

    print("train classification accuracy when k = 1, {0}".format(classification_accuracy(knn, 1, train_data, train_labels)))
    print("train classification accuracy when k = 15, {0}".format(classification_accuracy(knn, 15, train_data, train_labels)))

    print("test classification accuracy when k = 1, {0}".format(classification_accuracy(knn, 1, test_data, test_labels)))
    print("test classification accuracy when k = 15, {0}".format(classification_accuracy(knn, 15, test_data, test_labels)))
    k_s = cross_validation(train_data, train_labels)
    optimal_k = np.argmax(k_s)
    print("optimal k: {0}".format(optimal_k))

    knn = KNearestNeighbor(test_data, test_labels)
    print("Test accuracy with optimal {0}".format(classification_accuracy(knn, optimal_k, test_data, test_labels)))
Example #22
0
def main():
    train_data, train_labels, test_data, test_labels = data.load_all_data(
        'data')

    # Fit the model
    # means = compute_mean_mles(train_data, train_labels)
    # covariances = compute_sigma_mles(train_data, train_labels)

    # Evaluation
    #part_2_2_1(covariances)

    # 2.2.2
    part_2_2_2(train_data, train_labels, test_data, test_labels)

    # 2.2.3
    part_2_2_3(train_data, train_labels, test_data, test_labels)
Example #23
0
def main():
    train_data, train_labels, test_data, test_labels = data.load_all_data('data')

    # Fit the model
    means = compute_mean_mles(train_data, train_labels)
    covariances = compute_sigma_mles(train_data, train_labels)

    plot_cov_diagonal(covariances)
    # Evaluation
    print("Average Conditional Log-Likelihood")
    print('Training Set:', avg_conditional_likelihood(train_data, train_labels, means, covariances))
    print('Test Set', avg_conditional_likelihood(test_data, test_labels, means, covariances))

    print('\n \n Accuracy of Model:')
    print('Training Set:', model_accuracy(train_data, train_labels, means, covariances))
    print('Test Set', model_accuracy(test_data, test_labels, means, covariances))
Example #24
0
def main():
    train_data, train_labels, test_data, test_labels = data.load_all_data('data')
    train_data, test_data = binarize_data(train_data), binarize_data(test_data)

    # Fit the model
    eta = compute_parameters(train_data, train_labels)

    # Evaluation
    plot_images(eta)
    generate_new_data(eta)

    print("The training average conditional likelihood is {:f}".format(avg_conditional_likelihood(train_data, train_labels, eta)))
    print("The test average conditional likelihood is {:f}".format(avg_conditional_likelihood(test_data, test_labels, eta)))
    
    print("The training accuracy is {:f}".format(calculate_accuracy(train_data, eta, train_labels)))
    print("The test accuracy is {:f}".format(calculate_accuracy(test_data, eta, test_labels)))
Example #25
0
def main():
    train_data, train_labels, test_data, test_labels = data.load_all_data(
        'data')
    knn = KNearestNeighbor(train_data, train_labels)

    trainLabel_1 = []
    for i in range(len(train_data)):
        trainLabel_1.append(knn.query_knn(train_data[i], 1))

    trainLabel_15 = []
    for j in range(len(train_data)):
        trainLabel_15.append(knn.query_knn(train_data[j], 15))

    testLabel_1 = []
    for i in range(len(test_data)):
        testLabel_1.append(knn.query_knn(test_data[i], 1))

    testLabel_15 = []
    for j in range(len(test_data)):
        testLabel_15.append(knn.query_knn(test_data[j], 15))

    trainAccuracy_1 = classification_accuracy(knn, 1, trainLabel_1,
                                              train_labels)
    trainAccuracy_15 = classification_accuracy(knn, 15, trainLabel_15,
                                               train_labels)
    testAccuracy_1 = classification_accuracy(knn, 1, testLabel_1, test_labels)
    testAccuracy_15 = classification_accuracy(knn, 15, testLabel_15,
                                              test_labels)
    print("Train Accuracy, k = 1: ", trainAccuracy_1)
    print("Train Accuracy, k = 15: ", trainAccuracy_15)
    print("Test Accuracy, k = 1: ", testAccuracy_1)
    print("Test Accuracy, k = 15: ", testAccuracy_15)

    #do kfold cross validation on the data set  and output the k that gives the max accuracy
    accuracies = cross_validation(knn)

    #find max average accuracies (index +1 is the k used)
    averages = []
    for x in range(len(accuracies)):
        averages.append(np.mean(accuracies[x]))
    print(averages)
    index = np.argmax(averages)
    max_k = index + 1
    max_accuracies = accuracies[index]

    print("The max k is: {} with accuracies of {}".format(
        max_k, max_accuracies))
Example #26
0
def main():
    train_data, train_labels, test_data, test_labels = data.load_all_data(
        'data', shuffle=False)
    train_data, test_data = binarize_data(train_data), binarize_data(test_data)
    # Fit the model
    eta = compute_parameters(train_data.reshape((10, -1, 64)))
    # Evaluation
    plot_images(eta)
    generate_new_data(eta)
    print('Train_data: ')
    avg_conditional_likelihood(train_data, train_labels, eta, data.TRAIN_STEM)
    print('Test_data: ')
    avg_conditional_likelihood(test_data, test_labels, eta, data.TEST_STEM)
    print('\nThe accuracy for train data is: ',
          accuracy(train_labels, train_data, eta))
    print('The accuracy for test data is: ',
          accuracy(test_labels, test_data, eta))
def main():
    train_data, train_labels, test_data, test_labels = data.load_all_data(
        'data')
    knn = KNearestNeighbor(train_data, train_labels)

    # k = 10
    # print(knn.query_knn(test_data[0], k))
    # print(test_labels[0])

    k = 1
    accuracy_train = classification_accuracy(knn, k, train_data, train_labels)
    accuracy_test = classification_accuracy(knn, k, test_data, test_labels)
    print("accuracy for k=%d\ntraining set: %.3f\ntesting set: %.3f" %
          (k, accuracy_train, accuracy_test))

    k = 15
    accuracy_train = classification_accuracy(knn, k, train_data, train_labels)
    accuracy_test = classification_accuracy(knn, k, test_data, test_labels)
    print("accuracy for k=%d\ntraining set: %.3f\ntesting set: %.3f" %
          (k, accuracy_train, accuracy_test))

    print("Perform the %d-cross validation for finding optimal k: " % k_cross)
    accuracy = cross_validation(knn)
    for i in range(len(accuracy)):
        print("%dNN: %.3f%%" % (i + 1, accuracy[i] * 100))

    m = max(accuracy)
    # Very rare but still check if there is a tier
    index_of_max = [i for i, j in enumerate(accuracy) if j == m]

    print("The best result for kNN is with k = ", end='')
    for i in range(len(index_of_max)):
        if i == 0:
            print(index_of_max[i] + 1, end='')
        else:
            print(", %d" % index_of_max[i] + 1, end='')
    print("")

    for i in range(len(index_of_max)):
        accuracy_train = classification_accuracy(knn, index_of_max[i] + 1,
                                                 train_data, train_labels)
        accuracy_test = classification_accuracy(knn, index_of_max[i] + 1,
                                                test_data, test_labels)
        print(
            "Accuracy for optimal kNN k=%d\ntraining set: %.3f\ntesting set: %.3f"
            % (index_of_max[i] + 1, accuracy_train, accuracy_test))
Example #28
0
def main():
    train_data, train_labels, test_data, test_labels = data.load_all_data('data')

    # Fit the model
    means = compute_mean_mles(train_data, train_labels)
    covariances = compute_sigma_mles(train_data, train_labels)

    # PART A
    run_a(train_data, train_labels, test_data, test_labels, means, covariances)
    print("")

    # PART B
    run_b(train_data, train_labels, test_data, test_labels, means, covariances)
    print("")

    # PART C
    run_c(covariances)
Example #29
0
def main():
    train_data, train_labels, test_data, test_labels = data.load_all_data('data')
    knn = KNearestNeighbor(train_data, train_labels)

    # Example usage:
    accu_train_1 = classification_accuracy(knn, 1, train_data, train_labels)
    accu_train_15 = classification_accuracy(knn, 15, train_data,train_labels)    
    accu_test_1 = classification_accuracy(knn, 1, test_data, test_labels)
    accu_test_15 = classification_accuracy(knn, 15, test_data,test_labels)
    print(accu_train_1, accu_test_1,accu_train_15,accu_test_15)
    accus, k = cross_validation(train_data, train_labels)
    for idx, val in enumerate(accus):
        print('Average accuracy for k='+str(idx+1)+' is '+str(val))
    print('The optimal value of k is '+str(k))
    train_accu = classification_accuracy(knn, k, train_data, train_labels)
    print('The training classification accuracy for k={0} is {1}'.format(k, train_accu))
    test_accu = classification_accuracy(knn, k, test_data, test_labels)
    print('The test classification accuracy for k={0} is {1}'.format(k, test_accu))
def main():
    train_data, train_labels, test_data, test_labels = data.load_all_data(
        'D:\\digit')
    #print(int(train_labels[0]))
    #	Fit	the	model
    means = compute_mean_mles(train_data, train_labels)
    #print(means[0])
    covariances = compute_sigma_mles(train_data, train_labels, means)
    #print(covariances[0])
    #	Evaluation
    #a=np.array([1,2,3])
    #b=np.array([[1,2,3],[4,5,6]])
    #print(a-b)
    #a=np.array([[9,4],[3,6]])
    #a=a[a[:,1].argsort()]

    #print(generative_likelihood(test_data[0],means,covariances))
    print(test_accuracy(test_data, test_labels, means, covariances))
Example #31
0
def main():
    train_data, train_labels, test_data, test_labels = data.load_all_data(
        'data')
    knn = KNearestNeighbor(train_data, train_labels)

    print("K=1")
    print("training accuracy: ",
          classification_accuracy(knn, 1, train_data, train_labels))
    print("test accuracy: ",
          classification_accuracy(knn, 1, test_data, test_labels))

    print("K=15")
    print("training accuracy: ",
          classification_accuracy(knn, 15, train_data, train_labels))
    print("test accuracy: ",
          classification_accuracy(knn, 15, test_data, test_labels))

    print("best K is", cross_validation(train_data, train_labels))
Example #32
0
def main():

    ### For first run only
    # train_data, train_labels, test_data, test_labels = data.load_all_data_from_zip('hw5digits.zip', './data')

    train_data, train_labels, test_data, test_labels = data.load_all_data(
        'data', shuffle=True)

    ### [CHECK: DATA] Display some images and labels to ensure those match up
    # display_img_w_label(train_data[0:n], train_labels[0:n])

    # Fit the model
    means = compute_mean_mles(train_data, train_labels)

    ### [CHECK: MEANS] Display means to make sure they are sensible
    # display_mean_mles(means)

    covariances = compute_sigma_mles(train_data, train_labels)

    ### [ CHECK: COVARIANCE, EIGENVECTORS ] Plot some PCA projections to verify
    plot_pca_grid(train_data, train_labels, covariances, means)

    # Q 1a)
    train_avg = avg_conditional_likelihood(train_data, train_labels, means,
                                           covariances)
    print("avg conditional loglik, train: {}".format(train_avg))

    test_avg = avg_conditional_likelihood(test_data, test_labels, means,
                                          covariances)
    print("avg conditional loglik, test: {}".format(test_avg))

    # Evaluation, Q 1b)
    train_preds = classify_data(train_data, means, covariances)
    train_accuracy = np.sum(np.equal(train_preds,
                                     train_labels)) / train_data.shape[0]
    print("train accuracy: {}".format(train_accuracy))

    test_preds = classify_data(test_data, means, covariances)
    test_accuracy = np.sum(np.equal(test_preds,
                                    test_labels)) / test_data.shape[0]
    print("test accuracy: {}".format(test_accuracy))

    # Q 1c)
    plot_eigenvectors(covariances)
Example #33
0
def main():
    train_data, train_labels, test_data, test_labels = data.load_all_data(
        'data')
    # 700 for each digit, total of 7000
    print("Train data shape: ", train_data.shape)
    print("Train labels shape: ", train_labels.shape)

    # 400 for each digit, total of 4000
    print("Test data shape: ", test_data.shape)
    print("Test labels shape: ",
          test_labels.shape)  # Values are in {0, 1, ..., 9}

    # Fit the model
    means = compute_mean_mles(train_data, train_labels)
    covariances = compute_sigma_mles(train_data, train_labels)

    # Evaluation
    # Average Conditional Log Likelihood
    trainAverageConditionalLogLikelihood = avg_conditional_likelihood(
        train_data, train_labels, means, covariances)
    testAverageConditionalLogLikelihood = avg_conditional_likelihood(
        test_data, test_labels, means, covariances)
    print("Train Average Conditional Log Likelihood:",
          trainAverageConditionalLogLikelihood)
    print("Test Average Conditional Log Likelihood:",
          testAverageConditionalLogLikelihood)

    # Ensure probabilities close to 1.0
    print("Train Average Conditional Likelihood:",
          np.exp(trainAverageConditionalLogLikelihood))
    print("Test Average Conditional Likelihood:",
          np.exp(testAverageConditionalLogLikelihood))

    # Accuracy
    trainPrediction = classify_data(train_data, means, covariances)
    trainAccuracy = computeAccuracy(train_labels, trainPrediction)
    testPrediction = classify_data(test_data, means, covariances)
    testAccuracy = computeAccuracy(test_labels, testPrediction)
    print("Train Accuracy: ", trainAccuracy)
    print("Test Accuracy: ", testAccuracy)

    # Plot 8 by 8 images of all 10 leading eigenvectors
    # Save it as leading eigenvector plot
    plotAndSaveLeadingEigenvector(covariances)
Example #34
0
 def OnBtnNewClick(self, event):
     self._enable_buttons()
     data.load_all_data()