def main(): train_data, train_labels, test_data, test_labels = data.load_all_data( 'data') knn = KNearestNeighbor(train_data, train_labels) # Example usage: #predicted_label = knn.query_knn(test_data[0], 1) ac_train1 = classification_accuracy(knn, 1, train_data, train_labels) ac_test1 = classification_accuracy(knn, 1, test_data, test_labels) print("Train Accuracy when k=1: " + str(ac_train1)) print("Test Accuracy when k=1: " + str(ac_test1)) ac_train15 = classification_accuracy(knn, 15, train_data, train_labels) ac_test15 = classification_accuracy(knn, 15, test_data, test_labels) print("Train Accuracy when k=15: " + str(ac_train15)) print("Test Accuracy when k=15: " + str(ac_test15)) cross_validation(train_data, train_labels, k_range=np.arange(1, 16)) res = classification_accuracy(knn, 3, train_data, train_labels) print("Train Accuracy: " + str(res)) res = classification_accuracy(knn, 3, test_data, test_labels) print("Test Accuracy: " + str(res))
def main(): train_data, train_labels, test_data, test_labels = data.load_all_data( 'data') # Fit the model means = compute_mean_mles(train_data, train_labels) covariances = compute_sigma_mles(train_data, train_labels) # Evaluation train_log_llh = avg_conditional_likelihood(train_data, train_labels, means, covariances) test_log_llh = avg_conditional_likelihood(test_data, test_labels, means, covariances) print('Train average conditional log-likelihood: ', train_log_llh) print('Test average conditional log-likelihood: ', test_log_llh) train_posterior_result = classify_data(train_data, means, covariances) test_posterior_result = classify_data(test_data, means, covariances) train_accuracy = np.mean( train_labels.astype(int) == train_posterior_result) test_accuracy = np.mean(test_labels.astype(int) == test_posterior_result) print('Train posterior accuracy: ', train_accuracy) print('Test posterior accuracy: ', test_accuracy) for i in range(10): (e_val, e_vec) = np.linalg.eig(covariances[i]) # In particular, note the axis to access the eigenvector curr_leading_evec = e_vec[:, np.argmax(e_val)].reshape((8, 8)) plt.subplot(3, 4, i + 1) plt.imshow(curr_leading_evec, cmap='gray') plt.show()
def main(): train_data, train_labels, test_data, test_labels = data.load_all_data( 'data') # Fit the model means = compute_mean_mles(train_data, train_labels) covariances = compute_sigma_mles(train_data, train_labels) # 2.1.1 plot_cov_diagonal(covariances) # 2.1.2 print( "The average conditional log-likelihood for the training set is ", avg_conditional_likelihood(train_data, train_labels, means, covariances)) print( "The average conditional log-likelihood for the testing set is ", avg_conditional_likelihood(test_data, test_labels, means, covariances)) # 2.1.3 print( "The test accuracy is ", accuracy_score(test_labels, classify_data(test_data, means, covariances))) print( "The train accuracy is ", accuracy_score(train_labels, classify_data(train_data, means, covariances)))
def main(): train_data, train_labels, test_data, test_labels = data.load_all_data( 'data') # Fit the model means = compute_mean_mles(train_data, train_labels) covariances = compute_sigma_mles(train_data, train_labels) # q2.2.1 plot diagonal of covariances plot_cov_diagonal(covariances) # q2.2.2 report average conditional log likelihood for both test and train train_avg_cond_log_likelihood = avg_conditional_likelihood( train_data, train_labels, means, covariances) test_avg_cond_log_likelihood = avg_conditional_likelihood( test_data, test_labels, means, covariances) print("average conditional log likelihood for train set: {:f}".format( train_avg_cond_log_likelihood)) print("average conditional log likelihood for test set: {:f}".format( test_avg_cond_log_likelihood)) # q2.2.3 report classification accuracy for train and test set train_accuracy = classification_accuracy(means, covariances, train_data, train_labels) test_accuracy = classification_accuracy(means, covariances, test_data, test_labels) print('classification accuracy for train set: {:f}'.format(train_accuracy)) print('classification accuracy for test set: {:f}'.format(test_accuracy))
def main(): train_data, train_labels, test_data, test_labels = data.load_all_data( 'data') NN = NeuralNet(True) NN.train_model(train_data, train_labels) # accuracy accuracy = NN.test_model(test_data, test_labels) print(f"achieved {accuracy[1]} accuracy on test set") print(f"with params {NN.params}") # ROC NN.plot_ROC(test_data, test_labels) # confusion matrix NN.plot_Confusion_Matrix(test_data, test_labels) # recall and precision recalls = NN.get_recall(test_data, test_labels) precisions = NN.get_precision(test_data, test_labels) for num in range(10): print( f"{num} has a recall of {recalls[num]} and precision {precisions[num]}" )
def main(): train_data, train_labels, test_data, test_labels = data.load_all_data('data') # Fit the model means = compute_mean_mles(train_data, train_labels) covariances = compute_sigma_mles(train_data, train_labels) print "============Q2.2 Part1 plot of log of Sigma_k diagonal" plot_cov_diagonal(covariances) print "============Q2.2 part2 average log likelihood========" print "===========Train data average log likelihood=========" avg_train = avg_conditional_likelihood(train_data, train_labels, means, covariances) print "===========Test data average log likelihood ========" avg_test = avg_conditional_likelihood(test_data, test_labels, means, covariances) #the final code for classify but need to get everything work now print "=============Q2.2 part3 prediction and accuracy of each predication======" print "=============Train data prediction and accuracy========" train_predict = classify_data(train_data, means, covariances) n_dim_train = train_labels.shape[0] classify_accuracy(train_predict, train_labels, n_dim_train) print "=============Test data prediction and accuracy=========" test_predict = classify_data(test_data, means, covariances) n_dim_test = test_labels.shape[0] classify_accuracy(test_predict, test_labels,n_dim_test )
def main(): train_data, train_labels, test_data, test_labels = data.load_all_data( 'data') train_data, test_data = binarize_data(train_data), binarize_data(test_data) # Fit the model eta = compute_parameters(train_data, train_labels) # Evaluation plot_images(eta) generate_new_data(eta) avg_cond_like_train = avg_conditional_likelihood(train_data, train_labels, eta) avg_cond_like_test = avg_conditional_likelihood(test_data, test_labels, eta) classes_train = classify_data(train_data, eta) classes_test = classify_data(test_data, eta) class_accuracy_train = sum(1 for indx, k in enumerate(classes_train) if train_labels[indx] == k) / len(train_data) class_accuracy_test = sum(1 for indx, k in enumerate(classes_test) if test_labels[indx] == k) / len(test_data) print("Average conditional likelihood ->", "\nTrain: ", avg_cond_like_train, "\nTest: ", avg_cond_like_test) print("Classification accuracy->", "\nTrain: ", class_accuracy_train, "\nTest: ", class_accuracy_test)
def main(): train_data, train_labels, test_data, test_labels = data.load_all_data( 'data') # Fit the model means = compute_mean_mles(train_data, train_labels) covariances = compute_sigma_mles(train_data, train_labels) # Evaluation # question 1(a) train_avg = avg_conditional_likelihood(train_data, train_labels, means, covariances) test_avg = avg_conditional_likelihood(test_data, test_labels, means, covariances) print( 'The average conditional log-likelihood on train set is {}.\n'.format( train_avg)) print('The average conditional log-likelihood on test set is {}.\n'.format( test_avg)) # question 1(b) classify_train = classify_data(train_data, means, covariances) classify_test = classify_data(test_data, means, covariances) train_acc = compute_accuracy(classify_train, train_labels) test_acc = compute_accuracy(classify_test, test_labels) print('The accuracy on train set is {}.\n'.format(train_acc)) print('The accuracy on test set is {}.\n'.format(test_acc)) # question 1(c) plot_leading_eigenvectors(covariances)
def main(): train_data, train_labels, test_data, test_labels = data.load_all_data( 'data') # Fit the model means = compute_mean_mles(train_data, train_labels) # print(means) covariances = compute_sigma_mles(train_data, train_labels) #print(covariances) log__generative_likelihood = generative_likelihood(train_data, means, covariances) # print(log__generative_likelihood) log_conditional_likelihood = conditional_likelihood( train_data, means, covariances) # print(log_conditional_likelihood) # Evaluation # plot(covariances) avg_cond_train = avg_conditional_likelihood(train_data, train_labels, means, covariances) print("Average conditional log-likelihood for train set is: " + str(avg_cond_train)) avg_cond_test = avg_conditional_likelihood(test_data, test_labels, means, covariances) print("Average conditional log-likelihood for test set is: " + str(avg_cond_test)) train_accur = accuracy(train_data, train_labels, means, covariances) print("train set accuracy is: " + str(train_accur)) test_accur = accuracy(test_data, test_labels, means, covariances) print("test set accuracy is: " + str(test_accur))
def main(): train_data, train_labels, test_data, test_labels = data.load_all_data('data') knn = KNearestNeighbor(train_data, train_labels) optimal_k, average_accuracy_for_best_k = cross_validation(train_data, train_labels) print("The optimal k is {:d}".format(optimal_k)) print("The average accuracy across folds for k={:d} is {:f}".format(optimal_k, average_accuracy_for_best_k)) print("The training accuracy for k={:d} is {:f}".format(optimal_k, classification_accuracy(knn, optimal_k, train_data, train_labels))) print("The test accuracy for k={:d} is {:f}".format(optimal_k, classification_accuracy(knn, optimal_k, test_data, test_labels)))
def main(): train_data, train_labels, test_data, test_labels = data.load_all_data( 'data') clf = AdaBoost(train_data, train_labels) print("Best Parameters: ", clf.best_params_) print("Test accuracy: ", classification_accuracy(clf, test_data, test_labels)) print("Train accuracy: ", classification_accuracy(clf, train_data, train_labels))
def main(): train_data, train_labels, test_data, test_labels = data.load_all_data( 'data') knn = KNearestNeighbor(train_data, train_labels) # Example usage: # part_2_1_1(knn, train_data, train_labels, test_data, test_labels) # part 2.1.3 part_2_1_3(train_data, train_labels, test_data, test_labels)
def load_data(TIME_STEPS, STEP): # Load Data directory = "MHEALTHDATASET/" raw, X, y = data_management.load_all_data(directory) raw.head() # to beginning we are only going to use accelerometer valid_activities_set = raw.query("label in (1,4)") # raw.copy() valid_activities_set.columns # Get only 2 activities and accelerometer data # data=valid_activities_set[['id','acc_chest_x' , 'acc_chest_y' , 'acc_chest_z','label']].copy() data = valid_activities_set[[ 'id', 'acc_chest_x', 'acc_chest_y', 'acc_chest_z', 'acc_left_ank_x', 'acc_left_ank_y', 'acc_left_ank_z', 'acc_right_arm_x', 'acc_right_arm_y', 'acc_right_arm_z', 'label' ]].copy() # Separate in train and test df_train = data[data['id'] <= 7] df_test = data[data['id'] > 7] # Scale data [-1,1] with min_max scale_columns = [ 'acc_chest_x', 'acc_chest_y', 'acc_chest_z', 'acc_left_ank_x', 'acc_left_ank_y', 'acc_left_ank_z', 'acc_right_arm_x', 'acc_right_arm_y', 'acc_right_arm_z' ] df_train.loc[:, scale_columns] = data_management.range_normalization( df_train[scale_columns].to_numpy(), -1, 1) df_test.loc[:, scale_columns] = data_management.range_normalization( df_test[scale_columns].to_numpy(), -1, 1) # Create dataset as time series X_train, y_train = data_management.create_dataset( df_train[[ 'acc_chest_x', 'acc_chest_y', 'acc_chest_z', 'acc_left_ank_x', 'acc_left_ank_y', 'acc_left_ank_z', 'acc_right_arm_x', 'acc_right_arm_y', 'acc_right_arm_z' ]], df_train.label, TIME_STEPS, STEP) X_test, y_test = data_management.create_dataset( df_test[[ 'acc_chest_x', 'acc_chest_y', 'acc_chest_z', 'acc_left_ank_x', 'acc_left_ank_y', 'acc_left_ank_z', 'acc_right_arm_x', 'acc_right_arm_y', 'acc_right_arm_z' ]], df_test.label, TIME_STEPS, STEP) # Encode Target y_train = data_management.encode_target(y_train) y_test = data_management.encode_target(y_test) return X_train, y_train, X_test, y_test
def main(): train_data, train_labels, test_data, test_labels = data.load_all_data( 'data') clf = MLP(train_data, train_labels) predicted_labels = clf.predict(train_data) fpr_rf, tpr_rf, _ = roc_curve(test_data, predicted_labels) print("Best Parameters: ", clf.best_params_) print("Test accuracy: ", classification_accuracy(clf, test_data, test_labels)) print("Train accuracy: ", classification_accuracy(clf, train_data, train_labels))
def main(): train_data, train_labels, test_data, test_labels = data.load_all_data('data') mlp = MLP(train_data, train_labels) solver, activation, learning_rate, layer_size = cross_validation(train_data, train_labels) clf = mlp.mlpclassifier(solver, activation, learning_rate, layer_size) print("Solver: ", solver, " Activation: ", activation, " Learning Rate: ", learning_rate, " Layer Size: ", layer_size) print(clf.predict(test_data)) print("Test accuracy: ", classification_accuracy(clf, test_data, test_labels)) print("Train accuracy: ", classification_accuracy(clf, train_data, train_labels))
def main(): train_data, train_labels, test_data, test_labels = data.load_all_data( 'data') train_data, test_data = binarize_data(train_data), binarize_data(test_data) # Fit the model eta = compute_parameters(train_data, train_labels) # Evaluation plot_images(eta) generate_new_data(eta)
def main(): train_data, train_labels, test_data, test_labels = data.load_all_data('data') knn = KNearestNeighbor(train_data, train_labels) # Example usage: print("K = 1 training set accuracy: %f" %classification_accuracy(knn,1,train_data,train_labels)) print("K = 1 testing set accuracy: %f" %classification_accuracy(knn,1,test_data,test_labels)) print("K = 15 training set accuracy: %f" %classification_accuracy(knn,15,train_data,train_labels)) print("K = 15 testing set accuracy: %f" %classification_accuracy(knn,15,test_data,test_labels)) print("10 fold cross validation to find the opitmalK in the 1-15 range") cross_validation(train_data,train_labels) print("K = 4 training set accuracy: %f" %classification_accuracy(knn,4,train_data,train_labels)) print("For K = 4 testing set accuracy: %f" %classification_accuracy(knn,4,test_data,test_labels))
def main(): train_data, train_labels, test_data, test_labels = data.load_all_data('data') # Fit the model means = compute_mean_mles(train_data, train_labels) covariances = compute_sigma_mles(train_data, train_labels) plot_cov_diagonal(covariances) # Evaluation print("The average training conditional likelihood is {:f}".format(avg_conditional_likelihood(train_data, train_labels, means, covariances))) print("The average test conditional likelihood is {:f}".format(avg_conditional_likelihood(test_data, test_labels, means, covariances))) print("The training accuracy is {:f}".format(calculate_accuracy(train_data, means, covariances, train_labels))) print("The test accuracy is {:f}".format(calculate_accuracy(test_data, means, covariances, test_labels)))
def main(): train_data, train_labels, test_data, test_labels = data.load_all_data('data') knn = KNearestNeighbor(train_data, train_labels) # Example usage: predicted_label = knn.query_knn(test_data[0], 1) # plt.imshow(predicted_label, cmap='gray') # plt.show() # print(predicted_label) print("test data, k=1: ",classification_accuracy(knn, 1, test_data, test_labels)) print("test data, k=15: ",classification_accuracy(knn, 15, test_data, test_labels)) print("train data, k=1: ",classification_accuracy(knn, 1, train_data, train_labels)) print("train data, k=15: ",classification_accuracy(knn, 15, train_data, train_labels)) print("Optimal k is ", cross_validation(train_data, train_labels))
def main(): train_data, train_labels, test_data, test_labels = data.load_all_data( 'data', shuffle=True) knn = KNearestNeighbor(train_data, train_labels) # sub part 1 print("For K = 1 Classification Accuracy: {}".format( classification_accuracy(knn, 1, test_data, test_labels))) print("For K = 15 CLassification Accuracy: {}".format( classification_accuracy(knn, 15, test_data, test_labels))) #sub part 3 print(cross_validation(train_data, train_labels)) '''
def main(): train_data, train_labels, test_data, test_labels = data.load_all_data('data') knn = KNearestNeighbor(train_data, train_labels) print("train classification accuracy when k = 1, {0}".format(classification_accuracy(knn, 1, train_data, train_labels))) print("train classification accuracy when k = 15, {0}".format(classification_accuracy(knn, 15, train_data, train_labels))) print("test classification accuracy when k = 1, {0}".format(classification_accuracy(knn, 1, test_data, test_labels))) print("test classification accuracy when k = 15, {0}".format(classification_accuracy(knn, 15, test_data, test_labels))) k_s = cross_validation(train_data, train_labels) optimal_k = np.argmax(k_s) print("optimal k: {0}".format(optimal_k)) knn = KNearestNeighbor(test_data, test_labels) print("Test accuracy with optimal {0}".format(classification_accuracy(knn, optimal_k, test_data, test_labels)))
def main(): train_data, train_labels, test_data, test_labels = data.load_all_data( 'data') # Fit the model # means = compute_mean_mles(train_data, train_labels) # covariances = compute_sigma_mles(train_data, train_labels) # Evaluation #part_2_2_1(covariances) # 2.2.2 part_2_2_2(train_data, train_labels, test_data, test_labels) # 2.2.3 part_2_2_3(train_data, train_labels, test_data, test_labels)
def main(): train_data, train_labels, test_data, test_labels = data.load_all_data('data') # Fit the model means = compute_mean_mles(train_data, train_labels) covariances = compute_sigma_mles(train_data, train_labels) plot_cov_diagonal(covariances) # Evaluation print("Average Conditional Log-Likelihood") print('Training Set:', avg_conditional_likelihood(train_data, train_labels, means, covariances)) print('Test Set', avg_conditional_likelihood(test_data, test_labels, means, covariances)) print('\n \n Accuracy of Model:') print('Training Set:', model_accuracy(train_data, train_labels, means, covariances)) print('Test Set', model_accuracy(test_data, test_labels, means, covariances))
def main(): train_data, train_labels, test_data, test_labels = data.load_all_data('data') train_data, test_data = binarize_data(train_data), binarize_data(test_data) # Fit the model eta = compute_parameters(train_data, train_labels) # Evaluation plot_images(eta) generate_new_data(eta) print("The training average conditional likelihood is {:f}".format(avg_conditional_likelihood(train_data, train_labels, eta))) print("The test average conditional likelihood is {:f}".format(avg_conditional_likelihood(test_data, test_labels, eta))) print("The training accuracy is {:f}".format(calculate_accuracy(train_data, eta, train_labels))) print("The test accuracy is {:f}".format(calculate_accuracy(test_data, eta, test_labels)))
def main(): train_data, train_labels, test_data, test_labels = data.load_all_data( 'data') knn = KNearestNeighbor(train_data, train_labels) trainLabel_1 = [] for i in range(len(train_data)): trainLabel_1.append(knn.query_knn(train_data[i], 1)) trainLabel_15 = [] for j in range(len(train_data)): trainLabel_15.append(knn.query_knn(train_data[j], 15)) testLabel_1 = [] for i in range(len(test_data)): testLabel_1.append(knn.query_knn(test_data[i], 1)) testLabel_15 = [] for j in range(len(test_data)): testLabel_15.append(knn.query_knn(test_data[j], 15)) trainAccuracy_1 = classification_accuracy(knn, 1, trainLabel_1, train_labels) trainAccuracy_15 = classification_accuracy(knn, 15, trainLabel_15, train_labels) testAccuracy_1 = classification_accuracy(knn, 1, testLabel_1, test_labels) testAccuracy_15 = classification_accuracy(knn, 15, testLabel_15, test_labels) print("Train Accuracy, k = 1: ", trainAccuracy_1) print("Train Accuracy, k = 15: ", trainAccuracy_15) print("Test Accuracy, k = 1: ", testAccuracy_1) print("Test Accuracy, k = 15: ", testAccuracy_15) #do kfold cross validation on the data set and output the k that gives the max accuracy accuracies = cross_validation(knn) #find max average accuracies (index +1 is the k used) averages = [] for x in range(len(accuracies)): averages.append(np.mean(accuracies[x])) print(averages) index = np.argmax(averages) max_k = index + 1 max_accuracies = accuracies[index] print("The max k is: {} with accuracies of {}".format( max_k, max_accuracies))
def main(): train_data, train_labels, test_data, test_labels = data.load_all_data( 'data', shuffle=False) train_data, test_data = binarize_data(train_data), binarize_data(test_data) # Fit the model eta = compute_parameters(train_data.reshape((10, -1, 64))) # Evaluation plot_images(eta) generate_new_data(eta) print('Train_data: ') avg_conditional_likelihood(train_data, train_labels, eta, data.TRAIN_STEM) print('Test_data: ') avg_conditional_likelihood(test_data, test_labels, eta, data.TEST_STEM) print('\nThe accuracy for train data is: ', accuracy(train_labels, train_data, eta)) print('The accuracy for test data is: ', accuracy(test_labels, test_data, eta))
def main(): train_data, train_labels, test_data, test_labels = data.load_all_data( 'data') knn = KNearestNeighbor(train_data, train_labels) # k = 10 # print(knn.query_knn(test_data[0], k)) # print(test_labels[0]) k = 1 accuracy_train = classification_accuracy(knn, k, train_data, train_labels) accuracy_test = classification_accuracy(knn, k, test_data, test_labels) print("accuracy for k=%d\ntraining set: %.3f\ntesting set: %.3f" % (k, accuracy_train, accuracy_test)) k = 15 accuracy_train = classification_accuracy(knn, k, train_data, train_labels) accuracy_test = classification_accuracy(knn, k, test_data, test_labels) print("accuracy for k=%d\ntraining set: %.3f\ntesting set: %.3f" % (k, accuracy_train, accuracy_test)) print("Perform the %d-cross validation for finding optimal k: " % k_cross) accuracy = cross_validation(knn) for i in range(len(accuracy)): print("%dNN: %.3f%%" % (i + 1, accuracy[i] * 100)) m = max(accuracy) # Very rare but still check if there is a tier index_of_max = [i for i, j in enumerate(accuracy) if j == m] print("The best result for kNN is with k = ", end='') for i in range(len(index_of_max)): if i == 0: print(index_of_max[i] + 1, end='') else: print(", %d" % index_of_max[i] + 1, end='') print("") for i in range(len(index_of_max)): accuracy_train = classification_accuracy(knn, index_of_max[i] + 1, train_data, train_labels) accuracy_test = classification_accuracy(knn, index_of_max[i] + 1, test_data, test_labels) print( "Accuracy for optimal kNN k=%d\ntraining set: %.3f\ntesting set: %.3f" % (index_of_max[i] + 1, accuracy_train, accuracy_test))
def main(): train_data, train_labels, test_data, test_labels = data.load_all_data('data') # Fit the model means = compute_mean_mles(train_data, train_labels) covariances = compute_sigma_mles(train_data, train_labels) # PART A run_a(train_data, train_labels, test_data, test_labels, means, covariances) print("") # PART B run_b(train_data, train_labels, test_data, test_labels, means, covariances) print("") # PART C run_c(covariances)
def main(): train_data, train_labels, test_data, test_labels = data.load_all_data('data') knn = KNearestNeighbor(train_data, train_labels) # Example usage: accu_train_1 = classification_accuracy(knn, 1, train_data, train_labels) accu_train_15 = classification_accuracy(knn, 15, train_data,train_labels) accu_test_1 = classification_accuracy(knn, 1, test_data, test_labels) accu_test_15 = classification_accuracy(knn, 15, test_data,test_labels) print(accu_train_1, accu_test_1,accu_train_15,accu_test_15) accus, k = cross_validation(train_data, train_labels) for idx, val in enumerate(accus): print('Average accuracy for k='+str(idx+1)+' is '+str(val)) print('The optimal value of k is '+str(k)) train_accu = classification_accuracy(knn, k, train_data, train_labels) print('The training classification accuracy for k={0} is {1}'.format(k, train_accu)) test_accu = classification_accuracy(knn, k, test_data, test_labels) print('The test classification accuracy for k={0} is {1}'.format(k, test_accu))
def main(): train_data, train_labels, test_data, test_labels = data.load_all_data( 'D:\\digit') #print(int(train_labels[0])) # Fit the model means = compute_mean_mles(train_data, train_labels) #print(means[0]) covariances = compute_sigma_mles(train_data, train_labels, means) #print(covariances[0]) # Evaluation #a=np.array([1,2,3]) #b=np.array([[1,2,3],[4,5,6]]) #print(a-b) #a=np.array([[9,4],[3,6]]) #a=a[a[:,1].argsort()] #print(generative_likelihood(test_data[0],means,covariances)) print(test_accuracy(test_data, test_labels, means, covariances))
def main(): train_data, train_labels, test_data, test_labels = data.load_all_data( 'data') knn = KNearestNeighbor(train_data, train_labels) print("K=1") print("training accuracy: ", classification_accuracy(knn, 1, train_data, train_labels)) print("test accuracy: ", classification_accuracy(knn, 1, test_data, test_labels)) print("K=15") print("training accuracy: ", classification_accuracy(knn, 15, train_data, train_labels)) print("test accuracy: ", classification_accuracy(knn, 15, test_data, test_labels)) print("best K is", cross_validation(train_data, train_labels))
def main(): ### For first run only # train_data, train_labels, test_data, test_labels = data.load_all_data_from_zip('hw5digits.zip', './data') train_data, train_labels, test_data, test_labels = data.load_all_data( 'data', shuffle=True) ### [CHECK: DATA] Display some images and labels to ensure those match up # display_img_w_label(train_data[0:n], train_labels[0:n]) # Fit the model means = compute_mean_mles(train_data, train_labels) ### [CHECK: MEANS] Display means to make sure they are sensible # display_mean_mles(means) covariances = compute_sigma_mles(train_data, train_labels) ### [ CHECK: COVARIANCE, EIGENVECTORS ] Plot some PCA projections to verify plot_pca_grid(train_data, train_labels, covariances, means) # Q 1a) train_avg = avg_conditional_likelihood(train_data, train_labels, means, covariances) print("avg conditional loglik, train: {}".format(train_avg)) test_avg = avg_conditional_likelihood(test_data, test_labels, means, covariances) print("avg conditional loglik, test: {}".format(test_avg)) # Evaluation, Q 1b) train_preds = classify_data(train_data, means, covariances) train_accuracy = np.sum(np.equal(train_preds, train_labels)) / train_data.shape[0] print("train accuracy: {}".format(train_accuracy)) test_preds = classify_data(test_data, means, covariances) test_accuracy = np.sum(np.equal(test_preds, test_labels)) / test_data.shape[0] print("test accuracy: {}".format(test_accuracy)) # Q 1c) plot_eigenvectors(covariances)
def main(): train_data, train_labels, test_data, test_labels = data.load_all_data( 'data') # 700 for each digit, total of 7000 print("Train data shape: ", train_data.shape) print("Train labels shape: ", train_labels.shape) # 400 for each digit, total of 4000 print("Test data shape: ", test_data.shape) print("Test labels shape: ", test_labels.shape) # Values are in {0, 1, ..., 9} # Fit the model means = compute_mean_mles(train_data, train_labels) covariances = compute_sigma_mles(train_data, train_labels) # Evaluation # Average Conditional Log Likelihood trainAverageConditionalLogLikelihood = avg_conditional_likelihood( train_data, train_labels, means, covariances) testAverageConditionalLogLikelihood = avg_conditional_likelihood( test_data, test_labels, means, covariances) print("Train Average Conditional Log Likelihood:", trainAverageConditionalLogLikelihood) print("Test Average Conditional Log Likelihood:", testAverageConditionalLogLikelihood) # Ensure probabilities close to 1.0 print("Train Average Conditional Likelihood:", np.exp(trainAverageConditionalLogLikelihood)) print("Test Average Conditional Likelihood:", np.exp(testAverageConditionalLogLikelihood)) # Accuracy trainPrediction = classify_data(train_data, means, covariances) trainAccuracy = computeAccuracy(train_labels, trainPrediction) testPrediction = classify_data(test_data, means, covariances) testAccuracy = computeAccuracy(test_labels, testPrediction) print("Train Accuracy: ", trainAccuracy) print("Test Accuracy: ", testAccuracy) # Plot 8 by 8 images of all 10 leading eigenvectors # Save it as leading eigenvector plot plotAndSaveLeadingEigenvector(covariances)
def OnBtnNewClick(self, event): self._enable_buttons() data.load_all_data()