def score_pca(k, p = 0.5, seed = 1): Xtrain, ytrain, Xtest, ytest = ps6.split_dataset(X, y, p, seed) mu = ps6.get_mean_face(Xtrain) eig_vecs, eig_vals = ps6.pca(Xtrain, k) Xtrain_proj = np.dot(Xtrain - mu, eig_vecs) # testing mu = ps6.get_mean_face(Xtest) Xtest_proj = np.dot(Xtest - mu, eig_vecs) good = 0 bad = 0 for i, obs in enumerate(Xtest_proj): dist = [np.linalg.norm(obs - x) for x in Xtrain_proj] idx = np.argmin(dist) y_pred = ytrain[idx] if y_pred == ytest[i]: good += 1 else: bad += 1 return good, bad
def part_1c(): p = 0.5 # Select a split percentage value k = 5 # Select a value for k size = [32, 32] X, y = ps6.load_images(YALE_FACES_DIR, size) Xtrain, ytrain, Xtest, ytest = ps6.split_dataset(X, y, p) # training mu = ps6.get_mean_face(Xtrain) eig_vecs, eig_vals = ps6.pca(Xtrain, k) Xtrain_proj = np.dot(Xtrain - mu, eig_vecs) # testing mu = ps6.get_mean_face(Xtest) Xtest_proj = np.dot(Xtest - mu, eig_vecs) good = 0 bad = 0 for i, obs in enumerate(Xtest_proj): dist = [np.linalg.norm(obs - x) for x in Xtrain_proj] idx = np.argmin(dist) y_pred = ytrain[idx] if y_pred == ytest[i]: good += 1 else: bad += 1 print 'Good predictions = ', good, 'Bad predictions = ', bad print '{0:.2f}% accuracy'.format(100 * float(good) / (good + bad))
def part_1c(): p = 0.5 # Select a split percentage value k = 5 # Select a value for k # testing values of k or comment this back in to see result set in a loop. # p_range = np.arange(0.1, 1.0, 0.1) # for j in p_range: size = [32, 32] X, y = ps6.load_images(YALE_FACES_DIR, size) Xtrain, ytrain, Xtest, ytest = ps6.split_dataset(X, y, p) # training mu = ps6.get_mean_face(Xtrain) eig_vecs, eig_vals = ps6.pca(Xtrain, k) Xtrain_proj = np.dot(Xtrain - mu, eig_vecs) # testing mu = ps6.get_mean_face(Xtest) Xtest_proj = np.dot(Xtest - mu, eig_vecs) good = 0 bad = 0 for i, obs in enumerate(Xtest_proj): dist = [np.linalg.norm(obs - x) for x in Xtrain_proj] idx = np.argmin(dist) y_pred = ytrain[idx] if y_pred == ytest[i]: good += 1 else: bad += 1 # Enable result comparsion to a random value selector. random_guess = np.random.randint(low=1, high=16, size=len(ytest)) # random accuracy check. rand_good = 0 rand_bad = 0 for i in range(len(random_guess)): if random_guess[i] == ytest[i]: rand_good += 1 else: rand_bad += 1 print 'Results where P is {}'.format(p) print '-------------------------------' print 'Random Selection Results' print 'Good predictions = ', rand_good, 'Bad predictions = ', rand_bad print '(Random) Testing accuracy: {0:.2f}%'.format(100 * float(rand_good) / (rand_good + rand_bad)) print 'Normal Dist Results' print 'Good predictions = ', good, 'Bad predictions = ', bad print '{0:.2f}% accuracy'.format(100 * float(good) / (good + bad)) print '--------------------------------' print ''
def part_1c(): p = 0.8 # Select a split percentage value k = 5 # Select a value for k size = [32, 32] X, y = ps6.load_images(YALE_FACES_DIR, size) Xtrain, ytrain, Xtest, ytest = ps6.split_dataset(X, y, p) # training mu = ps6.get_mean_face(Xtrain) eig_vecs, eig_vals = ps6.pca(Xtrain, k) Xtrain_proj = np.dot(Xtrain - mu, eig_vecs) # testing mu = ps6.get_mean_face(Xtest) Xtest_proj = np.dot(Xtest - mu, eig_vecs) good = 0 bad = 0 for i, obs in enumerate(Xtest_proj): dist = [np.linalg.norm(obs - x) for x in Xtrain_proj] idx = np.argmin(dist) y_pred = ytrain[idx] if y_pred == ytest[i]: good += 1 else: bad += 1 print('Good predictions = ', good, 'Bad predictions = ', bad) print('{0:.2f}% accuracy'.format(100 * float(good) / (good + bad))) rand_y = np.random.choice([1, 16], (len(ytrain))) temp_y = np.zeros_like(rand_y) temp_y[rand_y == ytrain] = 1 rand_accuracy = 100 * float(np.sum(temp_y)) / (len(ytrain)) #None # raise NotImplementedError print('(Random) Training accuracy: {0:.2f}%'.format(rand_accuracy))
def part_1a_1b(): orig_size = (192, 231) small_size = (32, 32) X, y = ps6.load_images(YALE_FACES_DIR, small_size) # Get the mean face x_mean = ps6.get_mean_face(X) x_mean_image = visualize_mean_face(x_mean, small_size, orig_size) cv2.imwrite(os.path.join(OUTPUT_DIR, "ps6-1-a-1.png"), x_mean_image) # PCA dimension reduction k = 10 eig_vecs, eig_vals = ps6.pca(X, k) plot_eigen_faces(eig_vecs.T, "ps6-1-b-1.png")
def test_mean_face(self): for i in range(1, 4): file_name = "x_data_mean_{}.npy".format(i) file_path = os.path.join(INPUT_DIR, file_name) x_data = np.load(file_path) file_name = "correct_mean_{}.npy".format(i) file_path = os.path.join(INPUT_DIR, file_name) x_mean = np.load(file_path) result = ps6.get_mean_face(x_data) correct = np.allclose(result, x_mean, atol=1) message = "Values do not match the reference solution. " \ "This function should only compute the mean of each " \ "column." self.assertTrue(correct, message)
def part_1c(): runk = 1 runp = 1 if runk: p = 0.5 # Select a split percentage value ks = [] accuracy = [] for k in range(1, 30): size = (32, 32) X, y = ps6.load_images(YALE_FACES_DIR, size) Xtrain, ytrain, Xtest, ytest = ps6.split_dataset(X, y, p) # training mu = ps6.get_mean_face(Xtrain) eig_vecs, eig_vals = ps6.pca(Xtrain, k) Xtrain_proj = np.dot(Xtrain - mu, eig_vecs) # testing mu = ps6.get_mean_face(Xtest) Xtest_proj = np.dot(Xtest - mu, eig_vecs) good = 0 bad = 0 for i, obs in enumerate(Xtest_proj): dist = [np.linalg.norm(obs - x) for x in Xtrain_proj] idx = np.argmin(dist) y_pred = ytrain[idx] if y_pred == ytest[i]: good += 1 else: bad += 1 print 'Good predictions = ', good, 'Bad predictions = ', bad ks.append(k) accuracy.append(100 * float(good) / (good + bad)) print '{0:.2f}% accuracy'.format(100 * float(good) / (good + bad)) plt.figure() plt.ylabel('Accuracy') plt.xlabel('# of PCs') plt.title('Accuracy vs Number of PCs ') plt.plot(ks, accuracy) plt.grid() plt.draw() plt.savefig('./pca_plot.png') #plt.show() if runp: k = 10 accuracy = [] ps = [] for p in np.arange(0.2, 1.0, 0.1): size = (32, 32) X, y = ps6.load_images(YALE_FACES_DIR, size) Xtrain, ytrain, Xtest, ytest = ps6.split_dataset(X, y, p) # training mu = ps6.get_mean_face(Xtrain) eig_vecs, eig_vals = ps6.pca(Xtrain, k) Xtrain_proj = np.dot(Xtrain - mu, eig_vecs) # testing mu = ps6.get_mean_face(Xtest) Xtest_proj = np.dot(Xtest - mu, eig_vecs) good = 0 bad = 0 for i, obs in enumerate(Xtest_proj): dist = [np.linalg.norm(obs - x) for x in Xtrain_proj] idx = np.argmin(dist) y_pred = ytrain[idx] if y_pred == ytest[i]: good += 1 else: bad += 1 print 'Good predictions = ', good, 'Bad predictions = ', bad ps.append(p) accuracy.append(100 * float(good) / (good + bad)) print '{0:.2f}% accuracy'.format(100 * float(good) / (good + bad)) plt.figure() plt.ylabel('Accuracy') plt.xlabel('Percentage of data split') plt.title('Accuracy vs data split percentage ') plt.plot(ps, accuracy) plt.grid() plt.draw() plt.savefig('./split_P_plot.png')