def crossValidate(X_fold, y_fold, k, idx): #print "Use idx ", idx , " for crossvalidation" #X_train = np.array(len(X_fold)-1) #X_cross = np.array(l) #y_train = np.array(len(y_fold)-1) #y_cross = np.array(len(y_fold)) for i in xrange(0, len(X_fold)): if i == idx: X_cross = X_fold[i] y_cross = y_fold[i] else: X_train = np.vstack(X_fold[0:i] + X_fold[i + 1:]) y_train = np.hstack(y_fold[0:i] + y_fold[i + 1:]) # print "dim train ", X_train.shape # print "dim cross ", X_cross.shape # print "dim y train ", y_train.shape # print "dim y cross ", y_cross.shape classifier = KNearestNeighbor() classifier.train(X_train, y_train) dists = classifier.compute_distances_no_loops(X_cross) y_cross_pred = classifier.predict_labels(dists, k) num_correct = np.sum(y_cross_pred == y_cross) print "cross val has ", y_cross.shape accuracy = float(num_correct) / len(y_cross) return accuracy
def cal_standard_knn(): # Create a kNN classifier instance. # Remember that training a kNN classifier is a noop: # the Classifier simply remembers the data and does no further processing classifier = KNearestNeighbor() classifier.train(X_train, y_train) print('KNN Classifier Train Done\n') #------------------------------------------------------------ # Open cs231n/classifiers/k_nearest_neighbor.py and implement # compute_distances_two_loops. # Test your implementation: print('Ready to test with 2 loops') #dists = classifier.compute_distances_two_loops(X_test) #print(dists.shape) print('Ready to test with 1 loop') #dists = classifier.compute_distances_one_loop(X_test) #print(dists.shape) print('Ready to test with 0 loop\n') dists = classifier.compute_distances_no_loops(X_test) print(dists.shape) #------------------------------------------------------------ print('Ready to predict') y_pred = classifier.predict_labels(dists, 3) print('Accurarcy = %s' % np.mean(y_pred == y_test))
def test_cross_validation(X_train, y_train): print('Ready to test with cross_validation') num_folds = 5 k_choices = [1, 3, 5, 8, 10] X_train_folds = [] y_train_folds = [] print('Train data shape = ', X_train.shape) y_train = y_train.reshape(-1, 1) print('Train label shape = ', y_train.shape) X_train_folds = np.array_split(X_train, num_folds) y_train_folds = np.array_split(y_train, num_folds) k_to_accuracies = {} for each_k in k_choices: k_to_accuracies.setdefault(each_k, []) for i in range(num_folds): classfer = KNearestNeighbor() X_train_slice = np.vstack(X_train_folds[0:i] + X_train_folds[i + 1:num_folds]) y_train_slice = np.vstack(y_train_folds[0:i] + y_train_folds[i + 1:num_folds]) y_train_slice = y_train_slice.reshape(-1) #print('debug') #print(y_train_slice.shape) X_test_slice = X_train_folds[i] y_test_slice = y_train_folds[i] y_test_slice = y_test_slice.reshape(-1) #print(X_train_slice.shape) classfer.train(X_train_slice, y_train_slice) dis = classfer.compute_distances_no_loops(X_test_slice) y_predict = classfer.predict_labels(dis, each_k) acc = np.mean(y_predict == y_test_slice) k_to_accuracies[each_k].append(acc) #break #break for each_k in k_choices: for item in k_to_accuracies[each_k]: print('k = %d, acc = %f' % (each_k, item))
def cross_validate(X_train, y_train): num_folds = 5 k_choices = [1, 3, 5, 8, 10, 12, 15, 20, 50, 100] X_train_folds = [] y_train_folds = [] N = len(X_train) train_folds = np.array_split(range(N), num_folds, axis=0) k_to_accuracies = {} for k1 in k_choices: fold_eval = [] for i in range(num_folds): mask = np.ones(N, dtype=bool) mask[train_folds[i]] = False X_train_cur = X_train[mask] y_train_cur = y_train[mask] classifier = KNearestNeighbor() classifier.train(X_train_cur, y_train_cur) X_test_cur = X_train[train_folds[i]] y_test_cur = y_train[train_folds[i]] dists = classifier.compute_distances_no_loops(X_test_cur) y_test_pred = classifier.predict_labels(dists, k=k1) num_correct = np.sum(y_test_pred == y_test_cur) accuracy = float(num_correct) / len(y_test_cur) fold_eval.append(accuracy) #pass k_to_accuracies[k1] = fold_eval[:] #k_to_accuracies[k1] = [1,2,3,4,5] for k in sorted(k_to_accuracies): for accuracy in k_to_accuracies[k]: print 'k = %d, accuracy = %f' % (k, accuracy) for k in k_choices: accuracies = k_to_accuracies[k] plt.scatter([k] * len(accuracies), accuracies) accuracies_mean = np.array( [np.mean(v) for k, v in sorted(k_to_accuracies.items())]) accuracies_std = np.array( [np.std(v) for k, v in sorted(k_to_accuracies.items())]) plt.errorbar(k_choices, accuracies_mean, yerr=accuracies_std) plt.title('Cross-validation on k') plt.xlabel('k') plt.ylabel('Cross-validation accuracy') plt.savefig('./figures/validation_k')
def cross_validate(X_train, y_train): num_folds = 5 k_choices = [1,3,5,8,10,12,15,20,50,100] X_train_folds = [] y_train_folds = [] N = len(X_train) train_folds = np.array_split(range(N),num_folds,axis=0) k_to_accuracies = {} for k1 in k_choices: fold_eval = [] for i in range(num_folds): mask = np.ones(N,dtype=bool) mask[train_folds[i]] = False X_train_cur = X_train[mask] y_train_cur = y_train[mask] classifier = KNearestNeighbor() classifier.train(X_train_cur, y_train_cur) X_test_cur = X_train[train_folds[i]] y_test_cur = y_train[train_folds[i]] dists = classifier.compute_distances_no_loops(X_test_cur) y_test_pred = classifier.predict_labels(dists,k=k1) num_correct = np.sum(y_test_pred == y_test_cur) accuracy = float(num_correct)/len(y_test_cur) fold_eval.append(accuracy) #pass k_to_accuracies[k1] = fold_eval[:] #k_to_accuracies[k1] = [1,2,3,4,5] for k in sorted(k_to_accuracies): for accuracy in k_to_accuracies[k]: print 'k = %d, accuracy = %f' % (k, accuracy) for k in k_choices: accuracies = k_to_accuracies[k] plt.scatter([k]*len(accuracies), accuracies) accuracies_mean = np.array([np.mean(v) for k,v in sorted(k_to_accuracies.items())]) accuracies_std = np.array([np.std(v) for k,v in sorted(k_to_accuracies.items())]) plt.errorbar(k_choices, accuracies_mean, yerr=accuracies_std) plt.title('Cross-validation on k') plt.xlabel('k') plt.ylabel('Cross-validation accuracy') plt.savefig('./figures/validation_k')
def cross_validate(X_train, y_train, num_folds=5): k_choices = [1, 3, 5, 8, 10, 12, 15, 20, 50, 100] X_train_folds = np.array_split(X_train, num_folds) y_train_folds = np.array_split(y_train, num_folds) # A dictionary holding the accuracies for different values of k that we find # when running cross-validation. After running cross-validation, # k_to_accuracies[k] should be a list of length num_folds giving the different # accuracy values that we found when using that value of k. k_to_accuracies = {k: [] for k in k_choices} for i in range(num_folds): X_train_cv = np.vstack(X_train_folds[:i] + X_train_folds[i + 1:]) y_train_cv = np.hstack(y_train_folds[:i] + y_train_folds[i + 1:]) X_val = X_train_folds[i] y_val = y_train_folds[i] classifier = KNearestNeighbor() classifier.train(X_train_cv, y_train_cv) dists_cv = classifier.compute_distances_no_loops(X_val) for k in k_choices: y_val_pred = classifier.predict_labels(dists_cv, k=k) num_correct = np.sum(y_val_pred == y_val) accuracy = float(num_correct) / len(y_val) k_to_accuracies[k].append(accuracy) # Print out the computed accuracies for k in sorted(k_to_accuracies): for accuracy in k_to_accuracies[k]: print 'k = %d, accuracy = %f' % (k, accuracy) plot_cross_validation(k_choices, k_to_accuracies) sort_by_accuracy = sorted(k_to_accuracies, key=lambda k: np.mean(k_to_accuracies[k])) return sort_by_accuracy[-1]
# two matrices are similar; one of the simplest is the Frobenius norm. In case # you haven't seen it before, the Frobenius norm of two matrices is the square # root of the squared sum of differences of all elements; in other words, reshape # the matrices into vectors and compute the Euclidean distance between them. difference = np.linalg.norm(dists - dists_one, ord='fro') print('Difference was: %f' % (difference, )) if difference < 0.001: print('Good! The distance matrices are the same') else: print('Uh-oh! The distance matrices are different') # Now implement the fully vectorized version inside compute_distances_no_loops # and run the code dists_two = classifier.compute_distances_no_loops(X_test) # check that the distance matrix agrees with the one we computed before: difference = np.linalg.norm(dists - dists_two, ord='fro') print('Difference was: %f' % (difference, )) if difference < 0.001: print('Good! The distance matrices are the same') else: print('Uh-oh! The distance matrices are different') # Let's compare how fast the implementations are def time_function(f, *args): """ Call a function f with args and return the time (in seconds) that it took to execute. """ import time
# two matrices are similar; one of the simplest is the Frobenius norm. In case # you haven't seen it before, the Frobenius norm of two matrices is the square # root of the squared sum of differences of all elements; in other words, reshape # the matrices into vectors and compute the Euclidean distance between them. difference = np.linalg.norm(dists - dists_one, ord='fro') print('Difference was: %f' % (difference, )) if difference < 0.001: print('Good! The distance matrices are the same') else: print('Uh-oh! The distance matrices are different') # In[ ]: # Now implement the fully vectorized version inside compute_distances_no_loops # and run the code dists_two = classifier.compute_distances_no_loops(X_test) # check that the distance matrix agrees with the one we computed before: difference = np.linalg.norm(dists - dists_two, ord='fro') print('Difference was: %f' % (difference, )) if difference < 0.001: print('Good! The distance matrices are the same') else: print('Uh-oh! The distance matrices are different') # In[ ]: # Let's compare how fast the implementations are def time_function(f, *args): """
for j in range( num_folds ): #Loop through all the folds of the training data. CV-fold is j-th. Other folds for training X_test_cv = X_train_folds[j] y_test_cv = y_train_folds[j] #print 'Test CV: ', X_test_cv.shape, y_test_cv.shape X_train_cv = np.vstack( X_train_folds[0:j] + X_train_folds[j + 1:] ) #Leaving out the j-th array. X/y_train_folds are LISTs y_train_cv = np.hstack(y_train_folds[0:j] + y_train_folds[j + 1:]) #print 'Train CV: ', X_train_cv.shape, y_train_cv.shape classifier.train(X_train_cv, y_train_cv) dists_cv = classifier.compute_distances_no_loops(X_test_cv) #print 'Dists CV: ', dists_cv.shape y_test_pred = classifier.predict_labels(dists_cv, k) num_correct_cv = np.sum(y_test_pred == y_test_cv) accuracy_cv = float(num_correct_cv) / y_test_cv.shape[0] print y_test_cv.shape[0] print 'Accuracy at %d-nearest neighbors, cv-fold is %d-th fold, is %.2f' % ( k, j + 1, accuracy_cv * 100) k_to_accuracies[k].append(accuracy_cv) ################################################################################ # END OF YOUR CODE # ################################################################################ # Print out the computed accuracies
X_train_folds = [] y_train_folds = [] X_train_folds = np.split(X_train, num_folds) y_train_folds = np.split(y_train, num_folds) k_to_accuracies = {} for k_choice in k_choices: for i in range(num_folds): knn = KNearestNeighbor() xtrain = X_train_folds[:i] + X_train_folds[i + 1:] xtrain = np.asarray([item for sublist in xtrain for item in sublist]) ytrain = y_train_folds[:i] + y_train_folds[i + 1:] ytrain = np.asarray([item for sublist in ytrain for item in sublist]) knn.train(xtrain, ytrain) dists = knn.compute_distances_no_loops(np.asarray(X_train_folds[i])) y_test_pred = knn.predict_labels(dists, k=k_choice) num_correct = np.sum(y_test_pred == y_train_folds[i]) accuracy = float(num_correct) / len(y_train_folds[i]) k_to_accuracies.setdefault(k_choice, []).append(accuracy) print('k = %d, accuracy = %f' % (k_choice, accuracy)) for k in k_choices: accuracies = k_to_accuracies[k] plt.scatter([k] * len(accuracies), accuracies) # plot the trend line with error bars that correspond to standard deviation accuracies_mean = np.array( [np.mean(v) for k, v in sorted(k_to_accuracies.items())]) accuracies_std = np.array( [np.std(v) for k, v in sorted(k_to_accuracies.items())])
def test1(): cifar10_dir = 'cs231n/datasets/cifar-10-batches-py' X_train, y_train, X_test, y_test = load_CIFAR10(cifar10_dir) print 'Training data shape:', X_train.shape print 'Training label shape:', y_train.shape print 'Test data shape:', X_test.shape print 'Test label shape:', y_test.shape # classes = ['plane','car','bird','cat','deer','dog','frog','horse','ship','truck'] # num_classes = len(classes) # sample_per_class = 7 # for y,cls in enumerate(classes): # idxs = np.flatnonzero(y_train == y) # idxs = np.random.choice(idxs, sample_per_class, replace=False) # for i, idx in enumerate(idxs): # plt_idx = i*num_classes + y + 1 # plt.subplot(sample_per_class, num_classes, plt_idx) # plt.imshow(X_train[idx].astype('uint8')) # plt.axis('off') # if i == 0: # plt.title(cls) # plt.savefig("./figures/cifar_sample.png") # plt.show() # plt.close() num_training = 5000 mask = range(num_training) X_train = X_train[mask] y_train = y_train[mask] num_test = 500 mask = range(num_test) X_test = X_test[mask] y_test = y_test[mask] X_train = np.reshape(X_train, (X_train.shape[0],-1)) X_test = np.reshape(X_test,(X_test.shape[0],-1)) print X_train.shape, X_test.shape from cs231n.classifiers import KNearestNeighbor classifier = KNearestNeighbor() classifier.train(X_train, y_train) # two_loop_time = time_function(classifier.compute_distances_two_loops,X_test) # print "two loop time %f" % two_loop_time # one_loop_time = time_function(classifier.compute_distances_one_loop,X_test) # print "one loop time %f " %one_loop_time # no_loop_time = time_function(classifier.compute_distances_no_loops,X_test) # print "no loop time %f "% no_loop_time dists = classifier.compute_distances_no_loops(X_test) # dist_one_loop = classifier.compute_distances_one_loop(X_test) # dist_two_loops = classifier.compute_distances_two_loops(X_test) #matrix_compare(dists,dist_one_loop) #matrix_compare(dists,dist_two_loops) y_test_pred = classifier.predict_labels(dists,k=5) num_correct = np.sum(y_test_pred == y_test) accuracy = float(num_correct)/num_test print "God %d/%d correct => accuracy: %f" %(num_correct, num_test, accuracy) cross_validate(X_train,y_train)
# values of k in the k_to_accuracies dictionary. # ################################################################################ for k in k_choices: accuracies = [] for i in range(num_folds): X_train_this = list(X_train_folds) del X_train_this[i] y_train_this = list(y_train_folds) del y_train_this[i] x = np.row_stack(X_train_this) y = np.concatenate(y_train_this) print('after row stack') print x.shape print y.shape classifier.train(x, y) dists = classifier.compute_distances_no_loops(X_train_folds[i]) y_test_pred = classifier.predict_labels(dists, k) num_correct = np.sum(y_test_pred == y_train_folds[i]) accuracy = float(num_correct) / num_test accuracies.append(accuracy) k_to_accuracies[k] = accuracies ################################################################################ # END OF YOUR CODE # ################################################################################ # Print out the computed accuracies for k in sorted(k_to_accuracies): for accuracy in k_to_accuracies[k]: print 'k = %d, accuracy = %f' % (k, accuracy) # plot the raw observations
# two matrices are similar; one of the simplest is the Frobenius norm. In case # you haven't seen it before, the Frobenius norm of two matrices is the square # root of the squared sum of differences of all elements; in other words, reshape # the matrices into vectors and compute the Euclidean distance between them. difference = np.linalg.norm(dists - dists_one, ord='fro') print 'Difference was: %f' % (difference, ) if difference < 0.001: print 'Good! The distance matrices are the same' else: print 'Uh-oh! The distance matrices are different' # In[ ]: # Now implement the fully vectorized version inside compute_distances_no_loops # and run the code dists_two = classifier.compute_distances_no_loops(X_test) # check that the distance matrix agrees with the one we computed before: difference = np.linalg.norm(dists - dists_two, ord='fro') print 'Difference was: %f' % (difference, ) if difference < 0.001: print 'Good! The distance matrices are the same' else: print 'Uh-oh! The distance matrices are different' # In[ ]: # Let's compare how fast the implementations are def time_function(f, *args): """
# you haven't seen it before, the Frobenius norm of two matrices is the square # root of the squared sum of differences of all elements; in other words, reshape # the matrices into vectors and compute the Euclidean distance between them. difference = np.linalg.norm(dists - dists_one, ord='fro') print 'Difference was: %f' % (difference, ) if difference < 0.001: print 'Good! The distance matrices are the same' else: print 'Uh-oh! The distance matrices are different' # In[ ]: # Now implement the fully vectorized version inside compute_distances_no_loops # and run the code dists_two = classifier.compute_distances_no_loops(X_test) # check that the distance matrix agrees with the one we computed before: difference = np.linalg.norm(dists - dists_two, ord='fro') print 'Difference was: %f' % (difference, ) if difference < 0.001: print 'Good! The distance matrices are the same' else: print 'Uh-oh! The distance matrices are different' # In[ ]: # Let's compare how fast the implementations are def time_function(f, *args): """
# To ensure that our vectorized implementation is correct, we make sure that it # agrees with the naive implementation. There are many ways to decide whether # two matrices are similar; one of the simplest is the Frobenius norm. In case # you haven't seen it before, the Frobenius norm of two matrices is the square # root of the squared sum of differences of all elements; in other words, reshape # the matrices into vectors and compute the Euclidean distance between them. difference = np.linalg.norm(dists - dists_one, ord='fro') print 'Difference was: %f' % (difference, ) if difference < 0.001: print 'Good! The distance matrices are the same' else: print 'Uh-oh! The distance matrices are different' # Now implement the fully vectorized version inside compute_distances_no_loops # and run the code dists_two = classifier.compute_distances_no_loops(X_test) # check that the distance matrix agrees with the one we computed before: difference = np.linalg.norm(dists - dists_two, ord='fro') print 'Difference was: %f' % (difference, ) if difference < 0.001: print 'Good! The distance matrices are the same' else: print 'Uh-oh! The distance matrices are different' # Let's compare how fast the implementations are def time_function(f, *args): """ Call a function f with args and return the time (in seconds) that it took to execute. """
import numpy as np import h5py from numpy import loadtxt from cs231n.classifiers import KNearestNeighbor h5f = h5py.File('img_data.h5','r') X = h5f['dataset_1'][:] h5f.close() y = loadtxt("y_labels.txt", dtype=np.uint8, delimiter="\n", unpack=False) X_train = X[8000:35117,:] y_train = y[8000:35117] X_val=X[3000:8000,:] y_val=y[3000:8000] num_val = 5000 # Create a kNN classifier instance. # Remember that training a kNN classifier is a noop: # the Classifier simply remembers the data and does no further processing classifier = KNearestNeighbor() classifier.train(X_train, y_train) dists = classifier.compute_distances_no_loops(X_val) y_val_pred = classifier.predict_labels(dists, k=5) num_correct = np.sum(y_val_pred == y_val) accuracy = float(num_correct) / num_val print accuracy
# where in each case you use all but one of the folds as training data and the # # last fold as a validation set. Store the accuracies for all fold and all # # values of k in the k_to_accuracies dictionary. # ################################################################################ for k in k_choices: k_to_accuracies[k] = np.zeros(num_folds) for i in range(num_folds): x_t = np.array(X_train_folds[:i] + X_train_folds[i + 1:]) # 剩下为训练集 y_t = np.array(y_train_folds[:i] + y_train_folds[i + 1:]) x_t = x_t.reshape(X_train_folds[i].shape[0] * 4, -1) y_t = y_t.reshape(y_train_folds[i].shape[0] * 4, -1) x_te = np.array(X_train_folds[i]) # 测试集 y_te = np.array(y_train_folds[i]) classifier.train(x_t, y_t) dists_ = classifier.compute_distances_no_loops(x_te) y_pred = classifier.predict_labels(dists_, k) # Compute and print the fraction of correctly predicted examples num_correct = np.sum(y_pred == y_te) accuracy = float(num_correct) / num_test k_to_accuracies[k][i] = accuracy pass ################################################################################ # END OF YOUR CODE # ################################################################################ # Print out the computed accuracies for k in sorted(k_to_accuracies): for accuracy in k_to_accuracies[k]: print('k = %d, accuracy = %f' % (k, accuracy))
def test1(): cifar10_dir = 'cs231n/datasets/cifar-10-batches-py' X_train, y_train, X_test, y_test = load_CIFAR10(cifar10_dir) print 'Training data shape:', X_train.shape print 'Training label shape:', y_train.shape print 'Test data shape:', X_test.shape print 'Test label shape:', y_test.shape # classes = ['plane','car','bird','cat','deer','dog','frog','horse','ship','truck'] # num_classes = len(classes) # sample_per_class = 7 # for y,cls in enumerate(classes): # idxs = np.flatnonzero(y_train == y) # idxs = np.random.choice(idxs, sample_per_class, replace=False) # for i, idx in enumerate(idxs): # plt_idx = i*num_classes + y + 1 # plt.subplot(sample_per_class, num_classes, plt_idx) # plt.imshow(X_train[idx].astype('uint8')) # plt.axis('off') # if i == 0: # plt.title(cls) # plt.savefig("./figures/cifar_sample.png") # plt.show() # plt.close() num_training = 5000 mask = range(num_training) X_train = X_train[mask] y_train = y_train[mask] num_test = 500 mask = range(num_test) X_test = X_test[mask] y_test = y_test[mask] X_train = np.reshape(X_train, (X_train.shape[0], -1)) X_test = np.reshape(X_test, (X_test.shape[0], -1)) print X_train.shape, X_test.shape from cs231n.classifiers import KNearestNeighbor classifier = KNearestNeighbor() classifier.train(X_train, y_train) # two_loop_time = time_function(classifier.compute_distances_two_loops,X_test) # print "two loop time %f" % two_loop_time # one_loop_time = time_function(classifier.compute_distances_one_loop,X_test) # print "one loop time %f " %one_loop_time # no_loop_time = time_function(classifier.compute_distances_no_loops,X_test) # print "no loop time %f "% no_loop_time dists = classifier.compute_distances_no_loops(X_test) # dist_one_loop = classifier.compute_distances_one_loop(X_test) # dist_two_loops = classifier.compute_distances_two_loops(X_test) #matrix_compare(dists,dist_one_loop) #matrix_compare(dists,dist_two_loops) y_test_pred = classifier.predict_labels(dists, k=5) num_correct = np.sum(y_test_pred == y_test) accuracy = float(num_correct) / num_test print "God %d/%d correct => accuracy: %f" % (num_correct, num_test, accuracy) cross_validate(X_train, y_train)
# To ensure that our vectorized implementation is correct, we make sure that it # agrees with the naive implementation. There are many ways to decide whether # two matrices are similar; one of the simplest is the Frobenius norm. In case # you haven't seen it before, the Frobenius norm of two matrices is the square # root of the squared sum of differences of all elements; in other words, reshape # the matrices into vectors and compute the Euclidean distance between them. difference = np.linalg.norm(dists - dists_one, ord='fro') print 'Difference was: %f' % (difference, ) if difference < 0.001: print 'Good! The distance matrices are the same' else: print 'Uh-oh! The distance matrices are different' # Now implement the fully vectorized version inside compute_distances_no_loops # and run the code dists_two = classifier.compute_distances_no_loops(X_test) # check that the distance matrix agrees with the one we computed before: difference = np.linalg.norm(dists - dists_two, ord='fro') print 'Difference was: %f' % (difference, ) if difference < 0.001: print 'Good! The distance matrices are the same' else: print 'Uh-oh! The distance matrices are different' # Let's compare how fast the implementations are def time_function(f, *args): """ Call a function f with args and return the time (in seconds) that it took to execute. """ import time
################################################################################ # TODO: # # Perform k-fold cross validation to find the best value of k. For each # # possible value of k, run the k-nearest-neighbor algorithm num_folds times, # # where in each case you use all but one of the folds as training data and the # # last fold as a validation set. Store the accuracies for all fold and all # # values of k in the k_to_accuracies dictionary. # ################################################################################ # Your code for k in k_choices: accuracies = [] for i in range(num_folds): X_val = X_train_folds.pop(0) y_val = y_train_folds.pop(0) classifier.train(np.vstack((X_train_folds[:])), np.hstack((y_train_folds[:]))) dists = classifier.compute_distances_no_loops(X_val) y_val_pred = classifier.predict_labels(dists, k=k) num_correct = np.sum(y_val_pred == y_val) accuracies.append(float(num_correct) / y_val.shape[0]) X_train_folds.append(X_val) y_train_folds.append(y_val) k_to_accuracies[k] = accuracies ################################################################################ # END OF YOUR CODE # ################################################################################
num_correct = np.sum(y_test_pred == y_test) accuracy = float(num_correct) / num_test print('Got %d / %d correct => accuracy: %f' % (num_correct, num_test, accuracy)) y_test_pred = classifier.predict_labels(dists, k=5) num_correct = np.sum(y_test_pred == y_test) accuracy = float(num_correct) / num_test print('Got %d / %d correct => accuracy: %f' % (num_correct, num_test, accuracy)) dists_one = classifier.compute_distances_one_loop(X_test) difference = np.linalg.norm(dists - dists_one, ord='fro') print('Difference was: %f' % (difference, )) dists_two = classifier.compute_distances_no_loops(X_test) difference = np.linalg.norm(dists_one - dists_two, ord='fro') print('Difference was: %f' % (difference, )) ######################################################## def time_function(f, *args): """ Call a function f with args and return the time (in seconds) that it took to execute. """ import time tic = time.time() f(*args) toc = time.time() return toc - tic
# agrees with the naive implementation. There are many ways to decide whether # two matrices are similar; one of the simplest is the Frobenius norm. In case # you haven't seen it before, the Frobenius norm of two matrices is the square # root of the squared sum of differences of all elements; in other words, reshape # the matrices into vectors and compute the Euclidean distance between them. difference = np.linalg.norm(dists - dists_one, ord='fro') print 'Difference was: %f' % (difference, ) if difference < 0.001: print 'Good! The distance matrices are the same' else: print 'Uh-oh! The distance matrices are different' # Now implement the fully vectorized version inside compute_distances_no_loops # and run the code dists_two = classifier.compute_distances_no_loops(X_test) # check that the distance matrix agrees with the one we computed before: difference = np.linalg.norm(dists - dists_two, ord='fro') print 'Difference was: %f' % (difference, ) if difference < 0.001: print 'Good! The distance matrices are the same' else: print 'Uh-oh! The distance matrices are different' # Let's compare how fast the implementations are def time_function(f, *args): """ Call a function f with args and return the time (in seconds) that it took to execute. """ import time
# two matrices are similar; one of the simplest is the Frobenius norm. In case # you haven't seen it before, the Frobenius norm of two matrices is the square # root of the squared sum of differences of all elements; in other words, reshape # the matrices into vectors and compute the Euclidean distance between them. difference = np.linalg.norm(dists - dists_one, ord='fro') print 'Difference was: %f' % (difference, ) if difference < 0.001: print 'Good! The distance matrices are the same' else: print 'Uh-oh! The distance matrices are different' # In[24]: # Now implement the fully vectorized version inside compute_distances_no_loops # and run the code dists_two = classifier.compute_distances_no_loops(X_test) # check that the distance matrix agrees with the one we computed before: difference = np.linalg.norm(dists - dists_two, ord='fro') print 'Difference was: %f' % (difference, ) if difference < 0.001: print 'Good! The distance matrices are the same' else: print 'Uh-oh! The distance matrices are different' # In[25]: # Let's compare how fast the implementations are def time_function(f, *args): """
accuracy = float(num_correct) / num_test print('Using k=5, Got %d /%d correct => accuracy: %f' % (num_correct, num_test, accuracy)) #Now lets speed up distance matrix computation by using partial vectorization with # one loop. dists_one = classifier.compute_distances_one_loop(X_test) # compute the differeces between the two methods differeces = np.linalg.norm(dists - dists_one, ord='fro') if differeces < 0.001: print('Good, the two method give the same results.') else: print('The distance is different') # Now we use the method without any loop dists_non = classifier.compute_distances_no_loops(X_test) # compute the differeces between the two methods differeces = np.linalg.norm(dists - dists_non, ord='fro') if differeces < 0.001: print('Good, The differece is %f' % differeces) else: print('The distance is different') #Let's compute how fast the implementations are def time_function(f, *args): ''' Call a function f with args and return the time (in seconds) that it took to execute '''
for fold in range(num_folds): #Cross Validation num_test_crossval = 1000 #Every Single time pick one fold in total folds for test validation X_test_crossval = X_train_folds[fold] y_test_crossval = y_train_folds[fold] #Pick rest of the folds as training data X_train_crossval = np.vstack(X_train_folds[0:fold] + X_train_folds[fold + 1:]) y_train_crossval = np.hstack(y_train_folds[0:fold] + y_train_folds[fold + 1:]) #Training the classifier classifier.train(X_train_crossval, y_train_crossval) #Calculating the L2 distance for test data dists_crossval = classifier.compute_distances_no_loops(X_test_crossval) #Predicting the output with current k value y_test_pred = classifier.predict_labels(dists_crossval, k) #Calculating the accuracy num_correct = np.sum(y_test_pred == y_test_crossval) accuracy = float(num_correct) / num_test_crossval k_to_accuracies[k].append(accuracy) ################################################################################ # END OF YOUR CODE # ################################################################################ # Print out the computed accuracies for k in sorted(k_to_accuracies):