def cal_standard_knn(): # Create a kNN classifier instance. # Remember that training a kNN classifier is a noop: # the Classifier simply remembers the data and does no further processing classifier = KNearestNeighbor() classifier.train(X_train, y_train) print('KNN Classifier Train Done\n') #------------------------------------------------------------ # Open cs231n/classifiers/k_nearest_neighbor.py and implement # compute_distances_two_loops. # Test your implementation: print('Ready to test with 2 loops') #dists = classifier.compute_distances_two_loops(X_test) #print(dists.shape) print('Ready to test with 1 loop') #dists = classifier.compute_distances_one_loop(X_test) #print(dists.shape) print('Ready to test with 0 loop\n') dists = classifier.compute_distances_no_loops(X_test) print(dists.shape) #------------------------------------------------------------ print('Ready to predict') y_pred = classifier.predict_labels(dists, 3) print('Accurarcy = %s' % np.mean(y_pred == y_test))
def crossValidate(X_fold, y_fold, k, idx): #print "Use idx ", idx , " for crossvalidation" #X_train = np.array(len(X_fold)-1) #X_cross = np.array(l) #y_train = np.array(len(y_fold)-1) #y_cross = np.array(len(y_fold)) for i in xrange(0, len(X_fold)): if i == idx: X_cross = X_fold[i] y_cross = y_fold[i] else: X_train = np.vstack(X_fold[0:i] + X_fold[i + 1:]) y_train = np.hstack(y_fold[0:i] + y_fold[i + 1:]) # print "dim train ", X_train.shape # print "dim cross ", X_cross.shape # print "dim y train ", y_train.shape # print "dim y cross ", y_cross.shape classifier = KNearestNeighbor() classifier.train(X_train, y_train) dists = classifier.compute_distances_no_loops(X_cross) y_cross_pred = classifier.predict_labels(dists, k) num_correct = np.sum(y_cross_pred == y_cross) print "cross val has ", y_cross.shape accuracy = float(num_correct) / len(y_cross) return accuracy
def use_classifier(x_train, y_train, x_test, y_test, k): classifier = KNearestNeighbor() classifier.train(x_train, y_train) dists = get_distance(classifier, x_test) y_pred = classifier.predict_labels(dists, k) accuracy = accuracy_score(y_test, y_pred) return accuracy
def test_cross_validation(X_train, y_train): print('Ready to test with cross_validation') num_folds = 5 k_choices = [1, 3, 5, 8, 10] X_train_folds = [] y_train_folds = [] print('Train data shape = ', X_train.shape) y_train = y_train.reshape(-1, 1) print('Train label shape = ', y_train.shape) X_train_folds = np.array_split(X_train, num_folds) y_train_folds = np.array_split(y_train, num_folds) k_to_accuracies = {} for each_k in k_choices: k_to_accuracies.setdefault(each_k, []) for i in range(num_folds): classfer = KNearestNeighbor() X_train_slice = np.vstack(X_train_folds[0:i] + X_train_folds[i + 1:num_folds]) y_train_slice = np.vstack(y_train_folds[0:i] + y_train_folds[i + 1:num_folds]) y_train_slice = y_train_slice.reshape(-1) #print('debug') #print(y_train_slice.shape) X_test_slice = X_train_folds[i] y_test_slice = y_train_folds[i] y_test_slice = y_test_slice.reshape(-1) #print(X_train_slice.shape) classfer.train(X_train_slice, y_train_slice) dis = classfer.compute_distances_no_loops(X_test_slice) y_predict = classfer.predict_labels(dis, each_k) acc = np.mean(y_predict == y_test_slice) k_to_accuracies[each_k].append(acc) #break #break for each_k in k_choices: for item in k_to_accuracies[each_k]: print('k = %d, acc = %f' % (each_k, item))
def main(): X_train, y_train, X_test, y_test = gen_train_test(5000, 500) num_test = y_test.shape[0] classifier = KNearestNeighbor() classifier.train(X_train, y_train) starttime = datetime.datetime.now() dists = classifier.compute_distances_one_loop(X_test) endtime = datetime.datetime.now() print(endtime - starttime).seconds print dists.shape y_test_pred = classifier.predict_labels(dists, k=5) # Compute and print the fraction of correctly predicted examples num_correct = np.sum(y_test_pred == y_test) accuracy = float(num_correct) / num_test print 'Got %d / %d correct => accuracy: %f' % (num_correct, num_test, accuracy)
def cross_validate(X_train, y_train): num_folds = 5 k_choices = [1, 3, 5, 8, 10, 12, 15, 20, 50, 100] X_train_folds = [] y_train_folds = [] N = len(X_train) train_folds = np.array_split(range(N), num_folds, axis=0) k_to_accuracies = {} for k1 in k_choices: fold_eval = [] for i in range(num_folds): mask = np.ones(N, dtype=bool) mask[train_folds[i]] = False X_train_cur = X_train[mask] y_train_cur = y_train[mask] classifier = KNearestNeighbor() classifier.train(X_train_cur, y_train_cur) X_test_cur = X_train[train_folds[i]] y_test_cur = y_train[train_folds[i]] dists = classifier.compute_distances_no_loops(X_test_cur) y_test_pred = classifier.predict_labels(dists, k=k1) num_correct = np.sum(y_test_pred == y_test_cur) accuracy = float(num_correct) / len(y_test_cur) fold_eval.append(accuracy) #pass k_to_accuracies[k1] = fold_eval[:] #k_to_accuracies[k1] = [1,2,3,4,5] for k in sorted(k_to_accuracies): for accuracy in k_to_accuracies[k]: print 'k = %d, accuracy = %f' % (k, accuracy) for k in k_choices: accuracies = k_to_accuracies[k] plt.scatter([k] * len(accuracies), accuracies) accuracies_mean = np.array( [np.mean(v) for k, v in sorted(k_to_accuracies.items())]) accuracies_std = np.array( [np.std(v) for k, v in sorted(k_to_accuracies.items())]) plt.errorbar(k_choices, accuracies_mean, yerr=accuracies_std) plt.title('Cross-validation on k') plt.xlabel('k') plt.ylabel('Cross-validation accuracy') plt.savefig('./figures/validation_k')
def cross_validate(X_train, y_train): num_folds = 5 k_choices = [1,3,5,8,10,12,15,20,50,100] X_train_folds = [] y_train_folds = [] N = len(X_train) train_folds = np.array_split(range(N),num_folds,axis=0) k_to_accuracies = {} for k1 in k_choices: fold_eval = [] for i in range(num_folds): mask = np.ones(N,dtype=bool) mask[train_folds[i]] = False X_train_cur = X_train[mask] y_train_cur = y_train[mask] classifier = KNearestNeighbor() classifier.train(X_train_cur, y_train_cur) X_test_cur = X_train[train_folds[i]] y_test_cur = y_train[train_folds[i]] dists = classifier.compute_distances_no_loops(X_test_cur) y_test_pred = classifier.predict_labels(dists,k=k1) num_correct = np.sum(y_test_pred == y_test_cur) accuracy = float(num_correct)/len(y_test_cur) fold_eval.append(accuracy) #pass k_to_accuracies[k1] = fold_eval[:] #k_to_accuracies[k1] = [1,2,3,4,5] for k in sorted(k_to_accuracies): for accuracy in k_to_accuracies[k]: print 'k = %d, accuracy = %f' % (k, accuracy) for k in k_choices: accuracies = k_to_accuracies[k] plt.scatter([k]*len(accuracies), accuracies) accuracies_mean = np.array([np.mean(v) for k,v in sorted(k_to_accuracies.items())]) accuracies_std = np.array([np.std(v) for k,v in sorted(k_to_accuracies.items())]) plt.errorbar(k_choices, accuracies_mean, yerr=accuracies_std) plt.title('Cross-validation on k') plt.xlabel('k') plt.ylabel('Cross-validation accuracy') plt.savefig('./figures/validation_k')
def cross_validate(X_train, y_train, num_folds=5): k_choices = [1, 3, 5, 8, 10, 12, 15, 20, 50, 100] X_train_folds = np.array_split(X_train, num_folds) y_train_folds = np.array_split(y_train, num_folds) # A dictionary holding the accuracies for different values of k that we find # when running cross-validation. After running cross-validation, # k_to_accuracies[k] should be a list of length num_folds giving the different # accuracy values that we found when using that value of k. k_to_accuracies = {k: [] for k in k_choices} for i in range(num_folds): X_train_cv = np.vstack(X_train_folds[:i] + X_train_folds[i + 1:]) y_train_cv = np.hstack(y_train_folds[:i] + y_train_folds[i + 1:]) X_val = X_train_folds[i] y_val = y_train_folds[i] classifier = KNearestNeighbor() classifier.train(X_train_cv, y_train_cv) dists_cv = classifier.compute_distances_no_loops(X_val) for k in k_choices: y_val_pred = classifier.predict_labels(dists_cv, k=k) num_correct = np.sum(y_val_pred == y_val) accuracy = float(num_correct) / len(y_val) k_to_accuracies[k].append(accuracy) # Print out the computed accuracies for k in sorted(k_to_accuracies): for accuracy in k_to_accuracies[k]: print 'k = %d, accuracy = %f' % (k, accuracy) plot_cross_validation(k_choices, k_to_accuracies) sort_by_accuracy = sorted(k_to_accuracies, key=lambda k: np.mean(k_to_accuracies[k])) return sort_by_accuracy[-1]
# compute_distances_two_loops. # Test your implementation: dists = classifier.compute_distances_two_loops(X_test) print(dists.shape) # We can visualize the distance matrix: each row is a single test example and # its distances to training examples plt.imshow(dists, interpolation='none') plt.show() # Now implement the function predict_labels and run the code below: # We use k = 1 (which is Nearest Neighbor). y_test_pred = classifier.predict_labels(dists, k=1) # Compute and print the fraction of correctly predicted examples num_correct = np.sum(y_test_pred == y_test) accuracy = float(num_correct) / num_test print('Got %d / %d correct => accuracy: %f' % (num_correct, num_test, accuracy)) y_test_pred = classifier.predict_labels(dists, k=5) num_correct = np.sum(y_test_pred == y_test) accuracy = float(num_correct) / num_test print('Got %d / %d correct => accuracy: %f' % (num_correct, num_test, accuracy)) # Output: # Got 137 / 500 correct => accuracy: 0.274000 # Got 139 / 500 correct => accuracy: 0.278000
def test1(): cifar10_dir = 'cs231n/datasets/cifar-10-batches-py' X_train, y_train, X_test, y_test = load_CIFAR10(cifar10_dir) print 'Training data shape:', X_train.shape print 'Training label shape:', y_train.shape print 'Test data shape:', X_test.shape print 'Test label shape:', y_test.shape # classes = ['plane','car','bird','cat','deer','dog','frog','horse','ship','truck'] # num_classes = len(classes) # sample_per_class = 7 # for y,cls in enumerate(classes): # idxs = np.flatnonzero(y_train == y) # idxs = np.random.choice(idxs, sample_per_class, replace=False) # for i, idx in enumerate(idxs): # plt_idx = i*num_classes + y + 1 # plt.subplot(sample_per_class, num_classes, plt_idx) # plt.imshow(X_train[idx].astype('uint8')) # plt.axis('off') # if i == 0: # plt.title(cls) # plt.savefig("./figures/cifar_sample.png") # plt.show() # plt.close() num_training = 5000 mask = range(num_training) X_train = X_train[mask] y_train = y_train[mask] num_test = 500 mask = range(num_test) X_test = X_test[mask] y_test = y_test[mask] X_train = np.reshape(X_train, (X_train.shape[0],-1)) X_test = np.reshape(X_test,(X_test.shape[0],-1)) print X_train.shape, X_test.shape from cs231n.classifiers import KNearestNeighbor classifier = KNearestNeighbor() classifier.train(X_train, y_train) # two_loop_time = time_function(classifier.compute_distances_two_loops,X_test) # print "two loop time %f" % two_loop_time # one_loop_time = time_function(classifier.compute_distances_one_loop,X_test) # print "one loop time %f " %one_loop_time # no_loop_time = time_function(classifier.compute_distances_no_loops,X_test) # print "no loop time %f "% no_loop_time dists = classifier.compute_distances_no_loops(X_test) # dist_one_loop = classifier.compute_distances_one_loop(X_test) # dist_two_loops = classifier.compute_distances_two_loops(X_test) #matrix_compare(dists,dist_one_loop) #matrix_compare(dists,dist_two_loops) y_test_pred = classifier.predict_labels(dists,k=5) num_correct = np.sum(y_test_pred == y_test) accuracy = float(num_correct)/num_test print "God %d/%d correct => accuracy: %f" %(num_correct, num_test, accuracy) cross_validate(X_train,y_train)
# Perform k-fold cross validation to find the best value of k. For each # # possible value of k, run the k-nearest-neighbor algorithm num_folds times, # # where in each case you use all but one of the folds as training data and the # # last fold as a validation set. Store the accuracies for all fold and all # # values of k in the k_to_accuracies dictionary. # ################################################################################ # Your code for k in k_choices: accuracies = [] for i in range(num_folds): X_val = X_train_folds.pop(0) y_val = y_train_folds.pop(0) classifier.train(np.vstack((X_train_folds[:])), np.hstack((y_train_folds[:]))) dists = classifier.compute_distances_no_loops(X_val) y_val_pred = classifier.predict_labels(dists, k=k) num_correct = np.sum(y_val_pred == y_val) accuracies.append(float(num_correct) / y_val.shape[0]) X_train_folds.append(X_val) y_train_folds.append(y_val) k_to_accuracies[k] = accuracies ################################################################################ # END OF YOUR CODE # ################################################################################ # Print out the computed accuracies
################################################################################ for k in k_choices: for fold in range(num_folds): #This fold will be omitted. #Creating validation data and temp training data validation_X_test = X_train_folds[fold] validation_y_test = y_train_folds[fold] temp_X_train = np.concatenate(X_train_folds[:fold] + X_train_folds[fold + 1:]) temp_y_train = np.concatenate(y_train_folds[:fold] + y_train_folds[fold + 1:]) #Initializing a class test_classifier = KNearestNeighbor() test_classifier.train( temp_X_train, temp_y_train ) #Computing the distance temp_dists = test_classifier.compute_distances_two_loops(validation_X_test) temp_y_test_pred = test_classifier.predict_labels(temp_dists, k=k) #Checking accuracies num_correct = np.sum(temp_y_test_pred == validation_y_test) num_test = validation_X_test.shape[0] accuracy = float(num_correct) / num_test print("k=",k,"Fold=",fold,"Accuracy=",accuracy) k_to_accuracies[k] = k_to_accuracies.get(k,[]) + [accuracy] ################################################################################ # END OF YOUR CODE # ################################################################################ # Print out the computed accuracies
# **Inline Question #1:** Notice the structured patterns in the distance matrix, where some rows or columns are visible brighter. (Note that with the default color scheme black indicates low distances while white indicates high distances.) # # - What in the data is the cause behind the distinctly bright rows? # Test images not well identified. Can be outliers in the test images not similar to # any training images. Zoomed in or transsformed very much. # - What causes the columns? # Outliers in training images with respect to ALL test images # **Your Answer**: *fill this in.* # # # In[ ]: # Now implement the function predict_labels and run the code below: # We use k = 1 (which is Nearest Neighbor). y_test_pred = classifier.predict_labels(dists, k=1) # Compute and print the fraction of correctly predicted examples num_correct = np.sum(y_test_pred == y_test) accuracy = float(num_correct) / num_test print('Got %d / %d correct => accuracy: %f' % (num_correct, num_test, accuracy)) # You should expect to see approximately `27%` accuracy. Now lets try out a larger `k`, say `k = 5`: # In[ ]: y_test_pred = classifier.predict_labels(dists, k=5) num_correct = np.sum(y_test_pred == y_test) accuracy = float(num_correct) / num_test print('Got %d / %d correct => accuracy: %f' %
import numpy as np import h5py from numpy import loadtxt from cs231n.classifiers import KNearestNeighbor h5f = h5py.File('img_data.h5','r') X = h5f['dataset_1'][:] h5f.close() y = loadtxt("y_labels.txt", dtype=np.uint8, delimiter="\n", unpack=False) X_train = X[8000:35117,:] y_train = y[8000:35117] X_val=X[3000:8000,:] y_val=y[3000:8000] num_val = 5000 # Create a kNN classifier instance. # Remember that training a kNN classifier is a noop: # the Classifier simply remembers the data and does no further processing classifier = KNearestNeighbor() classifier.train(X_train, y_train) dists = classifier.compute_distances_no_loops(X_val) y_val_pred = classifier.predict_labels(dists, k=5) num_correct = np.sum(y_val_pred == y_val) accuracy = float(num_correct) / num_val print accuracy
classifier = KNearestNeighbor() classifier.train(X_train, y_train) # Open cs231n/classifiers/k_nearest_neighbor.py and implement # compute_distances_two_loops. # Test your implementation: dists = classifier.compute_distances_two_loops(X_test) print dists.shape plt.imshow(dists, interpolation='nearest') plt.show() # Now implement the function predict_labels and run the code below: # We use k = 1 (which is Nearest Neighbor). y_test_pred = classifier.predict_labels(dists, k=1) # Compute and print the fraction of correctly predicted examples num_correct = np.sum(y_test_pred == y_test) accuracy = float(num_correct) / num_test print 'Got %d / %d correct => accuracy: %f' % (num_correct, num_test, accuracy) y_test_pred = classifier.predict_labels(dists, k=5) num_correct = np.sum(y_test_pred == y_test) accuracy = float(num_correct) / num_test print 'Got %d / %d correct => accuracy: %f' % (num_correct, num_test, accuracy) # Now lets speed up distance matrix computation by using partial vectorization # with one loop. Implement the function compute_distances_one_loop and run the # code below: dists_one = classifier.compute_distances_one_loop(X_test)
# where in each case you use all but one of the folds as training data and the # # last fold as a validation set. Store the accuracies for all fold and all # # values of k in the k_to_accuracies dictionary. # ################################################################################ for k in k_choices: k_to_accuracies[k] = np.zeros(num_folds) for i in range(num_folds): x_t = np.array(X_train_folds[:i] + X_train_folds[i + 1:]) # 剩下为训练集 y_t = np.array(y_train_folds[:i] + y_train_folds[i + 1:]) x_t = x_t.reshape(X_train_folds[i].shape[0] * 4, -1) y_t = y_t.reshape(y_train_folds[i].shape[0] * 4, -1) x_te = np.array(X_train_folds[i]) # 测试集 y_te = np.array(y_train_folds[i]) classifier.train(x_t, y_t) dists_ = classifier.compute_distances_no_loops(x_te) y_pred = classifier.predict_labels(dists_, k) # Compute and print the fraction of correctly predicted examples num_correct = np.sum(y_pred == y_te) accuracy = float(num_correct) / num_test k_to_accuracies[k][i] = accuracy pass ################################################################################ # END OF YOUR CODE # ################################################################################ # Print out the computed accuracies for k in sorted(k_to_accuracies): for accuracy in k_to_accuracies[k]: print('k = %d, accuracy = %f' % (k, accuracy))
plt.show() # **Inline Question #1:** Notice the structured patterns in the distance matrix, where some rows or columns are visible brighter. (Note that with the default color scheme black indicates low distances while white indicates high distances.) # # - What in the data is the cause behind the distinctly bright rows? # - What causes the columns? # **Your Answer**: *fill this in.* # # # In[18]: # Now implement the function predict_labels and run the code below: # We use k = 1 (which is Nearest Neighbor). y_test_pred = classifier.predict_labels(dists, k=1) # Compute and print the fraction of correctly predicted examples num_correct = np.sum(y_test_pred == y_test) accuracy = float(num_correct) / num_test print('Got %d / %d correct => accuracy: %f' % (num_correct, num_test, accuracy)) # You should expect to see approximately `27%` accuracy. Now lets try out a larger `k`, say `k = 5`: # In[19]: y_test_pred = classifier.predict_labels(dists, k=5) num_correct = np.sum(y_test_pred == y_test) accuracy = float(num_correct) / num_test print('Got %d / %d correct => accuracy: %f' %
def test1(): cifar10_dir = 'cs231n/datasets/cifar-10-batches-py' X_train, y_train, X_test, y_test = load_CIFAR10(cifar10_dir) print 'Training data shape:', X_train.shape print 'Training label shape:', y_train.shape print 'Test data shape:', X_test.shape print 'Test label shape:', y_test.shape # classes = ['plane','car','bird','cat','deer','dog','frog','horse','ship','truck'] # num_classes = len(classes) # sample_per_class = 7 # for y,cls in enumerate(classes): # idxs = np.flatnonzero(y_train == y) # idxs = np.random.choice(idxs, sample_per_class, replace=False) # for i, idx in enumerate(idxs): # plt_idx = i*num_classes + y + 1 # plt.subplot(sample_per_class, num_classes, plt_idx) # plt.imshow(X_train[idx].astype('uint8')) # plt.axis('off') # if i == 0: # plt.title(cls) # plt.savefig("./figures/cifar_sample.png") # plt.show() # plt.close() num_training = 5000 mask = range(num_training) X_train = X_train[mask] y_train = y_train[mask] num_test = 500 mask = range(num_test) X_test = X_test[mask] y_test = y_test[mask] X_train = np.reshape(X_train, (X_train.shape[0], -1)) X_test = np.reshape(X_test, (X_test.shape[0], -1)) print X_train.shape, X_test.shape from cs231n.classifiers import KNearestNeighbor classifier = KNearestNeighbor() classifier.train(X_train, y_train) # two_loop_time = time_function(classifier.compute_distances_two_loops,X_test) # print "two loop time %f" % two_loop_time # one_loop_time = time_function(classifier.compute_distances_one_loop,X_test) # print "one loop time %f " %one_loop_time # no_loop_time = time_function(classifier.compute_distances_no_loops,X_test) # print "no loop time %f "% no_loop_time dists = classifier.compute_distances_no_loops(X_test) # dist_one_loop = classifier.compute_distances_one_loop(X_test) # dist_two_loops = classifier.compute_distances_two_loops(X_test) #matrix_compare(dists,dist_one_loop) #matrix_compare(dists,dist_two_loops) y_test_pred = classifier.predict_labels(dists, k=5) num_correct = np.sum(y_test_pred == y_test) accuracy = float(num_correct) / num_test print "God %d/%d correct => accuracy: %f" % (num_correct, num_test, accuracy) cross_validate(X_train, y_train)
X_train_folds = [] y_train_folds = [] X_train_folds = np.split(X_train, num_folds) y_train_folds = np.split(y_train, num_folds) k_to_accuracies = {} for k_choice in k_choices: for i in range(num_folds): knn = KNearestNeighbor() xtrain = X_train_folds[:i] + X_train_folds[i + 1:] xtrain = np.asarray([item for sublist in xtrain for item in sublist]) ytrain = y_train_folds[:i] + y_train_folds[i + 1:] ytrain = np.asarray([item for sublist in ytrain for item in sublist]) knn.train(xtrain, ytrain) dists = knn.compute_distances_no_loops(np.asarray(X_train_folds[i])) y_test_pred = knn.predict_labels(dists, k=k_choice) num_correct = np.sum(y_test_pred == y_train_folds[i]) accuracy = float(num_correct) / len(y_train_folds[i]) k_to_accuracies.setdefault(k_choice, []).append(accuracy) print('k = %d, accuracy = %f' % (k_choice, accuracy)) for k in k_choices: accuracies = k_to_accuracies[k] plt.scatter([k] * len(accuracies), accuracies) # plot the trend line with error bars that correspond to standard deviation accuracies_mean = np.array( [np.mean(v) for k, v in sorted(k_to_accuracies.items())]) accuracies_std = np.array( [np.std(v) for k, v in sorted(k_to_accuracies.items())]) plt.errorbar(k_choices, accuracies_mean, yerr=accuracies_std)
plt.show() # **Inline Question #1:** Notice the structured patterns in the distance matrix, where some rows or columns are visible brighter. (Note that with the default color scheme black indicates low distances while white indicates high distances.) # # - What in the data is the cause behind the distinctly bright rows? # - What causes the columns? # **Your Answer**: *fill this in.* # # # In[12]: # Now implement the function predict_labels and run the code below: # We use k = 1 (which is Nearest Neighbor). y_test_pred = classifier.predict_labels(dists, k=1) # Compute and print the fraction of correctly predicted examples num_correct = np.sum(y_test_pred == y_test) accuracy = float(num_correct) / num_test print('Got %d / %d correct => accuracy: %f' % (num_correct, num_test, accuracy)) # You should expect to see approximately `27%` accuracy. Now lets try out a larger `k`, say `k = 5`: # In[13]: y_test_pred = classifier.predict_labels(dists, k=5) num_correct = np.sum(y_test_pred == y_test) accuracy = float(num_correct) / num_test print('Got %d / %d correct => accuracy: %f' %
num_folds ): #Loop through all the folds of the training data. CV-fold is j-th. Other folds for training X_test_cv = X_train_folds[j] y_test_cv = y_train_folds[j] #print 'Test CV: ', X_test_cv.shape, y_test_cv.shape X_train_cv = np.vstack( X_train_folds[0:j] + X_train_folds[j + 1:] ) #Leaving out the j-th array. X/y_train_folds are LISTs y_train_cv = np.hstack(y_train_folds[0:j] + y_train_folds[j + 1:]) #print 'Train CV: ', X_train_cv.shape, y_train_cv.shape classifier.train(X_train_cv, y_train_cv) dists_cv = classifier.compute_distances_no_loops(X_test_cv) #print 'Dists CV: ', dists_cv.shape y_test_pred = classifier.predict_labels(dists_cv, k) num_correct_cv = np.sum(y_test_pred == y_test_cv) accuracy_cv = float(num_correct_cv) / y_test_cv.shape[0] print y_test_cv.shape[0] print 'Accuracy at %d-nearest neighbors, cv-fold is %d-th fold, is %.2f' % ( k, j + 1, accuracy_cv * 100) k_to_accuracies[k].append(accuracy_cv) ################################################################################ # END OF YOUR CODE # ################################################################################ # Print out the computed accuracies best_k = 1 max_accuracy = 0
# Open cs231n/classifiers/k_nearest_neighbor.py and implement # compute_distances_two_loops. print "Calculating distances...." # Test your implementation: dists = classifier.compute_distances_two_loops(X_test) print dists.shape #500 x 50000 # We can visualize the distance matrix: each row is a single test example and # its distances to training examples plt.imshow(dists, interpolation='none') plt.show() # Now implement the function predict_labels and run the code below: # We use k = 1 (which is Nearest Neighbor). y_test_pred = classifier.predict_labels(dists, k=1) # Compute and print the fraction of correctly predicted examples num_correct = np.sum(y_test_pred == y_test) accuracy = float(num_correct) / num_test print 'Got %d / %d correct => accuracy: %f' % (num_correct, num_test, accuracy) # Now lets speed up distance matrix computation by using partial vectorization # with one loop. Implement the function compute_distances_one_loop and run the # code below: dists_one = classifier.compute_distances_one_loop(X_test) # To ensure that our vectorized implementation is correct, we make sure that it # agrees with the naive implementation. There are many ways to decide whether # two matrices are similar; one of the simplest is the Frobenius norm. In case # you haven't seen it before, the Frobenius norm of two matrices is the square
#Creating validation data and temp training data validation_X_test = X_train_folds[fold] validation_y_test = y_train_folds[fold] temp_X_train = np.concatenate(X_train_folds[:fold] + X_train_folds[fold + 1:]) temp_y_train = np.concatenate(y_train_folds[:fold] + y_train_folds[fold + 1:]) #Initializing a class test_classifier = KNearestNeighbor() test_classifier.train(temp_X_train, temp_y_train) #Computing the distance temp_dists = test_classifier.compute_distances_two_loops( validation_X_test) temp_y_test_pred = test_classifier.predict_labels(temp_dists, k=k) #Checking accuracies num_correct = np.sum(temp_y_test_pred == validation_y_test) num_test = validation_X_test.shape[0] accuracy = float(num_correct) / num_test print("k=", k, "Fold=", fold, "Accuracy=", accuracy) k_to_accuracies[k] = k_to_accuracies.get(k, []) + [accuracy] ################################################################################ # END OF YOUR CODE # ################################################################################ # Print out the computed accuracies for k in sorted(k_to_accuracies): for accuracy in k_to_accuracies[k]:
# plot the raw observations for k in k_choices: accuracies = k_to_accuracies[k] plt.scatter([k] * len(accuracies), accuracies) # plot the trend line with error bars that correspond to standard deviation accuracies_mean = np.array([np.mean(v) for k,v in sorted(k_to_accuracies.items())]) accuracies_std = np.array([np.std(v) for k,v in sorted(k_to_accuracies.items())]) plt.errorbar(k_choices, accuracies_mean, yerr=accuracies_std) plt.title('Cross-validation on k') plt.xlabel('k') plt.ylabel('Cross-validation accuracy') plt.show() """ # Based on the cross-validation results above, choose the best value for k, # retrain the classifier using all the training data, and test it on the test # data. You should be able to get above 28% accuracy on the test data. best_k = 1 classifier = KNearestNeighbor() classifier.train(X_train, y_train) #y_test_pred = classifier.predict(X_test, k=best_k) dists_two = classifier.compute_distances_no_loops(X_test) y_test_pred = classifier.predict_labels(dists_two, k=best_k) # Compute and display the accuracy num_test = y_test.shape[0] num_correct = np.sum(y_test_pred[:] == y_test[:, 0]) accuracy = float(num_correct) / num_test print 'Got %d / %d correct => accuracy: %f' % (num_correct, num_test, accuracy)
# compute_distances_two_loops. print 'before computing distances' # Test your implementation: dists = classifier.compute_distances_two_loops(X_test) print 'after computing distances' print dists.shape # We can visualize the distance matrix: each row is a single test example and # its distances to training examples #plt.imshow(dists, interpolation='none') #plt.show() # Now implement the function predict_labels and run the code below: # We use k = 1 (which is Nearest Neighbor). y_test_pred = classifier.predict_labels(dists, k=1) # Compute and print the fraction of correctly predicted examples num_correct = np.sum(y_test_pred == y_test) accuracy = float(num_correct) / num_test print 'Got %d / %d correct => accuracy: %f' % (num_correct, num_test, accuracy) y_test_pred = classifier.predict_labels(dists, k=5) num_correct = np.sum(y_test_pred == y_test) accuracy = float(num_correct) / num_test print 'Got %d / %d correct => accuracy: %f' % (num_correct, num_test, accuracy) dists_one = classifier.compute_distances_one_loop(X_test) # To ensure that our vectorized implementation is correct, we make sure that it
Call a function f with args and return the time (in seconds) that it took to execute. """ import time tic = time.time() f(*args) toc = time.time() return toc - tic no_loop_time = time_function(classifier.compute_distances_no_loops, X_test) print 'No loop version took %f seconds' % no_loop_time # you should see significantly faster performance with the fully vectorized implementation dists_two = classifier.compute_distances_no_loops(X_test) y_test_pred = classifier.predict_labels(dists_two, k=5) # # Compute and print the fraction of correctly predicted examples num_correct = np.sum(y_test_pred == y_test) accuracy = float(num_correct) / num_test print 'Got %d / %d correct => accuracy: %f' % (num_correct, num_test, accuracy) num_folds = 5 k_choices = [1, 3, 5, 8, 10, 12, 15, 20, 50, 100] X_train_folds = [] y_train_folds = [] ################################################################################ # TODO: #
tic = time.time() best_k = 10 classifier.train(X_train, y_train) ### L2 distance # t_dist,dist_bst_v = time_function(classifier.compute_distances_no_loops,X_test) ### L1 distance dist_bst_v = cdist(X_test, X_train, 'cityblock') #print("\tVectorized L2: ",t_dist) #exit() y_pred = classifier.predict_labels(dist_bst_v, best_k) num_correct = np.sum(y_pred == y_test) num_test = X_test.shape[0] accuracy = float(num_correct) / num_test print('Got %d / %d correct => accuracy: %f' % (num_correct, num_test, accuracy)) #raw_input("Press enter to continue") #exit() toc = time.time() print("Running time:", toc - tic) ### Data Set: CIFAR-10 ###
X_train = np.reshape(X_train, (X_train.shape[0], -1)) X_test = np.reshape(X_test, (X_test.shape[0], -1)) print(X_train.shape, X_test.shape) ####################################################### from cs231n.classifiers import KNearestNeighbor classifier = KNearestNeighbor() classifier.train(X_train, y_train) dists = classifier.compute_distances_two_loops(X_test) print(dists.shape) plt.imshow(dists, interpolation='none') plt.show() y_test_pred = classifier.predict_labels(dists, k=1) num_correct = np.sum(y_test_pred == y_test) accuracy = float(num_correct) / num_test print('Got %d / %d correct => accuracy: %f' % (num_correct, num_test, accuracy)) y_test_pred = classifier.predict_labels(dists, k=5) num_correct = np.sum(y_test_pred == y_test) accuracy = float(num_correct) / num_test print('Got %d / %d correct => accuracy: %f' % (num_correct, num_test, accuracy)) dists_one = classifier.compute_distances_one_loop(X_test) difference = np.linalg.norm(dists - dists_one, ord='fro') print('Difference was: %f' % (difference, ))
X_test = np.reshape(X_test, (X_test.shape[0], -1)) print X_train.shape, X_test.shape from cs231n.classifiers import KNearestNeighbor classifier = KNearestNeighbor() classifier.train(X_train, y_train) dists = classifier.compute_distances_two_loops(X_test) print 'Distance Matrix: ', dists.shape dists_show = 0 if (dists_show != 0): plt.imshow(dists, interpolation='none') plt.show() # Now implement the function predict_labels and run the code below: # We use k = 1 (which is Nearest Neighbor). y_test_pred = classifier.predict_labels(dists, k=3) # Compute and print the fraction of correctly predicted examples num_correct = np.sum(y_test_pred == y_test) accuracy = float(num_correct) / num_test * 100 print 'Got %d / %d correct, and accuracy: %f' % (num_correct, num_test, accuracy) # Now lets speed up distance matrix computation by using partial vectorization dists_one = classifier.compute_distances_one_loop(X_test) # To ensure that our vectorized implementation is correct, we make sure that # agrees with the naive implementation. There are many ways to decide whethe # two matrices are similar; one of the simplest is the Frobenius norm. In ca # you haven't seen it before, the Frobenius norm of two matrices is the squa # root of the squared sum of differences of all elements; in other words, re # the matrices into vectors and compute the Euclidean distance between them. difference = np.linalg.norm(dists - dists_one, ord='fro') difference = np.linalg.norm(dists - dists_one, ord='fro') print 'Difference was: %f' % (difference, ) if difference < 0.001: print 'Good! The distance matrices are the same' else: print 'Uh-oh! The distance matrices are different'
plt.show() # **Inline Question #1:** Notice the structured patterns in the distance matrix, where some rows or columns are visible brighter. (Note that with the default color scheme black indicates low distances while white indicates high distances.) # # - What in the data is the cause behind the distinctly bright rows? # - What causes the columns? # **Your Answer**: *fill this in.* # # # In[73]: # Now implement the function predict_labels and run the code below: # We use k = 1 (which is Nearest Neighbor). y_test_pred = classifier.predict_labels(dists, k=1) print(y_test_pred, y_test) print(type(y_test_pred), type(y_test)) # Compute and print the fraction of correctly predicted examples num_correct = np.sum(y_test_pred == y_test) accuracy = float(num_correct) / num_test print('Got %d / %d correct => accuracy: %f' % (num_correct, num_test, accuracy)) # You should expect to see approximately `27%` accuracy. Now lets try out a larger `k`, say `k = 5`: # In[11]: y_test_pred = classifier.predict_labels(dists, k=5) num_correct = np.sum(y_test_pred == y_test) accuracy = float(num_correct) / num_test
classifier.train(X_train, y_train) # open cs231n/classifiers /k_nearest_neighbor.py and implement #compute distances by two loops dists = classifier.compute_distances_two_loops(X_test) print(dists.shape) # We can visualize the distance matrix: each row is a single test example and #its distance to training examples plt.imshow(dists, interpolation='none') plt.show() # Now run the prediction fuction predict_labels and run the code # first try k=1 y_test_pred = classifier.predict_labels(dists, k=1) # compute and print the fraction of correctly predicted examples num_correct = np.sum(y_test_pred == y_test) accuracy = float(num_correct) / num_test print('Got %d /%d correct => accuracy: %f' % (num_correct, num_test, accuracy)) #secondly set k=5 y_test_pred = classifier.predict_labels(dists, k=5) num_correct = np.sum(y_test_pred == y_test) accuracy = float(num_correct) / num_test print('Using k=5, Got %d /%d correct => accuracy: %f' % (num_correct, num_test, accuracy)) #Now lets speed up distance matrix computation by using partial vectorization with # one loop.
num_test_crossval = 1000 #Every Single time pick one fold in total folds for test validation X_test_crossval = X_train_folds[fold] y_test_crossval = y_train_folds[fold] #Pick rest of the folds as training data X_train_crossval = np.vstack(X_train_folds[0:fold] + X_train_folds[fold + 1:]) y_train_crossval = np.hstack(y_train_folds[0:fold] + y_train_folds[fold + 1:]) #Training the classifier classifier.train(X_train_crossval, y_train_crossval) #Calculating the L2 distance for test data dists_crossval = classifier.compute_distances_no_loops(X_test_crossval) #Predicting the output with current k value y_test_pred = classifier.predict_labels(dists_crossval, k) #Calculating the accuracy num_correct = np.sum(y_test_pred == y_test_crossval) accuracy = float(num_correct) / num_test_crossval k_to_accuracies[k].append(accuracy) ################################################################################ # END OF YOUR CODE # ################################################################################ # Print out the computed accuracies for k in sorted(k_to_accuracies): for accuracy in k_to_accuracies[k]: print('k = %d, accuracy = %f' % (k, accuracy))