def test_KNN_dists_noloop_shape(sample_train, sample_test, in_count): Xtrain, ytrain = sample_train(count=in_count) Xtest, ytest = sample_test(count=in_count-30) Xtrain = np.reshape(Xtrain, (Xtrain.shape[0], -1)) Xtest = np.reshape(Xtest, (Xtest.shape[0], -1)) knn = KNearestNeighbor() knn.train(Xtrain,ytrain) assert knn.compute_distances_no_loops(Xtest).shape == (Xtest.shape[0], Xtrain.shape[0])
def test_KNN_dists_one_to_none(sample_train, sample_test): Xtrain, ytrain = sample_train(count=40) Xtest, ytest = sample_test(count=10) Xtrain = np.reshape(Xtrain, (Xtrain.shape[0], -1)) Xtest = np.reshape(Xtest, (Xtest.shape[0], -1)) knn = KNearestNeighbor() knn.train(Xtrain,ytrain) dist_one = knn.compute_distances_one_loop(Xtest) dist_no = knn.compute_distances_no_loops(Xtest) assert np.linalg.norm(dist_one - dist_no, ord='fro') < 0.001
def test_KNN_predict_labels_shape(sample_train, sample_test): Xtrain, ytrain = sample_train(count=40) Xtest, ytest = sample_test(count=10) Xtrain = np.reshape(Xtrain, (Xtrain.shape[0], -1)) Xtest = np.reshape(Xtest, (Xtest.shape[0], -1)) knn = KNearestNeighbor() knn.train(Xtrain,ytrain) dist_no = knn.compute_distances_no_loops(Xtest) assert knn.predict_labels(dist_no, k=1).shape == ytest.shape assert knn.predict_labels(dist_no, k=2).shape == ytest.shape assert knn.predict_labels(dist_no, k=3).shape == ytest.shape assert knn.predict_labels(dist_no, k=4).shape == ytest.shape
# possible value of k, run the k-nearest-neighbor algorithm num_folds times, # # where in each case you use all but one of the folds as training data and the # # last fold as a validation set. Store the accuracies for all fold and all # # values of k in the k_to_accuracies dictionary. # ################################################################################ for k in k_choices: acc = [] print(k) for i in range(num_folds): #(0,4000,3072),每个都是(0,1000,3072),竖着叠加,shape(4000,3072) x_train_fold = np.vstack(X_train_folds[0:i] + X_train_folds[i + 1:]) #(,4000),每个都是(,1000),横向叠加,shape(4000,) y_train_fold = np.hstack((y_train_folds[0:i] + y_train_folds[i + 1:])) x_val = X_train_folds[i] y_val = y_train_folds[i] classifier = KNearestNeighbor() classifier.train(x_train_fold, y_train_fold) dists_two = classifier.compute_distances_no_loops(x_val) y_val_pred = classifier.predict(x_val, k) correct = np.sum(y_val_pred == y_val) / y_val.shape[0] acc.append(correct) k_to_accuracies[k] = acc ################################################################################ # END OF YOUR CODE # ################################################################################ # Print out the computed accuracies for k in sorted(k_to_accuracies): for accuracy in k_to_accuracies[k]: print('k = %d, accuracy = %f' % (k, accuracy))
# To ensure that our vectorized implementation is correct, we make sure that it # agrees with the naive implementation. There are many ways to decide whether # two matrices are similar; one of the simplest is the Frobenius norm. In case # you haven't seen it before, the Frobenius norm of two matrices is the square # root of the squared sum of differences of all elements; in other words, reshape # the matrices into vectors and compute the Euclidean distance between them. difference = np.linalg.norm(dists - dists_one, ord='fro') print('Difference was: %f' % (difference, )) if difference < 0.001: print('Good! The distance matrices are the same') else: print('Uh-oh! The distance matrices are different') # Now implement the fully vectorized version inside compute_distances_no_loops # and run the code dists_two = classifier.compute_distances_no_loops(X_test) # check that the distance matrix agrees with the one we computed before: difference = np.linalg.norm(dists - dists_two, ord='fro') print('Difference was: %f' % (difference, )) if difference < 0.001: print('Good! The distance matrices are the same') else: print('Uh-oh! The distance matrices are different') # Let's compare how fast the implementations are def time_function(f, *args): """ Call a function f with args and return the time (in seconds) that it took to execute. """
#sys.path.intert(0, new+'\\classifier') # where the classifier is stored from cs231n.classifiers.k_nearest_neighbor import KNearestNeighbor # Create a kNN classifier instance. # Remember that training a kNN classifier is a noop: # the Classifier simply remembers the data and does no further processing classifier = KNearestNeighbor() classifier.train(X_train, y_train) #% calcualte the distance # Open cs231n/classifiers/k_nearest_neighbor.py and implement # compute_distances_two_loops. # Test your implementation: dists = classifier.compute_distances_no_loops(X_test) #dists = classifier.compute_distances_two_loops(X_test) #dists = classifier.compute_distances_one_loop(X_test) #%% # We can visualize the distance matrix: each row is a single test example and # its distances to training examples plt.figure() plt.subplot(2, 1, 1) plt.imshow(dists, interpolation='none') plt.ylabel('one loop') plt.show() plt.subplot(2, 1, 2) plt.imshow(dists, interpolation='none') plt.show() plt.ylabel('no loop')