def test_KNN_train(sample_train, sample_test): #this test is designed to verify that input shapes are correct Xtrain, ytrain = sample_train(count=40) Xtest, ytest = sample_test(count=10) with pytest.raises(ValueError): knn = KNearestNeighbor() knn.train(Xtrain, ytrain)
def test_KNN_train_reshape_input(sample_train, sample_test): Xtrain, ytrain = sample_train(count=40) Xtest, ytest = sample_test(count=10) Xtrain = np.reshape(Xtrain, (Xtrain.shape[0], -1)) Xtest = np.reshape(Xtest, (Xtest.shape[0], -1)) knn = KNearestNeighbor() knn.train(Xtrain,ytrain)
def test_KNN_dists_noloop_shape(sample_train, sample_test, in_count): Xtrain, ytrain = sample_train(count=in_count) Xtest, ytest = sample_test(count=in_count-30) Xtrain = np.reshape(Xtrain, (Xtrain.shape[0], -1)) Xtest = np.reshape(Xtest, (Xtest.shape[0], -1)) knn = KNearestNeighbor() knn.train(Xtrain,ytrain) assert knn.compute_distances_no_loops(Xtest).shape == (Xtest.shape[0], Xtrain.shape[0])
def test_KNN_predict_loop_parameter(sample_train, sample_test, k, num_loops): Xtrain, ytrain = sample_train(count=40) Xtest, ytest = sample_test(count=10) Xtrain = np.reshape(Xtrain, (Xtrain.shape[0], -1)) Xtest = np.reshape(Xtest, (Xtest.shape[0], -1)) knn = KNearestNeighbor() knn.train(Xtrain,ytrain) assert knn.predict(Xtest,k,num_loops).shape == ytest.shape
def test_KNN_predict_labels_shape(sample_train, sample_test): Xtrain, ytrain = sample_train(count=40) Xtest, ytest = sample_test(count=10) Xtrain = np.reshape(Xtrain, (Xtrain.shape[0], -1)) Xtest = np.reshape(Xtest, (Xtest.shape[0], -1)) knn = KNearestNeighbor() knn.train(Xtrain,ytrain) dist_no = knn.compute_distances_no_loops(Xtest) assert knn.predict_labels(dist_no, k=1).shape == ytest.shape assert knn.predict_labels(dist_no, k=2).shape == ytest.shape assert knn.predict_labels(dist_no, k=3).shape == ytest.shape assert knn.predict_labels(dist_no, k=4).shape == ytest.shape
def test_KNN_predict_incorrect_shape(sample_train, sample_test): Xtrain, ytrain = sample_train(count=500) Xtest, ytest = sample_test(count=125) Xtrain = np.reshape(Xtrain, (Xtrain.shape[0], -1)) Xtest = np.reshape(Xtest, (Xtest.shape[0], -1)) knn = KNearestNeighbor() knn.train(Xtrain,ytrain) with pytest.raises(ValueError): knn.predict(ytrain)#using ytrain, shich has incorrect dimensions;
def test_KNN_predict_num_loop_parameter(sample_train, sample_test, num_loops): Xtrain, ytrain = sample_train(count=40) Xtest, ytest = sample_test(count=10) Xtrain = np.reshape(Xtrain, (Xtrain.shape[0], -1)) Xtest = np.reshape(Xtest, (Xtest.shape[0], -1)) knn = KNearestNeighbor() knn.train(Xtrain,ytrain) with pytest.raises(ValueError): knn.predict(Xtest,0,num_loops).shape
def test_KNN_dists_one_to_none(sample_train, sample_test): Xtrain, ytrain = sample_train(count=40) Xtest, ytest = sample_test(count=10) Xtrain = np.reshape(Xtrain, (Xtrain.shape[0], -1)) Xtest = np.reshape(Xtest, (Xtest.shape[0], -1)) knn = KNearestNeighbor() knn.train(Xtrain,ytrain) dist_one = knn.compute_distances_one_loop(Xtest) dist_no = knn.compute_distances_no_loops(Xtest) assert np.linalg.norm(dist_one - dist_no, ord='fro') < 0.001
num_test = 500 mask = list(range(num_test)) X_test = X_test[mask] y_test = y_test[mask] # Reshape the image data into rows X_train = np.reshape(X_train, (X_train.shape[0], -1)) X_test = np.reshape(X_test, (X_test.shape[0], -1)) print(X_train.shape, X_test.shape) from cs231n.classifiers.k_nearest_neighbor import KNearestNeighbor # Create a kNN classifier instance. # Remember that training a kNN classifier is a noop: # the Classifier simply remembers the data and does no further processing classifier = KNearestNeighbor() classifier.train(X_train, y_train) """ dists = classifier.compute_distances_two_loops(X_test) pickle.dump(dists,open(r"D:\python\CS231n\assignment1\tmp.txt","wb")) print(dists.shape) print(dists) plt.imshow(dists, interpolation='none') plt.show() """ with open(r"D:\python\CS231n\assignment1\tmp.txt", "rb") as file: dists = pickle.load(file) y_test_pred = classifier.predict_labels(dists, k=1) # Compute and print the fraction of correctly predicted examples
if i == 0: plt.title(cls) plt.show() # Subsample the data for more efficient code execution in this exercise num_training = 5000 mask = range(num_training) X_train = X_train[mask] y_train = y_train[mask] num_test = 500 mask = range(num_test) X_test = X_test[mask] y_test = y_test[mask] # Reshape the image data into rows # CV: each row is a data vector of 3072 items. # 5k for training and 500 for testing X_train = np.reshape(X_train, (X_train.shape[0], -1)) X_test = np.reshape(X_test, (X_test.shape[0], -1)) print X_train.shape, X_test.shape # CV: Just saves the data # Create a kNN classifier instance. # Remember that training a kNN classifier is a noop: # the Classifier simply remembers the data and does no further processing classifier = KNearestNeighbor() classifier.train(X_train, y_train)
num_test = 500 mask = list(range(num_test)) X_test = X_test[mask] y_test = y_test[mask] # Reshape the image data into rows X_train = np.reshape(X_train, (X_train.shape[0], -1)) X_test = np.reshape(X_test, (X_test.shape[0], -1)) print(X_train.shape, X_test.shape) from cs231n.classifiers.k_nearest_neighbor import KNearestNeighbor # Create a kNN classifier instance. # Remember that training a kNN classifier is a noop: # the Classifier simply remembers the data and does no further processing classifier = KNearestNeighbor() classifier.train(X_train, y_train) # Open cs231n/classifiers/k_nearest_neighbor.py and implement # compute_distances_two_loops. # Test your implementation: dists = classifier.compute_distances_two_loops(X_test) # We can visualize the distance matrix: each row is a single test example and # its distances to training examples plt.imshow(dists, interpolation='none') plt.show() # Now implement the function predict_labels and run the code below: # We use k = 1 (which is Nearest Neighbor).
X_train = np.reshape(X_train, (X_train.shape[0], -1)) X_test = np.reshape(X_test, (X_test.shape[0], -1)) print(X_train.shape, X_test.shape) #%% #import sys #new = cwd+'\\cs231n' #sys.path.intert(0, new+'\\classifier') # where the classifier is stored from cs231n.classifiers.k_nearest_neighbor import KNearestNeighbor # Create a kNN classifier instance. # Remember that training a kNN classifier is a noop: # the Classifier simply remembers the data and does no further processing classifier = KNearestNeighbor() classifier.train(X_train, y_train) #% calcualte the distance # Open cs231n/classifiers/k_nearest_neighbor.py and implement # compute_distances_two_loops. # Test your implementation: dists = classifier.compute_distances_no_loops(X_test) #dists = classifier.compute_distances_two_loops(X_test) #dists = classifier.compute_distances_one_loop(X_test) #%% # We can visualize the distance matrix: each row is a single test example and # its distances to training examples plt.figure()
num_training=50000 mask=range(num_training) x_train=x_train[mask] y_train=y_train[mask] num_test=10000 mask=range(num_test) x_test=x_test[mask] y_test=y_test[mask] x_train=np.reshape(x_train,(x_train.shape[0],-1)) x_test=np.reshape(x_test,(x_test.shape[0],-1)) print(x_train.shape,x_test.shape) classifier=KNearestNeighbor() classifier.train(x_train,y_train) ks = range(1 , 10) pre = [] num_correct = [] accuracy = [] for k in ks: pr = classifier.predict(x_test , k) num = np.sum(pr == y_test) pre.append(pr) num_correct.append(num) accuracy.append(float(num) / num_test) plt.plot(ks , accuracy)
# Split the arrays into individual folds X_train_folds = np.split(X_train, num_folds) y_train_folds = np.split(y_train, num_folds) # Dictionary holding the accuracies (list) for different values of k k_to_accuracies = {} # k-fold cross validation using fold i as validation, and all others as training for choice in k_choices: for i in range(num_folds): # Partition training and test arrays X_tr = np.vstack([X_train_folds[x] for x in range(num_folds) if x!=i]) y_tr = np.hstack([y_train_folds[x] for x in range(num_folds) if x!=i]) X_te = X_train_folds[i] y_te = y_train_folds[i] # Create kNN classifier instance clf = KNearestNeighbor() clf.train(X_tr, y_tr) # Predict pred = clf.predict(X_te, k=choice) acc = float(np.sum(pred == y_te)) / y_te.shape[0] print(f"k = {choice}, accuracy = {acc}") if i == 0: k_to_accuracies[choice] = [acc] else: k_to_accuracies[choice].append(acc) # Plot results for k in k_choices: accs = k_to_accuracies[k] plt.scatter([k] * len(accs), accs) # Plot trend line with error bars corresponding to standard deviation
[np.mean(v) for k, v in sorted(k_to_accuracies.items())]) accuracies_std = np.array( [np.std(v) for k, v in sorted(k_to_accuracies.items())]) plt.errorbar(k_choices, accuracies_mean, yerr=accuracies_std) plt.title('Cross-validation on k') plt.xlabel('k') plt.ylabel('Cross-validation accuracy') plt.show() best_k = 1 classifier.train(X_train, y_train) y_test_pred = classifier.predict(X_test, k=best_k) determine_accuracy(y_test_pred, y_test, num_test) X_train, y_train, X_test, y_test, num_test = load_data() raw_input('Any key to continue...') classifier = KNearestNeighbor() compute_distances(X_train, y_train, X_test, y_test, num_test) print '# 1' print classifier.y_train print classifier.y_train.shape raw_input('Any key to continue...') no_loop_time = time_function(classifier.compute_distances_no_loops, X_test) print 'No loop version took %f seconds' % no_loop_time raw_input('Any key to continue...') cross_validation(X_train, y_train)