def test_KNN_predict_incorrect_shape(sample_train, sample_test): Xtrain, ytrain = sample_train(count=500) Xtest, ytest = sample_test(count=125) Xtrain = np.reshape(Xtrain, (Xtrain.shape[0], -1)) Xtest = np.reshape(Xtest, (Xtest.shape[0], -1)) knn = KNearestNeighbor() knn.train(Xtrain,ytrain) with pytest.raises(ValueError): knn.predict(ytrain)#using ytrain, shich has incorrect dimensions;
def test_KNN_predict_num_loop_parameter(sample_train, sample_test, num_loops): Xtrain, ytrain = sample_train(count=40) Xtest, ytest = sample_test(count=10) Xtrain = np.reshape(Xtrain, (Xtrain.shape[0], -1)) Xtest = np.reshape(Xtest, (Xtest.shape[0], -1)) knn = KNearestNeighbor() knn.train(Xtrain,ytrain) with pytest.raises(ValueError): knn.predict(Xtest,0,num_loops).shape
def test_KNN_predict_loop_parameter(sample_train, sample_test, k, num_loops): Xtrain, ytrain = sample_train(count=40) Xtest, ytest = sample_test(count=10) Xtrain = np.reshape(Xtrain, (Xtrain.shape[0], -1)) Xtest = np.reshape(Xtest, (Xtest.shape[0], -1)) knn = KNearestNeighbor() knn.train(Xtrain,ytrain) assert knn.predict(Xtest,k,num_loops).shape == ytest.shape
# possible value of k, run the k-nearest-neighbor algorithm num_folds times, # # where in each case you use all but one of the folds as training data and the # # last fold as a validation set. Store the accuracies for all fold and all # # values of k in the k_to_accuracies dictionary. # ################################################################################ for k in k_choices: acc = [] print(k) for i in range(num_folds): #(0,4000,3072),每个都是(0,1000,3072),竖着叠加,shape(4000,3072) x_train_fold = np.vstack(X_train_folds[0:i] + X_train_folds[i + 1:]) #(,4000),每个都是(,1000),横向叠加,shape(4000,) y_train_fold = np.hstack((y_train_folds[0:i] + y_train_folds[i + 1:])) x_val = X_train_folds[i] y_val = y_train_folds[i] classifier = KNearestNeighbor() classifier.train(x_train_fold, y_train_fold) dists_two = classifier.compute_distances_no_loops(x_val) y_val_pred = classifier.predict(x_val, k) correct = np.sum(y_val_pred == y_val) / y_val.shape[0] acc.append(correct) k_to_accuracies[k] = acc ################################################################################ # END OF YOUR CODE # ################################################################################ # Print out the computed accuracies for k in sorted(k_to_accuracies): for accuracy in k_to_accuracies[k]: print('k = %d, accuracy = %f' % (k, accuracy))
# plot the raw observations for k in k_choices: accuracies = k_to_accuracies[k] plt.scatter([k] * len(accuracies), accuracies) # plot the trend line with error bars that correspond to standard deviation accuracies_mean = np.array( [np.mean(v) for k, v in sorted(k_to_accuracies.items())]) accuracies_std = np.array( [np.std(v) for k, v in sorted(k_to_accuracies.items())]) plt.errorbar(k_choices, accuracies_mean, yerr=accuracies_std) plt.title('Cross-validation on k') plt.xlabel('k') plt.ylabel('Cross-validation accuracy') plt.show() # Based on the cross-validation results above, choose the best value for k, # retrain the classifier using all the training data, and test it on the test # data. You should be able to get above 28% accuracy on the test data. best_k = 10 classifier = KNearestNeighbor() classifier.train(X_train, y_train) y_test_pred = classifier.predict(X_test, k=best_k) # Compute and display the accuracy num_correct = np.sum(y_test_pred == y_test) accuracy = float(num_correct) / num_test print('Got %d / %d correct => accuracy: %f' % (num_correct, num_test, accuracy))
mask=range(num_training) x_train=x_train[mask] y_train=y_train[mask] num_test=10000 mask=range(num_test) x_test=x_test[mask] y_test=y_test[mask] x_train=np.reshape(x_train,(x_train.shape[0],-1)) x_test=np.reshape(x_test,(x_test.shape[0],-1)) print(x_train.shape,x_test.shape) classifier=KNearestNeighbor() classifier.train(x_train,y_train) ks = range(1 , 10) pre = [] num_correct = [] accuracy = [] for k in ks: pr = classifier.predict(x_test , k) num = np.sum(pr == y_test) pre.append(pr) num_correct.append(num) accuracy.append(float(num) / num_test) plt.plot(ks , accuracy) plt.show()
# Dictionary holding the accuracies (list) for different values of k k_to_accuracies = {} # k-fold cross validation using fold i as validation, and all others as training for choice in k_choices: for i in range(num_folds): # Partition training and test arrays X_tr = np.vstack([X_train_folds[x] for x in range(num_folds) if x!=i]) y_tr = np.hstack([y_train_folds[x] for x in range(num_folds) if x!=i]) X_te = X_train_folds[i] y_te = y_train_folds[i] # Create kNN classifier instance clf = KNearestNeighbor() clf.train(X_tr, y_tr) # Predict pred = clf.predict(X_te, k=choice) acc = float(np.sum(pred == y_te)) / y_te.shape[0] print(f"k = {choice}, accuracy = {acc}") if i == 0: k_to_accuracies[choice] = [acc] else: k_to_accuracies[choice].append(acc) # Plot results for k in k_choices: accs = k_to_accuracies[k] plt.scatter([k] * len(accs), accs) # Plot trend line with error bars corresponding to standard deviation accs_mean = np.array([np.mean(val) for key,val in sorted(k_to_accuracies.items())]) accs_std = np.array([np.std(val) for key,val in sorted(k_to_accuracies.items())]) plt.errorbar(k_choices, accs_mean, yerr=accs_std)