for k in k_choices: accuracies = k_to_accuracies[k] plt.scatter([k] * len(accuracies), accuracies) # plot the trend line with error bars that correspond to standard deviation accuracies_mean = np.array( [np.mean(v) for k, v in sorted(k_to_accuracies.items())]) accuracies_std = np.array( [np.std(v) for k, v in sorted(k_to_accuracies.items())]) plt.errorbar(k_choices, accuracies_mean, yerr=accuracies_std) plt.title('Cross-validation on k') plt.xlabel('k') plt.ylabel('Cross-validation accuracy') plt.show() # In[ ]: # Based on the cross-validation results above, choose the best value for k, # retrain the classifier using all the training data, and test it on the test # data. You should be able to get above 28% accuracy on the test data. best_k = 1 classifier = KNearestNeighbor() classifier.train(X_train, y_train) y_test_pred = classifier.predict(X_test, k=best_k) # Compute and display the accuracy num_correct = np.sum(y_test_pred == y_test) accuracy = float(num_correct) / num_test print 'Got %d / %d correct => accuracy: %f' % (num_correct, num_test, accuracy)
xdev = np.array(X_train_folds[i]) ydev = np.array(y_train_folds[i]) xtr = np.reshape(xtr, ((X_train.shape[0] * (num_folds - 1)) / num_folds, -1)) # (4000, 3072) ytr = np.reshape(ytr, ((y_train.shape[0] * (num_folds - 1)) / num_folds, -1)) # (4000, 1) xdev = np.reshape(xdev, (X_train.shape[0] / num_folds, -1)) # (1000, 3072) ydev = np.reshape(ydev, (y_train.shape[0] / num_folds, -1)) # (1000, 1) nn = KNearestNeighbor() nn.train(xtr, ytr) y_predict = nn.predict(xdev, k=k, num_loops=0) # (1000, ) # print(np.shape(y_predict)) 注意维度匹配 y_predict = np.reshape(y_predict, (y_predict.shape[0], -1)) num_correct = np.sum(y_predict == ydev) accuracy = num_correct / float(xdev.shape[0]) k_to_accuracies[k][i] = accuracy ################################################################################ # END OF YOUR CODE # ################################################################################ # Print out the computed accuracies print("the computed accuracies.....") for k in sorted(k_to_accuracies): for accuracy in k_to_accuracies[k]: print('k = %d, accuracy = %f' % (k, accuracy))
for k in k_choices: accuracies = k_to_accuracies[k] plt.scatter([k] * len(accuracies), accuracies) # plot the trend line with error bars that correspond to standard deviation accuracies_mean = np.array([np.mean(v) for k,v in sorted(k_to_accuracies.items())]) accuracies_std = np.array([np.std(v) for k,v in sorted(k_to_accuracies.items())]) plt.errorbar(k_choices, accuracies_mean, yerr=accuracies_std) plt.title('Cross-validation on k') plt.xlabel('k') plt.ylabel('Cross-validation accuracy') plt.show() # In[ ]: # Based on the cross-validation results above, choose the best value for k, # retrain the classifier using all the training data, and test it on the test # data. You should be able to get above 28% accuracy on the test data. best_k = 1 classifier = KNearestNeighbor() classifier.train(X_train, y_train) y_test_pred = classifier.predict(X_test, k=best_k) # Compute and display the accuracy num_correct = np.sum(y_test_pred == y_test) accuracy = float(num_correct) / num_test print 'Got %d / %d correct => accuracy: %f' % (num_correct, num_test, accuracy)