def model_with_best_k(self): # Based on the cross-validation results above, choose the best value for k, # retrain the classifier using all the training data, and test it on the test # data. You should be able to get above 28% accuracy on the test data. best_k = 10 classifier = KNearestNeighbor() classifier.train(self.X_train, self.y_train) y_test_pred = classifier.predict(self.X_test, k=best_k) # Compute and display the accuracy num_correct = np.sum(y_test_pred == self.y_test) accuracy = float(num_correct) / self.num_test print 'Got %d / %d correct => accuracy: %f' % (num_correct, self.num_test, accuracy) return
print 'k = %d, accuracy = %f' % (k, accuracy) # plot the raw observations for k in k_choices: accuracies = k_to_accuracies[k] plt.scatter([k] * len(accuracies), accuracies) # plot the trend line with error bars that correspond to standard deviation accuracies_mean = np.array( [np.mean(v) for k, v in sorted(k_to_accuracies.items())]) accuracies_std = np.array( [np.std(v) for k, v in sorted(k_to_accuracies.items())]) plt.errorbar(k_choices, accuracies_mean, yerr=accuracies_std) plt.title('Cross-validation on k') plt.xlabel('k') plt.ylabel('Cross-validation accuracy') plt.show() # Based on the cross-validation results above, choose the best value for k, # retrain the classifier using all the training data, and test it on the test # data. You should be able to get above 28% accuracy on the test data. best_k = 1 classifier = KNearestNeighbor() classifier.train(X_train, y_train) y_test_pred = classifier.predict(X_test, k=best_k) # Compute and display the accuracy num_correct = np.sum(y_test_pred == y_test) accuracy = float(num_correct) / num_test print 'Got %d / %d correct => accuracy: %f' % (num_correct, num_test, accuracy)
# X_test = X_test[mask] # y_test = y_test[mask] # Reshape the image data into rows X_train = np.reshape(X_train, (X_train.shape[0], -1)) X_test = np.reshape(X_test, (X_test.shape[0], -1)) print(X_train.shape, X_test.shape) # Create a kNN classifier instance. # Remember that training a kNN classifier is a noop: # the Classifier simply remembers the data and does no further processing classifier = KNearestNeighbor() classifier.train(X_train, y_train) #Cross-validation # validations = cross_validation(X_train,y_train) # print(validations) # Based on the cross-validation results above, choose the best value for k, # retrain the classifier using all the training data, and test it on the test # data. You should be able to get above 28% accuracy on the test data. #Evaluation y_test_pred = classifier.predict(X_test, k=100) # Compute and display the accuracy num_correct = np.sum(y_test_pred == y_test) accuracy = float(num_correct) / num_test print('Got %d / %d correct => accuracy: %f' % (num_correct, num_test, accuracy))
# accuracy values that we found when using that value of k. k_to_accuracies = {} ################################################################################ # TODO: # # Perform k-fold cross validation to find the best value of k. For each # # possible value of k, run the k-nearest-neighbor algorithm num_folds times, # # where in each case you use all but one of the folds as training data and the # # last fold as a validation set. Store the accuracies for all fold and all # # values of k in the k_to_accuracies dictionary. # ################################################################################ for k in k_choices: accuracies = [] for f in range(num_folds): classifier.train(X=X_train_folds[f], y=y_train_folds[f]) y_test_pred = classifier.predict(X=X_test, k=k) num_correct = np.sum(y_test_pred == y_test) accuracies.append( float(num_correct) / num_test) print('Got %d / %d correct => accuracy: %f' % (num_correct, num_test, accuracies[f])) k_to_accuracies[k] = accuracies ################################################################################ # END OF YOUR CODE # ################################################################################ # Print out the computed accuracies for k in sorted(k_to_accuracies): for accuracy in k_to_accuracies[k]: print('k = %d, accuracy = %f' % (k, accuracy))
# where in each case you use all but one of the folds as training data and the # # last fold as a validation set. Store the accuracies for all fold and all # # values of k in the k_to_accuracies dictionary. # ################################################################################ # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)***** for k in k_choices: k_to_accuracies[k] = [] for i in range(num_folds): classifier.train( np.concatenate( [X_train_folds[j] for j in range(num_folds) if j != i]), np.concatenate( [y_train_folds[j] for j in range(num_folds) if j != i])) k_to_accuracies[k].append(1.0 * np.sum( classifier.predict(X_train_folds[i], k=k) == y_train_folds[i]) / len(y_train_folds[i])) # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)***** # Print out the computed accuracies for k in sorted(k_to_accuracies): for accuracy in k_to_accuracies[k]: print('k = %d, accuracy = %f' % (k, accuracy)) # plot the raw observations for k in k_choices: accuracies = k_to_accuracies[k] plt.scatter([k] * len(accuracies), accuracies) # plot the trend line with error bars that correspond to standard deviation
# values of k in the k_to_accuracies dictionary. # ################################################################################ for k in k_choices: accrs = [] for i in range(num_folds): X_train_folds_copy = X_train_folds[:] y_train_folds_copy = y_train_folds[:] X_valid = X_train_folds_copy.pop(i) y_valid = y_train_folds_copy.pop(i) X_train_i = np.concatenate(X_train_folds_copy) y_train_i = np.concatenate(y_train_folds_copy) classifier = KNearestNeighbor() classifier.train(X_train_i, y_train_i) preds = classifier.predict(X_valid, k=k) acc = (preds == y_valid).mean() accrs.append(acc) k_to_accuracies[k] = accrs print(k) ################################################################################ # END OF YOUR CODE # ################################################################################ # Print out the computed accuracies for k in sorted(k_to_accuracies): for accuracy in k_to_accuracies[k]: print 'k = %d, accuracy = %f' % (k, accuracy)