def run_k_fold_cross_validation(X_train, y_train, num_folds, k, k_accuracy): X_train_folds, y_train_folds = generate_folds(X_train, y_train, num_folds) accuracy = 0.0 accuracy_list = [] for i in range(num_folds): val_fold_x = X_train_folds[i] val_fold_y = y_train_folds[i] temp_X_train = np.concatenate(X_train_folds[:i] + X_train_folds[i + 1:]) temp_y_train = np.concatenate(y_train_folds[:i] + y_train_folds[i + 1:]) classifier = KNearestNeighbor() classifier.train(temp_X_train, temp_y_train) dists = classifier.compute_distances_no_loops(val_fold_x) val_pred_y = classifier.predict_labels(dists, k) num_correct = np.sum(val_pred_y == val_fold_y) accuracy_list.append((float(num_correct) / val_pred_y.shape[0])) accuracy = accuracy + (float(num_correct) / val_pred_y.shape[0]) k_accuracy[k] = accuracy_list accuracy = accuracy / num_folds return accuracy
classifier = KNearestNeighbor() classifier.train(X_train, y_train) print("test compute_distances_two_loops implementation......") # Test your implementation: dists = classifier.compute_distances_two_loops(X_test) print(dists.shape) # (num_test x num_train) # We can visualize the distance matrix: each row is a single test example and # its distances to training examples plt.imshow(dists, interpolation='none') plt.show() print("set k=1 and test the data......") # Now implement the function predict_labels and run the code below: # We use k = 1 (which is Nearest Neighbor). y_test_pred = classifier.predict_labels(dists, k=1) # Compute and print the fraction of correctly predicted examples num_correct = np.sum(y_test_pred == y_test) accuracy = float(num_correct) / num_test print('Got %d / %d correct => accuracy: %f' % (num_correct, num_test, accuracy)) print("use compute_distances_one_loop to calculate the dists.....") # Now lets speed up distance matrix computation by using partial vectorization # with one loop. Implement the function compute_distances_one_loop and run the # code below: dists_one = classifier.compute_distances_one_loop(X_test) # To ensure that our vectorized implementation is correct, we make sure that it # agrees with the naive implementation. There are many ways to decide whether # two matrices are similar; one of the simplest is the Frobenius norm. In case # you haven't seen it before, the Frobenius norm of two matrices is the square # root of the squared sum of differences of all elements; in other words, reshape
# **Inline Question #1:** Notice the structured patterns in the distance matrix, where some rows or columns are visible brighter. (Note that with the default color scheme black indicates low distances while white indicates high distances.) # # - What in the data is the cause behind the distinctly bright rows? # - What causes the columns? # **Your Answer**: *fill this in.* # # # In[ ]: # Now implement the function predict_labels and run the code below: # We use k = 1 (which is Nearest Neighbor). y_test_pred = classifier.predict_labels(dists, k=1) # Compute and print the fraction of correctly predicted examples num_correct = np.sum(y_test_pred == y_test) accuracy = float(num_correct) / num_test print 'Got %d / %d correct => accuracy: %f' % (num_correct, num_test, accuracy) # You should expect to see approximately `27%` accuracy. Now lets try out a larger `k`, say `k = 5`: # In[ ]: y_test_pred = classifier.predict_labels(dists, k=5) num_correct = np.sum(y_test_pred == y_test) accuracy = float(num_correct) / num_test print 'Got %d / %d correct => accuracy: %f' % (num_correct, num_test, accuracy)
x_test = np.reshape(x_test, (x_test.shape[0], -1)) print 'after subsample and re shape:' print 'x_train : ', x_train.shape, " x_test : ", x_test.shape # KNN classifier classifier = KNearestNeighbor() classifier.train(x_train, y_train) # compute the distance between test_data and train_data dists = classifier.compute_distances_no_loops(x_test) # each row is a single test example and its distances to training example print 'dist shape : ', dists.shape plt.imshow(dists, interpolation='none') plt.show() y_test_pred = classifier.predict_labels(dists, k=5) num_correct = np.sum(y_test_pred == y_test) acc = float(num_correct) / num_test print 'k=5 ,The Accurancy is : ', acc # Cross-Validation # 5-fold cross validation split the training data to 5 pieces num_folds = 5 # k is params of knn k_choice = [1, 5, 8, 11, 15, 18, 20, 50, 100] x_train_folds = [] y_train_folds = [] x_train_folds = np.array_split(x_train, num_folds) y_train_folds = np.array_split(y_train, num_folds)