def run_k_fold_cross_validation(X_train, y_train, num_folds, k, k_accuracy): X_train_folds, y_train_folds = generate_folds(X_train, y_train, num_folds) accuracy = 0.0 accuracy_list = [] for i in range(num_folds): val_fold_x = X_train_folds[i] val_fold_y = y_train_folds[i] temp_X_train = np.concatenate(X_train_folds[:i] + X_train_folds[i + 1:]) temp_y_train = np.concatenate(y_train_folds[:i] + y_train_folds[i + 1:]) classifier = KNearestNeighbor() classifier.train(temp_X_train, temp_y_train) dists = classifier.compute_distances_no_loops(val_fold_x) val_pred_y = classifier.predict_labels(dists, k) num_correct = np.sum(val_pred_y == val_fold_y) accuracy_list.append((float(num_correct) / val_pred_y.shape[0])) accuracy = accuracy + (float(num_correct) / val_pred_y.shape[0]) k_accuracy[k] = accuracy_list accuracy = accuracy / num_folds return accuracy
# two matrices are similar; one of the simplest is the Frobenius norm. In case # you haven't seen it before, the Frobenius norm of two matrices is the square # root of the squared sum of differences of all elements; in other words, reshape # the matrices into vectors and compute the Euclidean distance between them. print("test the difference between two loop and one loop........") difference = np.linalg.norm(dists - dists_one, ord='fro') print('Difference was: %f' % (difference, )) if difference < 0.001: print('Good! The distance matrices are the same') else: print('Uh-oh! The distance matrices are different') print("use compute_distances_no_loop to calculate the dists.....") # Now implement the fully vectorized version inside compute_distances_no_loops # and run the code dists_two = classifier.compute_distances_no_loops(X_test) # check that the distance matrix agrees with the one we computed before: print("test the difference between two loop and no loop........") difference = np.linalg.norm(dists - dists_two, ord='fro') print('Difference was: %f' % (difference, )) if difference < 0.001: print('Good! The distance matrices are the same') else: print('Uh-oh! The distance matrices are different') # Let's compare how fast the implementations are def time_function(f, *args): """ Call a function f with args and return the time (in seconds) that it took to execute. """
# you haven't seen it before, the Frobenius norm of two matrices is the square # root of the squared sum of differences of all elements; in other words, reshape # the matrices into vectors and compute the Euclidean distance between them. difference = np.linalg.norm(dists - dists_one, ord='fro') print 'Difference was: %f' % (difference, ) if difference < 0.001: print 'Good! The distance matrices are the same' else: print 'Uh-oh! The distance matrices are different' # In[ ]: # Now implement the fully vectorized version inside compute_distances_no_loops # and run the code dists_two = classifier.compute_distances_no_loops(X_test) # check that the distance matrix agrees with the one we computed before: difference = np.linalg.norm(dists - dists_two, ord='fro') print 'Difference was: %f' % (difference, ) if difference < 0.001: print 'Good! The distance matrices are the same' else: print 'Uh-oh! The distance matrices are different' # In[ ]: # Let's compare how fast the implementations are def time_function(f, *args): """
mask = range(num_test) x_test = x_test[mask] y_test = y_test[mask] # the image data has three chanels # the next two step shape the image size 32*32*3 to 3072*1 x_train = np.reshape(x_train, (x_train.shape[0], -1)) x_test = np.reshape(x_test, (x_test.shape[0], -1)) print 'after subsample and re shape:' print 'x_train : ', x_train.shape, " x_test : ", x_test.shape # KNN classifier classifier = KNearestNeighbor() classifier.train(x_train, y_train) # compute the distance between test_data and train_data dists = classifier.compute_distances_no_loops(x_test) # each row is a single test example and its distances to training example print 'dist shape : ', dists.shape plt.imshow(dists, interpolation='none') plt.show() y_test_pred = classifier.predict_labels(dists, k=5) num_correct = np.sum(y_test_pred == y_test) acc = float(num_correct) / num_test print 'k=5 ,The Accurancy is : ', acc # Cross-Validation # 5-fold cross validation split the training data to 5 pieces num_folds = 5