def test(X_train, y_train, X_test, y_test, best_k): classifier = KNearestNeighbor() classifier.train(X_train, y_train) y_test_pred = classifier.predict(X_test, k=best_k) # Compute and display the accuracy num_correct = np.sum(y_test_pred == y_test) accuracy = float(num_correct) / len(y_test) print 'Best k=%d' % best_k print 'Got %d / %d correct => accuracy: %f' % (num_correct, len(y_test), accuracy)
def cross_validation(train_data, train_label): """交叉验证的方式选择最优的超参数k""" num_folds = 5 k_choices = [1, 3, 5, 8, 10, 12, 15, 20, 50, 100] # 任务: # 将训练数据切分,训练样本和对应的样本标签包含在数组 # x_train_folds 和 y_train_folds 之中,数组的长度为num_folds # 其中y_train_folds[i] 是一个矢量,表示矢量x_train_folds[i]中所有样本的标签 # 提示:可以尝试使用numpy的 array_spilt 方法 x_train_folds = np.array_split(train_data, num_folds) y_train_folds = np.array_split(train_label, num_folds) # 我们将不同k值下的准确率保存在一个字典中。交叉验证之后,k_to_accuracies[k]保存了一个 # 长度为num_folds的list,值为k值下的准确率 k_to_accuracies = {} # 任务: # 通过k折的交叉验证找到最佳k值。对于每一个k值,执行KNN算法num_folds次,每一次执行中,选择一折为验证集 # 其它折为训练集。将不同k值在不同折上的验证结果保存在k_to_accuracies字典中 classifiers = KNearestNeighbor() for k in k_choices: accuracies = np.zeros(num_folds) for fold in range(num_folds): temp_x = x_train_folds.copy() temp_y = y_train_folds.copy() # 组成验证集 x_validate_fold = temp_x.pop(fold) y_validate_fold = temp_y.pop(fold) # 组成训练集 x_temp_train_fold = np.array([x for x_fold in temp_x for x in x_fold]) y_temp_train_fold = np.array([y for y_fold in temp_y for y in y_fold]) classifiers.train(x_temp_train_fold, y_temp_train_fold) # 进行验证 y_test_predicted = classifiers.predict(x_validate_fold, k, 0) num_correct = np.sum(y_test_predicted == y_validate_fold) accuracy = float(num_correct) / y_validate_fold.shape[0] accuracies[fold] = accuracy k_to_accuracies[k] = accuracies # 输出准确率 for k in sorted(k_to_accuracies): for accuracy in k_to_accuracies[k]: print('k = %d, accuracy = %f' % (k, accuracy)) # 画图显示所有的精确度散点 for k in k_choices: accuracies = k_to_accuracies[k] plt.scatter([k]*len(accuracies), accuracies) # plot the trend line with error bars that correspond to standard # 画出在不同k值下,误差均值和标准差 accuracies_mean = np.array([np.mean(k_to_accuracies[k]) for k in sorted(k_to_accuracies)]) accuracies_std = np.array([np.std(k_to_accuracies[k]) for k in sorted(k_to_accuracies)]) plt.errorbar(k_choices, accuracies_mean, yerr=accuracies_std) plt.title('Cross-validation on k') plt.xlabel('k') plt.ylabel('Cross-validation accuracy') plt.show()
def main(): X_train, y_train, X_test, y_test = load_CIFAR10('../cifar-10-batches-py') num_training = 48000 mask = list(range(num_training)) X_train = X_train[mask] y_train = y_train[mask] num_test = 1000 mask = list(range(num_test)) X_test = X_test[mask] y_test = y_test[mask] # Reshape the image data into rows print(X_train.shape) ''' (48000, 32, 32, 3) ''' X_train = np.reshape(X_train, (X_train.shape[0], -1)) print(X_train.shape) ''' (48000, 3072) ''' X_test = np.reshape(X_test, (X_test.shape[0], -1)) print(X_train.shape, X_test.shape) ''' (48000, 3072) (1000, 3072) ''' classifier = KNearestNeighbor() classifier.train(X_train, y_train) y_test_pred = classifier.predict(X_test, k=5) print(y_test_pred) # Compute and display the accuracy num_correct = np.sum(y_test_pred == y_test) accuracy = float(num_correct) / num_test print('Got %d / %d correct => accuracy: %f' % (num_correct, num_test, accuracy)) '''
acc_k = np.zeros((len(k_choices), num_folds), dtype=np.float) ################################################################################ # TODO: # # Perform k-fold cross validation to find the best value of k. For each # # possible value of k, run the k-nearest-neighbor algorithm num_folds times, # # where in each case you use all but one of the folds as training data and the # # last fold as a validation set. Store the accuracies for all fold and all # # values of k in the k_to_accuracies dictionary. # ################################################################################ for ik, k in enumerate(k_choices): for i in range(num_folds): train_set = np.concatenate((X_train_folds[:i] + X_train_folds[i + 1:])) label_set = np.concatenate((y_train_folds[:i] + y_train_folds[i + 1:])) classifier.train(train_set, label_set) y_pred_fold = classifier.predict(X_train_folds[i], k=k, num_loops=0) num_correct = np.sum(y_pred_fold == y_train_folds[i]) acc_k[ik, i] = float(num_correct) / num_split k_to_accuracies[k] = acc_k[ik] ################################################################################ # END OF YOUR CODE # ################################################################################ # Print out the computed accuracies for k in sorted(k_to_accuracies): for accuracy in k_to_accuracies[k]: print('k = %d, accuracy = %f' % (k, accuracy)) # plot the raw observations fig = plt.figure() for k in k_choices:
# 交叉验证:执行knn算法num_folds次,每次选择一折为验证集,其他折为训练集,将准确率保存在k_to_accuracy中。 classifier = KNearestNeighbor() for k in k_choices: accuracies = np.zeros(num_folds) for fold in xrange(num_folds): temp_X = X_train_folds[:] temp_y = y_train_folds[:] X_validate_fold = temp_X.pop(fold) y_validate_fold = temp_y.pop(fold) temp_X = np.array([y for x in temp_X for y in x]) temp_y = np.array([y for x in temp_y for y in x]) classifier.train(temp_X, temp_y) y_test_pred = classifier.predict(X_validate_fold, k = k) num_correct = np.sum(y_test_pred == y_validate_fold) accuracy = float(num_correct) / len(y_test_pred) accuracies[fold] = accuracy k_to_accuracies[k] = accuracies # 输出准确率 for k in sorted(k_to_accuracies): for accuracy in k_to_accuracies[k]: print 'k = %d, accuracy = %f' % (k, accuracy) # 画图 for k in k_choices: accuracies = k_to_accuracies[k] plt.scatter([k] * len(accuracies), accuracies)
for k in k_choices: acc = np.zeros((num_folds)) for f in xrange(num_folds): X_current_fold = list(X_train_folds) del X_current_fold[f] X_current_fold = np.concatenate(X_current_fold) y_current_fold = list(y_train_folds) del y_current_fold[f] y_current_fold = np.concatenate(y_current_fold) classifier = KNearestNeighbor() classifier.train(X_current_fold, y_current_fold) y_current_predict = classifier.predict(X_train_folds[f], k, num_loops=0) num_correct = np.sum(y_current_predict == y_train_folds[f]) acc[f] = float(num_correct) / num_test k_to_accuracies[k] = acc # Print out the computed accuracies for k in sorted(k_to_accuracies): for accuracy in k_to_accuracies[k]: print 'k = %d, accuracy = %f' % (k, accuracy) # plot the raw observations for k in k_choices: accuracies = k_to_accuracies[k]
# where in each case you use all but one of the folds as training data and the # # last fold as a validation set. Store the accuracies for all fold and all # # values of k in the k_to_accuracies dictionary. # ################################################################################ for k in k_choices: # for each k for n in range(num_folds): # for each nth fold other_folds = [x for x in range(num_folds) if x != n] X_test_fold = X_train_folds[n] y_test_fold = y_train_folds[n] # we have the test data X_train_other_folds = np.concatenate((X_train_folds[other_folds]), axis=0) y_train_other_folds = np.concatenate((y_train_folds[other_folds]), axis=0) classifier = KNearestNeighbor() classifier.train(X_train_other_folds, y_train_other_folds) y_pred_fold = classifier.predict(X=X_test_fold, k=k) num_correct_fold = np.sum(y_pred_fold == y_test_fold) acc_fold = float(num_correct_fold) / len(y_test_fold) k_to_accuracies.setdefault(k, []).append(acc_fold) #print(k_to_accuracies) #print("for k=%d choice, the accuracy= %f" % (k, acc_fold)) pass ################################################################################ # END OF YOUR CODE # ################################################################################ # Print out the computed accuracies for k in sorted(k_to_accuracies): for accuracy in k_to_accuracies[k]: print('k = %d, accuracy = %f' % (k, accuracy)) print("mean for k=%d is %f1" % (k, np.mean(k_to_accuracies[k])))
X_train_instance_list = X_train_folds[:idx_fold] + X_train_folds[ idx_fold + 1:] y_train_instance_list = y_train_folds[:idx_fold] + y_train_folds[ idx_fold + 1:] X_train_instance = np.concatenate(X_train_instance_list) y_train_instance = np.concatenate(y_train_instance_list) # train classifier.train(X_train_instance, y_train_instance) # cross-validation # dists_cv = classifier.compute_distances_no_loops(X_cv_instance) # y_cv_pred = classifier.predict_labels(dists_cv, k=k_cv) y_cv_pred = classifier.predict(X_cv_instance, k=k_cv) num_correct = np.sum(y_cv_pred == y_cv_instance) accuracy = float(num_correct) / len(y_cv_instance) k_to_accuracies[k_cv].append(accuracy) ################################################################################ # END OF YOUR CODE # ################################################################################ # Print out the computed accuracies for k in sorted(k_to_accuracies): for accuracy in k_to_accuracies[k]: print('k = %d, accuracy = %f' % (k, accuracy)) # In[ ]:
for k in k_choices: accuracies = k_to_accuracies[k] plt.scatter([k] * len(accuracies), accuracies) # plot the trend line with error bars that correspond to standard deviation accuracies_mean = np.array( [np.mean(v) for k, v in sorted(k_to_accuracies.items())]) accuracies_std = np.array( [np.std(v) for k, v in sorted(k_to_accuracies.items())]) plt.errorbar(k_choices, accuracies_mean, yerr=accuracies_std) plt.title('Cross-validation on k') plt.xlabel('k') plt.ylabel('Cross-validation accuracy') plt.show() # In[ ]: # Based on the cross-validation results above, choose the best value for k, # retrain the classifier using all the training data, and test it on the test # data. You should be able to get above 28% accuracy on the test data. best_k = 1 classifier = KNearestNeighbor() classifier.train(X_train, y_train) y_test_pred = classifier.predict(X_test, k=best_k) # Compute and display the accuracy num_correct = np.sum(y_test_pred == y_test) accuracy = float(num_correct) / num_test print 'Got %d / %d correct => accuracy: %f' % (num_correct, num_test, accuracy)
dists2 = classifier.compute_distances_two_loops(X_test) dists1 = classifier.compute_distances_one_loop(X_test) dists0 = classifier.compute_distances_no_loops(X_test) dists = classifier.compute_distances_no_loops(X_test) print dists.shape # Now implement the function predict_labels and run the code below: # We use k = 1 (which is Nearest Neighbor). y_test_pred = classifier.predict_labels(dists, k=1) # Compute and print the fraction of correctly predicted examples num_correct = np.sum(y_test_pred == y_test) accuracy = float(num_correct) / num_test print 'Got %d / %d correct => accuracy: %f' % (num_correct, num_test, accuracy) classifier.predict(X_test, k=1, num_loops=0) classifier.pridict_currency(X_test, y_test, k=1, num_loops=0) # cross validation num_folds = 5 k_choices = [1, 3, 5, 8, 10, 12, 15, 20, 50, 100] X_train_folds = [] y_train_folds = [] X_train_folds = np.array_split(X_train, num_folds) y_train_folds = np.array_split(y_train, num_folds) k_to_accuracies = {} for k in k_choices: validation_accuracies = [] for i in range(num_folds): current_x_test = X_train_folds[i]
X_train_folds = np.array_split(X_train_temp, num_folds) y_train_folds = np.array_split(y_train_temp, num_folds) print(X_train_folds[4]) k_to_accuracies = {} num_test = X_train_folds[0].shape[0] for j in range(len(k_choices)): k = k_choices[j] for i in range(1,num_folds+1): X_train_temp = np.concatenate((X_train_folds[num_folds-i],X_train_folds[num_folds-i-1],X_train_folds[num_folds-i-2],X_train_folds[num_folds-i-3]),axis = 0) y_train_temp = np.concatenate((y_train_folds[num_folds-i],y_train_folds[num_folds-i-1],y_train_folds[num_folds-i-2],y_train_folds[num_folds-i-3])) X_test_temp = X_train_folds[num_folds-i-4] y_test_temp = y_train_folds[num_folds-i-4] classifier.train(X_train_temp, y_train_temp) y_test_pred = classifier.predict(X_test_temp, k=k) num_correct = np.sum(y_test_pred == y_test_temp) accuracy = float(num_correct) / num_test k_to_accuracies.setdefault(k,[]).append(accuracy) for k in sorted(k_to_accuracies): for accuracy in k_to_accuracies[k]: print('k = %d, accuracy = %f' % (k, accuracy)) for k in k_choices: accuracies = k_to_accuracies[k] plt.scatter([k] * len(accuracies), accuracies) # plot the trend line with error bars that correspond to standard deviation accuracies_mean = np.array([np.mean(v) for k,v in sorted(k_to_accuracies.items())]) accuracies_std = np.array([np.std(v) for k,v in sorted(k_to_accuracies.items())]) plt.errorbar(k_choices, accuracies_mean,yerr=accuracies_std)
y_train_folds = [] X_train_folds = np.array_split(classifier.X_train, num_folds) y_train_folds = np.array_split(classifier.y_train, num_folds) pass k_to_accuracies = {} X_val = X_train_folds[num_folds-1] y_val = y_train_folds[num_folds-1] for k in k_choices: k_to_accuracies[k] = [] for i in range(num_folds): knn = KNearestNeighbor() knn.train(X_train_folds[i], y_train_folds[i]) y_predict = knn.predict(X_val, k = k) acc = np.mean(y_predict == y_val) k_to_accuracies[k].append(acc) print('k_to_accuracies') print(k_to_accuracies) pass for k in k_choices: accuracies = k_to_accuracies[k] plt.scatter([k] * len(accuracies), accuracies) plt.show() for k,v in sorted(k_to_accuracies.items()): print(k, v)
# values of k in the k_to_accuracies dictionary. # ################################################################################ # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)***** num_per_fold = len(X_train)/num_folds X_valid_fold = X_train_folds[-1] y_valid_fold = y_train_folds[-1] X_train_folds = X_train_folds[:-1] y_train_folds = y_train_folds[:-1] accuracies = [] for i in range(len(k_choices)): accuracies = [0.0] * (num_folds - 1) for n in range(num_folds-1): classifier.train(X_train_folds[n], y_train_folds[n]) y_pred_folds = classifier.predict( X_valid_fold, k=k_choices[i], num_loops=0) num_correct = np.sum(y_valid_fold == y_pred_folds) accuracies[n] = float(num_correct) / num_per_fold k_to_accuracies.update({k_choices[i]: accuracies}) pass # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)***** # Print out the computed accuracies for k in sorted(k_to_accuracies): for accuracy in k_to_accuracies[k]: print('k = %d, accuracy = %f' % (k, accuracy))
import numpy as np import matplotlib.pyplot as plt from cs231n.data_utils import load_CIFAR10 from cs231n.classifiers import KNearestNeighbor #Load data cifar10_dir = 'cs231n/datasets/cifar-10-batches-py' X_tre, y_tre, X_te, y_te = load_CIFAR10(cifar10_dir) X_tre_rows = X_tre.reshape(X_tre.shape[0], 32 * 32 * 3) X_te_rows = X_te.reshape(X_te.shape[0], 32 * 32 * 3) X_val_rows = X_tre_rows[:1000, :] y_val_rows = y_tre[:1000] X_tre_rows = X_tre_rows[1000:, :] y_tre = y_tre[:1000] val_acc = [] for k in [1, 3, 5, 10, 20, 50, 100]: knn = KNearestNeighbor() knn.train(X_tre, y_tre) y_val_predict = knn.predict(X_val_rows, k=k) acc = np.mean(y_val_predict == y_val_rows) print 'accuracy: %f' % (acc * 100) val_acc.append((k, acc))
k_to_accuracies = {} for i in range(len(k_choices)): accuracy = [] for j in range(num_folds): X_train_ = np.reshape( np.asarray(X_train_folds[:j] + X_train_folds[j + 1:]), (-1, 3072)) y_train_ = np.reshape( np.asarray(y_train_folds[:j] + y_train_folds[j + 1:]), (4000, -1)) X_test_ = np.asarray(X_train_folds[j]) y_test_ = np.asarray(y_train_folds[j]) from cs231n.classifiers import KNearestNeighbor classifier = KNearestNeighbor() classifier.train(X_train_, y_train_) y_test_pred = classifier.predict(X_test_, k=k_choices[i], num_loops=0) accuracy.append(np.sum(y_test_pred == y_test_) / y_test_.shape[0]) k_to_accuracies[k_choices[i]] = accuracy # Print out the computed accuracies for k in sorted(k_to_accuracies): for accuracy in k_to_accuracies[k]: print('k = %d, accuracy = %f' % (k, accuracy)) # plot the raw observations for k in k_choices: accuracies = k_to_accuracies[k] plt.scatter([k] * len(accuracies), accuracies) # plot the trend line with error bars that correspond to standard deviation
# last fold as a validation set. Store the accuracies for all fold and all # # values of k in the k_to_accuracies dictionary. # ################################################################################ for k in k_choices: k_to_accuracies[k] = [] for f in xrange(num_folds): X_train_val = np.concatenate( [j for i, j in enumerate(X_train_folds) if i != f]) y_train_val = np.concatenate( [j for i, j in enumerate(y_train_folds) if i != f]) classifier = KNearestNeighbor() classifier.train(X_train_val, y_train_val) for k in k_choices: y_pred = classifier.predict(X_train_folds[f], k) num_correct = np.sum(y_pred == y_train_folds[f]) accuracy = float(num_correct) / float(y_train_folds[f].shape[0]) k_to_accuracies[k].append(accuracy) ################################################################################ # END OF YOUR CODE # ################################################################################ # Print out the computed accuracies for k in sorted(k_to_accuracies): for accuracy in k_to_accuracies[k]: print 'k = %d, accuracy = %f' % (k, accuracy) # In[17]:
# possible value of k, run the k-nearest-neighbor algorithm num_folds times, # # where in each case you use all but one of the folds as training data and the # # last fold as a validation set. Store the accuracies for all fold and all # # values of k in the k_to_accuracies dictionary. # ################################################################################ #pass for k_ in k_choices: k_to_accuracies.setdefault(k_, []) for i in range(num_folds): classifier = KNearestNeighbor() X_val_train = np.vstack(X_train_folds[:i] + X_train_folds[i + 1:]) y_val_train = np.vstack(y_train_folds[:i] + y_train_folds[i + 1:]) y_val_train = y_val_train[:, 0] ##reshape classifier.train(X_val_train, y_val_train) for k_ in k_choices: y_val_pred = classifier.predict(X_train_folds[i], k=k_, num_loops=2) num_correct = np.sum(y_val_pred == y_train_folds[i][:, 0]) num_correct = np.sum(y_val_pred == y_train_folds[i]) accuracy = float(num_correct) / len(y_val_pred) k_to_accuracies[k_].append(accuracy) ##try ################################################################################ # END OF YOUR CODE # ################################################################################ # Print out the computed accuracies for k in sorted(k_to_accuracies): for accuracy in k_to_accuracies[k]: print 'k = %d, accuracy = %f' % (k, accuracy) # In[ ]:
# possible value of k, run the k-nearest-neighbor algorithm num_folds times, # # where in each case you use all but one of the folds as training data and the # # last fold as a validation set. Store the accuracies for all fold and all # # values of k in the k_to_accuracies dictionary. # ################################################################################ # pass for k_ in k_choices: k_to_accuracies.setdefault(k_, []) for i in range(num_folds): classifier = KNearestNeighbor() X_val_train = np.vstack(X_train_folds[0:i] + X_train_folds[i+1:]) y_val_train = np.vstack(y_train_folds[0:i] + y_train_folds[i+1:]) y_val_train = y_val_train[:,0] classifier.train(X_val_train, y_val_train) for k_ in k_choices: y_val_pred = classifier.predict(X_train_folds[i], k=k_) num_correct = np.sum(y_val_pred == y_train_folds[i][:,0]) accuracy = float(num_correct) / len(y_val_pred) k_to_accuracies[k_] = k_to_accuracies[k_] + [accuracy] ################################################################################ # END OF YOUR CODE # ################################################################################ # Print out the computed accuracies for k in sorted(k_to_accuracies): for accuracy in k_to_accuracies[k]: print 'k = %d, accuracy = %f' % (k, accuracy) # plot the raw observations for k in k_choices: accuracies = k_to_accuracies[k]
# kValue = [3] kAccuracies = [] # print(xTrain) for ptr, k in enumerate(kValue): kValueAcc = [] for i in xrange(0, cvFold): xValid = xTrain[i] yValid = yTrain[i] xTrainCV = xTrain[np.arange(cvFold) != i] yTrainCV = yTrain[np.arange(cvFold) != i] xTrainCV = np.reshape( xTrainCV, (lengthTrain - lengthTrain / cvFold, xTrainCV.shape[2])) yTrainCV = np.reshape(yTrainCV, (lengthTrain - lengthTrain / cvFold, )) clsfr.train(xTrainCV, yTrainCV) yPredict = clsfr.predict(xValid, k=k) acc = np.sum(yPredict == yValid) kValueAcc.append([float(acc) / (lengthTrain / cvFold)]) kAccuracies.append(kValueAcc) print([np.mean(i) for i in kAccuracies]) plt.figure() x = np.array(kValue) y = np.array([np.mean(i) for i in kAccuracies]) print(x.shape) print(y.shape) plt.errorbar(np.array(kValue), np.array([np.mean(i) for i in kAccuracies]), yerr=np.array([np.std(i) for i in kAccuracies])) plt.show()
# values of k in the k_to_accuracies dictionary. # ################################################################################ for k in k_choices: for n in range(num_folds): # Concat all our folds together except for the nth fold for training. current_train_fold_x = np.concatenate(tuple([X_train_folds[i] for i in range(num_folds) if i!=n])) current_train_fold_y = np.concatenate(tuple([y_train_folds[i] for i in range(num_folds) if i!=n])) # Select the held out fold to be our test data. current_valid_fold_x = X_train_folds[n] current_valid_fold_y = y_train_folds[n] classifier.train(current_train_fold_x, current_train_fold_y) # Perform prediction on our test set, default is to use no loop version. y_test_pred = classifier.predict(current_valid_fold_x, k=k) # Evaluate and store in k_to_accuracies dict. num_correct = np.sum(y_test_pred == current_valid_fold_y) if k not in k_to_accuracies: k_to_accuracies[k] = [float(num_correct) / current_test_fold_x.shape[0]] else: k_to_accuracies[k].append(float(num_correct) / current_test_fold_x.shape[0]) ################################################################################ # END OF YOUR CODE # ################################################################################ # Print out the computed accuracies for k in sorted(k_to_accuracies): for accuracy in k_to_accuracies[k]:
num_folds = 5 k_choices = [1, 3, 5, 8, 10, 12, 15, 20, 50, 100] X_train_folds = [] y_train_folds = [] X_train_folds = np.array_split(X_train, num_folds) y_train_folds = np.array_split(y_train, num_folds) k_to_accuracies = {} for k in k_choices: k_to_accuracies[k] = [] #for f in xrange(num_folds): # X_train_val = np.concatenate([j for i,j in enumerate(X_train_folds) if i!=f]) # y_train_val = np.concatenate([j for i,j in enumerate(y_train_folds) if i!=f]) X_train_val = np.concatenate([j for i,j in enumerate(X_train_folds) if i!=0]) y_train_val = np.concatenate([j for i,j in enumerate(y_train_folds) if i!=0]) classifier.train(X_train_val, y_train_val) for k in k_choices: y_pred = classifier.predict(X_train_folds[f], k) num_correct = np.sum(y_pred == y_train_folds[f]) accuracy = float(num_correct) / float(y_train_folds[f].shape[0]) k_to_accuracies[k].append(accuracy)
# Perform k-fold cross validation to find the best value of k. For each # # possible value of k, run the k-nearest-neighbor algorithm num_folds times, # # where in each case you use all but one of the folds as training data and the # # last fold as a validation set. Store the accuracies for all fold and all # # values of k in the k_to_accuracies dictionary. # ################################################################################ # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)***** for k in k_choices: k_to_accuracies[k] = [] for i in range(5): classifier_k_fold = KNearestNeighbor() classifier_k_fold.train( np.delete(X_train_folds, i, axis=0).reshape(-1, 3072), np.delete(y_train_folds, i, axis=0).reshape(-1)) y_predict_k_fold = classifier_k_fold.predict(X_train_folds[i], k=k) correct_count = np.sum(y_predict_k_fold == y_train_folds[i]) k_to_accuracies[k].append( float(correct_count / y_predict_k_fold.shape[0])) # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)***** # Print out the computed accuracies for k in sorted(k_to_accuracies): for accuracy in k_to_accuracies[k]: print('k = %d, accuracy = %f' % (k, accuracy)) # %% # plot the raw observations for k in k_choices: accuracies = k_to_accuracies[k]
accuracy = float(num_correct) / num_test k_to_accuracies[k].append(accuracy) # Print out the computed accuracies for k in sorted(k_to_accuracies): for accuracy in k_to_accuracies[k]: print 'k = %d, accuracy = %f' % (k, accuracy) # plot the trend line with error bars that correspond to standard deviation accuracies_mean = np.array([np.mean(v) for k,v in sorted(k_to_accuracies.items())]) accuracies_std = np.array([np.std(v) for k,v in sorted(k_to_accuracies.items())]) plt.errorbar(k_choices, accuracies_mean, yerr=accuracies_std) plt.title('Cross-validation on k') plt.xlabel('k') plt.ylabel('Cross-validation accuracy') plt.show() # Based on the cross-validation results above, choose the best value for k, # retrain the classifier using all the training data, and test it on the test # data. best_k = 6 classifier = KNearestNeighbor() classifier.train(X_train, y_train) y_test_pred = classifier.predict(X_test, k=best_k) # Compute and display the accuracy num_correct = np.sum(y_test_pred == y_test) accuracy = float(num_correct) / num_test print 'Got %d / %d correct => accuracy: %f' % (num_correct, num_test, accuracy)
# where in each case you use all but one of the folds as training data and the # # last fold as a validation set. Store the accuracies for all fold and all # # values of k in the k_to_accuracies dictionary. # ################################################################################ ### study np.vstack.. np.hstack.. for ck in k_choices: k_to_accuracies[ck] = [] for it in range(num_folds): X_train_cv = np.vstack(X_train_folds[0:it]+X_train_folds[it+1:]) X_test_cv = X_train_folds[it] y_train_cv = np.hstack(y_train_folds[0:it]+y_train_folds[it+1:]) y_test_cv = y_train_folds[it] for ck in k_choices: classifier.train(X_train_cv,y_train_cv) y_predict = classifier.predict(X_test_cv,k=ck) k_to_accuracies[ck].append(np.mean(y_predict==y_test_cv)) ################################################################################ # END OF YOUR CODE # ################################################################################ # Print out the computed accuracies # for k in sorted(k_to_accuracies): # for accuracy in k_to_accuracies[k]: # print 'k = %d, accuracy = %f' % (k, accuracy) # plot the raw observations for k in k_choices: accuracies = k_to_accuracies[k]
num_folds = 5 k_choices = [1, 3, 5, 8, 10, 12, 15, 20, 50, 100] X_train_folds = [] y_train_folds = [] X_train_folds = np.array_split(X_train, num_folds) y_train_folds = np.array_split(y_train, num_folds) k_to_accuracies = {k: np.zeros(num_folds) for k in k_choices} for k in k_to_accuracies: # Training phase for i in range(num_folds): X_train = X_train_folds[i] y_test_pred = classifier.predict(X_test, k=k, num_loops=0) #print("y_test_pred[0:4]: ", y_test_pred[0:4]) #print("y_train_folds[",i,",0:4]: ", y_train_folds[i][0:4]) num_correct = np.sum(y_test_pred == y_test) print("k: ", k, " num_correct: ", num_correct, " len(y_test_pred): ", len(y_test_pred)) accuracy = float(num_correct) / len(y_test_pred) print("accuracy: ", accuracy) #k_to_accuracies[k][i] = accuracy # Validation phase - need to make this different from training # y_val_pred = classifier.predict(X_train_folds[num_folds-1], k=k, num_loops=0) # num_correct = np.sum(y_val_pred == y_train_folds[num_folds-1]) # accuracy = float(num_correct) / len(y_val_pred) # k_to_accuracies[k][num_folds-1] = accuracy # print("k_to_accuracies[",k,"]: ", k_to_accuracies[k])
# Perform k-fold cross validation to find the best value of k. For each # # possible value of k, run the k-nearest-neighbor algorithm num_folds times, # # where in each case you use all but one of the folds as training data and the # # last fold as a validation set. Store the accuracies for all fold and all # # values of k in the k_to_accuracies dictionary. # ################################################################################ for k in k_choices: accuracy = [] for fold in range(num_folds): Xval = X_train_folds[fold] yval = y_train_folds[fold] Xtrain = X_train_folds[range(num_folds)!=fold] ytrain = y_train_folds[range(num_folds)!=fold] import ipdb; ipdb.set_trace() classifier.train(Xtrain,ytrain) predictions = classifier.predict(Xval,k) acc = np.sum(predictions==yval)/float(yval.shape[0]) accuracy.append(acc) k_to_accuracies[k] = accuracy ################################################################################ # END OF YOUR CODE # ################################################################################ # Print out the computed accuracies for k in sorted(k_to_accuracies): for accuracy in k_to_accuracies[k]: print 'k = %d, accuracy = %f' % (k, accuracy) # In[ ]:
# *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)***** for k in k_choices: k_to_accuracies[k] = [] for i in range(num_folds): X_train_ = [] y_train_ = [] for j in range(num_folds): if j!=i : X_train_.extend(X_train_folds[j]) y_train_.extend(y_train_folds[j]) classifier = KNearestNeighbor() classifier.train(np.array(X_train_), np.array(y_train_)) X_val = np.array(X_train_folds[i]) for k in k_choices: y_val_pred = classifier.predict(X_val,k=k) accuracy_val = np.mean(y_train_folds[i]== y_val_pred) k_to_accuracies[k].append(accuracy_val) # pass # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)***** # Print out the computed accuracies for k in sorted(k_to_accuracies): for accuracy in k_to_accuracies[k]: print('k = %d, accuracy = %f' % (k, accuracy))
X_train_folds = [] y_train_folds = [] X_train_folds = np.array_split( X_train, num_folds) # (50000,3072) ==> (10000,3072)로 5(num_folds)개 y_train_folds = np.array_split(y_train, num_folds) k_to_accuracies = {} for k_val in k_choices: k_to_accuracies[k_val] = [] for i in range(num_folds): # print 'Cross-validation :'+ str(i) X_train_cycle = np.concatenate( [f for j, f in enumerate(X_train_folds) if j != i]) y_train_cycle = np.concatenate( [f for j, f in enumerate(y_train_folds) if j != i]) X_val_cycle = X_train_folds[i] y_val_cycle = y_train_folds[i] knn = KNearestNeighbor() knn.train(X_train_cycle, y_train_cycle) y_val_pred = knn.predict(X_val_cycle, k_val) num_correct = np.sum(y_val_cycle == y_val_pred) k_to_accuracies[k_val].append( float(num_correct) / float(len(y_val_cycle))) # Print out the computed accuracies for k in sorted(k_to_accuracies): for accuracy in k_to_accuracies[k]: print('k = %d, accuracy = %f' % (k, accuracy))