def find_knn(test_x, train_x, train_y, k): """ :param test_x: a sample to test :param train_x: the data for training :param train_y: the training target :param k: the number of nearest neighbours :return: the prediction target """ # compute the distances between train and test data points distances = Q1.distanceFunc(test_x, train_x) neg_distance = -distances # take top k element _, indices = tf.nn.top_k(neg_distance, k=k) # build a N2 dim vector, with targets for the test data points shape = test_x.shape[0] prediction_y = tf.zeros([shape], tf.int32) # find the nearest neighbor of each point for i in range(shape): k_neighbors = tf.gather(train_y, indices[i, :]) # find the most possible neighbor values, _, counts = tf.unique_with_counts( tf.reshape(k_neighbors, shape=[-1])) _, max_count_idx = tf.nn.top_k(counts, k=1) prediction = tf.gather(values, max_count_idx) # add the dense to the prediction set sparse_test_target = tf.SparseTensor([[i]], prediction, [shape]) prediction_y = tf.add(prediction_y, tf.sparse_tensor_to_dense(sparse_test_target)) return prediction_y
:return: mean square error """ sqr_err = tf.square(test - prediction) loss = tf.reduce_mean(tf.reduce_sum(sqr_err, 1)) / 2 return loss # initialize inputs and targets trainingset_x = tf.placeholder(tf.float32) trainingset_y = tf.placeholder(tf.float32) test_x = tf.placeholder(tf.float32) test_y = tf.placeholder(tf.float32) k = tf.placeholder("int32") # calculating Euc_distance dis = Q1.distanceFunc(test_x, trainingset_x) # picking KNN and responsibility res = find_knn(dis, k_n=k) # calculating the prediction prediction_y = prediction(res, trainingset_y) # mean squared error MSE = mse(test_y, prediction_y) # interact sess = tf.InteractiveSession() X = np.linspace(0.0, 11.0, num=1000)[:, np.newaxis] # Find the nearest k neighbours: ks = [1, 3, 5, 50] min_valid = float("inf")
def get_best_k(test_mode): """ :param test_mode: choose a mode 0 or 1 to decide classification type """ train_X, valid_X, test_X, train_Y, valid_Y, test_Y = data_segmentation( test_mode) train_data = tf.Variable(train_X, dtype=tf.float32) train_target = tf.Variable(train_Y, dtype=tf.int32) valid_data = tf.Variable(valid_X, dtype=tf.float32) valid_target = tf.Variable(valid_Y, dtype=tf.int32) test_data = tf.Variable(test_X, dtype=tf.float32) test_target = tf.Variable(test_Y, dtype=tf.int32) # interact sess = tf.InteractiveSession() init = tf.global_variables_initializer() sess.run(init) # Find the nearest k neighbours: ks = [1, 5, 10, 25, 50, 100, 200] # initialize the optimal solution max_valid = float("-inf") k_best = float("inf") # test each value of k for kc in ks: prediction_valid = find_knn(valid_data, train_data, train_target, kc) # accuracy of the acc = accuracy(prediction_valid, valid_target) a = sess.run(acc) print("The accuracy is {} with k = {}\n".format(a, kc)) if a > max_valid or max_valid is None: max_valid = a k_best = kc # test with the k selected from validation prediction_test = find_knn(test_data, train_data, train_target, k_best) acc = accuracy(prediction_test, test_target) a = sess.run(acc) print("The accuracy is {} with k = {}\n".format(a, k_best)) # misclassifications for K=10 test_prediction = find_knn(test_data, train_data, train_target, 10) # find a misclassification mis_idx = tf.where(tf.not_equal(test_prediction, test_target))[0] # get the 10 nearest neighbor training data of this failed test case distances = Q1.distanceFunc(tf.gather(test_data, mis_idx), train_data) nearest_k_train_values, nearest_k_indices = tf.nn.top_k(-1 * distances, k=10) img = Image.fromarray( 255 * sess.run(tf.reshape(tf.gather(test_data, mis_idx), [32, 32]))) plt.imshow(img, cmap='gray') plt.show() for j in range(10): plt.subplot(2, 5, j + 1) img = Image.fromarray(255 * sess.run( tf.reshape(tf.gather(train_data, nearest_k_indices[:, j]), [32, 32]))) plt.imshow(img, cmap='gray') plt.tight_layout() plt.show()