def run_k_fold_cross_validation(X_train, y_train, num_folds, k, k_accuracy):
    X_train_folds, y_train_folds = generate_folds(X_train, y_train, num_folds)
    accuracy = 0.0
    accuracy_list = []
    for i in range(num_folds):
        val_fold_x = X_train_folds[i]
        val_fold_y = y_train_folds[i]
        temp_X_train = np.concatenate(X_train_folds[:i] +
                                      X_train_folds[i + 1:])
        temp_y_train = np.concatenate(y_train_folds[:i] +
                                      y_train_folds[i + 1:])
        classifier = KNearestNeighbor()
        classifier.train(temp_X_train, temp_y_train)
        dists = classifier.compute_distances_no_loops(val_fold_x)
        val_pred_y = classifier.predict_labels(dists, k)
        num_correct = np.sum(val_pred_y == val_fold_y)
        accuracy_list.append((float(num_correct) / val_pred_y.shape[0]))
        accuracy = accuracy + (float(num_correct) / val_pred_y.shape[0])
    k_accuracy[k] = accuracy_list
    accuracy = accuracy / num_folds
    return accuracy
Beispiel #2
0
classifier = KNearestNeighbor()
classifier.train(X_train, y_train)

print("test compute_distances_two_loops implementation......")
# Test your implementation:
dists = classifier.compute_distances_two_loops(X_test)
print(dists.shape)  # (num_test x num_train)
# We can visualize the distance matrix: each row is a single test example and
# its distances to training examples
plt.imshow(dists, interpolation='none')
plt.show()

print("set k=1 and test the data......")
# Now implement the function predict_labels and run the code below:
# We use k = 1 (which is Nearest Neighbor).
y_test_pred = classifier.predict_labels(dists, k=1)
# Compute and print the fraction of correctly predicted examples
num_correct = np.sum(y_test_pred == y_test)
accuracy = float(num_correct) / num_test
print('Got %d / %d correct => accuracy: %f' % (num_correct, num_test, accuracy))

print("use compute_distances_one_loop to calculate the dists.....")
# Now lets speed up distance matrix computation by using partial vectorization
# with one loop. Implement the function compute_distances_one_loop and run the
# code below:
dists_one = classifier.compute_distances_one_loop(X_test)
# To ensure that our vectorized implementation is correct, we make sure that it
# agrees with the naive implementation. There are many ways to decide whether
# two matrices are similar; one of the simplest is the Frobenius norm. In case
# you haven't seen it before, the Frobenius norm of two matrices is the square
# root of the squared sum of differences of all elements; in other words, reshape
Beispiel #3
0

# **Inline Question #1:** Notice the structured patterns in the distance matrix, where some rows or columns are visible brighter. (Note that with the default color scheme black indicates low distances while white indicates high distances.)
# 
# - What in the data is the cause behind the distinctly bright rows?
# - What causes the columns?

# **Your Answer**: *fill this in.*
# 
# 

# In[ ]:

# Now implement the function predict_labels and run the code below:
# We use k = 1 (which is Nearest Neighbor).
y_test_pred = classifier.predict_labels(dists, k=1)

# Compute and print the fraction of correctly predicted examples
num_correct = np.sum(y_test_pred == y_test)
accuracy = float(num_correct) / num_test
print 'Got %d / %d correct => accuracy: %f' % (num_correct, num_test, accuracy)


# You should expect to see approximately `27%` accuracy. Now lets try out a larger `k`, say `k = 5`:

# In[ ]:

y_test_pred = classifier.predict_labels(dists, k=5)
num_correct = np.sum(y_test_pred == y_test)
accuracy = float(num_correct) / num_test
print 'Got %d / %d correct => accuracy: %f' % (num_correct, num_test, accuracy)
Beispiel #4
0
x_test = np.reshape(x_test, (x_test.shape[0], -1))
print
'after subsample and re shape:'
print
'x_train : ', x_train.shape, " x_test : ", x_test.shape
# KNN classifier
classifier = KNearestNeighbor()
classifier.train(x_train, y_train)
# compute the distance between test_data and train_data
dists = classifier.compute_distances_no_loops(x_test)
# each row is a single test example and its distances to training example
print
'dist shape : ', dists.shape
plt.imshow(dists, interpolation='none')
plt.show()
y_test_pred = classifier.predict_labels(dists, k=5)
num_correct = np.sum(y_test_pred == y_test)
acc = float(num_correct) / num_test
print
'k=5 ,The Accurancy is : ', acc

# Cross-Validation

# 5-fold cross validation split the training data to 5 pieces
num_folds = 5
# k is params of knn
k_choice = [1, 5, 8, 11, 15, 18, 20, 50, 100]
x_train_folds = []
y_train_folds = []
x_train_folds = np.array_split(x_train, num_folds)
y_train_folds = np.array_split(y_train, num_folds)