def cal_standard_knn():
    # Create a kNN classifier instance.
    # Remember that training a kNN classifier is a noop:
    # the Classifier simply remembers the data and does no further processing
    classifier = KNearestNeighbor()
    classifier.train(X_train, y_train)

    print('KNN Classifier Train Done\n')

    #------------------------------------------------------------

    # Open cs231n/classifiers/k_nearest_neighbor.py and implement
    # compute_distances_two_loops.

    # Test your implementation:
    print('Ready to test with 2 loops')
    #dists = classifier.compute_distances_two_loops(X_test)
    #print(dists.shape)

    print('Ready to test with 1 loop')
    #dists = classifier.compute_distances_one_loop(X_test)
    #print(dists.shape)

    print('Ready to test with 0 loop\n')
    dists = classifier.compute_distances_no_loops(X_test)
    print(dists.shape)

    #------------------------------------------------------------
    print('Ready to predict')
    y_pred = classifier.predict_labels(dists, 3)

    print('Accurarcy = %s' % np.mean(y_pred == y_test))
def crossValidate(X_fold, y_fold, k, idx):
    #print "Use idx ", idx , " for crossvalidation"
    #X_train = np.array(len(X_fold)-1)
    #X_cross = np.array(l)
    #y_train = np.array(len(y_fold)-1)
    #y_cross = np.array(len(y_fold))

    for i in xrange(0, len(X_fold)):
        if i == idx:
            X_cross = X_fold[i]
            y_cross = y_fold[i]
        else:
            X_train = np.vstack(X_fold[0:i] + X_fold[i + 1:])
            y_train = np.hstack(y_fold[0:i] + y_fold[i + 1:])


#    print "dim train ", X_train.shape
#    print "dim cross ", X_cross.shape
#    print "dim y train ", y_train.shape
#    print "dim y cross ", y_cross.shape
    classifier = KNearestNeighbor()
    classifier.train(X_train, y_train)
    dists = classifier.compute_distances_no_loops(X_cross)
    y_cross_pred = classifier.predict_labels(dists, k)

    num_correct = np.sum(y_cross_pred == y_cross)
    print "cross val has ", y_cross.shape
    accuracy = float(num_correct) / len(y_cross)
    return accuracy
Example #3
0
def use_classifier(x_train, y_train, x_test, y_test, k):
    classifier = KNearestNeighbor()
    classifier.train(x_train, y_train)
    dists = get_distance(classifier, x_test)

    y_pred = classifier.predict_labels(dists, k)
    accuracy = accuracy_score(y_test, y_pred)
    return accuracy
def test_cross_validation(X_train, y_train):

    print('Ready to test with cross_validation')

    num_folds = 5
    k_choices = [1, 3, 5, 8, 10]

    X_train_folds = []
    y_train_folds = []

    print('Train data shape = ', X_train.shape)
    y_train = y_train.reshape(-1, 1)
    print('Train label shape = ', y_train.shape)

    X_train_folds = np.array_split(X_train, num_folds)
    y_train_folds = np.array_split(y_train, num_folds)

    k_to_accuracies = {}

    for each_k in k_choices:
        k_to_accuracies.setdefault(each_k, [])
        for i in range(num_folds):
            classfer = KNearestNeighbor()
            X_train_slice = np.vstack(X_train_folds[0:i] +
                                      X_train_folds[i + 1:num_folds])
            y_train_slice = np.vstack(y_train_folds[0:i] +
                                      y_train_folds[i + 1:num_folds])
            y_train_slice = y_train_slice.reshape(-1)
            #print('debug')
            #print(y_train_slice.shape)

            X_test_slice = X_train_folds[i]
            y_test_slice = y_train_folds[i]
            y_test_slice = y_test_slice.reshape(-1)
            #print(X_train_slice.shape)

            classfer.train(X_train_slice, y_train_slice)
            dis = classfer.compute_distances_no_loops(X_test_slice)
            y_predict = classfer.predict_labels(dis, each_k)

            acc = np.mean(y_predict == y_test_slice)
            k_to_accuracies[each_k].append(acc)

            #break
        #break

    for each_k in k_choices:
        for item in k_to_accuracies[each_k]:
            print('k = %d, acc = %f' % (each_k, item))
Example #5
0
def main():
    X_train, y_train, X_test, y_test = gen_train_test(5000, 500)
    num_test = y_test.shape[0]
    classifier = KNearestNeighbor()
    classifier.train(X_train, y_train)
    starttime = datetime.datetime.now()
    dists = classifier.compute_distances_one_loop(X_test)
    endtime = datetime.datetime.now()
    print(endtime - starttime).seconds
    print dists.shape
    y_test_pred = classifier.predict_labels(dists, k=5)
    # Compute and print the fraction of correctly predicted examples
    num_correct = np.sum(y_test_pred == y_test)
    accuracy = float(num_correct) / num_test
    print 'Got %d / %d correct => accuracy: %f' % (num_correct, num_test,
                                                   accuracy)
Example #6
0
def cross_validate(X_train, y_train):
    num_folds = 5
    k_choices = [1, 3, 5, 8, 10, 12, 15, 20, 50, 100]
    X_train_folds = []
    y_train_folds = []
    N = len(X_train)
    train_folds = np.array_split(range(N), num_folds, axis=0)
    k_to_accuracies = {}
    for k1 in k_choices:
        fold_eval = []
        for i in range(num_folds):
            mask = np.ones(N, dtype=bool)
            mask[train_folds[i]] = False
            X_train_cur = X_train[mask]
            y_train_cur = y_train[mask]
            classifier = KNearestNeighbor()
            classifier.train(X_train_cur, y_train_cur)

            X_test_cur = X_train[train_folds[i]]
            y_test_cur = y_train[train_folds[i]]

            dists = classifier.compute_distances_no_loops(X_test_cur)
            y_test_pred = classifier.predict_labels(dists, k=k1)
            num_correct = np.sum(y_test_pred == y_test_cur)
            accuracy = float(num_correct) / len(y_test_cur)
            fold_eval.append(accuracy)
            #pass
        k_to_accuracies[k1] = fold_eval[:]
        #k_to_accuracies[k1] = [1,2,3,4,5]

    for k in sorted(k_to_accuracies):
        for accuracy in k_to_accuracies[k]:
            print 'k = %d, accuracy = %f' % (k, accuracy)

    for k in k_choices:
        accuracies = k_to_accuracies[k]
        plt.scatter([k] * len(accuracies), accuracies)

    accuracies_mean = np.array(
        [np.mean(v) for k, v in sorted(k_to_accuracies.items())])
    accuracies_std = np.array(
        [np.std(v) for k, v in sorted(k_to_accuracies.items())])
    plt.errorbar(k_choices, accuracies_mean, yerr=accuracies_std)
    plt.title('Cross-validation on k')
    plt.xlabel('k')
    plt.ylabel('Cross-validation accuracy')
    plt.savefig('./figures/validation_k')
def cross_validate(X_train, y_train):
    num_folds = 5
    k_choices = [1,3,5,8,10,12,15,20,50,100]
    X_train_folds = []
    y_train_folds = []
    N = len(X_train)
    train_folds = np.array_split(range(N),num_folds,axis=0)
    k_to_accuracies = {}
    for k1 in k_choices:
        fold_eval = []
        for i in range(num_folds):
            mask = np.ones(N,dtype=bool)
            mask[train_folds[i]] = False
            X_train_cur = X_train[mask]
            y_train_cur = y_train[mask]
            classifier = KNearestNeighbor()
            classifier.train(X_train_cur, y_train_cur)
            
            X_test_cur = X_train[train_folds[i]]
            y_test_cur = y_train[train_folds[i]]
            
            dists = classifier.compute_distances_no_loops(X_test_cur)
            y_test_pred = classifier.predict_labels(dists,k=k1)
            num_correct = np.sum(y_test_pred == y_test_cur)
            accuracy = float(num_correct)/len(y_test_cur)
            fold_eval.append(accuracy)
            #pass
        k_to_accuracies[k1] = fold_eval[:]
        #k_to_accuracies[k1] = [1,2,3,4,5]

    for k in sorted(k_to_accuracies):
        for accuracy in k_to_accuracies[k]:
            print 'k = %d, accuracy = %f' % (k, accuracy)
    
    for k in k_choices:
        accuracies = k_to_accuracies[k]
        plt.scatter([k]*len(accuracies), accuracies)

    accuracies_mean = np.array([np.mean(v) for k,v in sorted(k_to_accuracies.items())])
    accuracies_std = np.array([np.std(v) for k,v in sorted(k_to_accuracies.items())])
    plt.errorbar(k_choices, accuracies_mean, yerr=accuracies_std)
    plt.title('Cross-validation on k')
    plt.xlabel('k')
    plt.ylabel('Cross-validation accuracy')
    plt.savefig('./figures/validation_k')
Example #8
0
def cross_validate(X_train, y_train, num_folds=5):
    k_choices = [1, 3, 5, 8, 10, 12, 15, 20, 50, 100]

    X_train_folds = np.array_split(X_train, num_folds)
    y_train_folds = np.array_split(y_train, num_folds)

    # A dictionary holding the accuracies for different values of k that we find
    # when running cross-validation. After running cross-validation,
    # k_to_accuracies[k] should be a list of length num_folds giving the different
    # accuracy values that we found when using that value of k.
    k_to_accuracies = {k: [] for k in k_choices}

    for i in range(num_folds):
        X_train_cv = np.vstack(X_train_folds[:i] + X_train_folds[i + 1:])
        y_train_cv = np.hstack(y_train_folds[:i] + y_train_folds[i + 1:])

        X_val = X_train_folds[i]
        y_val = y_train_folds[i]

        classifier = KNearestNeighbor()
        classifier.train(X_train_cv, y_train_cv)
        dists_cv = classifier.compute_distances_no_loops(X_val)

        for k in k_choices:
            y_val_pred = classifier.predict_labels(dists_cv, k=k)
            num_correct = np.sum(y_val_pred == y_val)
            accuracy = float(num_correct) / len(y_val)
            k_to_accuracies[k].append(accuracy)

    # Print out the computed accuracies
    for k in sorted(k_to_accuracies):
        for accuracy in k_to_accuracies[k]:
            print 'k = %d, accuracy = %f' % (k, accuracy)

    plot_cross_validation(k_choices, k_to_accuracies)

    sort_by_accuracy = sorted(k_to_accuracies,
                              key=lambda k: np.mean(k_to_accuracies[k]))
    return sort_by_accuracy[-1]
Example #9
0
# compute_distances_two_loops.

# Test your implementation:
dists = classifier.compute_distances_two_loops(X_test)
print(dists.shape)


# We can visualize the distance matrix: each row is a single test example and
# its distances to training examples
plt.imshow(dists, interpolation='none')
plt.show()


# Now implement the function predict_labels and run the code below:
# We use k = 1 (which is Nearest Neighbor).
y_test_pred = classifier.predict_labels(dists, k=1)

# Compute and print the fraction of correctly predicted examples
num_correct = np.sum(y_test_pred == y_test)
accuracy = float(num_correct) / num_test
print('Got %d / %d correct => accuracy: %f' % (num_correct, num_test, accuracy))


y_test_pred = classifier.predict_labels(dists, k=5)
num_correct = np.sum(y_test_pred == y_test)
accuracy = float(num_correct) / num_test
print('Got %d / %d correct => accuracy: %f' % (num_correct, num_test, accuracy))

# Output:
# Got 137 / 500 correct => accuracy: 0.274000
# Got 139 / 500 correct => accuracy: 0.278000
def test1():
    cifar10_dir = 'cs231n/datasets/cifar-10-batches-py'
    X_train, y_train, X_test, y_test = load_CIFAR10(cifar10_dir)
    
    print 'Training data shape:', X_train.shape
    print 'Training label shape:', y_train.shape
    print 'Test data shape:', X_test.shape
    print 'Test label shape:', y_test.shape

    # classes = ['plane','car','bird','cat','deer','dog','frog','horse','ship','truck']
    # num_classes = len(classes)
    # sample_per_class = 7

    # for y,cls in enumerate(classes):
    #     idxs = np.flatnonzero(y_train == y)
    #     idxs = np.random.choice(idxs, sample_per_class, replace=False)
    #     for i, idx in enumerate(idxs):
    #         plt_idx = i*num_classes + y + 1
    #         plt.subplot(sample_per_class, num_classes, plt_idx)
    #         plt.imshow(X_train[idx].astype('uint8'))
    #         plt.axis('off')
    #         if i == 0:
    #             plt.title(cls)

    # plt.savefig("./figures/cifar_sample.png")
    # plt.show()
    # plt.close()

    num_training = 5000
    mask = range(num_training)
    X_train = X_train[mask]
    y_train = y_train[mask]
    
    num_test = 500
    mask = range(num_test)
    X_test = X_test[mask]
    y_test = y_test[mask]

    X_train = np.reshape(X_train, (X_train.shape[0],-1))
    X_test = np.reshape(X_test,(X_test.shape[0],-1))
    print X_train.shape, X_test.shape

    from cs231n.classifiers import KNearestNeighbor    
    classifier = KNearestNeighbor()
    classifier.train(X_train, y_train)
    

    # two_loop_time = time_function(classifier.compute_distances_two_loops,X_test)
    # print "two loop time %f" % two_loop_time

    # one_loop_time = time_function(classifier.compute_distances_one_loop,X_test)
    # print "one loop time %f " %one_loop_time
    
    # no_loop_time = time_function(classifier.compute_distances_no_loops,X_test)
    # print "no loop time %f "% no_loop_time
    
    dists = classifier.compute_distances_no_loops(X_test)

    # dist_one_loop = classifier.compute_distances_one_loop(X_test)
    # dist_two_loops = classifier.compute_distances_two_loops(X_test)
    #matrix_compare(dists,dist_one_loop)
    #matrix_compare(dists,dist_two_loops)

    y_test_pred = classifier.predict_labels(dists,k=5)
    num_correct = np.sum(y_test_pred == y_test)
    accuracy = float(num_correct)/num_test
    print "God %d/%d correct => accuracy: %f" %(num_correct, num_test, accuracy)
    cross_validate(X_train,y_train)
Example #11
0
# Perform k-fold cross validation to find the best value of k. For each        #
# possible value of k, run the k-nearest-neighbor algorithm num_folds times,   #
# where in each case you use all but one of the folds as training data and the #
# last fold as a validation set. Store the accuracies for all fold and all     #
# values of k in the k_to_accuracies dictionary.                               #
################################################################################
# Your code
for k in k_choices:
    accuracies = []
    for i in range(num_folds):
        X_val = X_train_folds.pop(0)
        y_val = y_train_folds.pop(0)
        classifier.train(np.vstack((X_train_folds[:])), np.hstack((y_train_folds[:])))
        dists = classifier.compute_distances_no_loops(X_val)

        y_val_pred = classifier.predict_labels(dists, k=k)
        num_correct = np.sum(y_val_pred == y_val)
        
        accuracies.append(float(num_correct) / y_val.shape[0])
        
        X_train_folds.append(X_val)
        y_train_folds.append(y_val)
    
    k_to_accuracies[k] = accuracies
        
    
################################################################################
#                                 END OF YOUR CODE                             #
################################################################################

# Print out the computed accuracies
Example #12
0
################################################################################
for k in k_choices:
    for fold in range(num_folds): #This fold will be omitted.
        #Creating validation data and temp training data
        validation_X_test = X_train_folds[fold]
        validation_y_test = y_train_folds[fold]
        temp_X_train = np.concatenate(X_train_folds[:fold] + X_train_folds[fold + 1:])
        temp_y_train = np.concatenate(y_train_folds[:fold] + y_train_folds[fold + 1:])

        #Initializing a class
        test_classifier = KNearestNeighbor()
        test_classifier.train( temp_X_train, temp_y_train )
        
        #Computing the distance
        temp_dists = test_classifier.compute_distances_two_loops(validation_X_test)
        temp_y_test_pred = test_classifier.predict_labels(temp_dists, k=k)
        
        #Checking accuracies
        num_correct = np.sum(temp_y_test_pred == validation_y_test)
        num_test = validation_X_test.shape[0]
        accuracy = float(num_correct) / num_test
        print("k=",k,"Fold=",fold,"Accuracy=",accuracy)
        k_to_accuracies[k] = k_to_accuracies.get(k,[]) + [accuracy]
    

        
################################################################################
#                                 END OF YOUR CODE                             #
################################################################################

# Print out the computed accuracies
Example #13
0
# **Inline Question #1:** Notice the structured patterns in the distance matrix, where some rows or columns are visible brighter. (Note that with the default color scheme black indicates low distances while white indicates high distances.)
#
# - What in the data is the cause behind the distinctly bright rows?
# Test images not well identified. Can be outliers in the test images not similar to
# any training images. Zoomed in or transsformed very much.
# - What causes the columns?
# Outliers in training images with respect to ALL test images
# **Your Answer**: *fill this in.*
#
#

# In[ ]:

# Now implement the function predict_labels and run the code below:
# We use k = 1 (which is Nearest Neighbor).
y_test_pred = classifier.predict_labels(dists, k=1)

# Compute and print the fraction of correctly predicted examples
num_correct = np.sum(y_test_pred == y_test)
accuracy = float(num_correct) / num_test
print('Got %d / %d correct => accuracy: %f' %
      (num_correct, num_test, accuracy))

# You should expect to see approximately `27%` accuracy. Now lets try out a larger `k`, say `k = 5`:

# In[ ]:

y_test_pred = classifier.predict_labels(dists, k=5)
num_correct = np.sum(y_test_pred == y_test)
accuracy = float(num_correct) / num_test
print('Got %d / %d correct => accuracy: %f' %
Example #14
0
import numpy as np
import h5py
from numpy import loadtxt
from cs231n.classifiers import KNearestNeighbor

h5f = h5py.File('img_data.h5','r')
X = h5f['dataset_1'][:]
h5f.close()
y = loadtxt("y_labels.txt", dtype=np.uint8, delimiter="\n", unpack=False)
X_train = X[8000:35117,:]
y_train = y[8000:35117]
X_val=X[3000:8000,:]
y_val=y[3000:8000]
num_val = 5000

# Create a kNN classifier instance. 
# Remember that training a kNN classifier is a noop: 
# the Classifier simply remembers the data and does no further processing 
classifier = KNearestNeighbor()
classifier.train(X_train, y_train)
dists = classifier.compute_distances_no_loops(X_val)
y_val_pred = classifier.predict_labels(dists, k=5)
num_correct = np.sum(y_val_pred == y_val)
accuracy = float(num_correct) / num_val
print accuracy


Example #15
0
classifier = KNearestNeighbor()
classifier.train(X_train, y_train)

# Open cs231n/classifiers/k_nearest_neighbor.py and implement
# compute_distances_two_loops.

# Test your implementation:
dists = classifier.compute_distances_two_loops(X_test)
print dists.shape

plt.imshow(dists, interpolation='nearest')
plt.show()

# Now implement the function predict_labels and run the code below:
# We use k = 1 (which is Nearest Neighbor).
y_test_pred = classifier.predict_labels(dists, k=1)

# Compute and print the fraction of correctly predicted examples
num_correct = np.sum(y_test_pred == y_test)
accuracy = float(num_correct) / num_test
print 'Got %d / %d correct => accuracy: %f' % (num_correct, num_test, accuracy)

y_test_pred = classifier.predict_labels(dists, k=5)
num_correct = np.sum(y_test_pred == y_test)
accuracy = float(num_correct) / num_test
print 'Got %d / %d correct => accuracy: %f' % (num_correct, num_test, accuracy)

# Now lets speed up distance matrix computation by using partial vectorization
# with one loop. Implement the function compute_distances_one_loop and run the
# code below:
dists_one = classifier.compute_distances_one_loop(X_test)
Example #16
0
# where in each case you use all but one of the folds as training data and the #
# last fold as a validation set. Store the accuracies for all fold and all     #
# values of k in the k_to_accuracies dictionary.                               #
################################################################################
for k in k_choices:
    k_to_accuracies[k] = np.zeros(num_folds)
    for i in range(num_folds):
        x_t = np.array(X_train_folds[:i] + X_train_folds[i + 1:])  # 剩下为训练集
        y_t = np.array(y_train_folds[:i] + y_train_folds[i + 1:])
        x_t = x_t.reshape(X_train_folds[i].shape[0] * 4, -1)
        y_t = y_t.reshape(y_train_folds[i].shape[0] * 4, -1)

        x_te = np.array(X_train_folds[i])  # 测试集
        y_te = np.array(y_train_folds[i])

        classifier.train(x_t, y_t)
        dists_ = classifier.compute_distances_no_loops(x_te)
        y_pred = classifier.predict_labels(dists_, k)
        # Compute and print the fraction of correctly predicted examples
        num_correct = np.sum(y_pred == y_te)
        accuracy = float(num_correct) / num_test
        k_to_accuracies[k][i] = accuracy
pass
################################################################################
#                                 END OF YOUR CODE                             #
################################################################################

# Print out the computed accuracies
for k in sorted(k_to_accuracies):
    for accuracy in k_to_accuracies[k]:
        print('k = %d, accuracy = %f' % (k, accuracy))
Example #17
0
plt.show()

# **Inline Question #1:** Notice the structured patterns in the distance matrix, where some rows or columns are visible brighter. (Note that with the default color scheme black indicates low distances while white indicates high distances.)
#
# - What in the data is the cause behind the distinctly bright rows?
# - What causes the columns?

# **Your Answer**: *fill this in.*
#
#

# In[18]:

# Now implement the function predict_labels and run the code below:
# We use k = 1 (which is Nearest Neighbor).
y_test_pred = classifier.predict_labels(dists, k=1)

# Compute and print the fraction of correctly predicted examples
num_correct = np.sum(y_test_pred == y_test)
accuracy = float(num_correct) / num_test
print('Got %d / %d correct => accuracy: %f' %
      (num_correct, num_test, accuracy))

# You should expect to see approximately `27%` accuracy. Now lets try out a larger `k`, say `k = 5`:

# In[19]:

y_test_pred = classifier.predict_labels(dists, k=5)
num_correct = np.sum(y_test_pred == y_test)
accuracy = float(num_correct) / num_test
print('Got %d / %d correct => accuracy: %f' %
Example #18
0
def test1():
    cifar10_dir = 'cs231n/datasets/cifar-10-batches-py'
    X_train, y_train, X_test, y_test = load_CIFAR10(cifar10_dir)

    print 'Training data shape:', X_train.shape
    print 'Training label shape:', y_train.shape
    print 'Test data shape:', X_test.shape
    print 'Test label shape:', y_test.shape

    # classes = ['plane','car','bird','cat','deer','dog','frog','horse','ship','truck']
    # num_classes = len(classes)
    # sample_per_class = 7

    # for y,cls in enumerate(classes):
    #     idxs = np.flatnonzero(y_train == y)
    #     idxs = np.random.choice(idxs, sample_per_class, replace=False)
    #     for i, idx in enumerate(idxs):
    #         plt_idx = i*num_classes + y + 1
    #         plt.subplot(sample_per_class, num_classes, plt_idx)
    #         plt.imshow(X_train[idx].astype('uint8'))
    #         plt.axis('off')
    #         if i == 0:
    #             plt.title(cls)

    # plt.savefig("./figures/cifar_sample.png")
    # plt.show()
    # plt.close()

    num_training = 5000
    mask = range(num_training)
    X_train = X_train[mask]
    y_train = y_train[mask]

    num_test = 500
    mask = range(num_test)
    X_test = X_test[mask]
    y_test = y_test[mask]

    X_train = np.reshape(X_train, (X_train.shape[0], -1))
    X_test = np.reshape(X_test, (X_test.shape[0], -1))
    print X_train.shape, X_test.shape

    from cs231n.classifiers import KNearestNeighbor
    classifier = KNearestNeighbor()
    classifier.train(X_train, y_train)

    # two_loop_time = time_function(classifier.compute_distances_two_loops,X_test)
    # print "two loop time %f" % two_loop_time

    # one_loop_time = time_function(classifier.compute_distances_one_loop,X_test)
    # print "one loop time %f " %one_loop_time

    # no_loop_time = time_function(classifier.compute_distances_no_loops,X_test)
    # print "no loop time %f "% no_loop_time

    dists = classifier.compute_distances_no_loops(X_test)

    # dist_one_loop = classifier.compute_distances_one_loop(X_test)
    # dist_two_loops = classifier.compute_distances_two_loops(X_test)
    #matrix_compare(dists,dist_one_loop)
    #matrix_compare(dists,dist_two_loops)

    y_test_pred = classifier.predict_labels(dists, k=5)
    num_correct = np.sum(y_test_pred == y_test)
    accuracy = float(num_correct) / num_test
    print "God %d/%d correct => accuracy: %f" % (num_correct, num_test,
                                                 accuracy)
    cross_validate(X_train, y_train)
Example #19
0
X_train_folds = []
y_train_folds = []
X_train_folds = np.split(X_train, num_folds)
y_train_folds = np.split(y_train, num_folds)
k_to_accuracies = {}

for k_choice in k_choices:
    for i in range(num_folds):
        knn = KNearestNeighbor()
        xtrain = X_train_folds[:i] + X_train_folds[i + 1:]
        xtrain = np.asarray([item for sublist in xtrain for item in sublist])
        ytrain = y_train_folds[:i] + y_train_folds[i + 1:]
        ytrain = np.asarray([item for sublist in ytrain for item in sublist])
        knn.train(xtrain, ytrain)
        dists = knn.compute_distances_no_loops(np.asarray(X_train_folds[i]))
        y_test_pred = knn.predict_labels(dists, k=k_choice)
        num_correct = np.sum(y_test_pred == y_train_folds[i])
        accuracy = float(num_correct) / len(y_train_folds[i])
        k_to_accuracies.setdefault(k_choice, []).append(accuracy)
        print('k = %d, accuracy = %f' % (k_choice, accuracy))

for k in k_choices:
    accuracies = k_to_accuracies[k]
    plt.scatter([k] * len(accuracies), accuracies)

# plot the trend line with error bars that correspond to standard deviation
accuracies_mean = np.array(
    [np.mean(v) for k, v in sorted(k_to_accuracies.items())])
accuracies_std = np.array(
    [np.std(v) for k, v in sorted(k_to_accuracies.items())])
plt.errorbar(k_choices, accuracies_mean, yerr=accuracies_std)
Example #20
0
plt.show()

# **Inline Question #1:** Notice the structured patterns in the distance matrix, where some rows or columns are visible brighter. (Note that with the default color scheme black indicates low distances while white indicates high distances.)
#
# - What in the data is the cause behind the distinctly bright rows?
# - What causes the columns?

# **Your Answer**: *fill this in.*
#
#

# In[12]:

# Now implement the function predict_labels and run the code below:
# We use k = 1 (which is Nearest Neighbor).
y_test_pred = classifier.predict_labels(dists, k=1)

# Compute and print the fraction of correctly predicted examples
num_correct = np.sum(y_test_pred == y_test)
accuracy = float(num_correct) / num_test
print('Got %d / %d correct => accuracy: %f' %
      (num_correct, num_test, accuracy))

# You should expect to see approximately `27%` accuracy. Now lets try out a larger `k`, say `k = 5`:

# In[13]:

y_test_pred = classifier.predict_labels(dists, k=5)
num_correct = np.sum(y_test_pred == y_test)
accuracy = float(num_correct) / num_test
print('Got %d / %d correct => accuracy: %f' %
Example #21
0
            num_folds
    ):  #Loop through all the folds of the training data. CV-fold is j-th. Other folds for training
        X_test_cv = X_train_folds[j]
        y_test_cv = y_train_folds[j]
        #print 'Test CV: ', X_test_cv.shape, y_test_cv.shape

        X_train_cv = np.vstack(
            X_train_folds[0:j] + X_train_folds[j + 1:]
        )  #Leaving out the j-th array. X/y_train_folds are LISTs
        y_train_cv = np.hstack(y_train_folds[0:j] + y_train_folds[j + 1:])
        #print 'Train CV: ', X_train_cv.shape, y_train_cv.shape

        classifier.train(X_train_cv, y_train_cv)
        dists_cv = classifier.compute_distances_no_loops(X_test_cv)
        #print 'Dists CV: ', dists_cv.shape
        y_test_pred = classifier.predict_labels(dists_cv, k)
        num_correct_cv = np.sum(y_test_pred == y_test_cv)
        accuracy_cv = float(num_correct_cv) / y_test_cv.shape[0]
        print y_test_cv.shape[0]
        print 'Accuracy at %d-nearest neighbors, cv-fold is %d-th fold, is %.2f' % (
            k, j + 1, accuracy_cv * 100)

        k_to_accuracies[k].append(accuracy_cv)

################################################################################
#                                 END OF YOUR CODE                             #
################################################################################

# Print out the computed accuracies
best_k = 1
max_accuracy = 0
Example #22
0
# Open cs231n/classifiers/k_nearest_neighbor.py and implement
# compute_distances_two_loops.
print "Calculating distances...."
# Test your implementation:
dists = classifier.compute_distances_two_loops(X_test)
print dists.shape    #500 x 50000

# We can visualize the distance matrix: each row is a single test example and
# its distances to training examples
plt.imshow(dists, interpolation='none')
plt.show()

# Now implement the function predict_labels and run the code below:
# We use k = 1 (which is Nearest Neighbor).
y_test_pred = classifier.predict_labels(dists, k=1)

# Compute and print the fraction of correctly predicted examples
num_correct = np.sum(y_test_pred == y_test)
accuracy = float(num_correct) / num_test
print 'Got %d / %d correct => accuracy: %f' % (num_correct, num_test, accuracy)

# Now lets speed up distance matrix computation by using partial vectorization
# with one loop. Implement the function compute_distances_one_loop and run the
# code below:
dists_one = classifier.compute_distances_one_loop(X_test)

# To ensure that our vectorized implementation is correct, we make sure that it
# agrees with the naive implementation. There are many ways to decide whether
# two matrices are similar; one of the simplest is the Frobenius norm. In case
# you haven't seen it before, the Frobenius norm of two matrices is the square
Example #23
0
        #Creating validation data and temp training data
        validation_X_test = X_train_folds[fold]
        validation_y_test = y_train_folds[fold]
        temp_X_train = np.concatenate(X_train_folds[:fold] +
                                      X_train_folds[fold + 1:])
        temp_y_train = np.concatenate(y_train_folds[:fold] +
                                      y_train_folds[fold + 1:])

        #Initializing a class
        test_classifier = KNearestNeighbor()
        test_classifier.train(temp_X_train, temp_y_train)

        #Computing the distance
        temp_dists = test_classifier.compute_distances_two_loops(
            validation_X_test)
        temp_y_test_pred = test_classifier.predict_labels(temp_dists, k=k)

        #Checking accuracies
        num_correct = np.sum(temp_y_test_pred == validation_y_test)
        num_test = validation_X_test.shape[0]
        accuracy = float(num_correct) / num_test
        print("k=", k, "Fold=", fold, "Accuracy=", accuracy)
        k_to_accuracies[k] = k_to_accuracies.get(k, []) + [accuracy]

################################################################################
#                                 END OF YOUR CODE                             #
################################################################################

# Print out the computed accuracies
for k in sorted(k_to_accuracies):
    for accuracy in k_to_accuracies[k]:
Example #24
0
# plot the raw observations
for k in k_choices:
  accuracies = k_to_accuracies[k]
  plt.scatter([k] * len(accuracies), accuracies)

# plot the trend line with error bars that correspond to standard deviation
accuracies_mean = np.array([np.mean(v) for k,v in sorted(k_to_accuracies.items())])
accuracies_std = np.array([np.std(v) for k,v in sorted(k_to_accuracies.items())])
plt.errorbar(k_choices, accuracies_mean, yerr=accuracies_std)
plt.title('Cross-validation on k')
plt.xlabel('k')
plt.ylabel('Cross-validation accuracy')
plt.show()

"""

# Based on the cross-validation results above, choose the best value for k,
# retrain the classifier using all the training data, and test it on the test
# data. You should be able to get above 28% accuracy on the test data.
best_k = 1

classifier = KNearestNeighbor()
classifier.train(X_train, y_train)
#y_test_pred = classifier.predict(X_test, k=best_k)
dists_two = classifier.compute_distances_no_loops(X_test)
y_test_pred = classifier.predict_labels(dists_two, k=best_k)
# Compute and display the accuracy
num_test = y_test.shape[0]
num_correct = np.sum(y_test_pred[:] == y_test[:, 0])
accuracy = float(num_correct) / num_test
print 'Got %d / %d correct => accuracy: %f' % (num_correct, num_test, accuracy)
Example #25
0
    # compute_distances_two_loops.

    print 'before computing distances'
    # Test your implementation:
    dists = classifier.compute_distances_two_loops(X_test)
    print 'after computing distances'
    print dists.shape

    # We can visualize the distance matrix: each row is a single test example and
    # its distances to training examples
    #plt.imshow(dists, interpolation='none')
    #plt.show()

    # Now implement the function predict_labels and run the code below:
    # We use k = 1 (which is Nearest Neighbor).
    y_test_pred = classifier.predict_labels(dists, k=1)
    # Compute and print the fraction of correctly predicted examples
    num_correct = np.sum(y_test_pred == y_test)
    accuracy = float(num_correct) / num_test
    print 'Got %d / %d correct => accuracy: %f' % (num_correct, num_test,
                                                   accuracy)

    y_test_pred = classifier.predict_labels(dists, k=5)
    num_correct = np.sum(y_test_pred == y_test)
    accuracy = float(num_correct) / num_test
    print 'Got %d / %d correct => accuracy: %f' % (num_correct, num_test,
                                                   accuracy)

    dists_one = classifier.compute_distances_one_loop(X_test)

    # To ensure that our vectorized implementation is correct, we make sure that it
Example #26
0
  Call a function f with args and return the time (in seconds) that it took to execute.
  """
  import time
  tic = time.time()
  f(*args)
  toc = time.time()
  return toc - tic


no_loop_time = time_function(classifier.compute_distances_no_loops, X_test)
print 'No loop version took %f seconds' % no_loop_time

# you should see significantly faster performance with the fully vectorized implementation

dists_two = classifier.compute_distances_no_loops(X_test)
y_test_pred = classifier.predict_labels(dists_two, k=5)

#
# Compute and print the fraction of correctly predicted examples
num_correct = np.sum(y_test_pred == y_test)
accuracy = float(num_correct) / num_test
print 'Got %d / %d correct => accuracy: %f' % (num_correct, num_test, accuracy)


num_folds = 5
k_choices = [1, 3, 5, 8, 10, 12, 15, 20, 50, 100]

X_train_folds = []
y_train_folds = []
################################################################################
# TODO:                                                                        #
tic = time.time()

best_k = 10

classifier.train(X_train, y_train)

### L2 distance
# t_dist,dist_bst_v = time_function(classifier.compute_distances_no_loops,X_test)
### L1 distance
dist_bst_v = cdist(X_test, X_train, 'cityblock')

#print("\tVectorized L2: ",t_dist)

#exit()

y_pred = classifier.predict_labels(dist_bst_v, best_k)

num_correct = np.sum(y_pred == y_test)
num_test = X_test.shape[0]
accuracy = float(num_correct) / num_test
print('Got %d / %d correct => accuracy: %f' %
      (num_correct, num_test, accuracy))

#raw_input("Press enter to continue")
#exit()

toc = time.time()
print("Running time:", toc - tic)

### Data Set: CIFAR-10
###
Example #28
0
X_train = np.reshape(X_train, (X_train.shape[0], -1))
X_test = np.reshape(X_test, (X_test.shape[0], -1))
print(X_train.shape, X_test.shape)

#######################################################

from cs231n.classifiers import KNearestNeighbor

classifier = KNearestNeighbor()
classifier.train(X_train, y_train)
dists = classifier.compute_distances_two_loops(X_test)
print(dists.shape)
plt.imshow(dists, interpolation='none')
plt.show()

y_test_pred = classifier.predict_labels(dists, k=1)
num_correct = np.sum(y_test_pred == y_test)
accuracy = float(num_correct) / num_test
print('Got %d / %d correct => accuracy: %f' %
      (num_correct, num_test, accuracy))

y_test_pred = classifier.predict_labels(dists, k=5)
num_correct = np.sum(y_test_pred == y_test)
accuracy = float(num_correct) / num_test
print('Got %d / %d correct => accuracy: %f' %
      (num_correct, num_test, accuracy))

dists_one = classifier.compute_distances_one_loop(X_test)
difference = np.linalg.norm(dists - dists_one, ord='fro')
print('Difference was: %f' % (difference, ))
Example #29
0
X_test = np.reshape(X_test, (X_test.shape[0], -1))
print X_train.shape, X_test.shape

from cs231n.classifiers import KNearestNeighbor
classifier = KNearestNeighbor()
classifier.train(X_train, y_train)

dists = classifier.compute_distances_two_loops(X_test)
print 'Distance Matrix: ', dists.shape
dists_show = 0
if (dists_show != 0):
    plt.imshow(dists, interpolation='none')
    plt.show()

# Now implement the function predict_labels and run the code below: # We use k = 1 (which is Nearest Neighbor).
y_test_pred = classifier.predict_labels(dists, k=3)
# Compute and print the fraction of correctly predicted examples
num_correct = np.sum(y_test_pred == y_test)
accuracy = float(num_correct) / num_test * 100
print 'Got %d / %d correct, and accuracy: %f' % (num_correct, num_test,
                                                 accuracy)

# Now lets speed up distance matrix computation by using partial vectorization
dists_one = classifier.compute_distances_one_loop(X_test)
# To ensure that our vectorized implementation is correct, we make sure that # agrees with the naive implementation. There are many ways to decide whethe # two matrices are similar; one of the simplest is the Frobenius norm. In ca # you haven't seen it before, the Frobenius norm of two matrices is the squa # root of the squared sum of differences of all elements; in other words, re # the matrices into vectors and compute the Euclidean distance between them. difference = np.linalg.norm(dists - dists_one, ord='fro')
difference = np.linalg.norm(dists - dists_one, ord='fro')
print 'Difference was: %f' % (difference, )
if difference < 0.001:
    print 'Good! The distance matrices are the same'
else:
    print 'Uh-oh! The distance matrices are different'
Example #30
0
plt.show()

# **Inline Question #1:** Notice the structured patterns in the distance matrix, where some rows or columns are visible brighter. (Note that with the default color scheme black indicates low distances while white indicates high distances.)
#
# - What in the data is the cause behind the distinctly bright rows?
# - What causes the columns?

# **Your Answer**: *fill this in.*
#
#

# In[73]:

# Now implement the function predict_labels and run the code below:
# We use k = 1 (which is Nearest Neighbor).
y_test_pred = classifier.predict_labels(dists, k=1)
print(y_test_pred, y_test)
print(type(y_test_pred), type(y_test))
# Compute and print the fraction of correctly predicted examples
num_correct = np.sum(y_test_pred == y_test)
accuracy = float(num_correct) / num_test
print('Got %d / %d correct => accuracy: %f' %
      (num_correct, num_test, accuracy))

# You should expect to see approximately `27%` accuracy. Now lets try out a larger `k`, say `k = 5`:

# In[11]:

y_test_pred = classifier.predict_labels(dists, k=5)
num_correct = np.sum(y_test_pred == y_test)
accuracy = float(num_correct) / num_test
Example #31
0
classifier.train(X_train, y_train)

# open cs231n/classifiers /k_nearest_neighbor.py and implement
#compute distances by two loops

dists = classifier.compute_distances_two_loops(X_test)
print(dists.shape)

# We can visualize the distance matrix: each row is a single test example and
#its distance to training examples
plt.imshow(dists, interpolation='none')
plt.show()

# Now run the prediction fuction predict_labels and run the code
# first try k=1
y_test_pred = classifier.predict_labels(dists, k=1)
# compute and print the fraction of correctly predicted examples

num_correct = np.sum(y_test_pred == y_test)
accuracy = float(num_correct) / num_test
print('Got %d /%d correct => accuracy: %f' % (num_correct, num_test, accuracy))

#secondly set k=5
y_test_pred = classifier.predict_labels(dists, k=5)
num_correct = np.sum(y_test_pred == y_test)
accuracy = float(num_correct) / num_test
print('Using k=5, Got %d /%d correct => accuracy: %f' %
      (num_correct, num_test, accuracy))

#Now lets speed up distance matrix computation by using partial vectorization with
# one loop.
Example #32
0
        num_test_crossval = 1000
        #Every Single time pick one fold in total folds for test validation
        X_test_crossval = X_train_folds[fold]
        y_test_crossval = y_train_folds[fold]
        #Pick rest of the folds as training data
        X_train_crossval = np.vstack(X_train_folds[0:fold] +
                                     X_train_folds[fold + 1:])
        y_train_crossval = np.hstack(y_train_folds[0:fold] +
                                     y_train_folds[fold + 1:])

        #Training the classifier
        classifier.train(X_train_crossval, y_train_crossval)
        #Calculating the L2 distance for test data
        dists_crossval = classifier.compute_distances_no_loops(X_test_crossval)
        #Predicting the output with current k value
        y_test_pred = classifier.predict_labels(dists_crossval, k)

        #Calculating the accuracy
        num_correct = np.sum(y_test_pred == y_test_crossval)
        accuracy = float(num_correct) / num_test_crossval

        k_to_accuracies[k].append(accuracy)

################################################################################
#                                 END OF YOUR CODE                             #
################################################################################

# Print out the computed accuracies
for k in sorted(k_to_accuracies):
    for accuracy in k_to_accuracies[k]:
        print('k = %d, accuracy = %f' % (k, accuracy))