def crossValidate(X_fold, y_fold, k, idx):
    #print "Use idx ", idx , " for crossvalidation"
    #X_train = np.array(len(X_fold)-1)
    #X_cross = np.array(l)
    #y_train = np.array(len(y_fold)-1)
    #y_cross = np.array(len(y_fold))

    for i in xrange(0, len(X_fold)):
        if i == idx:
            X_cross = X_fold[i]
            y_cross = y_fold[i]
        else:
            X_train = np.vstack(X_fold[0:i] + X_fold[i + 1:])
            y_train = np.hstack(y_fold[0:i] + y_fold[i + 1:])


#    print "dim train ", X_train.shape
#    print "dim cross ", X_cross.shape
#    print "dim y train ", y_train.shape
#    print "dim y cross ", y_cross.shape
    classifier = KNearestNeighbor()
    classifier.train(X_train, y_train)
    dists = classifier.compute_distances_no_loops(X_cross)
    y_cross_pred = classifier.predict_labels(dists, k)

    num_correct = np.sum(y_cross_pred == y_cross)
    print "cross val has ", y_cross.shape
    accuracy = float(num_correct) / len(y_cross)
    return accuracy
def cal_standard_knn():
    # Create a kNN classifier instance.
    # Remember that training a kNN classifier is a noop:
    # the Classifier simply remembers the data and does no further processing
    classifier = KNearestNeighbor()
    classifier.train(X_train, y_train)

    print('KNN Classifier Train Done\n')

    #------------------------------------------------------------

    # Open cs231n/classifiers/k_nearest_neighbor.py and implement
    # compute_distances_two_loops.

    # Test your implementation:
    print('Ready to test with 2 loops')
    #dists = classifier.compute_distances_two_loops(X_test)
    #print(dists.shape)

    print('Ready to test with 1 loop')
    #dists = classifier.compute_distances_one_loop(X_test)
    #print(dists.shape)

    print('Ready to test with 0 loop\n')
    dists = classifier.compute_distances_no_loops(X_test)
    print(dists.shape)

    #------------------------------------------------------------
    print('Ready to predict')
    y_pred = classifier.predict_labels(dists, 3)

    print('Accurarcy = %s' % np.mean(y_pred == y_test))
def test_cross_validation(X_train, y_train):

    print('Ready to test with cross_validation')

    num_folds = 5
    k_choices = [1, 3, 5, 8, 10]

    X_train_folds = []
    y_train_folds = []

    print('Train data shape = ', X_train.shape)
    y_train = y_train.reshape(-1, 1)
    print('Train label shape = ', y_train.shape)

    X_train_folds = np.array_split(X_train, num_folds)
    y_train_folds = np.array_split(y_train, num_folds)

    k_to_accuracies = {}

    for each_k in k_choices:
        k_to_accuracies.setdefault(each_k, [])
        for i in range(num_folds):
            classfer = KNearestNeighbor()
            X_train_slice = np.vstack(X_train_folds[0:i] +
                                      X_train_folds[i + 1:num_folds])
            y_train_slice = np.vstack(y_train_folds[0:i] +
                                      y_train_folds[i + 1:num_folds])
            y_train_slice = y_train_slice.reshape(-1)
            #print('debug')
            #print(y_train_slice.shape)

            X_test_slice = X_train_folds[i]
            y_test_slice = y_train_folds[i]
            y_test_slice = y_test_slice.reshape(-1)
            #print(X_train_slice.shape)

            classfer.train(X_train_slice, y_train_slice)
            dis = classfer.compute_distances_no_loops(X_test_slice)
            y_predict = classfer.predict_labels(dis, each_k)

            acc = np.mean(y_predict == y_test_slice)
            k_to_accuracies[each_k].append(acc)

            #break
        #break

    for each_k in k_choices:
        for item in k_to_accuracies[each_k]:
            print('k = %d, acc = %f' % (each_k, item))
Example #4
0
def cross_validate(X_train, y_train):
    num_folds = 5
    k_choices = [1, 3, 5, 8, 10, 12, 15, 20, 50, 100]
    X_train_folds = []
    y_train_folds = []
    N = len(X_train)
    train_folds = np.array_split(range(N), num_folds, axis=0)
    k_to_accuracies = {}
    for k1 in k_choices:
        fold_eval = []
        for i in range(num_folds):
            mask = np.ones(N, dtype=bool)
            mask[train_folds[i]] = False
            X_train_cur = X_train[mask]
            y_train_cur = y_train[mask]
            classifier = KNearestNeighbor()
            classifier.train(X_train_cur, y_train_cur)

            X_test_cur = X_train[train_folds[i]]
            y_test_cur = y_train[train_folds[i]]

            dists = classifier.compute_distances_no_loops(X_test_cur)
            y_test_pred = classifier.predict_labels(dists, k=k1)
            num_correct = np.sum(y_test_pred == y_test_cur)
            accuracy = float(num_correct) / len(y_test_cur)
            fold_eval.append(accuracy)
            #pass
        k_to_accuracies[k1] = fold_eval[:]
        #k_to_accuracies[k1] = [1,2,3,4,5]

    for k in sorted(k_to_accuracies):
        for accuracy in k_to_accuracies[k]:
            print 'k = %d, accuracy = %f' % (k, accuracy)

    for k in k_choices:
        accuracies = k_to_accuracies[k]
        plt.scatter([k] * len(accuracies), accuracies)

    accuracies_mean = np.array(
        [np.mean(v) for k, v in sorted(k_to_accuracies.items())])
    accuracies_std = np.array(
        [np.std(v) for k, v in sorted(k_to_accuracies.items())])
    plt.errorbar(k_choices, accuracies_mean, yerr=accuracies_std)
    plt.title('Cross-validation on k')
    plt.xlabel('k')
    plt.ylabel('Cross-validation accuracy')
    plt.savefig('./figures/validation_k')
def cross_validate(X_train, y_train):
    num_folds = 5
    k_choices = [1,3,5,8,10,12,15,20,50,100]
    X_train_folds = []
    y_train_folds = []
    N = len(X_train)
    train_folds = np.array_split(range(N),num_folds,axis=0)
    k_to_accuracies = {}
    for k1 in k_choices:
        fold_eval = []
        for i in range(num_folds):
            mask = np.ones(N,dtype=bool)
            mask[train_folds[i]] = False
            X_train_cur = X_train[mask]
            y_train_cur = y_train[mask]
            classifier = KNearestNeighbor()
            classifier.train(X_train_cur, y_train_cur)
            
            X_test_cur = X_train[train_folds[i]]
            y_test_cur = y_train[train_folds[i]]
            
            dists = classifier.compute_distances_no_loops(X_test_cur)
            y_test_pred = classifier.predict_labels(dists,k=k1)
            num_correct = np.sum(y_test_pred == y_test_cur)
            accuracy = float(num_correct)/len(y_test_cur)
            fold_eval.append(accuracy)
            #pass
        k_to_accuracies[k1] = fold_eval[:]
        #k_to_accuracies[k1] = [1,2,3,4,5]

    for k in sorted(k_to_accuracies):
        for accuracy in k_to_accuracies[k]:
            print 'k = %d, accuracy = %f' % (k, accuracy)
    
    for k in k_choices:
        accuracies = k_to_accuracies[k]
        plt.scatter([k]*len(accuracies), accuracies)

    accuracies_mean = np.array([np.mean(v) for k,v in sorted(k_to_accuracies.items())])
    accuracies_std = np.array([np.std(v) for k,v in sorted(k_to_accuracies.items())])
    plt.errorbar(k_choices, accuracies_mean, yerr=accuracies_std)
    plt.title('Cross-validation on k')
    plt.xlabel('k')
    plt.ylabel('Cross-validation accuracy')
    plt.savefig('./figures/validation_k')
Example #6
0
def cross_validate(X_train, y_train, num_folds=5):
    k_choices = [1, 3, 5, 8, 10, 12, 15, 20, 50, 100]

    X_train_folds = np.array_split(X_train, num_folds)
    y_train_folds = np.array_split(y_train, num_folds)

    # A dictionary holding the accuracies for different values of k that we find
    # when running cross-validation. After running cross-validation,
    # k_to_accuracies[k] should be a list of length num_folds giving the different
    # accuracy values that we found when using that value of k.
    k_to_accuracies = {k: [] for k in k_choices}

    for i in range(num_folds):
        X_train_cv = np.vstack(X_train_folds[:i] + X_train_folds[i + 1:])
        y_train_cv = np.hstack(y_train_folds[:i] + y_train_folds[i + 1:])

        X_val = X_train_folds[i]
        y_val = y_train_folds[i]

        classifier = KNearestNeighbor()
        classifier.train(X_train_cv, y_train_cv)
        dists_cv = classifier.compute_distances_no_loops(X_val)

        for k in k_choices:
            y_val_pred = classifier.predict_labels(dists_cv, k=k)
            num_correct = np.sum(y_val_pred == y_val)
            accuracy = float(num_correct) / len(y_val)
            k_to_accuracies[k].append(accuracy)

    # Print out the computed accuracies
    for k in sorted(k_to_accuracies):
        for accuracy in k_to_accuracies[k]:
            print 'k = %d, accuracy = %f' % (k, accuracy)

    plot_cross_validation(k_choices, k_to_accuracies)

    sort_by_accuracy = sorted(k_to_accuracies,
                              key=lambda k: np.mean(k_to_accuracies[k]))
    return sort_by_accuracy[-1]
Example #7
0
# two matrices are similar; one of the simplest is the Frobenius norm. In case
# you haven't seen it before, the Frobenius norm of two matrices is the square
# root of the squared sum of differences of all elements; in other words, reshape
# the matrices into vectors and compute the Euclidean distance between them.
difference = np.linalg.norm(dists - dists_one, ord='fro')
print('Difference was: %f' % (difference, ))
if difference < 0.001:
    print('Good! The distance matrices are the same')
else:
    print('Uh-oh! The distance matrices are different')



# Now implement the fully vectorized version inside compute_distances_no_loops
# and run the code
dists_two = classifier.compute_distances_no_loops(X_test)

# check that the distance matrix agrees with the one we computed before:
difference = np.linalg.norm(dists - dists_two, ord='fro')
print('Difference was: %f' % (difference, ))
if difference < 0.001:
    print('Good! The distance matrices are the same')
else:
    print('Uh-oh! The distance matrices are different')

# Let's compare how fast the implementations are
def time_function(f, *args):
    """
    Call a function f with args and return the time (in seconds) that it took to execute.
    """
    import time
Example #8
0
# two matrices are similar; one of the simplest is the Frobenius norm. In case
# you haven't seen it before, the Frobenius norm of two matrices is the square
# root of the squared sum of differences of all elements; in other words, reshape
# the matrices into vectors and compute the Euclidean distance between them.
difference = np.linalg.norm(dists - dists_one, ord='fro')
print('Difference was: %f' % (difference, ))
if difference < 0.001:
    print('Good! The distance matrices are the same')
else:
    print('Uh-oh! The distance matrices are different')

# In[ ]:

# Now implement the fully vectorized version inside compute_distances_no_loops
# and run the code
dists_two = classifier.compute_distances_no_loops(X_test)

# check that the distance matrix agrees with the one we computed before:
difference = np.linalg.norm(dists - dists_two, ord='fro')
print('Difference was: %f' % (difference, ))
if difference < 0.001:
    print('Good! The distance matrices are the same')
else:
    print('Uh-oh! The distance matrices are different')

# In[ ]:


# Let's compare how fast the implementations are
def time_function(f, *args):
    """
Example #9
0
    for j in range(
            num_folds
    ):  #Loop through all the folds of the training data. CV-fold is j-th. Other folds for training
        X_test_cv = X_train_folds[j]
        y_test_cv = y_train_folds[j]
        #print 'Test CV: ', X_test_cv.shape, y_test_cv.shape

        X_train_cv = np.vstack(
            X_train_folds[0:j] + X_train_folds[j + 1:]
        )  #Leaving out the j-th array. X/y_train_folds are LISTs
        y_train_cv = np.hstack(y_train_folds[0:j] + y_train_folds[j + 1:])
        #print 'Train CV: ', X_train_cv.shape, y_train_cv.shape

        classifier.train(X_train_cv, y_train_cv)
        dists_cv = classifier.compute_distances_no_loops(X_test_cv)
        #print 'Dists CV: ', dists_cv.shape
        y_test_pred = classifier.predict_labels(dists_cv, k)
        num_correct_cv = np.sum(y_test_pred == y_test_cv)
        accuracy_cv = float(num_correct_cv) / y_test_cv.shape[0]
        print y_test_cv.shape[0]
        print 'Accuracy at %d-nearest neighbors, cv-fold is %d-th fold, is %.2f' % (
            k, j + 1, accuracy_cv * 100)

        k_to_accuracies[k].append(accuracy_cv)

################################################################################
#                                 END OF YOUR CODE                             #
################################################################################

# Print out the computed accuracies
Example #10
0
X_train_folds = []
y_train_folds = []
X_train_folds = np.split(X_train, num_folds)
y_train_folds = np.split(y_train, num_folds)
k_to_accuracies = {}

for k_choice in k_choices:
    for i in range(num_folds):
        knn = KNearestNeighbor()
        xtrain = X_train_folds[:i] + X_train_folds[i + 1:]
        xtrain = np.asarray([item for sublist in xtrain for item in sublist])
        ytrain = y_train_folds[:i] + y_train_folds[i + 1:]
        ytrain = np.asarray([item for sublist in ytrain for item in sublist])
        knn.train(xtrain, ytrain)
        dists = knn.compute_distances_no_loops(np.asarray(X_train_folds[i]))
        y_test_pred = knn.predict_labels(dists, k=k_choice)
        num_correct = np.sum(y_test_pred == y_train_folds[i])
        accuracy = float(num_correct) / len(y_train_folds[i])
        k_to_accuracies.setdefault(k_choice, []).append(accuracy)
        print('k = %d, accuracy = %f' % (k_choice, accuracy))

for k in k_choices:
    accuracies = k_to_accuracies[k]
    plt.scatter([k] * len(accuracies), accuracies)

# plot the trend line with error bars that correspond to standard deviation
accuracies_mean = np.array(
    [np.mean(v) for k, v in sorted(k_to_accuracies.items())])
accuracies_std = np.array(
    [np.std(v) for k, v in sorted(k_to_accuracies.items())])
def test1():
    cifar10_dir = 'cs231n/datasets/cifar-10-batches-py'
    X_train, y_train, X_test, y_test = load_CIFAR10(cifar10_dir)
    
    print 'Training data shape:', X_train.shape
    print 'Training label shape:', y_train.shape
    print 'Test data shape:', X_test.shape
    print 'Test label shape:', y_test.shape

    # classes = ['plane','car','bird','cat','deer','dog','frog','horse','ship','truck']
    # num_classes = len(classes)
    # sample_per_class = 7

    # for y,cls in enumerate(classes):
    #     idxs = np.flatnonzero(y_train == y)
    #     idxs = np.random.choice(idxs, sample_per_class, replace=False)
    #     for i, idx in enumerate(idxs):
    #         plt_idx = i*num_classes + y + 1
    #         plt.subplot(sample_per_class, num_classes, plt_idx)
    #         plt.imshow(X_train[idx].astype('uint8'))
    #         plt.axis('off')
    #         if i == 0:
    #             plt.title(cls)

    # plt.savefig("./figures/cifar_sample.png")
    # plt.show()
    # plt.close()

    num_training = 5000
    mask = range(num_training)
    X_train = X_train[mask]
    y_train = y_train[mask]
    
    num_test = 500
    mask = range(num_test)
    X_test = X_test[mask]
    y_test = y_test[mask]

    X_train = np.reshape(X_train, (X_train.shape[0],-1))
    X_test = np.reshape(X_test,(X_test.shape[0],-1))
    print X_train.shape, X_test.shape

    from cs231n.classifiers import KNearestNeighbor    
    classifier = KNearestNeighbor()
    classifier.train(X_train, y_train)
    

    # two_loop_time = time_function(classifier.compute_distances_two_loops,X_test)
    # print "two loop time %f" % two_loop_time

    # one_loop_time = time_function(classifier.compute_distances_one_loop,X_test)
    # print "one loop time %f " %one_loop_time
    
    # no_loop_time = time_function(classifier.compute_distances_no_loops,X_test)
    # print "no loop time %f "% no_loop_time
    
    dists = classifier.compute_distances_no_loops(X_test)

    # dist_one_loop = classifier.compute_distances_one_loop(X_test)
    # dist_two_loops = classifier.compute_distances_two_loops(X_test)
    #matrix_compare(dists,dist_one_loop)
    #matrix_compare(dists,dist_two_loops)

    y_test_pred = classifier.predict_labels(dists,k=5)
    num_correct = np.sum(y_test_pred == y_test)
    accuracy = float(num_correct)/num_test
    print "God %d/%d correct => accuracy: %f" %(num_correct, num_test, accuracy)
    cross_validate(X_train,y_train)
Example #12
0
# values of k in the k_to_accuracies dictionary.                               #
################################################################################
for k in k_choices:
    accuracies = []
    for i in range(num_folds):
        X_train_this = list(X_train_folds)
        del X_train_this[i]        
        y_train_this = list(y_train_folds)
        del y_train_this[i]        
        x = np.row_stack(X_train_this)
        y = np.concatenate(y_train_this)
        print('after row stack')
        print x.shape
        print y.shape
        classifier.train(x, y)
        dists = classifier.compute_distances_no_loops(X_train_folds[i])
        y_test_pred = classifier.predict_labels(dists, k)
        num_correct = np.sum(y_test_pred == y_train_folds[i])
        accuracy = float(num_correct) / num_test
        accuracies.append(accuracy)
    k_to_accuracies[k] = accuracies
################################################################################
#                                 END OF YOUR CODE                             #
################################################################################

# Print out the computed accuracies
for k in sorted(k_to_accuracies):
    for accuracy in k_to_accuracies[k]:
        print 'k = %d, accuracy = %f' % (k, accuracy)

# plot the raw observations
Example #13
0
# two matrices are similar; one of the simplest is the Frobenius norm. In case
# you haven't seen it before, the Frobenius norm of two matrices is the square
# root of the squared sum of differences of all elements; in other words, reshape
# the matrices into vectors and compute the Euclidean distance between them.
difference = np.linalg.norm(dists - dists_one, ord='fro')
print 'Difference was: %f' % (difference, )
if difference < 0.001:
    print 'Good! The distance matrices are the same'
else:
    print 'Uh-oh! The distance matrices are different'

# In[ ]:

# Now implement the fully vectorized version inside compute_distances_no_loops
# and run the code
dists_two = classifier.compute_distances_no_loops(X_test)

# check that the distance matrix agrees with the one we computed before:
difference = np.linalg.norm(dists - dists_two, ord='fro')
print 'Difference was: %f' % (difference, )
if difference < 0.001:
    print 'Good! The distance matrices are the same'
else:
    print 'Uh-oh! The distance matrices are different'

# In[ ]:


# Let's compare how fast the implementations are
def time_function(f, *args):
    """
Example #14
0
# you haven't seen it before, the Frobenius norm of two matrices is the square
# root of the squared sum of differences of all elements; in other words, reshape
# the matrices into vectors and compute the Euclidean distance between them.
difference = np.linalg.norm(dists - dists_one, ord='fro')
print 'Difference was: %f' % (difference, )
if difference < 0.001:
  print 'Good! The distance matrices are the same'
else:
  print 'Uh-oh! The distance matrices are different'


# In[ ]:

# Now implement the fully vectorized version inside compute_distances_no_loops
# and run the code
dists_two = classifier.compute_distances_no_loops(X_test)

# check that the distance matrix agrees with the one we computed before:
difference = np.linalg.norm(dists - dists_two, ord='fro')
print 'Difference was: %f' % (difference, )
if difference < 0.001:
  print 'Good! The distance matrices are the same'
else:
  print 'Uh-oh! The distance matrices are different'


# In[ ]:

# Let's compare how fast the implementations are
def time_function(f, *args):
  """
# To ensure that our vectorized implementation is correct, we make sure that it
# agrees with the naive implementation. There are many ways to decide whether
# two matrices are similar; one of the simplest is the Frobenius norm. In case
# you haven't seen it before, the Frobenius norm of two matrices is the square
# root of the squared sum of differences of all elements; in other words, reshape
# the matrices into vectors and compute the Euclidean distance between them.
difference = np.linalg.norm(dists - dists_one, ord='fro')
print 'Difference was: %f' % (difference, )
if difference < 0.001:
    print 'Good! The distance matrices are the same'
else:
    print 'Uh-oh! The distance matrices are different'

# Now implement the fully vectorized version inside compute_distances_no_loops
# and run the code
dists_two = classifier.compute_distances_no_loops(X_test)

# check that the distance matrix agrees with the one we computed before:
difference = np.linalg.norm(dists - dists_two, ord='fro')
print 'Difference was: %f' % (difference, )
if difference < 0.001:
    print 'Good! The distance matrices are the same'
else:
    print 'Uh-oh! The distance matrices are different'


# Let's compare how fast the implementations are
def time_function(f, *args):
    """
  Call a function f with args and return the time (in seconds) that it took to execute.
  """
Example #16
0
import numpy as np
import h5py
from numpy import loadtxt
from cs231n.classifiers import KNearestNeighbor

h5f = h5py.File('img_data.h5','r')
X = h5f['dataset_1'][:]
h5f.close()
y = loadtxt("y_labels.txt", dtype=np.uint8, delimiter="\n", unpack=False)
X_train = X[8000:35117,:]
y_train = y[8000:35117]
X_val=X[3000:8000,:]
y_val=y[3000:8000]
num_val = 5000

# Create a kNN classifier instance. 
# Remember that training a kNN classifier is a noop: 
# the Classifier simply remembers the data and does no further processing 
classifier = KNearestNeighbor()
classifier.train(X_train, y_train)
dists = classifier.compute_distances_no_loops(X_val)
y_val_pred = classifier.predict_labels(dists, k=5)
num_correct = np.sum(y_val_pred == y_val)
accuracy = float(num_correct) / num_val
print accuracy


Example #17
0
# where in each case you use all but one of the folds as training data and the #
# last fold as a validation set. Store the accuracies for all fold and all     #
# values of k in the k_to_accuracies dictionary.                               #
################################################################################
for k in k_choices:
    k_to_accuracies[k] = np.zeros(num_folds)
    for i in range(num_folds):
        x_t = np.array(X_train_folds[:i] + X_train_folds[i + 1:])  # 剩下为训练集
        y_t = np.array(y_train_folds[:i] + y_train_folds[i + 1:])
        x_t = x_t.reshape(X_train_folds[i].shape[0] * 4, -1)
        y_t = y_t.reshape(y_train_folds[i].shape[0] * 4, -1)

        x_te = np.array(X_train_folds[i])  # 测试集
        y_te = np.array(y_train_folds[i])

        classifier.train(x_t, y_t)
        dists_ = classifier.compute_distances_no_loops(x_te)
        y_pred = classifier.predict_labels(dists_, k)
        # Compute and print the fraction of correctly predicted examples
        num_correct = np.sum(y_pred == y_te)
        accuracy = float(num_correct) / num_test
        k_to_accuracies[k][i] = accuracy
pass
################################################################################
#                                 END OF YOUR CODE                             #
################################################################################

# Print out the computed accuracies
for k in sorted(k_to_accuracies):
    for accuracy in k_to_accuracies[k]:
        print('k = %d, accuracy = %f' % (k, accuracy))
Example #18
0
def test1():
    cifar10_dir = 'cs231n/datasets/cifar-10-batches-py'
    X_train, y_train, X_test, y_test = load_CIFAR10(cifar10_dir)

    print 'Training data shape:', X_train.shape
    print 'Training label shape:', y_train.shape
    print 'Test data shape:', X_test.shape
    print 'Test label shape:', y_test.shape

    # classes = ['plane','car','bird','cat','deer','dog','frog','horse','ship','truck']
    # num_classes = len(classes)
    # sample_per_class = 7

    # for y,cls in enumerate(classes):
    #     idxs = np.flatnonzero(y_train == y)
    #     idxs = np.random.choice(idxs, sample_per_class, replace=False)
    #     for i, idx in enumerate(idxs):
    #         plt_idx = i*num_classes + y + 1
    #         plt.subplot(sample_per_class, num_classes, plt_idx)
    #         plt.imshow(X_train[idx].astype('uint8'))
    #         plt.axis('off')
    #         if i == 0:
    #             plt.title(cls)

    # plt.savefig("./figures/cifar_sample.png")
    # plt.show()
    # plt.close()

    num_training = 5000
    mask = range(num_training)
    X_train = X_train[mask]
    y_train = y_train[mask]

    num_test = 500
    mask = range(num_test)
    X_test = X_test[mask]
    y_test = y_test[mask]

    X_train = np.reshape(X_train, (X_train.shape[0], -1))
    X_test = np.reshape(X_test, (X_test.shape[0], -1))
    print X_train.shape, X_test.shape

    from cs231n.classifiers import KNearestNeighbor
    classifier = KNearestNeighbor()
    classifier.train(X_train, y_train)

    # two_loop_time = time_function(classifier.compute_distances_two_loops,X_test)
    # print "two loop time %f" % two_loop_time

    # one_loop_time = time_function(classifier.compute_distances_one_loop,X_test)
    # print "one loop time %f " %one_loop_time

    # no_loop_time = time_function(classifier.compute_distances_no_loops,X_test)
    # print "no loop time %f "% no_loop_time

    dists = classifier.compute_distances_no_loops(X_test)

    # dist_one_loop = classifier.compute_distances_one_loop(X_test)
    # dist_two_loops = classifier.compute_distances_two_loops(X_test)
    #matrix_compare(dists,dist_one_loop)
    #matrix_compare(dists,dist_two_loops)

    y_test_pred = classifier.predict_labels(dists, k=5)
    num_correct = np.sum(y_test_pred == y_test)
    accuracy = float(num_correct) / num_test
    print "God %d/%d correct => accuracy: %f" % (num_correct, num_test,
                                                 accuracy)
    cross_validate(X_train, y_train)
Example #19
0
    # To ensure that our vectorized implementation is correct, we make sure that it
    # agrees with the naive implementation. There are many ways to decide whether
    # two matrices are similar; one of the simplest is the Frobenius norm. In case
    # you haven't seen it before, the Frobenius norm of two matrices is the square
    # root of the squared sum of differences of all elements; in other words, reshape
    # the matrices into vectors and compute the Euclidean distance between them.
    difference = np.linalg.norm(dists - dists_one, ord='fro')
    print 'Difference was: %f' % (difference, )
    if difference < 0.001:
        print 'Good! The distance matrices are the same'
    else:
        print 'Uh-oh! The distance matrices are different'

    # Now implement the fully vectorized version inside compute_distances_no_loops
    # and run the code
    dists_two = classifier.compute_distances_no_loops(X_test)

    # check that the distance matrix agrees with the one we computed before:
    difference = np.linalg.norm(dists - dists_two, ord='fro')
    print 'Difference was: %f' % (difference, )
    if difference < 0.001:
        print 'Good! The distance matrices are the same'
    else:
        print 'Uh-oh! The distance matrices are different'

    # Let's compare how fast the implementations are
    def time_function(f, *args):
        """
      Call a function f with args and return the time (in seconds) that it took to execute.
      """
        import time
Example #20
0
################################################################################
# TODO:                                                                        #
# Perform k-fold cross validation to find the best value of k. For each        #
# possible value of k, run the k-nearest-neighbor algorithm num_folds times,   #
# where in each case you use all but one of the folds as training data and the #
# last fold as a validation set. Store the accuracies for all fold and all     #
# values of k in the k_to_accuracies dictionary.                               #
################################################################################
# Your code
for k in k_choices:
    accuracies = []
    for i in range(num_folds):
        X_val = X_train_folds.pop(0)
        y_val = y_train_folds.pop(0)
        classifier.train(np.vstack((X_train_folds[:])), np.hstack((y_train_folds[:])))
        dists = classifier.compute_distances_no_loops(X_val)

        y_val_pred = classifier.predict_labels(dists, k=k)
        num_correct = np.sum(y_val_pred == y_val)
        
        accuracies.append(float(num_correct) / y_val.shape[0])
        
        X_train_folds.append(X_val)
        y_train_folds.append(y_val)
    
    k_to_accuracies[k] = accuracies
        
    
################################################################################
#                                 END OF YOUR CODE                             #
################################################################################
Example #21
0
num_correct = np.sum(y_test_pred == y_test)
accuracy = float(num_correct) / num_test
print('Got %d / %d correct => accuracy: %f' %
      (num_correct, num_test, accuracy))

y_test_pred = classifier.predict_labels(dists, k=5)
num_correct = np.sum(y_test_pred == y_test)
accuracy = float(num_correct) / num_test
print('Got %d / %d correct => accuracy: %f' %
      (num_correct, num_test, accuracy))

dists_one = classifier.compute_distances_one_loop(X_test)
difference = np.linalg.norm(dists - dists_one, ord='fro')
print('Difference was: %f' % (difference, ))

dists_two = classifier.compute_distances_no_loops(X_test)
difference = np.linalg.norm(dists_one - dists_two, ord='fro')
print('Difference was: %f' % (difference, ))

########################################################


def time_function(f, *args):
    """
    Call a function f with args and return the time (in seconds) that it took to execute.
    """
    import time
    tic = time.time()
    f(*args)
    toc = time.time()
    return toc - tic
Example #22
0
# agrees with the naive implementation. There are many ways to decide whether
# two matrices are similar; one of the simplest is the Frobenius norm. In case
# you haven't seen it before, the Frobenius norm of two matrices is the square
# root of the squared sum of differences of all elements; in other words, reshape
# the matrices into vectors and compute the Euclidean distance between them.

difference = np.linalg.norm(dists - dists_one, ord='fro')
print 'Difference was: %f' % (difference, )
if difference < 0.001:
  print 'Good! The distance matrices are the same'
else:
  print 'Uh-oh! The distance matrices are different'

# Now implement the fully vectorized version inside compute_distances_no_loops
# and run the code
dists_two = classifier.compute_distances_no_loops(X_test)

# check that the distance matrix agrees with the one we computed before:
difference = np.linalg.norm(dists - dists_two, ord='fro')
print 'Difference was: %f' % (difference, )
if difference < 0.001:
  print 'Good! The distance matrices are the same'
else:
  print 'Uh-oh! The distance matrices are different'

# Let's compare how fast the implementations are
def time_function(f, *args):
  """
  Call a function f with args and return the time (in seconds) that it took to execute.
  """
  import time
Example #23
0
# two matrices are similar; one of the simplest is the Frobenius norm. In case
# you haven't seen it before, the Frobenius norm of two matrices is the square
# root of the squared sum of differences of all elements; in other words, reshape
# the matrices into vectors and compute the Euclidean distance between them.
difference = np.linalg.norm(dists - dists_one, ord='fro')
print 'Difference was: %f' % (difference, )
if difference < 0.001:
    print 'Good! The distance matrices are the same'
else:
    print 'Uh-oh! The distance matrices are different'

# In[24]:

# Now implement the fully vectorized version inside compute_distances_no_loops
# and run the code
dists_two = classifier.compute_distances_no_loops(X_test)

# check that the distance matrix agrees with the one we computed before:
difference = np.linalg.norm(dists - dists_two, ord='fro')
print 'Difference was: %f' % (difference, )
if difference < 0.001:
    print 'Good! The distance matrices are the same'
else:
    print 'Uh-oh! The distance matrices are different'

# In[25]:


# Let's compare how fast the implementations are
def time_function(f, *args):
    """
Example #24
0
accuracy = float(num_correct) / num_test
print('Using k=5, Got %d /%d correct => accuracy: %f' %
      (num_correct, num_test, accuracy))

#Now lets speed up distance matrix computation by using partial vectorization with
# one loop.
dists_one = classifier.compute_distances_one_loop(X_test)
# compute the differeces between the two methods
differeces = np.linalg.norm(dists - dists_one, ord='fro')
if differeces < 0.001:
    print('Good, the two method give the same results.')
else:
    print('The distance is different')

# Now we use the method without any loop
dists_non = classifier.compute_distances_no_loops(X_test)
# compute the differeces between the two methods
differeces = np.linalg.norm(dists - dists_non, ord='fro')
if differeces < 0.001:
    print('Good, The differece is %f' % differeces)
else:
    print('The distance is different')

#Let's compute how fast the implementations are


def time_function(f, *args):
    '''
    Call a function f with args and return  the time (in seconds)
    that it took to execute
    '''
Example #25
0
    for fold in range(num_folds):
        #Cross Validation
        num_test_crossval = 1000
        #Every Single time pick one fold in total folds for test validation
        X_test_crossval = X_train_folds[fold]
        y_test_crossval = y_train_folds[fold]
        #Pick rest of the folds as training data
        X_train_crossval = np.vstack(X_train_folds[0:fold] +
                                     X_train_folds[fold + 1:])
        y_train_crossval = np.hstack(y_train_folds[0:fold] +
                                     y_train_folds[fold + 1:])

        #Training the classifier
        classifier.train(X_train_crossval, y_train_crossval)
        #Calculating the L2 distance for test data
        dists_crossval = classifier.compute_distances_no_loops(X_test_crossval)
        #Predicting the output with current k value
        y_test_pred = classifier.predict_labels(dists_crossval, k)

        #Calculating the accuracy
        num_correct = np.sum(y_test_pred == y_test_crossval)
        accuracy = float(num_correct) / num_test_crossval

        k_to_accuracies[k].append(accuracy)

################################################################################
#                                 END OF YOUR CODE                             #
################################################################################

# Print out the computed accuracies
for k in sorted(k_to_accuracies):