Ejemplo n.º 1
def crossValidate(X_fold, y_fold, k, idx):
    #print "Use idx ", idx , " for crossvalidation"
    #X_train = np.array(len(X_fold)-1)
    #X_cross = np.array(l)
    #y_train = np.array(len(y_fold)-1)
    #y_cross = np.array(len(y_fold))

    for i in xrange(0, len(X_fold)):
        if i == idx:
            X_cross = X_fold[i]
            y_cross = y_fold[i]
            X_train = np.vstack(X_fold[0:i] + X_fold[i + 1:])
            y_train = np.hstack(y_fold[0:i] + y_fold[i + 1:])

#    print "dim train ", X_train.shape
#    print "dim cross ", X_cross.shape
#    print "dim y train ", y_train.shape
#    print "dim y cross ", y_cross.shape
    classifier = KNearestNeighbor()
    classifier.train(X_train, y_train)
    dists = classifier.compute_distances_no_loops(X_cross)
    y_cross_pred = classifier.predict_labels(dists, k)

    num_correct = np.sum(y_cross_pred == y_cross)
    print "cross val has ", y_cross.shape
    accuracy = float(num_correct) / len(y_cross)
    return accuracy
Ejemplo n.º 2
def cal_standard_knn():
    # Create a kNN classifier instance.
    # Remember that training a kNN classifier is a noop:
    # the Classifier simply remembers the data and does no further processing
    classifier = KNearestNeighbor()
    classifier.train(X_train, y_train)

    print('KNN Classifier Train Done\n')


    # Open cs231n/classifiers/k_nearest_neighbor.py and implement
    # compute_distances_two_loops.

    # Test your implementation:
    print('Ready to test with 2 loops')
    #dists = classifier.compute_distances_two_loops(X_test)

    print('Ready to test with 1 loop')
    #dists = classifier.compute_distances_one_loop(X_test)

    print('Ready to test with 0 loop\n')
    dists = classifier.compute_distances_no_loops(X_test)

    print('Ready to predict')
    y_pred = classifier.predict_labels(dists, 3)

    print('Accurarcy = %s' % np.mean(y_pred == y_test))
Ejemplo n.º 3
def test_cross_validation(X_train, y_train):

    print('Ready to test with cross_validation')

    num_folds = 5
    k_choices = [1, 3, 5, 8, 10]

    X_train_folds = []
    y_train_folds = []

    print('Train data shape = ', X_train.shape)
    y_train = y_train.reshape(-1, 1)
    print('Train label shape = ', y_train.shape)

    X_train_folds = np.array_split(X_train, num_folds)
    y_train_folds = np.array_split(y_train, num_folds)

    k_to_accuracies = {}

    for each_k in k_choices:
        k_to_accuracies.setdefault(each_k, [])
        for i in range(num_folds):
            classfer = KNearestNeighbor()
            X_train_slice = np.vstack(X_train_folds[0:i] +
                                      X_train_folds[i + 1:num_folds])
            y_train_slice = np.vstack(y_train_folds[0:i] +
                                      y_train_folds[i + 1:num_folds])
            y_train_slice = y_train_slice.reshape(-1)

            X_test_slice = X_train_folds[i]
            y_test_slice = y_train_folds[i]
            y_test_slice = y_test_slice.reshape(-1)

            classfer.train(X_train_slice, y_train_slice)
            dis = classfer.compute_distances_no_loops(X_test_slice)
            y_predict = classfer.predict_labels(dis, each_k)

            acc = np.mean(y_predict == y_test_slice)


    for each_k in k_choices:
        for item in k_to_accuracies[each_k]:
            print('k = %d, acc = %f' % (each_k, item))
Ejemplo n.º 4
def cross_validate(X_train, y_train):
    num_folds = 5
    k_choices = [1, 3, 5, 8, 10, 12, 15, 20, 50, 100]
    X_train_folds = []
    y_train_folds = []
    N = len(X_train)
    train_folds = np.array_split(range(N), num_folds, axis=0)
    k_to_accuracies = {}
    for k1 in k_choices:
        fold_eval = []
        for i in range(num_folds):
            mask = np.ones(N, dtype=bool)
            mask[train_folds[i]] = False
            X_train_cur = X_train[mask]
            y_train_cur = y_train[mask]
            classifier = KNearestNeighbor()
            classifier.train(X_train_cur, y_train_cur)

            X_test_cur = X_train[train_folds[i]]
            y_test_cur = y_train[train_folds[i]]

            dists = classifier.compute_distances_no_loops(X_test_cur)
            y_test_pred = classifier.predict_labels(dists, k=k1)
            num_correct = np.sum(y_test_pred == y_test_cur)
            accuracy = float(num_correct) / len(y_test_cur)
        k_to_accuracies[k1] = fold_eval[:]
        #k_to_accuracies[k1] = [1,2,3,4,5]

    for k in sorted(k_to_accuracies):
        for accuracy in k_to_accuracies[k]:
            print 'k = %d, accuracy = %f' % (k, accuracy)

    for k in k_choices:
        accuracies = k_to_accuracies[k]
        plt.scatter([k] * len(accuracies), accuracies)

    accuracies_mean = np.array(
        [np.mean(v) for k, v in sorted(k_to_accuracies.items())])
    accuracies_std = np.array(
        [np.std(v) for k, v in sorted(k_to_accuracies.items())])
    plt.errorbar(k_choices, accuracies_mean, yerr=accuracies_std)
    plt.title('Cross-validation on k')
    plt.ylabel('Cross-validation accuracy')
Ejemplo n.º 5
def cross_validate(X_train, y_train):
    num_folds = 5
    k_choices = [1,3,5,8,10,12,15,20,50,100]
    X_train_folds = []
    y_train_folds = []
    N = len(X_train)
    train_folds = np.array_split(range(N),num_folds,axis=0)
    k_to_accuracies = {}
    for k1 in k_choices:
        fold_eval = []
        for i in range(num_folds):
            mask = np.ones(N,dtype=bool)
            mask[train_folds[i]] = False
            X_train_cur = X_train[mask]
            y_train_cur = y_train[mask]
            classifier = KNearestNeighbor()
            classifier.train(X_train_cur, y_train_cur)
            X_test_cur = X_train[train_folds[i]]
            y_test_cur = y_train[train_folds[i]]
            dists = classifier.compute_distances_no_loops(X_test_cur)
            y_test_pred = classifier.predict_labels(dists,k=k1)
            num_correct = np.sum(y_test_pred == y_test_cur)
            accuracy = float(num_correct)/len(y_test_cur)
        k_to_accuracies[k1] = fold_eval[:]
        #k_to_accuracies[k1] = [1,2,3,4,5]

    for k in sorted(k_to_accuracies):
        for accuracy in k_to_accuracies[k]:
            print 'k = %d, accuracy = %f' % (k, accuracy)
    for k in k_choices:
        accuracies = k_to_accuracies[k]
        plt.scatter([k]*len(accuracies), accuracies)

    accuracies_mean = np.array([np.mean(v) for k,v in sorted(k_to_accuracies.items())])
    accuracies_std = np.array([np.std(v) for k,v in sorted(k_to_accuracies.items())])
    plt.errorbar(k_choices, accuracies_mean, yerr=accuracies_std)
    plt.title('Cross-validation on k')
    plt.ylabel('Cross-validation accuracy')
Ejemplo n.º 6
def cross_validate(X_train, y_train, num_folds=5):
    k_choices = [1, 3, 5, 8, 10, 12, 15, 20, 50, 100]

    X_train_folds = np.array_split(X_train, num_folds)
    y_train_folds = np.array_split(y_train, num_folds)

    # A dictionary holding the accuracies for different values of k that we find
    # when running cross-validation. After running cross-validation,
    # k_to_accuracies[k] should be a list of length num_folds giving the different
    # accuracy values that we found when using that value of k.
    k_to_accuracies = {k: [] for k in k_choices}

    for i in range(num_folds):
        X_train_cv = np.vstack(X_train_folds[:i] + X_train_folds[i + 1:])
        y_train_cv = np.hstack(y_train_folds[:i] + y_train_folds[i + 1:])

        X_val = X_train_folds[i]
        y_val = y_train_folds[i]

        classifier = KNearestNeighbor()
        classifier.train(X_train_cv, y_train_cv)
        dists_cv = classifier.compute_distances_no_loops(X_val)

        for k in k_choices:
            y_val_pred = classifier.predict_labels(dists_cv, k=k)
            num_correct = np.sum(y_val_pred == y_val)
            accuracy = float(num_correct) / len(y_val)

    # Print out the computed accuracies
    for k in sorted(k_to_accuracies):
        for accuracy in k_to_accuracies[k]:
            print 'k = %d, accuracy = %f' % (k, accuracy)

    plot_cross_validation(k_choices, k_to_accuracies)

    sort_by_accuracy = sorted(k_to_accuracies,
                              key=lambda k: np.mean(k_to_accuracies[k]))
    return sort_by_accuracy[-1]
Ejemplo n.º 7
# two matrices are similar; one of the simplest is the Frobenius norm. In case
# you haven't seen it before, the Frobenius norm of two matrices is the square
# root of the squared sum of differences of all elements; in other words, reshape
# the matrices into vectors and compute the Euclidean distance between them.
difference = np.linalg.norm(dists - dists_one, ord='fro')
print('Difference was: %f' % (difference, ))
if difference < 0.001:
    print('Good! The distance matrices are the same')
    print('Uh-oh! The distance matrices are different')

# Now implement the fully vectorized version inside compute_distances_no_loops
# and run the code
dists_two = classifier.compute_distances_no_loops(X_test)

# check that the distance matrix agrees with the one we computed before:
difference = np.linalg.norm(dists - dists_two, ord='fro')
print('Difference was: %f' % (difference, ))
if difference < 0.001:
    print('Good! The distance matrices are the same')
    print('Uh-oh! The distance matrices are different')

# Let's compare how fast the implementations are
def time_function(f, *args):
    Call a function f with args and return the time (in seconds) that it took to execute.
    import time
Ejemplo n.º 8
# two matrices are similar; one of the simplest is the Frobenius norm. In case
# you haven't seen it before, the Frobenius norm of two matrices is the square
# root of the squared sum of differences of all elements; in other words, reshape
# the matrices into vectors and compute the Euclidean distance between them.
difference = np.linalg.norm(dists - dists_one, ord='fro')
print('Difference was: %f' % (difference, ))
if difference < 0.001:
    print('Good! The distance matrices are the same')
    print('Uh-oh! The distance matrices are different')

# In[ ]:

# Now implement the fully vectorized version inside compute_distances_no_loops
# and run the code
dists_two = classifier.compute_distances_no_loops(X_test)

# check that the distance matrix agrees with the one we computed before:
difference = np.linalg.norm(dists - dists_two, ord='fro')
print('Difference was: %f' % (difference, ))
if difference < 0.001:
    print('Good! The distance matrices are the same')
    print('Uh-oh! The distance matrices are different')

# In[ ]:

# Let's compare how fast the implementations are
def time_function(f, *args):
Ejemplo n.º 9
    for j in range(
    ):  #Loop through all the folds of the training data. CV-fold is j-th. Other folds for training
        X_test_cv = X_train_folds[j]
        y_test_cv = y_train_folds[j]
        #print 'Test CV: ', X_test_cv.shape, y_test_cv.shape

        X_train_cv = np.vstack(
            X_train_folds[0:j] + X_train_folds[j + 1:]
        )  #Leaving out the j-th array. X/y_train_folds are LISTs
        y_train_cv = np.hstack(y_train_folds[0:j] + y_train_folds[j + 1:])
        #print 'Train CV: ', X_train_cv.shape, y_train_cv.shape

        classifier.train(X_train_cv, y_train_cv)
        dists_cv = classifier.compute_distances_no_loops(X_test_cv)
        #print 'Dists CV: ', dists_cv.shape
        y_test_pred = classifier.predict_labels(dists_cv, k)
        num_correct_cv = np.sum(y_test_pred == y_test_cv)
        accuracy_cv = float(num_correct_cv) / y_test_cv.shape[0]
        print y_test_cv.shape[0]
        print 'Accuracy at %d-nearest neighbors, cv-fold is %d-th fold, is %.2f' % (
            k, j + 1, accuracy_cv * 100)


#                                 END OF YOUR CODE                             #

# Print out the computed accuracies
Ejemplo n.º 10
X_train_folds = []
y_train_folds = []
X_train_folds = np.split(X_train, num_folds)
y_train_folds = np.split(y_train, num_folds)
k_to_accuracies = {}

for k_choice in k_choices:
    for i in range(num_folds):
        knn = KNearestNeighbor()
        xtrain = X_train_folds[:i] + X_train_folds[i + 1:]
        xtrain = np.asarray([item for sublist in xtrain for item in sublist])
        ytrain = y_train_folds[:i] + y_train_folds[i + 1:]
        ytrain = np.asarray([item for sublist in ytrain for item in sublist])
        knn.train(xtrain, ytrain)
        dists = knn.compute_distances_no_loops(np.asarray(X_train_folds[i]))
        y_test_pred = knn.predict_labels(dists, k=k_choice)
        num_correct = np.sum(y_test_pred == y_train_folds[i])
        accuracy = float(num_correct) / len(y_train_folds[i])
        k_to_accuracies.setdefault(k_choice, []).append(accuracy)
        print('k = %d, accuracy = %f' % (k_choice, accuracy))

for k in k_choices:
    accuracies = k_to_accuracies[k]
    plt.scatter([k] * len(accuracies), accuracies)

# plot the trend line with error bars that correspond to standard deviation
accuracies_mean = np.array(
    [np.mean(v) for k, v in sorted(k_to_accuracies.items())])
accuracies_std = np.array(
    [np.std(v) for k, v in sorted(k_to_accuracies.items())])
Ejemplo n.º 11
def test1():
    cifar10_dir = 'cs231n/datasets/cifar-10-batches-py'
    X_train, y_train, X_test, y_test = load_CIFAR10(cifar10_dir)
    print 'Training data shape:', X_train.shape
    print 'Training label shape:', y_train.shape
    print 'Test data shape:', X_test.shape
    print 'Test label shape:', y_test.shape

    # classes = ['plane','car','bird','cat','deer','dog','frog','horse','ship','truck']
    # num_classes = len(classes)
    # sample_per_class = 7

    # for y,cls in enumerate(classes):
    #     idxs = np.flatnonzero(y_train == y)
    #     idxs = np.random.choice(idxs, sample_per_class, replace=False)
    #     for i, idx in enumerate(idxs):
    #         plt_idx = i*num_classes + y + 1
    #         plt.subplot(sample_per_class, num_classes, plt_idx)
    #         plt.imshow(X_train[idx].astype('uint8'))
    #         plt.axis('off')
    #         if i == 0:
    #             plt.title(cls)

    # plt.savefig("./figures/cifar_sample.png")
    # plt.show()
    # plt.close()

    num_training = 5000
    mask = range(num_training)
    X_train = X_train[mask]
    y_train = y_train[mask]
    num_test = 500
    mask = range(num_test)
    X_test = X_test[mask]
    y_test = y_test[mask]

    X_train = np.reshape(X_train, (X_train.shape[0],-1))
    X_test = np.reshape(X_test,(X_test.shape[0],-1))
    print X_train.shape, X_test.shape

    from cs231n.classifiers import KNearestNeighbor    
    classifier = KNearestNeighbor()
    classifier.train(X_train, y_train)

    # two_loop_time = time_function(classifier.compute_distances_two_loops,X_test)
    # print "two loop time %f" % two_loop_time

    # one_loop_time = time_function(classifier.compute_distances_one_loop,X_test)
    # print "one loop time %f " %one_loop_time
    # no_loop_time = time_function(classifier.compute_distances_no_loops,X_test)
    # print "no loop time %f "% no_loop_time
    dists = classifier.compute_distances_no_loops(X_test)

    # dist_one_loop = classifier.compute_distances_one_loop(X_test)
    # dist_two_loops = classifier.compute_distances_two_loops(X_test)

    y_test_pred = classifier.predict_labels(dists,k=5)
    num_correct = np.sum(y_test_pred == y_test)
    accuracy = float(num_correct)/num_test
    print "God %d/%d correct => accuracy: %f" %(num_correct, num_test, accuracy)
Ejemplo n.º 12
# values of k in the k_to_accuracies dictionary.                               #
for k in k_choices:
    accuracies = []
    for i in range(num_folds):
        X_train_this = list(X_train_folds)
        del X_train_this[i]        
        y_train_this = list(y_train_folds)
        del y_train_this[i]        
        x = np.row_stack(X_train_this)
        y = np.concatenate(y_train_this)
        print('after row stack')
        print x.shape
        print y.shape
        classifier.train(x, y)
        dists = classifier.compute_distances_no_loops(X_train_folds[i])
        y_test_pred = classifier.predict_labels(dists, k)
        num_correct = np.sum(y_test_pred == y_train_folds[i])
        accuracy = float(num_correct) / num_test
    k_to_accuracies[k] = accuracies
#                                 END OF YOUR CODE                             #

# Print out the computed accuracies
for k in sorted(k_to_accuracies):
    for accuracy in k_to_accuracies[k]:
        print 'k = %d, accuracy = %f' % (k, accuracy)

# plot the raw observations
Ejemplo n.º 13
# two matrices are similar; one of the simplest is the Frobenius norm. In case
# you haven't seen it before, the Frobenius norm of two matrices is the square
# root of the squared sum of differences of all elements; in other words, reshape
# the matrices into vectors and compute the Euclidean distance between them.
difference = np.linalg.norm(dists - dists_one, ord='fro')
print 'Difference was: %f' % (difference, )
if difference < 0.001:
    print 'Good! The distance matrices are the same'
    print 'Uh-oh! The distance matrices are different'

# In[ ]:

# Now implement the fully vectorized version inside compute_distances_no_loops
# and run the code
dists_two = classifier.compute_distances_no_loops(X_test)

# check that the distance matrix agrees with the one we computed before:
difference = np.linalg.norm(dists - dists_two, ord='fro')
print 'Difference was: %f' % (difference, )
if difference < 0.001:
    print 'Good! The distance matrices are the same'
    print 'Uh-oh! The distance matrices are different'

# In[ ]:

# Let's compare how fast the implementations are
def time_function(f, *args):
Ejemplo n.º 14
# you haven't seen it before, the Frobenius norm of two matrices is the square
# root of the squared sum of differences of all elements; in other words, reshape
# the matrices into vectors and compute the Euclidean distance between them.
difference = np.linalg.norm(dists - dists_one, ord='fro')
print 'Difference was: %f' % (difference, )
if difference < 0.001:
  print 'Good! The distance matrices are the same'
  print 'Uh-oh! The distance matrices are different'

# In[ ]:

# Now implement the fully vectorized version inside compute_distances_no_loops
# and run the code
dists_two = classifier.compute_distances_no_loops(X_test)

# check that the distance matrix agrees with the one we computed before:
difference = np.linalg.norm(dists - dists_two, ord='fro')
print 'Difference was: %f' % (difference, )
if difference < 0.001:
  print 'Good! The distance matrices are the same'
  print 'Uh-oh! The distance matrices are different'

# In[ ]:

# Let's compare how fast the implementations are
def time_function(f, *args):
Ejemplo n.º 15
# To ensure that our vectorized implementation is correct, we make sure that it
# agrees with the naive implementation. There are many ways to decide whether
# two matrices are similar; one of the simplest is the Frobenius norm. In case
# you haven't seen it before, the Frobenius norm of two matrices is the square
# root of the squared sum of differences of all elements; in other words, reshape
# the matrices into vectors and compute the Euclidean distance between them.
difference = np.linalg.norm(dists - dists_one, ord='fro')
print 'Difference was: %f' % (difference, )
if difference < 0.001:
    print 'Good! The distance matrices are the same'
    print 'Uh-oh! The distance matrices are different'

# Now implement the fully vectorized version inside compute_distances_no_loops
# and run the code
dists_two = classifier.compute_distances_no_loops(X_test)

# check that the distance matrix agrees with the one we computed before:
difference = np.linalg.norm(dists - dists_two, ord='fro')
print 'Difference was: %f' % (difference, )
if difference < 0.001:
    print 'Good! The distance matrices are the same'
    print 'Uh-oh! The distance matrices are different'

# Let's compare how fast the implementations are
def time_function(f, *args):
  Call a function f with args and return the time (in seconds) that it took to execute.
Ejemplo n.º 16
import numpy as np
import h5py
from numpy import loadtxt
from cs231n.classifiers import KNearestNeighbor

h5f = h5py.File('img_data.h5','r')
X = h5f['dataset_1'][:]
y = loadtxt("y_labels.txt", dtype=np.uint8, delimiter="\n", unpack=False)
X_train = X[8000:35117,:]
y_train = y[8000:35117]
num_val = 5000

# Create a kNN classifier instance. 
# Remember that training a kNN classifier is a noop: 
# the Classifier simply remembers the data and does no further processing 
classifier = KNearestNeighbor()
classifier.train(X_train, y_train)
dists = classifier.compute_distances_no_loops(X_val)
y_val_pred = classifier.predict_labels(dists, k=5)
num_correct = np.sum(y_val_pred == y_val)
accuracy = float(num_correct) / num_val
print accuracy

Ejemplo n.º 17
# where in each case you use all but one of the folds as training data and the #
# last fold as a validation set. Store the accuracies for all fold and all     #
# values of k in the k_to_accuracies dictionary.                               #
for k in k_choices:
    k_to_accuracies[k] = np.zeros(num_folds)
    for i in range(num_folds):
        x_t = np.array(X_train_folds[:i] + X_train_folds[i + 1:])  # 剩下为训练集
        y_t = np.array(y_train_folds[:i] + y_train_folds[i + 1:])
        x_t = x_t.reshape(X_train_folds[i].shape[0] * 4, -1)
        y_t = y_t.reshape(y_train_folds[i].shape[0] * 4, -1)

        x_te = np.array(X_train_folds[i])  # 测试集
        y_te = np.array(y_train_folds[i])

        classifier.train(x_t, y_t)
        dists_ = classifier.compute_distances_no_loops(x_te)
        y_pred = classifier.predict_labels(dists_, k)
        # Compute and print the fraction of correctly predicted examples
        num_correct = np.sum(y_pred == y_te)
        accuracy = float(num_correct) / num_test
        k_to_accuracies[k][i] = accuracy
#                                 END OF YOUR CODE                             #

# Print out the computed accuracies
for k in sorted(k_to_accuracies):
    for accuracy in k_to_accuracies[k]:
        print('k = %d, accuracy = %f' % (k, accuracy))
Ejemplo n.º 18
def test1():
    cifar10_dir = 'cs231n/datasets/cifar-10-batches-py'
    X_train, y_train, X_test, y_test = load_CIFAR10(cifar10_dir)

    print 'Training data shape:', X_train.shape
    print 'Training label shape:', y_train.shape
    print 'Test data shape:', X_test.shape
    print 'Test label shape:', y_test.shape

    # classes = ['plane','car','bird','cat','deer','dog','frog','horse','ship','truck']
    # num_classes = len(classes)
    # sample_per_class = 7

    # for y,cls in enumerate(classes):
    #     idxs = np.flatnonzero(y_train == y)
    #     idxs = np.random.choice(idxs, sample_per_class, replace=False)
    #     for i, idx in enumerate(idxs):
    #         plt_idx = i*num_classes + y + 1
    #         plt.subplot(sample_per_class, num_classes, plt_idx)
    #         plt.imshow(X_train[idx].astype('uint8'))
    #         plt.axis('off')
    #         if i == 0:
    #             plt.title(cls)

    # plt.savefig("./figures/cifar_sample.png")
    # plt.show()
    # plt.close()

    num_training = 5000
    mask = range(num_training)
    X_train = X_train[mask]
    y_train = y_train[mask]

    num_test = 500
    mask = range(num_test)
    X_test = X_test[mask]
    y_test = y_test[mask]

    X_train = np.reshape(X_train, (X_train.shape[0], -1))
    X_test = np.reshape(X_test, (X_test.shape[0], -1))
    print X_train.shape, X_test.shape

    from cs231n.classifiers import KNearestNeighbor
    classifier = KNearestNeighbor()
    classifier.train(X_train, y_train)

    # two_loop_time = time_function(classifier.compute_distances_two_loops,X_test)
    # print "two loop time %f" % two_loop_time

    # one_loop_time = time_function(classifier.compute_distances_one_loop,X_test)
    # print "one loop time %f " %one_loop_time

    # no_loop_time = time_function(classifier.compute_distances_no_loops,X_test)
    # print "no loop time %f "% no_loop_time

    dists = classifier.compute_distances_no_loops(X_test)

    # dist_one_loop = classifier.compute_distances_one_loop(X_test)
    # dist_two_loops = classifier.compute_distances_two_loops(X_test)

    y_test_pred = classifier.predict_labels(dists, k=5)
    num_correct = np.sum(y_test_pred == y_test)
    accuracy = float(num_correct) / num_test
    print "God %d/%d correct => accuracy: %f" % (num_correct, num_test,
    cross_validate(X_train, y_train)
Ejemplo n.º 19
    # To ensure that our vectorized implementation is correct, we make sure that it
    # agrees with the naive implementation. There are many ways to decide whether
    # two matrices are similar; one of the simplest is the Frobenius norm. In case
    # you haven't seen it before, the Frobenius norm of two matrices is the square
    # root of the squared sum of differences of all elements; in other words, reshape
    # the matrices into vectors and compute the Euclidean distance between them.
    difference = np.linalg.norm(dists - dists_one, ord='fro')
    print 'Difference was: %f' % (difference, )
    if difference < 0.001:
        print 'Good! The distance matrices are the same'
        print 'Uh-oh! The distance matrices are different'

    # Now implement the fully vectorized version inside compute_distances_no_loops
    # and run the code
    dists_two = classifier.compute_distances_no_loops(X_test)

    # check that the distance matrix agrees with the one we computed before:
    difference = np.linalg.norm(dists - dists_two, ord='fro')
    print 'Difference was: %f' % (difference, )
    if difference < 0.001:
        print 'Good! The distance matrices are the same'
        print 'Uh-oh! The distance matrices are different'

    # Let's compare how fast the implementations are
    def time_function(f, *args):
      Call a function f with args and return the time (in seconds) that it took to execute.
        import time
Ejemplo n.º 20
# TODO:                                                                        #
# Perform k-fold cross validation to find the best value of k. For each        #
# possible value of k, run the k-nearest-neighbor algorithm num_folds times,   #
# where in each case you use all but one of the folds as training data and the #
# last fold as a validation set. Store the accuracies for all fold and all     #
# values of k in the k_to_accuracies dictionary.                               #
# Your code
for k in k_choices:
    accuracies = []
    for i in range(num_folds):
        X_val = X_train_folds.pop(0)
        y_val = y_train_folds.pop(0)
        classifier.train(np.vstack((X_train_folds[:])), np.hstack((y_train_folds[:])))
        dists = classifier.compute_distances_no_loops(X_val)

        y_val_pred = classifier.predict_labels(dists, k=k)
        num_correct = np.sum(y_val_pred == y_val)
        accuracies.append(float(num_correct) / y_val.shape[0])
    k_to_accuracies[k] = accuracies
#                                 END OF YOUR CODE                             #
Ejemplo n.º 21
num_correct = np.sum(y_test_pred == y_test)
accuracy = float(num_correct) / num_test
print('Got %d / %d correct => accuracy: %f' %
      (num_correct, num_test, accuracy))

y_test_pred = classifier.predict_labels(dists, k=5)
num_correct = np.sum(y_test_pred == y_test)
accuracy = float(num_correct) / num_test
print('Got %d / %d correct => accuracy: %f' %
      (num_correct, num_test, accuracy))

dists_one = classifier.compute_distances_one_loop(X_test)
difference = np.linalg.norm(dists - dists_one, ord='fro')
print('Difference was: %f' % (difference, ))

dists_two = classifier.compute_distances_no_loops(X_test)
difference = np.linalg.norm(dists_one - dists_two, ord='fro')
print('Difference was: %f' % (difference, ))


def time_function(f, *args):
    Call a function f with args and return the time (in seconds) that it took to execute.
    import time
    tic = time.time()
    toc = time.time()
    return toc - tic
Ejemplo n.º 22
# agrees with the naive implementation. There are many ways to decide whether
# two matrices are similar; one of the simplest is the Frobenius norm. In case
# you haven't seen it before, the Frobenius norm of two matrices is the square
# root of the squared sum of differences of all elements; in other words, reshape
# the matrices into vectors and compute the Euclidean distance between them.

difference = np.linalg.norm(dists - dists_one, ord='fro')
print 'Difference was: %f' % (difference, )
if difference < 0.001:
  print 'Good! The distance matrices are the same'
  print 'Uh-oh! The distance matrices are different'

# Now implement the fully vectorized version inside compute_distances_no_loops
# and run the code
dists_two = classifier.compute_distances_no_loops(X_test)

# check that the distance matrix agrees with the one we computed before:
difference = np.linalg.norm(dists - dists_two, ord='fro')
print 'Difference was: %f' % (difference, )
if difference < 0.001:
  print 'Good! The distance matrices are the same'
  print 'Uh-oh! The distance matrices are different'

# Let's compare how fast the implementations are
def time_function(f, *args):
  Call a function f with args and return the time (in seconds) that it took to execute.
  import time
Ejemplo n.º 23
# two matrices are similar; one of the simplest is the Frobenius norm. In case
# you haven't seen it before, the Frobenius norm of two matrices is the square
# root of the squared sum of differences of all elements; in other words, reshape
# the matrices into vectors and compute the Euclidean distance between them.
difference = np.linalg.norm(dists - dists_one, ord='fro')
print 'Difference was: %f' % (difference, )
if difference < 0.001:
    print 'Good! The distance matrices are the same'
    print 'Uh-oh! The distance matrices are different'

# In[24]:

# Now implement the fully vectorized version inside compute_distances_no_loops
# and run the code
dists_two = classifier.compute_distances_no_loops(X_test)

# check that the distance matrix agrees with the one we computed before:
difference = np.linalg.norm(dists - dists_two, ord='fro')
print 'Difference was: %f' % (difference, )
if difference < 0.001:
    print 'Good! The distance matrices are the same'
    print 'Uh-oh! The distance matrices are different'

# In[25]:

# Let's compare how fast the implementations are
def time_function(f, *args):
Ejemplo n.º 24
accuracy = float(num_correct) / num_test
print('Using k=5, Got %d /%d correct => accuracy: %f' %
      (num_correct, num_test, accuracy))

#Now lets speed up distance matrix computation by using partial vectorization with
# one loop.
dists_one = classifier.compute_distances_one_loop(X_test)
# compute the differeces between the two methods
differeces = np.linalg.norm(dists - dists_one, ord='fro')
if differeces < 0.001:
    print('Good, the two method give the same results.')
    print('The distance is different')

# Now we use the method without any loop
dists_non = classifier.compute_distances_no_loops(X_test)
# compute the differeces between the two methods
differeces = np.linalg.norm(dists - dists_non, ord='fro')
if differeces < 0.001:
    print('Good, The differece is %f' % differeces)
    print('The distance is different')

#Let's compute how fast the implementations are

def time_function(f, *args):
    Call a function f with args and return  the time (in seconds)
    that it took to execute
Ejemplo n.º 25
    for fold in range(num_folds):
        #Cross Validation
        num_test_crossval = 1000
        #Every Single time pick one fold in total folds for test validation
        X_test_crossval = X_train_folds[fold]
        y_test_crossval = y_train_folds[fold]
        #Pick rest of the folds as training data
        X_train_crossval = np.vstack(X_train_folds[0:fold] +
                                     X_train_folds[fold + 1:])
        y_train_crossval = np.hstack(y_train_folds[0:fold] +
                                     y_train_folds[fold + 1:])

        #Training the classifier
        classifier.train(X_train_crossval, y_train_crossval)
        #Calculating the L2 distance for test data
        dists_crossval = classifier.compute_distances_no_loops(X_test_crossval)
        #Predicting the output with current k value
        y_test_pred = classifier.predict_labels(dists_crossval, k)

        #Calculating the accuracy
        num_correct = np.sum(y_test_pred == y_test_crossval)
        accuracy = float(num_correct) / num_test_crossval


#                                 END OF YOUR CODE                             #

# Print out the computed accuracies
for k in sorted(k_to_accuracies):