def classifier():
  train_data = np.array(
    [
      [1, 2, 2, 1],
      [4, 3, 4, 4],
      [3, 4, 4, 2],
    ])

  train_labels = np.array([1, 2, 2])

  knn = KNearestNeighbor()
  knn.train(train_data, train_labels)
  return knn
Ejemplo n.º 2
0
def cross_validate(X_train, y_train):
    num_folds = 5
    k_choices = [1,3,5,8,10,12,15,20,50,100]
    X_train_folds = []
    y_train_folds = []
    N = len(X_train)
    train_folds = np.array_split(range(N),num_folds,axis=0)
    k_to_accuracies = {}
    for k1 in k_choices:
        fold_eval = []
        for i in range(num_folds):
            mask = np.ones(N,dtype=bool)
            mask[train_folds[i]] = False
            X_train_cur = X_train[mask]
            y_train_cur = y_train[mask]
            classifier = KNearestNeighbor()
            classifier.train(X_train_cur, y_train_cur)
            
            X_test_cur = X_train[train_folds[i]]
            y_test_cur = y_train[train_folds[i]]
            
            dists = classifier.compute_distances_no_loops(X_test_cur)
            y_test_pred = classifier.predict_labels(dists,k=k1)
            num_correct = np.sum(y_test_pred == y_test_cur)
            accuracy = float(num_correct)/len(y_test_cur)
            fold_eval.append(accuracy)
            #pass
        k_to_accuracies[k1] = fold_eval[:]
        #k_to_accuracies[k1] = [1,2,3,4,5]

    for k in sorted(k_to_accuracies):
        for accuracy in k_to_accuracies[k]:
            print 'k = %d, accuracy = %f' % (k, accuracy)
    
    for k in k_choices:
        accuracies = k_to_accuracies[k]
        plt.scatter([k]*len(accuracies), accuracies)

    accuracies_mean = np.array([np.mean(v) for k,v in sorted(k_to_accuracies.items())])
    accuracies_std = np.array([np.std(v) for k,v in sorted(k_to_accuracies.items())])
    plt.errorbar(k_choices, accuracies_mean, yerr=accuracies_std)
    plt.title('Cross-validation on k')
    plt.xlabel('k')
    plt.ylabel('Cross-validation accuracy')
    plt.savefig('./figures/validation_k')
Ejemplo n.º 3
0
# In[111]:

# Reshape the image data into rows
X_train = np.reshape(X_train, (X_train.shape[0], -1))
X_test = np.reshape(X_test, (X_test.shape[0], -1))
print(X_train.shape, X_test.shape)

# In[112]:

from cs231n.classifiers import KNearestNeighbor

# Create a kNN classifier instance.
# Remember that training a kNN classifier is a noop:
# the Classifier simply remembers the data and does no further processing
classifier = KNearestNeighbor()
classifier.train(X_train, y_train)

# We would now like to classify the test data with the kNN classifier. Recall that we can break down this process into two steps:
#
# 1. First we must compute the distances between all test examples and all train examples.
# 2. Given these distances, for each test example we find the k nearest examples and have them vote for the label
#
# Lets begin with computing the distance matrix between all training and test examples. For example, if there are **Ntr** training examples and **Nte** test examples, this stage should result in a **Nte x Ntr** matrix where each element (i,j) is the distance between the i-th test and j-th train example.
#
# First, open `cs231n/classifiers/k_nearest_neighbor.py` and implement the function `compute_distances_two_loops` that uses a (very inefficient) double loop over all pairs of (test, train) examples and computes the distance matrix one element at a time.

# In[71]:

# Open cs231n/classifiers/k_nearest_neighbor.py and implement
# compute_distances_two_loops.
Ejemplo n.º 4
0
num_test = 500
mask = range(num_test)
X_test = X_test[mask]
y_test = y_test[mask]

# Reshape the image data into rows
X_train = np.reshape(X_train, (X_train.shape[0], -1))
X_test = np.reshape(X_test, (X_test.shape[0], -1))
print X_train.shape, X_test.shape

from cs231n.classifiers import KNearestNeighbor

# Create a kNN classifier instance. 
# Remember that training a kNN classifier is a noop: 
# the Classifier simply remembers the data and does no further processing 
classifier = KNearestNeighbor()
classifier.train(X_train, y_train)

num_folds = 5
k_choices = [1, 3, 5, 8, 10, 12, 15, 20, 50, 100]

X_train_folds = []
y_train_folds = []
################################################################################
# TODO:                                                                        #
# Split up the training data into folds. After splitting, X_train_folds and    #
# y_train_folds should each be lists of length num_folds, where                #
# y_train_folds[i] is the label vector for the points in X_train_folds[i].     #
# Hint: Look up the numpy array_split function.                                #
################################################################################
X_train_folds = np.array_split(X_train, num_folds, axis=0)
Ejemplo n.º 5
0
# In[ ]:

# Reshape the image data into rows
X_train = np.reshape(X_train, (X_train.shape[0], -1))
X_test = np.reshape(X_test, (X_test.shape[0], -1))
print X_train.shape, X_test.shape


# In[ ]:

from cs231n.classifiers import KNearestNeighbor

# Create a kNN classifier instance. 
# Remember that training a kNN classifier is a noop: 
# the Classifier simply remembers the data and does no further processing 
classifier = KNearestNeighbor()
classifier.train(X_train, y_train)


# We would now like to classify the test data with the kNN classifier. Recall that we can break down this process into two steps: 
# 
# 1. First we must compute the distances between all test examples and all train examples. 
# 2. Given these distances, for each test example we find the k nearest examples and have them vote for the label
# 
# Lets begin with computing the distance matrix between all training and test examples. For example, if there are **Ntr** training examples and **Nte** test examples, this stage should result in a **Nte x Ntr** matrix where each element (i,j) is the distance between the i-th test and j-th train example.
# 
# First, open `cs231n/classifiers/k_nearest_neighbor.py` and implement the function `compute_distances_two_loops` that uses a (very inefficient) double loop over all pairs of (test, train) examples and computes the distance matrix one element at a time.

# In[ ]:

# Open cs231n/classifiers/k_nearest_neighbor.py and implement
Ejemplo n.º 6
0
num_test = 500
mask = range(num_test)
X_test = X_test[mask]
y_test = y_test[mask]

# Reshape the image data into rows
X_train = np.reshape(X_train, (X_train.shape[0], -1))
X_test = np.reshape(X_test, (X_test.shape[0], -1))
print X_train.shape, X_test.shape

from cs231n.classifiers import KNearestNeighbor

# Create a kNN classifier instance. 
# Remember that training a kNN classifier is a noop: 
# the Classifier simply remembers the data and does no further processing 
classifier = KNearestNeighbor()
classifier.train(X_train, y_train)

# Open cs231n/classifiers/k_nearest_neighbor.py and implement
# compute_distances_two_loops.

# Test your implementation:
dists = classifier.compute_distances_no_loops(X_test)
print dists.shape

# Now implement the function predict_labels and run the code below:
# We use k = 1 (which is Nearest Neighbor).
y_test_pred = classifier.predict_labels(dists, k=7)

# # Compute and print the fraction of correctly predicted examples
num_correct = np.sum(y_test_pred == y_test)
Ejemplo n.º 7
0
# A dictionary will store the accuracies for different values of k that we find
# while finding the score
# k_to_accuracies[k] should be a list of length num_folds giving the different
# accuracy values that we found when using that value of k.

k_to_accuracies = {k: [] for k in k_choices}

for i in xrange(num_folds):
    # do subsetting
    X_val_curr = X_train_folds[i]
    y_val_curr = y_train_folds[i]
    X_train_curr = np.concatenate(X_train_folds[:i] + X_train_folds[i+1:])
    y_train_curr = np.concatenate(y_train_folds[:i] + y_train_folds[i+1:])

    # train classifier + compute distances for this fold
    classifier = KNearestNeighbor()
    classifier.train(X_train_curr, y_train_curr)
    dists = classifier.compute_distances_no_loops(X_val_curr)
    
    # finding accuracies on validation data for each k
    for k in xrange(1,100):
        y_val_pred_curr = classifier.predict_labels(dists, k=k)
        num_correct = np.sum(y_val_pred_curr == y_val_curr)
        accuracy = float(num_correct) / y_val_curr.shape[0]
        k_to_accuracies[k].append(accuracy)
    # printing the output according to diffrent values of k    
   for k in sorted(k_to_accuracies):
    for accuracy in k_to_accuracies[k]:
        print 'k = %d, accuracy = %f' % (k, accuracy)     
Ejemplo n.º 8
0
num_test = 500
mask = range(num_test)
X_test = X_test[mask]
y_test = y_test[mask]

# Reshape the image data into rows
X_train = np.reshape(X_train, (X_train.shape[0], -1))
X_test = np.reshape(X_test, (X_test.shape[0], -1))
print X_train.shape, X_test.shape

from cs231n.classifiers import KNearestNeighbor
# Create a kNN classifier instance. 
# Remember that training a kNN classifier is a noop: 
# the Classifier simply remembers the data and does no further processing 
classifier = KNearestNeighbor()
classifier.train(X_train, y_train)

'''
We would now like to classify the test data with the kNN classifier. Recall that we can break down this process into two steps:

    First we must compute the distances between all test examples and all train examples.
    Given these distances, for each test example we find the k nearest examples and have them vote for the label

Lets begin with computing the distance matrix between all training and test examples. For example, if there are Ntr training examples and Nte test examples, this stage should result in a Nte x Ntr matrix where each element (i,j) is the distance between the i-th test and j-th train example.

First, open cs231n/classifiers/k_nearest_neighbor.py and implement the function compute_distances_two_loops that uses a (very inefficient) double loop over all pairs of (test, train) examples and computes the distance matrix one element at a time.

'''

# Open cs231n/classifiers/k_nearest_neighbor.py and implement
Ejemplo n.º 9
0
X_test = np.reshape(X_test, (X_test.shape[0], -1))

from cs231n.classifiers import KNearestNeighbor

num_folds = 5
k_choices = [1, 3, 5, 8, 10, 12, 15, 20, 50, 100]

X_train_folds = []
y_train_folds = []
X_train_folds = np.split(X_train, num_folds)
y_train_folds = np.split(y_train, num_folds)
k_to_accuracies = {}

for k_choice in k_choices:
    for i in range(num_folds):
        knn = KNearestNeighbor()
        xtrain = X_train_folds[:i] + X_train_folds[i + 1:]
        xtrain = np.asarray([item for sublist in xtrain for item in sublist])
        ytrain = y_train_folds[:i] + y_train_folds[i + 1:]
        ytrain = np.asarray([item for sublist in ytrain for item in sublist])
        knn.train(xtrain, ytrain)
        dists = knn.compute_distances_no_loops(np.asarray(X_train_folds[i]))
        y_test_pred = knn.predict_labels(dists, k=k_choice)
        num_correct = np.sum(y_test_pred == y_train_folds[i])
        accuracy = float(num_correct) / len(y_train_folds[i])
        k_to_accuracies.setdefault(k_choice, []).append(accuracy)
        print('k = %d, accuracy = %f' % (k_choice, accuracy))

for k in k_choices:
    accuracies = k_to_accuracies[k]
    plt.scatter([k] * len(accuracies), accuracies)
Ejemplo n.º 10
0
X_train = X_train[mask]
y_train = y_train[mask]

num_test = 500
mask = list(range(num_test))
X_test = X_test[mask]
y_test = y_test[mask]

# 将图像数据转换为二维的
X_train = np.reshape(X_train, (X_train.shape[0], -1))
X_test = np.reshape(X_test, (X_test.shape[0], -1))
print(X_train.shape, X_test.shape)

from cs231n.classifiers import KNearestNeighbor

classifier = KNearestNeighbor()
classifier.train(X_train, y_train)
# 测试
dists = classifier.compute_distances_two_loops(X_test)
# print(dists.shape)
plt.imshow(dists, interpolation='None')
plt.show()

# # 将k设为1的测试
# y_test_pred = classifier.predict_labels(dists, k=1)
# # 计算并打印准确率
# num_correct = np.sum(y_test_pred == y_test)
# accuracy = float(num_correct) / num_test
# print('Got %d / %d correct => accuracy: %f' % (num_correct, num_test, accuracy))
# # 将k设为5的测试
# y_test_pred = classifier.predict_labels(dists, k=5)
Ejemplo n.º 11
0
def test1():
    cifar10_dir = 'cs231n/datasets/cifar-10-batches-py'
    X_train, y_train, X_test, y_test = load_CIFAR10(cifar10_dir)

    print 'Training data shape:', X_train.shape
    print 'Training label shape:', y_train.shape
    print 'Test data shape:', X_test.shape
    print 'Test label shape:', y_test.shape

    # classes = ['plane','car','bird','cat','deer','dog','frog','horse','ship','truck']
    # num_classes = len(classes)
    # sample_per_class = 7

    # for y,cls in enumerate(classes):
    #     idxs = np.flatnonzero(y_train == y)
    #     idxs = np.random.choice(idxs, sample_per_class, replace=False)
    #     for i, idx in enumerate(idxs):
    #         plt_idx = i*num_classes + y + 1
    #         plt.subplot(sample_per_class, num_classes, plt_idx)
    #         plt.imshow(X_train[idx].astype('uint8'))
    #         plt.axis('off')
    #         if i == 0:
    #             plt.title(cls)

    # plt.savefig("./figures/cifar_sample.png")
    # plt.show()
    # plt.close()

    num_training = 5000
    mask = range(num_training)
    X_train = X_train[mask]
    y_train = y_train[mask]

    num_test = 500
    mask = range(num_test)
    X_test = X_test[mask]
    y_test = y_test[mask]

    X_train = np.reshape(X_train, (X_train.shape[0], -1))
    X_test = np.reshape(X_test, (X_test.shape[0], -1))
    print X_train.shape, X_test.shape

    from cs231n.classifiers import KNearestNeighbor
    classifier = KNearestNeighbor()
    classifier.train(X_train, y_train)

    # two_loop_time = time_function(classifier.compute_distances_two_loops,X_test)
    # print "two loop time %f" % two_loop_time

    # one_loop_time = time_function(classifier.compute_distances_one_loop,X_test)
    # print "one loop time %f " %one_loop_time

    # no_loop_time = time_function(classifier.compute_distances_no_loops,X_test)
    # print "no loop time %f "% no_loop_time

    dists = classifier.compute_distances_no_loops(X_test)

    # dist_one_loop = classifier.compute_distances_one_loop(X_test)
    # dist_two_loops = classifier.compute_distances_two_loops(X_test)
    #matrix_compare(dists,dist_one_loop)
    #matrix_compare(dists,dist_two_loops)

    y_test_pred = classifier.predict_labels(dists, k=5)
    num_correct = np.sum(y_test_pred == y_test)
    accuracy = float(num_correct) / num_test
    print "God %d/%d correct => accuracy: %f" % (num_correct, num_test,
                                                 accuracy)
    cross_validate(X_train, y_train)
Ejemplo n.º 12
0
X_test = X_test[mask]
y_test = y_test[mask]

# Reshape the image data into rows
X_train = np.reshape(X_train, (X_train.shape[0], -1))
X_test = np.reshape(X_test, (X_test.shape[0], -1))

print(X_train.shape, X_test.shape)

#%%
from cs231n.classifiers import KNearestNeighbor

# Create a kNN classifier instance.
# Remember that training a kNN classifier is a noop:
# the Classifier simply remembers the data and does no further processing
classifier = KNearestNeighbor()
classifier.train(X_train, y_train)

#%% [markdown]
# We would now like to classify the test data with the kNN classifier. Recall that we can break down this process into two steps:
#
# 1. First we must compute the distances between all test examples and all train examples.
# 2. Given these distances, for each test example we find the k nearest examples and have them vote for the label
#
# Lets begin with computing the distance matrix between all training and test examples. For example, if there are **Ntr** training examples and **Nte** test examples, this stage should result in a **Nte x Ntr** matrix where each element (i,j) is the distance between the i-th test and j-th train example.
#
# **Note: For the three distance computations that we require you to implement in this notebook, you may not use the np.linalg.norm() function that numpy provides.**
#
# First, open `cs231n/classifiers/k_nearest_neighbor.py` and implement the function `compute_distances_two_loops` that uses a (very inefficient) double loop over all pairs of (test, train) examples and computes the distance matrix one element at a time.

#%%
Ejemplo n.º 13
0
X_train, y_train, X_test, y_test = load_CIFAR10(cifar10_dir)

num_training = 5000
mask = list(range(num_training))
X_train = X_train[mask]
y_train = y_train[mask]

num_test = 500
mask = list(range(num_test))
X_test = X_test[mask]
y_test = y_test[mask]
X_train = np.reshape(X_train, (X_train.shape[0], -1))
X_test = np.reshape(X_test, (X_test.shape[0], -1))
from cs231n.classifiers import KNearestNeighbor
classifier = KNearestNeighbor()
classifier.train(X_train, y_train)

num_folds = 5
k_choices = [1, 3, 5, 8, 10, 12, 15, 20, 50, 100]

X_train_folds = []
y_train_folds = []

X_train_temp = X_train
y_train_temp = y_train
X_train_folds = np.array_split(X_train_temp, num_folds)
y_train_folds = np.array_split(y_train_temp, num_folds)
print(X_train_folds[4])

k_to_accuracies = {}
Ejemplo n.º 14
0
num_test = 500
mask = range(num_test)
X_test = X_test[mask]
y_test = y_test[mask]

# Reshape the image data into rows
X_train = np.reshape(X_train, (X_train.shape[0], -1))
X_test = np.reshape(X_test, (X_test.shape[0], -1))
print X_train.shape, X_test.shape

from cs231n.classifiers import KNearestNeighbor

# Create a kNN classifier instance.
# Remember that training a kNN classifier is a noop:
# the Classifier simply remembers the data and does no further processing
classifier = KNearestNeighbor()
classifier.train(X_train, y_train)

# Open cs231n/classifiers/k_nearest_neighbor.py and implement
# compute_distances_two_loops.

# Test your implementation:
dists = classifier.compute_distances_two_loops(X_test)
#print dists.shape
## Now implement the function predict_labels and run the code below:
## We use k = 1 (which is Nearest Neighbor).
#y_test_pred = classifier.predict_labels(dists, k=1)
#
## Compute and print the fraction of correctly predicted examples
#num_correct = np.sum(y_test_pred == y_test)
#accuracy = float(num_correct) / num_test
Ejemplo n.º 15
0
num_test = 500
mask = list(range(num_test))
X_test = X_test[mask]
y_test = y_test[mask]

# Reshape the image data into rows
X_train = np.reshape(X_train, (X_train.shape[0], -1))
X_test = np.reshape(X_test, (X_test.shape[0], -1))
print(X_train.shape, X_test.shape)

from cs231n.classifiers import KNearestNeighbor
# Create a kNN classifier instance.
# Remember that training a kNN classifier is a noop:
# the Classifier simply remembers the data and does no further processing
classifier = KNearestNeighbor()
# classifier.train(X_train, y_train)
# # #
# # # # Test your implementation:
# # dists = classifier.compute_distances_two_loops(X_test)
# dists = classifier.compute_distances_no_loops(X_test)
# #
# # print('dist.shape', dists.shape)
#
# # We can visualize the distance matrix: each row is a single test example and
# # its distances to training examples
# #plt.imshow(dists, interpolation='none')
# #plt.show()
#
# # # # Now implement the function predict_labels and run the code below:
# # # # We use k = 1 (which is Nearest Neighbor).
Ejemplo n.º 16
0
y_train = y_train[mask]

num_test = 500
mask = list(range(num_test))
X_test = X_test[mask]
y_test = y_test[mask]

X_train = np.reshape(X_train, (X_train.shape[0], -1))
X_test = np.reshape(X_test, (X_test.shape[0], -1))
print(X_train.shape, X_test.shape)

#######################################################

from cs231n.classifiers import KNearestNeighbor

classifier = KNearestNeighbor()
classifier.train(X_train, y_train)
dists = classifier.compute_distances_two_loops(X_test)
print(dists.shape)
plt.imshow(dists, interpolation='none')
plt.show()

y_test_pred = classifier.predict_labels(dists, k=1)
num_correct = np.sum(y_test_pred == y_test)
accuracy = float(num_correct) / num_test
print('Got %d / %d correct => accuracy: %f' %
      (num_correct, num_test, accuracy))

y_test_pred = classifier.predict_labels(dists, k=5)
num_correct = np.sum(y_test_pred == y_test)
accuracy = float(num_correct) / num_test
Ejemplo n.º 17
0
y_train = y_train[mask]

num_test = 500
mask = list(range(num_test))
X_test = X_test[mask]
y_test = y_test[mask]

# Reshape the image data into rows
X_train = np.reshape(X_train, (X_train.shape[0], -1))
X_test = np.reshape(X_test, (X_test.shape[0], -1))
print(X_train.shape, X_test.shape)

# Create a kNN classifier instance.
# Remember that training a kNN classifier is a noop:
# the Classifier simply remembers the data and does no further processing
classifier = KNearestNeighbor()
classifier.train(X_train, y_train)
#
# # Open cs231n/classifiers/k_nearest_neighbor.py and implement
# # compute_distances_two_loops.
#
# # Test your implementation:
# dists = classifier.compute_distances_two_loops(X_test)
# print(dists.shape)
#
# # We can visualize the distance matrix: each row is a single test example and
# # its distances to training examples
# # plt.imshow(dists, interpolation='none')
# # plt.show()
#
#
Ejemplo n.º 18
0
y_test = y_test[mask]

# Reshape the image data into rows
X_train = np.reshape(X_train, (X_train.shape[0], -1))
X_test = np.reshape(X_test, (X_test.shape[0], -1))
print X_train.shape, X_test.shape


##########################

from cs231n.classifiers import KNearestNeighbor

# Create a kNN classifier instance.
# Remember that training a kNN classifier is a noop:
# the Classifier simply remembers the data and does no further processing
classifier = KNearestNeighbor()
classifier.train(X_train, y_train)
############################

# Open cs231n/classifiers/k_nearest_neighbor.py and implement
# compute_distances_two_loops.

# Test your implementation:
dists = classifier.compute_distances_two_loops(X_test)
print dists.shape

#############################

# We can visualize the distance matrix: each row is a single test example and
# its distances to training examples
plt.imshow(dists, interpolation='none')
Ejemplo n.º 19
0
X_train = X_train[mask]
y_train = y_train[mask]
num_test = 500
mask = range(num_test)
X_test = X_test[mask]
y_test = y_test[mask]
print 'Test Print: ', y_train[range(
    10)]  #Print out a vector of 1st 10 elements of y_train

# Reshape to 2D
X_train = np.reshape(X_train, (X_train.shape[0], -1))
X_test = np.reshape(X_test, (X_test.shape[0], -1))
print X_train.shape, X_test.shape

from cs231n.classifiers import KNearestNeighbor
classifier = KNearestNeighbor()

num_folds = 5
k_choices = [1, 3, 5, 8, 10, 12, 15, 20, 50, 100]

X_train_folds = []
y_train_folds = []
################################################################################
# TODO:                                                                        #
# Split up the training data into folds. After splitting, X_train_folds and    #
# y_train_folds should each be lists of length num_folds, where                #
# y_train_folds[i] is the label vector for the points in X_train_folds[i].     #
# Hint: Look up the numpy array_split function.                                #
################################################################################
# Split the training set into 5 folds. These 5 folds now become Cross-Validation set. 4 folds will be for train_cv. 5th fold will be for test_cv
X_train_folds = np.array_split(X_train, num_folds)
Ejemplo n.º 20
0
y_train = y_train[mask]

num_test = 500
mask = range(num_test)
X_test = X_test[mask]
y_test = y_test[mask]

# Reshape the image data into rows
X_train = np.reshape(X_train, (X_train.shape[0], -1))
X_test = np.reshape(X_test, (X_test.shape[0], -1))
print X_train.shape, X_test.shape

# Create a kNN classifier instance.
# Remember that training a kNN classifier is a noop:
# the Classifier simply remembers the data and does no further processing
classifier = KNearestNeighbor()
classifier.train(X_train, y_train)

# Open cs231n/classifiers/k_nearest_neighbor.py and implement
# compute_distances_two_loops.

# Test your implementation:
#dists = classifier.compute_distances_two_loops(X_test)
#print dists.shape
#
## We can visualize the distance matrix: each row is a single test example and
## its distances to training examples
#fig2 = plt.figure()
#plt.imshow(dists, interpolation='none')
#plt.show()
#
Ejemplo n.º 21
0
from cs231n.classifiers import KNearestNeighbor
import numpy as np

classifier = KNearestNeighbor()
classifier.__init__()
print("I have import.")

a = np.arange(2000).reshape(100, 20)
a_y = np.array([1] * 5)
b = np.arange(20).reshape(1, 20)
c = a - b
print(a_y)
classifier.train(a, a_y.T)
classifier.compute_distances_one_loop(a)
Ejemplo n.º 22
0
# print('Test data shape: ', X_test.shape)
# print('Test labels shape: ', y_test.shape)
num_training = 5000
mask = list(range(num_training))
X_train = X_train[mask]
y_train = y_train[mask]

num_test = 500
mask = list(range(num_test))
X_test = X_test[mask]
y_test = y_test[mask]
X_train = np.reshape(X_train,
                     (X_train.shape[0], -1))  #X_train.shape:5000*32*32*3
X_test = np.reshape(X_test, (X_test.shape[0], -1))
# print(X_train.shape, X_test.shape,y_train.shape)
classifier = KNearestNeighbor()
# classifier.train(X_train, y_train)
#
# dists = classifier.compute_distances_no_loops(X_test)
# y_test_pred = classifier.predict_labels(dists, k=1)
# num_correct = np.sum(y_test_pred == y_test)
# accuracy = float(num_correct) / num_test
# print('Got %d / %d correct => accuracy: %f' % (num_correct, num_test, accuracy))

#交叉验证
num_folds = 5
k_choices = [1, 3, 5, 8, 10, 12, 15, 20, 50, 100]

X_train_folds = []
y_train_folds = []
################################################################################
Ejemplo n.º 23
0
mask = list(range(num_test))
X_test = X_test[mask]
y_test = y_test[mask]

# Reshape the image data into rows
X_train = np.reshape(X_train, (X_train.shape[0], -1))
X_test = np.reshape(X_test, (X_test.shape[0], -1))
print(X_train.shape, X_test.shape)

# %%
from cs231n.classifiers import KNearestNeighbor

# Create a kNN classifier instance.
# Remember that training a kNN classifier is a noop:
# the Classifier simply remembers the data and does no further processing
classifier = KNearestNeighbor()
classifier.train(X_train, y_train)

# %% [markdown]
# We would now like to classify the test data with the kNN classifier. Recall that we can break down this process into two steps:
#
# 1. First we must compute the distances between all test examples and all train examples.
# 2. Given these distances, for each test example we find the k nearest examples and have them vote for the label
#
# Lets begin with computing the distance matrix between all training and test examples. For example, if there are **Ntr** training examples and **Nte** test examples, this stage should result in a **Nte x Ntr** matrix where each element (i,j) is the distance between the i-th test and j-th train example.
#
# **Note: For the three distance computations that we require you to implement in this notebook, you may not use the np.linalg.norm() function that numpy provides.**
#
# First, open `cs231n/classifiers/k_nearest_neighbor.py` and implement the function `compute_distances_two_loops` that uses a (very inefficient) double loop over all pairs of (test, train) examples and computes the distance matrix one element at a time.

# %%
Ejemplo n.º 24
0
import numpy as np
import matplotlib.pyplot as plt
from cs231n.data_utils import load_CIFAR10
from cs231n.classifiers import KNearestNeighbor

#Load data

cifar10_dir = 'cs231n/datasets/cifar-10-batches-py'
X_tre, y_tre, X_te, y_te = load_CIFAR10(cifar10_dir)
X_tre_rows = X_tre.reshape(X_tre.shape[0], 32 * 32 * 3)
X_te_rows = X_te.reshape(X_te.shape[0], 32 * 32 * 3)

X_val_rows = X_tre_rows[:1000, :]
y_val_rows = y_tre[:1000]

X_tre_rows = X_tre_rows[1000:, :]
y_tre = y_tre[:1000]

val_acc = []
for k in [1, 3, 5, 10, 20, 50, 100]:

    knn = KNearestNeighbor()
    knn.train(X_tre, y_tre)

    y_val_predict = knn.predict(X_val_rows, k=k)
    acc = np.mean(y_val_predict == y_val_rows)
    print 'accuracy: %f' % (acc * 100)

    val_acc.append((k, acc))
Ejemplo n.º 25
0
num_test = 500
mask = range(num_test)
X_test = X_test[mask]
y_test = y_test[mask]

# Reshape the image data into rows
X_train = np.reshape(X_train, (X_train.shape[0], -1))
X_test = np.reshape(X_test, (X_test.shape[0], -1))
print X_train.shape, X_test.shape

from cs231n.classifiers import KNearestNeighbor

# Create a kNN classifier instance.
# Remember that training a kNN classifier is a noop:
# the Classifier simply remembers the data and does no further processing
classifier = KNearestNeighbor()
classifier.train(X_train, y_train)

# Open cs231n/classifiers/k_nearest_neighbor.py and implement
# compute_distances_two_loops.

# Test your implementation:
dists = classifier.compute_distances_two_loops(X_test)
print dists.shape

# We can visualize the distance matrix: each row is a single test example and
# its distances to training examples
plt.imshow(dists, interpolation='nearest')
plt.show()

# Now implement the function predict_labels and run the code below:
Ejemplo n.º 26
0
X_train=X_train[mask]
y_train=y_train[mask]


num_test=500
mask=list(range(num_test))
X_test=X_test[mask]
y_test=y_test[mask]

#Reshape the image data into rows
X_train=np.reshape(X_train,(X_train.shape[0],-1))
X_test=np.reshape(X_test,(X_test.shape[0],-1))
print(X_train.shape,X_test.shape)

####

from cs231n.classifiers import KNearestNeighbor
classifier = KNearestNeighbor()
classifier.train(X_train,y_train)











Ejemplo n.º 27
0
# where in each case you use all but one of the folds as training data and the #
# last fold as a validation set. Store the accuracies for all fold and all     #
# values of k in the k_to_accuracies dictionary.                               #
################################################################################
# *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****

for k in k_choices:
    k_to_accuracies[k] = []
for i in range(num_folds):
    X_train_ = []
    y_train_ = []
    for j in range(num_folds):
        if j!=i :
            X_train_.extend(X_train_folds[j])
            y_train_.extend(y_train_folds[j])
    classifier = KNearestNeighbor()
    classifier.train(np.array(X_train_), np.array(y_train_))
    X_val = np.array(X_train_folds[i])
    for k in k_choices:
        y_val_pred = classifier.predict(X_val,k=k)
        accuracy_val = np.mean(y_train_folds[i]== y_val_pred)
        k_to_accuracies[k].append(accuracy_val)



# pass

# *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****

# Print out the computed accuracies
for k in sorted(k_to_accuracies):
## DEBUGGING:
print 'After sampling, '
print '    Test shape = ', X_test.shape
print 'Training shape = ', X_train.shape
print 'Memory used (MB): ', psutil.virtual_memory().used/(1024*1024)
print

# print X_train.shape, X_test.shape

#### Cell 6
from cs231n.classifiers import KNearestNeighbor

# Create a kNN classifier instance. 
# Remember that training a kNN classifier is a noop: 
# the Classifier simply remembers the data and does no further processing 
classifier = KNearestNeighbor()
classifier.train(X_train, y_train)

#### Cell 7
# Open cs231n/classifiers/k_nearest_neighbor.py and implement
# compute_distances_two_loops.

# Test your implementation:
dists = classifier.compute_distances_two_loops(X_test)
print 'After two-loop classifier, dists shape = ', dists.shape
print 'Memory used (MB): ', psutil.virtual_memory().used/(1024*1024)
print
# print dists.shape

#### Cell 8
# We can visualize the distance matrix: each row is a single test example and
Ejemplo n.º 29
0
num_test = 500
mask = list(range(num_test))
X_test = X_test[mask]
y_test = y_test[mask]

# Reshape the image data into rows
X_train = np.reshape(X_train, (X_train.shape[0], -1))
X_test = np.reshape(X_test, (X_test.shape[0], -1))
print(X_train.shape, X_test.shape)

from cs231n.classifiers import KNearestNeighbor

# Create a kNN classifier instance.
# Remember that training a kNN classifier is a noop:
# the Classifier simply remembers the data and does no further processing
classifier = KNearestNeighbor()
classifier.train(X_train, y_train)

# Open cs231n/classifiers/k_nearest_neighbor.py and implement
# compute_distances_two_loops.

# Test your implementation:
# dists = classifier.compute_distances_two_loops(X_test)
# print(dists.shape)
dists = classifier.compute_distances_no_loops(X_test)
print(dists.shape)

# We can visualize the distance matrix: each row is a single test example and
# its distances to training examples
fig = plt.figure()
plt.imshow(dists, interpolation='none')
Ejemplo n.º 30
0
num_test = 500
mask = range(num_test)
X_test = X_test[mask]
y_test = y_test[mask]

# Reshape the image data into rows
X_train = np.reshape(X_train, (X_train.shape[0], -1))
X_test = np.reshape(X_test, (X_test.shape[0], -1))
print X_train.shape, X_test.shape

from cs231n.classifiers import KNearestNeighbor

# Create a kNN classifier instance.
# Remember that training a kNN classifier is a noop:
# the Classifier simply remembers the data and does no further processing
classifier = KNearestNeighbor()
classifier.train(X_train, y_train)

# Open cs231n/classifiers/k_nearest_neighbor.py and implement
# compute_distances_two_loops.
if not skip:
    # Test your implementation:
    dists = classifier.compute_distances_two_loops(X_test)
    #dists = classifier.compute_distances_no_loops(X_test)
    print dists.shape

    # We can visualize the distance matrix: each row is a single test example and
    # its distances to training examples
    plt.imshow(dists, interpolation='none')
    plt.show()
Ejemplo n.º 31
0
# In[6]:

# Reshape the image data into rows
X_train = np.reshape(X_train, (X_train.shape[0], -1))
X_test = np.reshape(X_test, (X_test.shape[0], -1))
print X_train.shape, X_test.shape

# In[7]:

from cs231n.classifiers import KNearestNeighbor

# Create a kNN classifier instance.
# Remember that training a kNN classifier is a noop:
# the Classifier simply remembers the data and does no further processing
classifier = KNearestNeighbor()
classifier.train(X_train, y_train)

# We would now like to classify the test data with the kNN classifier. Recall that we can break down this process into two steps:
#
# 1. First we must compute the distances between all test examples and all train examples.
# 2. Given these distances, for each test example we find the k nearest examples and have them vote for the label
#
# Lets begin with computing the distance matrix between all training and test examples. For example, if there are **Ntr** training examples and **Nte** test examples, this stage should result in a **Nte x Ntr** matrix where each element (i,j) is the distance between the i-th test and j-th train example.
#
# First, open `cs231n/classifiers/k_nearest_neighbor.py` and implement the function `compute_distances_two_loops` that uses a (very inefficient) double loop over all pairs of (test, train) examples and computes the distance matrix one element at a time.

# In[8]:

# Open cs231n/classifiers/k_nearest_neighbor.py and implement
# compute_distances_two_loops.
Ejemplo n.º 32
0
num_train = 5000
X_train = X_train[range(num_train)]
Y_train = Y_train[range(num_train)]
num_test = 500
X_test = X_test[range(num_test)]
Y_test = Y_test[range(num_test)]

# Reshape the image data into rows
X_train = np.reshape(X_train, (X_train.shape[0], -1))
X_test = np.reshape(X_test, (X_test.shape[0], -1))
print(X_train.shape, X_test.shape)

# Create a kNN classifier instance.
# Remember that training a kNN classifier is a noop:
# the Classifier simply remembers the data and does no further processing
classifier = KNearestNeighbor()
classifier.train(X_train, Y_train)
dists = classifier.compute_distances_two_loops(X_test)
print(dists.shape)
# plt.imshow(dists, interpolation='none')
# plt.show()

# y_test_pred = classifier.predict_labels(dists, k=1)
# num_correct=np.sum(y_test_pred==Y_test,axis=0)
# accuracy = float(num_correct) / num_test
# print('At k:%d, Got %d / %d correct => accuracy: %f' % (1, num_correct, num_test, accuracy))
#
# y_test_pred = classifier.predict_labels(dists, k=5)
# num_correct = np.sum(y_test_pred == Y_test)
# accuracy = float(num_correct) / num_test
# print('At k:%d, Got %d / %d correct => accuracy: %f' % (5, num_correct, num_test, accuracy))
Ejemplo n.º 33
0
def test1():
    cifar10_dir = 'cs231n/datasets/cifar-10-batches-py'
    X_train, y_train, X_test, y_test = load_CIFAR10(cifar10_dir)
    
    print 'Training data shape:', X_train.shape
    print 'Training label shape:', y_train.shape
    print 'Test data shape:', X_test.shape
    print 'Test label shape:', y_test.shape

    # classes = ['plane','car','bird','cat','deer','dog','frog','horse','ship','truck']
    # num_classes = len(classes)
    # sample_per_class = 7

    # for y,cls in enumerate(classes):
    #     idxs = np.flatnonzero(y_train == y)
    #     idxs = np.random.choice(idxs, sample_per_class, replace=False)
    #     for i, idx in enumerate(idxs):
    #         plt_idx = i*num_classes + y + 1
    #         plt.subplot(sample_per_class, num_classes, plt_idx)
    #         plt.imshow(X_train[idx].astype('uint8'))
    #         plt.axis('off')
    #         if i == 0:
    #             plt.title(cls)

    # plt.savefig("./figures/cifar_sample.png")
    # plt.show()
    # plt.close()

    num_training = 5000
    mask = range(num_training)
    X_train = X_train[mask]
    y_train = y_train[mask]
    
    num_test = 500
    mask = range(num_test)
    X_test = X_test[mask]
    y_test = y_test[mask]

    X_train = np.reshape(X_train, (X_train.shape[0],-1))
    X_test = np.reshape(X_test,(X_test.shape[0],-1))
    print X_train.shape, X_test.shape

    from cs231n.classifiers import KNearestNeighbor    
    classifier = KNearestNeighbor()
    classifier.train(X_train, y_train)
    

    # two_loop_time = time_function(classifier.compute_distances_two_loops,X_test)
    # print "two loop time %f" % two_loop_time

    # one_loop_time = time_function(classifier.compute_distances_one_loop,X_test)
    # print "one loop time %f " %one_loop_time
    
    # no_loop_time = time_function(classifier.compute_distances_no_loops,X_test)
    # print "no loop time %f "% no_loop_time
    
    dists = classifier.compute_distances_no_loops(X_test)

    # dist_one_loop = classifier.compute_distances_one_loop(X_test)
    # dist_two_loops = classifier.compute_distances_two_loops(X_test)
    #matrix_compare(dists,dist_one_loop)
    #matrix_compare(dists,dist_two_loops)

    y_test_pred = classifier.predict_labels(dists,k=5)
    num_correct = np.sum(y_test_pred == y_test)
    accuracy = float(num_correct)/num_test
    print "God %d/%d correct => accuracy: %f" %(num_correct, num_test, accuracy)
    cross_validate(X_train,y_train)
Ejemplo n.º 34
0
num_test = 500
mask = range(num_test)
X_test = X_test[mask]
y_test = y_test[mask]

# Reshape the image data into rows
X_train = np.reshape(X_train, (X_train.shape[0], -1))
X_test = np.reshape(X_test, (X_test.shape[0], -1))
print X_train.shape, X_test.shape

from cs231n.classifiers import KNearestNeighbor

# Create a kNN classifier instance.
# Remember that training a kNN classifier is a noop:
# the Classifier simply remembers the data and does no further processing
classifier = KNearestNeighbor()
classifier.train(X_train, y_train)

# Open cs231n/classifiers/k_nearest_neighbor.py and implement
# compute_distances_two_loops.

# Test your implementation:
dists = classifier.compute_distances_two_loops(X_test)
print dists.shape

plt.imshow(dists, interpolation='nearest')
plt.show()

# Now implement the function predict_labels and run the code below:
# We use k = 1 (which is Nearest Neighbor).
y_test_pred = classifier.predict_labels(dists, k=1)
Ejemplo n.º 35
0
# In[ ]:

# Reshape the image data into rows
X_train = np.reshape(X_train, (X_train.shape[0], -1))
X_test = np.reshape(X_test, (X_test.shape[0], -1))
print X_train.shape, X_test.shape

# In[ ]:

from cs231n.classifiers import KNearestNeighbor

# Create a kNN classifier instance.
# Remember that training a kNN classifier is a noop:
# the Classifier simply remembers the data and does no further processing
classifier = KNearestNeighbor()
classifier.train(X_train, y_train)

# We would now like to classify the test data with the kNN classifier. Recall that we can break down this process into two steps:
#
# 1. First we must compute the distances between all test examples and all train examples.
# 2. Given these distances, for each test example we find the k nearest examples and have them vote for the label
#
# Lets begin with computing the distance matrix between all training and test examples. For example, if there are **Ntr** training examples and **Nte** test examples, this stage should result in a **Nte x Ntr** matrix where each element (i,j) is the distance between the i-th test and j-th train example.
#
# First, open `cs231n/classifiers/k_nearest_neighbor.py` and implement the function `compute_distances_two_loops` that uses a (very inefficient) double loop over all pairs of (test, train) examples and computes the distance matrix one element at a time.

# In[ ]:

# Open cs231n/classifiers/k_nearest_neighbor.py and implement
# compute_distances_two_loops.
num_test = 500
mask = list(range(num_test))
X_test = X_test[mask]
y_test = y_test[mask]

# Reshape the image data into rows
X_train = np.reshape(X_train, (X_train.shape[0], -1))
X_test = np.reshape(X_test, (X_test.shape[0], -1))
print(X_train.shape, X_test.shape)

from cs231n.classifiers import KNearestNeighbor

# Create a kNN classifier instance. 
# Remember that training a kNN classifier is a noop: 
# the Classifier simply remembers the data and does no further processing 
classifier = KNearestNeighbor()
classifier.train(X_train, y_train)

"""
dists = classifier.compute_distances_two_loops(X_test)
print(dists.shape)
plt.imshow(dists, interpolation='none')
plt.show()

# Now implement the function predict_labels and run the code below:
# We use k = 1 (which is Nearest Neighbor).
y_test_pred = classifier.predict_labels(dists, k=1)

# Compute and print the fraction of correctly predicted examples
num_correct = np.sum(y_test_pred == y_test)
accuracy = float(num_correct) / num_test
Ejemplo n.º 37
0
X_test = X_test[mask]
y_test = y_test[mask]

# Box5
# Reshape the image data into rows
# reshape(array,newshape)
X_train = np.reshape(X_train, (X_train.shape[0], -1))
X_test = np.reshape(X_test, (X_test.shape[0], -1))
print(X_train.shape, X_test.shape)

from cs231n.classifiers import KNearestNeighbor

# Create a kNN classifier instance.
# Remember that training a kNN classifier is a noop:
# the Classifier simply remembers the data and does no further processing
classifier = KNearestNeighbor()
classifier.train(X_train, y_train)

# Test your implementation:
dists = classifier.compute_distances_two_loops(X_test)
print(dists.shape)

# We can visualize the distance matrix: each row is a single test example and
# its distances to training examples
plt.imshow(dists, interpolation='none')
plt.show()

# Now implement the function predict_labels and run the code below:
# We use k = 1 (which is Nearest Neighbor).
y_test_pred = classifier.predict_labels(dists, k=1)
Ejemplo n.º 38
0
import numpy as np
import h5py
from numpy import loadtxt
from cs231n.classifiers import KNearestNeighbor

h5f = h5py.File('img_data.h5','r')
X = h5f['dataset_1'][:]
h5f.close()
y = loadtxt("y_labels.txt", dtype=np.uint8, delimiter="\n", unpack=False)
X_train = X[8000:35117,:]
y_train = y[8000:35117]
X_val=X[3000:8000,:]
y_val=y[3000:8000]
num_val = 5000

# Create a kNN classifier instance. 
# Remember that training a kNN classifier is a noop: 
# the Classifier simply remembers the data and does no further processing 
classifier = KNearestNeighbor()
classifier.train(X_train, y_train)
dists = classifier.compute_distances_no_loops(X_val)
y_val_pred = classifier.predict_labels(dists, k=5)
num_correct = np.sum(y_val_pred == y_val)
accuracy = float(num_correct) / num_val
print accuracy


Ejemplo n.º 39
0
# In[10]:

# Reshape the image data into rows
X_train = np.reshape(X_train, (X_train.shape[0], -1))
X_test = np.reshape(X_test, (X_test.shape[0], -1))
print X_train.shape, X_test.shape


# In[11]:

from cs231n.classifiers import KNearestNeighbor

# Create a kNN classifier instance. 
# Remember that training a kNN classifier is a noop: 
# the Classifier simply remembers the data and does no further processing 
classifier = KNearestNeighbor()
classifier.train(X_train, y_train)


# In[12]:


# We would now like to classify the test data with the kNN classifier. Recall that we can break down this process into two steps: 
# 
# 1. First we must compute the distances between all test examples and all train examples. 
# 2. Given these distances, for each test example we find the k nearest examples and have them vote for the label
# 
# Lets begin with computing the distance matrix between all training and test examples. For example, if there are **Ntr** training examples and **Nte** test examples, this stage should result in a **Nte x Ntr** matrix where each element (i,j) is the distance between the i-th test and j-th train example.
# 
# First, open `cs231n/classifiers/k_nearest_neighbor.py` and implement the function `compute_distances_two_loops` that uses a (very inefficient) double loop over all pairs of (test, train) examples and computes the distance matrix one element at a time.
Ejemplo n.º 40
0
num_test = 500
mask = list(range(num_test))
X_test = X_test[mask]
y_test = y_test[mask]

# Reshape the image data into rows
X_train = np.reshape(X_train, (X_train.shape[0], -1))
X_test = np.reshape(X_test, (X_test.shape[0], -1))
print(X_train.shape, X_test.shape)


# Create a kNN classifier instance.
# Remember that training a kNN classifier is a noop:
# the Classifier simply remembers the data and does no further processing
classifier = KNearestNeighbor()
classifier.train(X_train, y_train)

# Open cs231n/classifiers/k_nearest_neighbor.py and implement
# compute_distances_two_loops.

num_folds = 5
k_choices = [1, 3, 5, 8, 10, 12, 15, 20, 50, 100]

X_train_folds = []
y_train_folds = []
################################################################################
# TODO:                                                                        #
# Split up the training data into folds. After splitting, X_train_folds and    #
# y_train_folds should each be lists of length num_folds, where                #
# y_train_folds[i] is the label vector for the points in X_train_folds[i].     #
Ejemplo n.º 41
0
)

mock_y_train = np.array(
    [
        (1),
        (2),
    ],
)

mock_X_test = np.array(
    [
        (2, 4, 6, 9),
    ],
)

knn_instance = KNearestNeighbor()
knn_instance.train(mock_X_train, mock_y_train)

expected_dists = np.array(
    [
        [(2), (3)],
    ]
)


def test_compute_distance_two_loops():
    actual_dists = knn_instance.compute_distances_two_loops(mock_X_test)
    np.testing.assert_array_equal(actual_dists, expected_dists)


def test_compute_distance_one_loop():
Ejemplo n.º 42
0
mask = range(num_test)
X_test = X_test[mask]
y_test = y_test[mask]


# Reshape the image data into rows
X_train = np.reshape(X_train, (X_train.shape[0], -1))
X_test = np.reshape(X_test, (X_test.shape[0], -1))
print X_train.shape, X_test.shape

from cs231n.classifiers import KNearestNeighbor

# Create a kNN classifier instance.
# Remember that training a kNN classifier is a noop:
# the Classifier simply remembers the data and does no further processing
classifier = KNearestNeighbor()
classifier.train(X_train, y_train)

# Open cs231n/classifiers/k_nearest_neighbor.py and implement
# compute_distances_two_loops.

# # Test your implementation:
#dists = classifier.compute_distances_two_loops(X_test)
# print dists.shape
#
# # We can visualize the distance matrix: each row is a single test example and
# # its distances to training examples
# plt.imshow(dists, interpolation='none')
# plt.show()
#
# # Now implement the function predict_labels and run the code below: