예제 #1
0
    def model_with_best_k(self):
        # Based on the cross-validation results above, choose the best value for k,
        # retrain the classifier using all the training data, and test it on the test
        # data. You should be able to get above 28% accuracy on the test data.
        best_k = 10

        classifier = KNearestNeighbor()
        classifier.train(self.X_train, self.y_train)
        y_test_pred = classifier.predict(self.X_test, k=best_k)

        # Compute and display the accuracy
        num_correct = np.sum(y_test_pred == self.y_test)
        accuracy = float(num_correct) / self.num_test
        print 'Got %d / %d correct => accuracy: %f' % (num_correct,
                                                       self.num_test, accuracy)
        return
예제 #2
0
        print 'k = %d, accuracy = %f' % (k, accuracy)

# plot the raw observations
for k in k_choices:
    accuracies = k_to_accuracies[k]
    plt.scatter([k] * len(accuracies), accuracies)

# plot the trend line with error bars that correspond to standard deviation
accuracies_mean = np.array(
    [np.mean(v) for k, v in sorted(k_to_accuracies.items())])
accuracies_std = np.array(
    [np.std(v) for k, v in sorted(k_to_accuracies.items())])
plt.errorbar(k_choices, accuracies_mean, yerr=accuracies_std)
plt.title('Cross-validation on k')
plt.xlabel('k')
plt.ylabel('Cross-validation accuracy')
plt.show()

# Based on the cross-validation results above, choose the best value for k,
# retrain the classifier using all the training data, and test it on the test
# data. You should be able to get above 28% accuracy on the test data.
best_k = 1

classifier = KNearestNeighbor()
classifier.train(X_train, y_train)
y_test_pred = classifier.predict(X_test, k=best_k)

# Compute and display the accuracy
num_correct = np.sum(y_test_pred == y_test)
accuracy = float(num_correct) / num_test
print 'Got %d / %d correct => accuracy: %f' % (num_correct, num_test, accuracy)
예제 #3
0
# X_test = X_test[mask]
# y_test = y_test[mask]

# Reshape the image data into rows
X_train = np.reshape(X_train, (X_train.shape[0], -1))
X_test = np.reshape(X_test, (X_test.shape[0], -1))
print(X_train.shape, X_test.shape)

# Create a kNN classifier instance.
# Remember that training a kNN classifier is a noop:
# the Classifier simply remembers the data and does no further processing
classifier = KNearestNeighbor()
classifier.train(X_train, y_train)

#Cross-validation
# validations = cross_validation(X_train,y_train)
# print(validations)

# Based on the cross-validation results above, choose the best value for k,
# retrain the classifier using all the training data, and test it on the test
# data. You should be able to get above 28% accuracy on the test data.

#Evaluation
y_test_pred = classifier.predict(X_test, k=100)

# Compute and display the accuracy
num_correct = np.sum(y_test_pred == y_test)
accuracy = float(num_correct) / num_test
print('Got %d / %d correct => accuracy: %f' %
      (num_correct, num_test, accuracy))
예제 #4
0
파일: knn.py 프로젝트: ucohen/test-repo
# accuracy values that we found when using that value of k.
k_to_accuracies = {}

################################################################################
# TODO:                                                                        #
# Perform k-fold cross validation to find the best value of k. For each        #
# possible value of k, run the k-nearest-neighbor algorithm num_folds times,   #
# where in each case you use all but one of the folds as training data and the #
# last fold as a validation set. Store the accuracies for all fold and all     #
# values of k in the k_to_accuracies dictionary.                               #
################################################################################
for k in k_choices:
    accuracies = []
    for f in range(num_folds):
        classifier.train(X=X_train_folds[f], y=y_train_folds[f])
        y_test_pred = classifier.predict(X=X_test, k=k)
        num_correct = np.sum(y_test_pred == y_test)
        accuracies.append( float(num_correct) / num_test)
        print('Got %d / %d correct => accuracy: %f' % (num_correct, num_test, accuracies[f]))
    k_to_accuracies[k] = accuracies

################################################################################
#                                 END OF YOUR CODE                             #
################################################################################

# Print out the computed accuracies
for k in sorted(k_to_accuracies):
    for accuracy in k_to_accuracies[k]:
        print('k = %d, accuracy = %f' % (k, accuracy))

예제 #5
0
# where in each case you use all but one of the folds as training data and the #
# last fold as a validation set. Store the accuracies for all fold and all     #
# values of k in the k_to_accuracies dictionary.                               #
################################################################################
# *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****

for k in k_choices:
    k_to_accuracies[k] = []
    for i in range(num_folds):
        classifier.train(
            np.concatenate(
                [X_train_folds[j] for j in range(num_folds) if j != i]),
            np.concatenate(
                [y_train_folds[j] for j in range(num_folds) if j != i]))
        k_to_accuracies[k].append(1.0 * np.sum(
            classifier.predict(X_train_folds[i], k=k) == y_train_folds[i]) /
                                  len(y_train_folds[i]))

# *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****

# Print out the computed accuracies
for k in sorted(k_to_accuracies):
    for accuracy in k_to_accuracies[k]:
        print('k = %d, accuracy = %f' % (k, accuracy))

# plot the raw observations
for k in k_choices:
    accuracies = k_to_accuracies[k]
    plt.scatter([k] * len(accuracies), accuracies)

# plot the trend line with error bars that correspond to standard deviation
예제 #6
0
파일: knn.py 프로젝트: RemyEE/cs231n-2
# values of k in the k_to_accuracies dictionary.                               #
################################################################################

for k in k_choices:
    accrs = []
    for i in range(num_folds):
        X_train_folds_copy = X_train_folds[:]
        y_train_folds_copy = y_train_folds[:]
        X_valid = X_train_folds_copy.pop(i)
        y_valid = y_train_folds_copy.pop(i)
        X_train_i = np.concatenate(X_train_folds_copy)
        y_train_i = np.concatenate(y_train_folds_copy)

        classifier = KNearestNeighbor()
        classifier.train(X_train_i, y_train_i)
        preds = classifier.predict(X_valid, k=k)
        acc = (preds == y_valid).mean()

        accrs.append(acc)

    k_to_accuracies[k] = accrs
    print(k)

################################################################################
#                                 END OF YOUR CODE                             #
################################################################################

# Print out the computed accuracies
for k in sorted(k_to_accuracies):
    for accuracy in k_to_accuracies[k]:
        print 'k = %d, accuracy = %f' % (k, accuracy)