Beispiel #1
0
import sklearn

if __name__ == '__main__':
    train_path = "/Users/zxj/Desktop/Mini1/train.pkl"
    train_data = pickle.load(open(train_path, "rb"))

    # Fixed_parameters
    # Please do not change the fixed parameters

    val_ratio = 0.2

    # student_parameters
    # You may want to change these in your experiment later.
    train_ratio = 1.0  # we split the train_data into 0.8:training

    train_num = int(train_data['data'].shape[0] * train_ratio *
                    (1.0 - val_ratio))
    val_num = -1 * int(train_data['data'].shape[0] * train_ratio * val_ratio)
    KNN_classifier = KNearestNeighbor()
    KNN_classifier.train(train_data['data'][:train_num],
                         train_data['target'][:train_num])
    dists = KNN_classifier.compute_distances(train_data['data'][val_num:, :])
    k_choices = [2, 3, 5, 7, 9, 11, 15, 19]
    for k in k_choices:
        y_test_pred = KNN_classifier.predict_labels(dists, k)

        num_correct = np.sum(y_test_pred == train_data['target'][val_num:])
        accuracy = float(num_correct) / (-1 * val_num)
        print(
            'For K= %d and train_ratio= %f, Got %d / %d correct => VAL_accuracy: %f'
            % (k, train_ratio, num_correct, -1 * val_num, accuracy))
Beispiel #2
0
#Open cs231n/classifiers/k_nearest_neighbor.py and implement
# compute_distances_two_loops.

# Test your implementation:
dists = classifier.compute_distances_two_loops(X_test)
print(dists.shape)

# We can visualize the distance matrix: each row is a single test example and
# its distances to training examples
plt.imshow(dists,interpolation='none')
plt.show()

# Now implement the function predict_labels and run the code below:
# We use k = 1 (which is Nearest Neighbor).
y_test_pred = classifier.predict_labels(dists, k=1)

# Compute and print the fraction of correctly predicted examples
num_correct = np.sum(y_test_pred == y_test)
accuracy = float(num_correct) / num_test
print('Got %d / %d correct => accuracy: %f' % (num_correct, num_test, accuracy))

#Now lets try out a larger k, say k = 5:

y_test_pred = classifier.predict_labels(dists, k=5)
num_correct = np.sum(y_test_pred == y_test)
accuracy = float(num_correct) / num_test
print('Got %d / %d correct => accuracy: %f' % (num_correct, num_test, accuracy))

# Now implement the fully vectorized version inside compute_distances_no_loops
# and run the code
Beispiel #3
0
for k in k_choices:
    k_to_accuracies.setdefault(k, [])

for i in range(num_folds):
    classifier = KNearestNeighbor()
    x_val_train = np.concatenate((x_train_folds[0:i], x_train_folds[i + 1:]),
                                 axis=0)
    x_val_train = x_val_train.reshape(-1, x_val_train.shape[2])
    y_val_train = np.concatenate((y_train_folds[0:i], y_train_folds[i + 1:]),
                                 axis=0)
    y_val_train = y_val_train.reshape(-1, y_val_train.shape[2])

    y_val_train = y_val_train[:, 0]
    classifier.train(x_val_train, y_val_train)
    for k in k_choices:
        y_val_pred = classifier.predict_labels(x_train_folds[i], k=k)
        num_correct = np.sum(y_val_pred == y_train_folds[i][:, 0])
        accuracy = float(num_correct) / len(y_val_pred)
        k_to_accuracies[k] = k_to_accuracies[k] + [accuracy]

for k in k_choices:
    accuracies = k_to_accuracies[k]
    plt.scatter([k] * len(accuracies), accuracies)

accuracies_mean = np.array(
    [np.mean(v) for k, v in sorted(k_to_accuracies.items())])
accuracies_std = np.array(
    [np.std(v) for k, v in sorted(k_to_accuracies.items())])
plt.errorbar(k_choices, accuracies_mean, yerr=accuracies_std)
plt.title('cross-validation on k')
plt.xlabel('k')
Beispiel #4
0
# values of k in the k_to_accuracies dictionary.                               #
################################################################################

classifier_cross=KNearestNeighbor()
for k in k_choices:
    accuracy_list=list()
    for i in range(num_folds):
        X_test_temp=np.array(X_train_folds[i])
        y_test_temp=np.array(y_train_folds[i])
        X_train_tmp = np.array(X_train_folds[:i] + X_train_folds[i + 1:])
        y_train_tmp = np.array(y_train_folds[:i] + y_train_folds[i + 1:])
        X_train_tmp = X_train_tmp.reshape(-1, X_train_tmp.shape[2])
        y_train_tmp = y_train_tmp.reshape(-1)
        classifier_cross.train(X_train_tmp,y_train_tmp)
        dists=classifier_cross.compute_distances_no_loops(X_test_temp)
        y_test_pred = classifier.predict_labels(dists, k)
        num_correct = np.sum(y_test_pred == y_test_temp)
        num_test=X_test_temp.shape[0]
        accuracy = float(num_correct) / num_test
        accuracy_list.append(accuracy)
    k_to_accuracies[k]=accuracy_list

best_k=-1
max_accuracy=-1
for k in k_choices:
    accuracies=k_to_accuracies[k]
    avg_accuracy=sum(accuracies)/len(accuracies)
    print(k,' : ',avg_accuracy)
    if avg_accuracy>max_accuracy:
        max_accuracy=avg_accuracy
        best_k=k
Beispiel #5
0
    X_train = X_train.reshape(500, -1)
    y_train = y_train.reshape(500, -1)
    X_test = X_test.reshape(10, -1)
    y_test = y_test.reshape(10, -1)

    classifier = KNearestNeighbor()
    classifier.train(X_train, y_train)
    dists = classifier.compute_distance_two_loops(X_test)
    dists_one = classifier.compute_distance_one_loop(X_test)

    diff = np.linalg.norm(dists - dists_one, ord='fro')
    if diff < 0.001:
        print('good')
    else:
        print('bad')
    y_pred = classifier.predict_labels(dists, 1)
    correct = np.where(y_pred == y_train)
    print('accuracy: ', len(correct) / len(y_test))

    # cross validation
    num_folds = 5
    k_chioces = [1, 3, 5, 8, 10, 12, 15, 20, 50, 100]

    X_train_folds = []
    y_train_folds = []
    X_train_folds = np.split(X_train, num_folds)
    y_train_folds = np.split(y_train, num_folds)

    # save the results
    k_to_accuracy = {}
Beispiel #6
0
cifar_10_dir = './cifar-10-batches-py'
x_train, y_train, x_test, y_test = load_cifar10(cifar_10_dir)
print('train_data_shape:', x_train.shape)
print('train_labels_shape:', y_train.shape)
print('test_data_shape:', x_test.shape)
print('test_labels_shape:', y_test.shape)

x_train = x_train.reshape(x_train.shape[0], -1)
x_test = x_test.reshape(x_test.shape[0], -1)
num_train = x_train.shape[0]
num_test = x_test.shape[0]

# num_train = 5000
# mask = range(num_train)
# x_train = x_train[mask]
# y_train = y_train[mask]
# num_test = 500
# mask = range(num_test)
# x_test = x_test[mask]
# y_test = y_test[mask]

classifier = KNearestNeighbor()
classifier.train(x_train, y_train)
dicts = classifier.compute_distance(x_test)
y_test_pred = classifier.predict_labels(dicts, k=10)

num_correct = np.sum(y_test_pred == y_test)
accuracy = num_correct / num_test
print('got %d / %d correct => accuracy: %f' %
      (num_correct, num_test, accuracy))