Python KNearestNeighbor.train 예제들, k_nearest_neighbor.KNearestNeighbor.train Python 예제들

예제 #1

0

파일 보기

파일: k_nearest_neighbor_test.py 프로젝트: 21hub/deep-learning-samples

 def test_arange(self):
     train = np.arange(150).reshape(5, -1)
     test = np.square(np.arange(2, 122)).reshape(4, -1)
     knn = KNearestNeighbor()
     knn.train(train, None)
     d_two = knn.compute_distances_two_loops(test)
     d_one = knn.compute_distances_one_loop(test)
     d_no = knn.compute_distances_no_loops(test)
     self.assertAlmostEqual(0, np.linalg.norm(d_two - d_one, ord='fro'))
     self.assertAlmostEqual(0, np.linalg.norm(d_no - d_one, ord='fro'))

예제 #2

0

파일 보기

 def test_arange(self):
     train = np.arange(150).reshape(5, -1)
     test = np.square(np.arange(2, 122)).reshape(4, -1)
     knn = KNearestNeighbor()
     knn.train(train, None)
     d_two = knn.compute_distances_two_loops(test)
     d_one = knn.compute_distances_one_loop(test)
     d_no = knn.compute_distances_no_loops(test)
     self.assertAlmostEqual(0, np.linalg.norm(d_two - d_one, ord='fro'))
     self.assertAlmostEqual(0, np.linalg.norm(d_no - d_one, ord='fro'))

예제 #3

0

파일 보기

def Cross_validation(X_train, y_train):
    """交叉验证，确定超参K，同时可视化K值

    :param X_train: 训练集
    :param y_train: 训练标签
    """
    num_folds = 5
    k_choices = [1, 3, 5, 8, 10, 12, 15, 20, 50, 100]
    k_accuracy = {}
    # 将数据集分为5份
    X_train_folds = np.array_split(X_train, num_folds)
    y_train_folds = np.array_split(y_train, num_folds)
    # 计算每种K值
    for k in k_choices:
        k_accuracy[k] = []
        # 每个K值分别计算每份数据集作为测试集时的正确率
        for index in range(num_folds):
            # 构建数据集
            X_te = X_train_folds[index]
            y_te = y_train_folds[index]
            X_tr = np.reshape(
                X_train_folds[:index] + X_train_folds[index + 1:],
                (X_train.shape[0] * (num_folds - 1) / num_folds, -1))
            y_tr = np.reshape(
                y_train_folds[:index] + y_train_folds[index + 1:],
                (X_train.shape[0] * (num_folds - 1) / num_folds))
            # 预测结果
            classify = KNearestNeighbor()
            classify.train(X_tr, y_tr)
            y_te_pred = classify.predict(X_te, k=k)
            accuracy = np.sum(y_te_pred == y_te) / float(X_te.shape[0])
            k_accuracy[k].append(accuracy)

    for k, accuracylist in k_accuracy.items():
        for accuracy in accuracylist:
            print("k = %d, accuracy = %.3f" % (k, accuracy))

    # 可视化K值效果
    for k in k_choices:
        accuracies = k_accuracy[k]
        plt.scatter([k] * len(accuracies), accuracies)
    accuracies_mean = np.array(
        [np.mean(v) for k, v in sorted(k_accuracy.items())])
    accuracies_std = np.array(
        [np.std(v) for k, v in sorted(k_accuracy.items())])
    # 根据均值和方差构建误差棒图
    plt.errorbar(k_choices, accuracies_mean, yerr=accuracies_std)
    plt.title('Cross-validation on k')
    plt.xlabel('k')
    plt.ylabel('Cross-validation accuracy')
    plt.show()

예제 #4

0

파일 보기

파일: knn.py 프로젝트: Jeeukrishnan/CS231N

#Parameters:	


#a : array_like
#Array to be reshaped.

#newshape : int or tuple of ints
#The new shape should be compatible with the original shape. If an integer, then the result will be a 1-D array of that length. One shape dimension can be -1.

from k_nearest_neighbor  import KNearestNeighbor

# Create a kNN classifier instance. 
# Remember that training a kNN classifier is a noop: 
# the Classifier simply remembers the data and does no further processing 
classifier = KNearestNeighbor()
classifier.train(X_train, y_train)

#Open cs231n/classifiers/k_nearest_neighbor.py and implement
# compute_distances_two_loops.

# Test your implementation:
dists = classifier.compute_distances_two_loops(X_test)
print(dists.shape)

# We can visualize the distance matrix: each row is a single test example and
# its distances to training examples
plt.imshow(dists,interpolation='none')
plt.show()

# Now implement the function predict_labels and run the code below:
# We use k = 1 (which is Nearest Neighbor).

예제 #5

0

파일 보기

        line_split = line.split(',')
        data_instances.append(map(float, line_split))
    data_instances = np.array(data_instances)
    np.random.shuffle(data_instances)

    # 5 fold cross validation
    learner_type = "CLASSIFICATION"
    fold_size = data_instances.shape[0] / 5
    data_indices = [idx for idx in range(data_instances.shape[0])]
    for k in range(1, 100, 5):
        total_performance = 0.0
        for holdout_fold_idx in range(5):
            kNN_model = KNearestNeighbor(k, learner_type)
            kNN_model.train(data_instances[ \
                    np.array( \
                        np.setdiff1d(data_indices, data_indices[ \
                                fold_size * holdout_fold_idx : \
                                fold_size * holdout_fold_idx + fold_size]))])
            kNN_model.condense_training_data()
            #  predict test data using k-NN and average performance
            predictions = kNN_model.predict( \
                data_instances[ \
                    fold_size * holdout_fold_idx : \
                    fold_size * holdout_fold_idx + fold_size])
            successes = fold_size - \
                sum(abs(
                   predictions - \
                   data_instances[
                       fold_size * holdout_fold_idx :
                       fold_size * holdout_fold_idx + fold_size,-1]))
            performance = successes / fold_size

예제 #6

0

파일 보기

파일: runKNN.py 프로젝트: yrdeng/CS229-project

# Create a kNN classifier instance.
# Remember that training a kNN classifier is a noop:
# the Classifier simply remembers the data and does no further processing
# classifier = KNearestNeighbor()
# classifier.train(X_train, y_train)

# numK = [8,9,10,11,12,13,14,15,16]
numK = [12]
results = {}
bestValAcc = 0
bestK = None

for num in numK:
    knn = KNearestNeighbor()
    knn.train(X_train, y_train)
    y_train_pred = knn.predict(X_train, k=num)
    y_val_pred = knn.predict(X_val, k=num)
    trainAcc = np.mean(y_train == y_train_pred)
    valAcc = np.mean(y_val == y_val_pred)
    print 'k: %d train accuracy: %.4f val accuracy: %.4f' % (num, trainAcc,
                                                             valAcc)
    if valAcc > bestValAcc:
        bestValAcc = valAcc
        bestK = num

print 'best validation accuracy achieved: %.4f, with best k : %d' % (
    bestValAcc, bestK)

# Based on the cross-validation results above, choose the best value for k,
# retrain the classifier using all the training data, and test it on the test

예제 #7

0

파일 보기

k_to_accuracies = {}

for k in k_choices:
    k_to_accuracies.setdefault(k, [])

for i in range(num_folds):
    classifier = KNearestNeighbor()
    x_val_train = np.concatenate((x_train_folds[0:i], x_train_folds[i + 1:]),
                                 axis=0)
    x_val_train = x_val_train.reshape(-1, x_val_train.shape[2])
    y_val_train = np.concatenate((y_train_folds[0:i], y_train_folds[i + 1:]),
                                 axis=0)
    y_val_train = y_val_train.reshape(-1, y_val_train.shape[2])

    y_val_train = y_val_train[:, 0]
    classifier.train(x_val_train, y_val_train)
    for k in k_choices:
        y_val_pred = classifier.predict_labels(x_train_folds[i], k=k)
        num_correct = np.sum(y_val_pred == y_train_folds[i][:, 0])
        accuracy = float(num_correct) / len(y_val_pred)
        k_to_accuracies[k] = k_to_accuracies[k] + [accuracy]

for k in k_choices:
    accuracies = k_to_accuracies[k]
    plt.scatter([k] * len(accuracies), accuracies)

accuracies_mean = np.array(
    [np.mean(v) for k, v in sorted(k_to_accuracies.items())])
accuracies_std = np.array(
    [np.std(v) for k, v in sorted(k_to_accuracies.items())])
plt.errorbar(k_choices, accuracies_mean, yerr=accuracies_std)

예제 #8

0

파일 보기

y_train = y_train[mask]

num_test = 500
mask = range(num_test)
X_test = X_test[mask]
y_test = y_test[mask]

# reshape训练和测试数据，转换为行的形式
X_train = np.reshape(X_train, (X_train.shape[0], -1))
X_test = np.reshape(X_test, (X_test.shape[0], -1))

print(X_train.shape)
print(X_test.shape)

classifier = KNearestNeighbor()
classifier.train(X_train, y_train)

dists = classifier.compute_distances_two_loops(X_test)
print(dists.shape)

plt.imshow(dists, interpolation='none')
plt.show()

# Now implement the function predict_labels and run the code below:
# k=1时
y_test_pred = classifier.predict_labels(dists, k=1)

# Compute and print the fraction of correctly predicted examples
num_correct = np.sum(y_test_pred == y_test)
accuracy = float(num_correct) / num_test
print('Got %d / %d correct => accuracy: %f' % (num_correct, num_test, accuracy))

예제 #9

0

파일 보기

파일: knn.py 프로젝트: Tudouvvv/Practice_DL

cifar_10_dir = './cifar-10-batches-py'
x_train, y_train, x_test, y_test = load_cifar10(cifar_10_dir)
print('train_data_shape:', x_train.shape)
print('train_labels_shape:', y_train.shape)
print('test_data_shape:', x_test.shape)
print('test_labels_shape:', y_test.shape)

x_train = x_train.reshape(x_train.shape[0], -1)
x_test = x_test.reshape(x_test.shape[0], -1)
num_train = x_train.shape[0]
num_test = x_test.shape[0]

# num_train = 5000
# mask = range(num_train)
# x_train = x_train[mask]
# y_train = y_train[mask]
# num_test = 500
# mask = range(num_test)
# x_test = x_test[mask]
# y_test = y_test[mask]

classifier = KNearestNeighbor()
classifier.train(x_train, y_train)
dicts = classifier.compute_distance(x_test)
y_test_pred = classifier.predict_labels(dicts, k=10)

num_correct = np.sum(y_test_pred == y_test)
accuracy = num_correct / num_test
print('got %d / %d correct => accuracy: %f' %
      (num_correct, num_test, accuracy))

예제 #10

0

파일 보기

파일: knn.py 프로젝트: yin-hong/cs231n-assignment

X_train=X_train[mask]
y_train=y_train[mask]
num_test=500
mask=list(range(num_test))
X_test=X_test[mask]
y_test=y_test[mask]

#Reshape the image data into rows
X_train=np.reshape(X_train,(X_train.shape[0],-1))
X_test=np.reshape(X_test,(X_test.shape[0],-1))
print(X_train.shape,X_test.shape)
#Create a KNN classifier instance,k=1


classifier=KNearestNeighbor()
classifier.train(X_train,y_train)
"""
dists=classifier.compute_distance_two_loops(X_test)
print('dists.shape is')
print(dists.shape)
#plt.imshow(dists,interpolation='none')
#plt.savefig('/home/hongyin/file/cs231n-assignment1/picFaster.jpg')
y_test_pred=classifier.predict_labels(dists,k=1)
num_correct=np.sum(y_test_pred==y_test)
accuracy=float(num_correct)/num_test
print('Got %d / %d correct => accuracy: %f' % (num_correct, num_test, accuracy))


#k=5
y_test_pred = classifier.predict_labels(dists, k=5)
num_correct = np.sum(y_test_pred == y_test)

예제 #11

0

파일 보기

파일: knn_classifier.py 프로젝트: lvliris/CV

            plt_idx = i * class_num + y + 1
            plt.subplot(samples_pre_class, class_num, plt_idx)
            plt.imshow(X_train[idx].astype('uint8'))
            plt.axis('off')
            if i == 0:
                plt.title(cls)

    plt.show()

    X_train = X_train.reshape(500, -1)
    y_train = y_train.reshape(500, -1)
    X_test = X_test.reshape(10, -1)
    y_test = y_test.reshape(10, -1)

    classifier = KNearestNeighbor()
    classifier.train(X_train, y_train)
    dists = classifier.compute_distance_two_loops(X_test)
    dists_one = classifier.compute_distance_one_loop(X_test)

    diff = np.linalg.norm(dists - dists_one, ord='fro')
    if diff < 0.001:
        print('good')
    else:
        print('bad')
    y_pred = classifier.predict_labels(dists, 1)
    correct = np.where(y_pred == y_train)
    print('accuracy: ', len(correct) / len(y_test))

    # cross validation
    num_folds = 5
    k_chioces = [1, 3, 5, 8, 10, 12, 15, 20, 50, 100]

예제 #12

0

파일 보기

# 可视化图像
VisualizeImage(X_train, y_train)
input('Enter any key to Cross-validation...')

# 创建用于超参数调优的交叉验证集（也可以验证集，因为数据量还是很大的）
num_training = 5000
X_tr = X_train[:num_training, ::]
X_tr = np.reshape(X_tr, (X_tr.shape[0], -1))
y_tr = y_train[:num_training]
# print(X_tr.shape, y_tr.shape)

num_testing = 500
X_te = X_test[:num_testing, ::]
X_te = np.reshape(X_te, (X_te.shape[0], -1))
y_te = y_test[:num_testing]
# print(X_te.shape, y_te.shape)

# 交叉验证确定参数K
Cross_validation(X_tr, y_tr)
input('Enter any key to train model...')

# 训练完整数据集(这里就以5000个数据集作为完整训练集，500个数据集作为测试集(60000个数据电脑内存吃不消), k值根据图显示10应为最佳)
classify = KNearestNeighbor()
classify.train(X_tr, y_tr)
y_te_pred = classify.predict(X_te, k=10)
accuracy = np.sum(y_te_pred == y_te) / float(X_te.shape[0])
print('最终测试： '
      '     K = %d, accuracy = %.3f' % (10, accuracy))

예제 #13

0

파일 보기

            centroids = kmeans_model.get_centroids()
            for cluster_idx in xrange(len(clusters)):
                ave_label = 0.0
                for instance in clusters[cluster_idx]:
                    ave_label += instance[-1]
                if len(clusters[cluster_idx]) > 0:
                    ave_label = ave_label / len(clusters[cluster_idx])
                if learner_type == "CLASSIFICATION":
                    ave_label = int(round(ave_label))
                centroids[cluster_idx].append(ave_label)

            #     for classification, vote to determine centroid classification
            #     for regression, average to find centroid estimate
            #  feed centroids into k-NN as training data
            kNN_model = KNearestNeighbor(best_ks[test[0]], learner_type)
            kNN_model.train(centroids)
            #  predict test data using k-NN and average performance
            predictions = kNN_model.predict( \
                data_instances[ \
                    fold_size * holdout_fold_idx : \
                    fold_size * holdout_fold_idx + fold_size])
            if kNN_model.learner_type == "CLASSIFICATION":
                successes = fold_size - \
                    sum(abs(
                        predictions - \
                        data_instances[
                            fold_size * holdout_fold_idx :
                            fold_size * holdout_fold_idx + fold_size,-1]))
                performance = successes / fold_size
            elif kNN_model.learner_type == "REGRESSION":
                performance = sum((predictions - \

예제 #14

0

파일 보기

파일: training_knn.py 프로젝트: MilesWQ/MachineLearning

k_choices = [1, 3, 5, 8, 10, 12, 15, 20, 50, 100]

X_train_folds = np.array(np.split(X_train, num_folds))
y_train_folds = np.array(np.split(y_train, num_folds))
k_to_accuracies = {}
# test each k
for k in k_choices:
    # loop for each validation fold
    for val_idx in range(num_folds):
        # get a list of indexes of training folds, e.g. [1,2,3,4] [0,2,3,4]
        train_idx = [i for i in range(num_folds) if i != val_idx]
        # get training set x & y
        X_train_set = np.concatenate(X_train_folds[train_idx])
        y_train_set = np.concatenate(y_train_folds[train_idx])
        # train
        knn_classifer.train(X_train_set, y_train_set)
        # get prediction with current validation fold
        predict_y = knn_classifer.predict(X_train_folds[val_idx], k)
        # compute acc for the current validation fold
        accuracy = np.mean(predict_y == y_train_folds[val_idx])
        # store the accuracy
        k_to_accuracies.setdefault(k, []).append(accuracy)

# print out the computed accuracies
for k in sorted(k_to_accuracies):
    for accuracy in k_to_accuracies[k]:
        print('k = %d is %f' % (k, accuracy))
    print('mean for k = %d is %f' % (k, np.mean(k_to_accuracies[k])))
# plot
for k in k_choices:
    accuracies = k_to_accuracies[k]

예제 #15

0

파일 보기

파일: run_model_with_five_fold_cross_validation.py 프로젝트: brandontarney/all_machine_learning

def run_model_with_cross_validation(model_name, knn_mode, k_number):

	#GET DATA
	#- expect data_0 ... data_4
	data_groups = list()
	data_groups.append(np.loadtxt('data_0', delimiter=','))
	data_groups.append(np.loadtxt('data_1', delimiter=','))
	data_groups.append(np.loadtxt('data_2', delimiter=','))
	data_groups.append(np.loadtxt('data_3', delimiter=','))
	data_groups.append(np.loadtxt('data_4', delimiter=','))

	NUM_GROUPS = len(data_groups)

	#For each data_group, train on all others and test on me
	culminating_result = 0;

	for test_group_id in range(NUM_GROUPS):

		#Form training data as 4/5 data
		train_data = np.array([])
		for train_group_id in range(len(data_groups)):
			if (train_group_id != test_group_id):
				#Initialize train_data if necessary
				if (train_data.size == 0):
					train_data = np.copy(data_groups[train_group_id])
				else:
					train_data = np.concatenate(
							(train_data, data_groups[train_group_id]), axis=0)

		print('train_data, group ', str(test_group_id), 'length: ', len(train_data))
		print(train_data)

		test_data = data_groups[test_group_id]

		result = 0
		model = None
		if (model_name == 'knn'):
			model = KNearestNeighbor(train_data, k_number)
			model.train(train_data)
			print('KNN train data length', len(model.data))
			result = model.test(test_data, knn_mode)
		elif (model_name == 'c_knn'):
			model = CondensedKNearestNeighbor(train_data, k_number)
			#Mode is always majority...this is not used for regression
			mode = "majority"
			model.train(train_data)
			print('condensed KNN train data length', len(model.data))
			result = model.test(test_data, mode)
		else:
			print('error - ', model_name, ' is not a supported model')
			return

		print('test_data, group ', str(test_group_id), 'length:', len(test_data))
		print(test_data)

		print()
		print('result of iteration ' + str(test_group_id))
		print(result)
		print()

		culminating_result = culminating_result + result
	
	final_average_result = culminating_result / NUM_GROUPS
	print()
	print('final average result:')
	print(final_average_result)
	print()

	return final_average_result

예제 #16

0

파일 보기

import sklearn

if __name__ == '__main__':
    train_path = "/Users/zxj/Desktop/Mini1/train.pkl"
    train_data = pickle.load(open(train_path, "rb"))

    # Fixed_parameters
    # Please do not change the fixed parameters

    val_ratio = 0.2

    # student_parameters
    # You may want to change these in your experiment later.
    train_ratio = 1.0  # we split the train_data into 0.8:training

    train_num = int(train_data['data'].shape[0] * train_ratio *
                    (1.0 - val_ratio))
    val_num = -1 * int(train_data['data'].shape[0] * train_ratio * val_ratio)
    KNN_classifier = KNearestNeighbor()
    KNN_classifier.train(train_data['data'][:train_num],
                         train_data['target'][:train_num])
    dists = KNN_classifier.compute_distances(train_data['data'][val_num:, :])
    k_choices = [2, 3, 5, 7, 9, 11, 15, 19]
    for k in k_choices:
        y_test_pred = KNN_classifier.predict_labels(dists, k)

        num_correct = np.sum(y_test_pred == train_data['target'][val_num:])
        accuracy = float(num_correct) / (-1 * val_num)
        print(
            'For K= %d and train_ratio= %f, Got %d / %d correct => VAL_accuracy: %f'
            % (k, train_ratio, num_correct, -1 * val_num, accuracy))

예제 #17

0

파일 보기

파일: assignment1_1.py 프로젝트: bengepai/cvx

x_train_folds = []
y_train_folds = []

x_train_folds = np.array_split(X_train, num_folds)
y_train_folds = np.array_split(Y_train, num_folds)

k_to_accuracies = {}

classifier = KNearestNeighbor()
for k in k_choices:
    accuracies = np.zeros(num_folds)
    for fold in range(num_folds):
        temp_X = x_train_folds[:]
        temp_y = y_train_folds[:]
        x_validate_fold = temp_X.pop(fold)
        y_validate_fold = temp_y.pop(fold)

        temp_X = np.array([y for x in temp_X for y in x])
        temp_y = np.array([y for x in temp_y for y in x])
        classifier.train(temp_X, temp_y)

        y_test_pred = classifier.predict(x_validate_fold, k=k)
        num_correct = np.sum(y_test_pred == y_validate_fold)
        accuracy = float(num_correct) / num_test
        accuracies[fold] = accuracy
    k_to_accuracies[k] = accuracies

for k in sorted(k_to_accuracies):
    for accuracy in k_to_accuracies[k]:
        print('k = %d, accuracy = %f' % (k, accuracy))