Esempio n. 1
0
def run():
	inputs_train, inputs_valid, inputs_test, target_train, target_valid, target_test = LoadData('digits.npz')
	inputs_train, inputs_valid, inputs_test, target_train, target_valid, target_test = inputs_train.T, inputs_valid.T, inputs_test.T, target_train.T, target_valid.T, target_test.T

	predict_label_dict = {}
	for i in range(10):
		k = 1 + 2*i
		predict_label_dict[k] = run_knn(k, inputs_train, target_train, inputs_test)
		# predict_label_dict[k] = run_knn(k, inputs_train, target_train, inputs_valid)
		# predict_label_dict[k] = run_knn(k, inputs_train, target_train, inputs_train)
		

	# plot config
	area = np.pi*(3)**2

	for k, predict_label in predict_label_dict.iteritems():
		rate = correctPredictRate(target_test, predict_label)
		# rate = correctPredictRate(target_valid, predict_label)
		# rate = correctPredictRate(target_train, predict_label)
		plt.scatter(k, rate, s=area, alpha=0.8)
		plt.title("test set")
		# plt.title("validation set")
		# plt.title("training set")
	  	plt.xlabel('k value')
		plt.ylabel('Correct Classification Percent')
	plt.show()
Esempio n. 2
0
def run():
	train_inputs, train_targets = load_train()
	valid_inputs, valid_targets = load_valid()

	predict_label_dict = {}
	for i in range(10):
		k = 1 + 2*i
		# predict_label_dict[k] = run_knn(k, train_inputs, train_targets, valid_inputs)
		predict_label_dict[k] = run_knn(k, train_inputs, train_targets, train_inputs)
		

	# plot config
	area = np.pi*(3)**2

	for k, predict_label in predict_label_dict.iteritems():
		# rate = correctPredictRate(valid_targets, predict_label)
		rate = correctPredictRate(train_targets, predict_label)
		plt.scatter(k, rate, s=area, alpha=0.8)
	plt.show()
Esempio n. 3
0
(train_x, train_t)  = load_train()
(valid_x, valid_t)  = load_valid()
(test_x, test_t)    = load_test()
(valid_a, valid_b)  = valid_x.shape
(valid_c, valid_d)  = test_x.shape

# print "valid_x is of shape " + str(valid_n) + " , " + str(valid_m)
k               = np.zeros(5)
cl_rate_valid   = np.zeros(5)
cl_rate_test    = np.zeros(5)

for i in range(5):
    k[i] = 2*i+1;

    valid_p = run_knn(k[i], train_x, train_t, valid_x)
    cl_rate_valid[i] = 1-(np.sum(np.bitwise_xor(valid_p, valid_t))/float(valid_a))

    test_p = run_knn(k[i], train_x, train_t, test_x)
    cl_rate_test[i] = 1-(np.sum(np.bitwise_xor(test_p, test_t))/float(valid_c))

print cl_rate_valid
print cl_rate_test

plt.plot(k, cl_rate_valid, marker='o', label='Validation Set')
plt.plot(k, cl_rate_test, marker='x', label='Test Set')
legend = plt.legend()
plt.xlabel('k')
plt.ylabel('Classification Rate')
plt.axis([1, 9, 0.8, 1])
plt.show()
import matplotlib.pyplot as plt
from utils import *
from run_knn import *

if __name__ == "__main__":

    train_inputs, train_targets = load_train()
    valid_inputs, valid_targets = load_valid()
    test_inputs, test_targets = load_test()

    accuracy_for_k = []
    for i in [1, 3, 5, 7, 9]:
        data = run_knn(i, train_inputs, train_targets, test_inputs)
        accuracy_for_k.append(getAccuracy(data, test_targets))

    print(accuracy_for_k)

    plt.plot([1, 3, 5, 7, 9], accuracy_for_k, '-o')
    plt.title("K Value's Effect on Test Set")
    plt.xlabel("k value")
    plt.ylabel("Accuracy on Test Set")
    plt.show()

Esempio n. 5
0
from run_knn import *
from utils import *
from plot_digits import *
import matplotlib.pyplot as plt

train_in, train_tar = load_train()
valid_in, valid_tar = load_valid()
test_in, test_tar = load_test()
valid_rate_vec = []
test_rate_vec = []
for i in [1, 3, 5, 7, 9]:
    c = 0
    predicted_labels = run_knn(i, train_in, train_tar, valid_in)
    for j in range (0, len(predicted_labels)):
        if predicted_labels[j] == valid_tar[j]:
            c = c + 1
    valid_rate_vec.append(1.0*c/len(predicted_labels))
    print 'Classification rate ', valid_rate_vec[-1]
    #print 'at k = ', i, 'c = ', c
plt.plot([1, 3, 5, 7, 9], valid_rate_vec)
plt.xlabel('K')
plt.ylabel('Classification rate')
plt.axis([0, 10, .70, 1.0])
plt.show()

#if we choose k = 5, then k-2 and k+2 yield same class rates for validation
#next is test run
for i in [3, 5, 7]:
    c = 0
    predicted_labels = run_knn(i, train_in, train_tar, test_in)
    for j in range (0, len(predicted_labels)):
Esempio n. 6
0
def demo():
    models = {
        'knn': True,
        'logistic': False,
        'pca': False,
        'nn': True,
        'nb': False
    }
    pre = {'pca': True, 'nb': True, 'auto': False}

    p = 30  # % of information used

    print "Loading data ..."
    # Load Training and Validation data sets:
    train_inputs, train_targets, valid_inputs, valid_targets = LoadData()

    # Load Testing data set:
    #train_inputs, train_targets, valid_inputs, valid_targets = LoadTest('P')

    print "Preprocessing ..."
    #if pre['nb']:
    #v = nbayes(train_inputs, train_targets,0, (100-p))
    #train_pre_inputs_nb = train_inputs[:,v]
    #valid_pre_inputs_nb = valid_inputs[:,v]

    #if pre['pca']:
    #V_train = pca(train_inputs, int(train_inputs.shape[1]*p/100))
    #train_pre_inputs_pca = np.dot(train_inputs, V_train.T)
    #valid_pre_inputs_pca = np.dot(valid_inputs, V_train.T)

    if pre['auto']:
        num_hiddens = 2000
        eps = 0.1  # leanrning rate
        momentum = 0.5
        W1, W2, b1, b2, train_pre_inputs_auto, valid_pre_inputs_auto = AutoEncoder(
            train_inputs, valid_inputs, num_hiddens, eps, momentum, 200)
        train_pre_inputs_auto = train_pre_inputs_auto.T
        valid_pre_inputs_auto = valid_pre_inputs_auto.T

    ### Model 1: knn
    if models['knn']:
        # nb
        print "KNN nb ..."
        pred_valid = run_knn(5, train_pre_inputs_nb, train_targets,
                             valid_pre_inputs_nb)
        valid_knn_nb = knn_MCE(pred_valid, valid_targets)

        # pca
        print "KNN pca ..."
        pred_valid = run_knn(5, train_pre_inputs_pca, train_targets,
                             valid_pre_inputs_pca)
        valid_knn_pca = knn_MCE(pred_valid, valid_targets)

    ## Model 2: logistic
    if models['logistic']:
        learning_rates = [0.01]
        #learning_rates = [0.001,0.01,0.1,1.0]
        for item in learning_rates:
            run_logistic_regression(item)

    ## Model 3: Naive Bayes
    if models['nb']:
        nb = NaiveBayesClassifier()
        nb.trainNB(train_inputs, train_targets)
        valid_prediction = nb.predict(valid_inputs)
        nb_valid_accuracy = nb.compute_accuracy(valid_inputs, valid_targets)
        print('Naive Bayes MCE: ', nb_valid_accuracy)

        #np.savetxt("nb_mean.txt", nb.mean * 255, delimiter=",", fmt="%10.5f")
        #np.savetxt("nb_var.txt", nb.var * 255, delimiter=",", fmt="%10.5f")

    # Model 4: NN
    if models['nn']:
        num_hiddens = 15
        eps = 0.1
        momentum = 0.5
        num_epochs = 7000

        # nb
        print "NN nb ..."
        W1, W2, b1, b2, target_train, train_predicted, target_valid, valid_predicted, train_nn_nb, valid_nn_nb = TrainNN(
            train_pre_inputs_nb, train_targets, valid_pre_inputs_nb,
            valid_targets, num_hiddens, eps, momentum, num_epochs)

        ## auto encoder
        #print "NN auto ..."
        #W1, W2, b1, b2, target_train, train_predicted, target_valid, valid_predicted, train_nn_auto, valid_nn_auto = TrainNN(train_pre_inputs_auto, train_targets, valid_pre_inputs_auto, valid_targets, num_hiddens, eps, momentum, num_epochs)

        num_hiddens = 15
        eps = 0.1
        momentum = 0.5
        num_epochs = 15000

        # pca
        print "NN pca ... "
        W1, W2, b1, b2, target_train, train_predicted, target_valid, valid_predicted, train_nn_pca, valid_nn_pca = TrainNN(
            train_pre_inputs_pca, train_targets, valid_pre_inputs_pca,
            valid_targets, num_hiddens, eps, momentum, num_epochs)

        # none
        print "NN none ... "
        W1, W2, b1, b2, target_train, train_predicted, target_valid, valid_predicted, train_nn, valid_nn = TrainNN(
            train_inputs, train_targets, valid_inputs, valid_targets,
            num_hiddens, eps, momentum, num_epochs)

    train_nn_auto = 0
    valid_nn_auto = 0

    return train_nn, train_nn_nb, train_nn_pca, valid_nn, valid_knn_nb, valid_knn_pca, valid_nn_nb, valid_nn_pca, train_nn_auto, valid_nn_auto
Esempio n. 7
0
import numpy as np
from l2_distance import l2_distance
from utils import *
from plot_digits import *
from run_knn import *
import matplotlib.pyplot as plt
"""load data
train_set = load_train()
valid_set = load_valid()
train_data = train_set[0]
train_labels = train_set[1]
valid_data = valid_set[0]
real_valid_label = valid_set[1]
"""
#train_data,train_label = load_train()
train_data, train_label = load_train_small()
#valid_data, real_valid_label = load_valid()
valid_data, real_valid_label = load_test()

k = [1, 3, 5, 7, 9]
for i in k:
    valid_label = run_knn(i, train_data, train_label, valid_data)
    correct_prediction = (valid_label == real_valid_label).astype(int)

    training_rate = float(np.sum(correct_prediction)) / float(len(valid_label))
    plt.scatter(i, training_rate)
    plt.xlabel("K")
    plt.ylabel("Classification Rate")

plt.show()
Esempio n. 8
0
(valid_a, valid_b)  = valid_x.shape
(valid_c, valid_d)  = test_x.shape

# print train_x.shape
# print valid_x.shape

# print "valid_x is of shape " + str(valid_n) + " , " + str(valid_m)

k               = np.zeros(5)
cl_rate_valid   = np.zeros(5)
cl_rate_test    = np.zeros(5)

for i in range(5):
    k[i] = 2*i+1;

    valid_p = run_knn(k[i], train_x.T, train_t.T, valid_x.T)
    cl_rate_valid[i] = 1-(np.sum(np.logical_xor(valid_p, valid_t.T))/float(valid_a))

    test_p = run_knn(k[i], train_x.T, train_t.T, test_x.T)
    cl_rate_test[i] = 1-(np.sum(np.logical_xor(test_p, test_t.T))/float(valid_c))

print cl_rate_valid
print cl_rate_test

plt.plot(k, cl_rate_valid, marker='o', label='Validation Set')
plt.plot(k, cl_rate_test, marker='x', label='Test Set')
legend = plt.legend(loc=3)
plt.xlabel('k')
plt.ylabel('Classification Rate')
plt.axis([1, 9, 0.8, 1])
plt.show()