def run(): inputs_train, inputs_valid, inputs_test, target_train, target_valid, target_test = LoadData('digits.npz') inputs_train, inputs_valid, inputs_test, target_train, target_valid, target_test = inputs_train.T, inputs_valid.T, inputs_test.T, target_train.T, target_valid.T, target_test.T predict_label_dict = {} for i in range(10): k = 1 + 2*i predict_label_dict[k] = run_knn(k, inputs_train, target_train, inputs_test) # predict_label_dict[k] = run_knn(k, inputs_train, target_train, inputs_valid) # predict_label_dict[k] = run_knn(k, inputs_train, target_train, inputs_train) # plot config area = np.pi*(3)**2 for k, predict_label in predict_label_dict.iteritems(): rate = correctPredictRate(target_test, predict_label) # rate = correctPredictRate(target_valid, predict_label) # rate = correctPredictRate(target_train, predict_label) plt.scatter(k, rate, s=area, alpha=0.8) plt.title("test set") # plt.title("validation set") # plt.title("training set") plt.xlabel('k value') plt.ylabel('Correct Classification Percent') plt.show()
def run(): train_inputs, train_targets = load_train() valid_inputs, valid_targets = load_valid() predict_label_dict = {} for i in range(10): k = 1 + 2*i # predict_label_dict[k] = run_knn(k, train_inputs, train_targets, valid_inputs) predict_label_dict[k] = run_knn(k, train_inputs, train_targets, train_inputs) # plot config area = np.pi*(3)**2 for k, predict_label in predict_label_dict.iteritems(): # rate = correctPredictRate(valid_targets, predict_label) rate = correctPredictRate(train_targets, predict_label) plt.scatter(k, rate, s=area, alpha=0.8) plt.show()
(train_x, train_t) = load_train() (valid_x, valid_t) = load_valid() (test_x, test_t) = load_test() (valid_a, valid_b) = valid_x.shape (valid_c, valid_d) = test_x.shape # print "valid_x is of shape " + str(valid_n) + " , " + str(valid_m) k = np.zeros(5) cl_rate_valid = np.zeros(5) cl_rate_test = np.zeros(5) for i in range(5): k[i] = 2*i+1; valid_p = run_knn(k[i], train_x, train_t, valid_x) cl_rate_valid[i] = 1-(np.sum(np.bitwise_xor(valid_p, valid_t))/float(valid_a)) test_p = run_knn(k[i], train_x, train_t, test_x) cl_rate_test[i] = 1-(np.sum(np.bitwise_xor(test_p, test_t))/float(valid_c)) print cl_rate_valid print cl_rate_test plt.plot(k, cl_rate_valid, marker='o', label='Validation Set') plt.plot(k, cl_rate_test, marker='x', label='Test Set') legend = plt.legend() plt.xlabel('k') plt.ylabel('Classification Rate') plt.axis([1, 9, 0.8, 1]) plt.show()
import matplotlib.pyplot as plt from utils import * from run_knn import * if __name__ == "__main__": train_inputs, train_targets = load_train() valid_inputs, valid_targets = load_valid() test_inputs, test_targets = load_test() accuracy_for_k = [] for i in [1, 3, 5, 7, 9]: data = run_knn(i, train_inputs, train_targets, test_inputs) accuracy_for_k.append(getAccuracy(data, test_targets)) print(accuracy_for_k) plt.plot([1, 3, 5, 7, 9], accuracy_for_k, '-o') plt.title("K Value's Effect on Test Set") plt.xlabel("k value") plt.ylabel("Accuracy on Test Set") plt.show()
from run_knn import * from utils import * from plot_digits import * import matplotlib.pyplot as plt train_in, train_tar = load_train() valid_in, valid_tar = load_valid() test_in, test_tar = load_test() valid_rate_vec = [] test_rate_vec = [] for i in [1, 3, 5, 7, 9]: c = 0 predicted_labels = run_knn(i, train_in, train_tar, valid_in) for j in range (0, len(predicted_labels)): if predicted_labels[j] == valid_tar[j]: c = c + 1 valid_rate_vec.append(1.0*c/len(predicted_labels)) print 'Classification rate ', valid_rate_vec[-1] #print 'at k = ', i, 'c = ', c plt.plot([1, 3, 5, 7, 9], valid_rate_vec) plt.xlabel('K') plt.ylabel('Classification rate') plt.axis([0, 10, .70, 1.0]) plt.show() #if we choose k = 5, then k-2 and k+2 yield same class rates for validation #next is test run for i in [3, 5, 7]: c = 0 predicted_labels = run_knn(i, train_in, train_tar, test_in) for j in range (0, len(predicted_labels)):
def demo(): models = { 'knn': True, 'logistic': False, 'pca': False, 'nn': True, 'nb': False } pre = {'pca': True, 'nb': True, 'auto': False} p = 30 # % of information used print "Loading data ..." # Load Training and Validation data sets: train_inputs, train_targets, valid_inputs, valid_targets = LoadData() # Load Testing data set: #train_inputs, train_targets, valid_inputs, valid_targets = LoadTest('P') print "Preprocessing ..." #if pre['nb']: #v = nbayes(train_inputs, train_targets,0, (100-p)) #train_pre_inputs_nb = train_inputs[:,v] #valid_pre_inputs_nb = valid_inputs[:,v] #if pre['pca']: #V_train = pca(train_inputs, int(train_inputs.shape[1]*p/100)) #train_pre_inputs_pca = np.dot(train_inputs, V_train.T) #valid_pre_inputs_pca = np.dot(valid_inputs, V_train.T) if pre['auto']: num_hiddens = 2000 eps = 0.1 # leanrning rate momentum = 0.5 W1, W2, b1, b2, train_pre_inputs_auto, valid_pre_inputs_auto = AutoEncoder( train_inputs, valid_inputs, num_hiddens, eps, momentum, 200) train_pre_inputs_auto = train_pre_inputs_auto.T valid_pre_inputs_auto = valid_pre_inputs_auto.T ### Model 1: knn if models['knn']: # nb print "KNN nb ..." pred_valid = run_knn(5, train_pre_inputs_nb, train_targets, valid_pre_inputs_nb) valid_knn_nb = knn_MCE(pred_valid, valid_targets) # pca print "KNN pca ..." pred_valid = run_knn(5, train_pre_inputs_pca, train_targets, valid_pre_inputs_pca) valid_knn_pca = knn_MCE(pred_valid, valid_targets) ## Model 2: logistic if models['logistic']: learning_rates = [0.01] #learning_rates = [0.001,0.01,0.1,1.0] for item in learning_rates: run_logistic_regression(item) ## Model 3: Naive Bayes if models['nb']: nb = NaiveBayesClassifier() nb.trainNB(train_inputs, train_targets) valid_prediction = nb.predict(valid_inputs) nb_valid_accuracy = nb.compute_accuracy(valid_inputs, valid_targets) print('Naive Bayes MCE: ', nb_valid_accuracy) #np.savetxt("nb_mean.txt", nb.mean * 255, delimiter=",", fmt="%10.5f") #np.savetxt("nb_var.txt", nb.var * 255, delimiter=",", fmt="%10.5f") # Model 4: NN if models['nn']: num_hiddens = 15 eps = 0.1 momentum = 0.5 num_epochs = 7000 # nb print "NN nb ..." W1, W2, b1, b2, target_train, train_predicted, target_valid, valid_predicted, train_nn_nb, valid_nn_nb = TrainNN( train_pre_inputs_nb, train_targets, valid_pre_inputs_nb, valid_targets, num_hiddens, eps, momentum, num_epochs) ## auto encoder #print "NN auto ..." #W1, W2, b1, b2, target_train, train_predicted, target_valid, valid_predicted, train_nn_auto, valid_nn_auto = TrainNN(train_pre_inputs_auto, train_targets, valid_pre_inputs_auto, valid_targets, num_hiddens, eps, momentum, num_epochs) num_hiddens = 15 eps = 0.1 momentum = 0.5 num_epochs = 15000 # pca print "NN pca ... " W1, W2, b1, b2, target_train, train_predicted, target_valid, valid_predicted, train_nn_pca, valid_nn_pca = TrainNN( train_pre_inputs_pca, train_targets, valid_pre_inputs_pca, valid_targets, num_hiddens, eps, momentum, num_epochs) # none print "NN none ... " W1, W2, b1, b2, target_train, train_predicted, target_valid, valid_predicted, train_nn, valid_nn = TrainNN( train_inputs, train_targets, valid_inputs, valid_targets, num_hiddens, eps, momentum, num_epochs) train_nn_auto = 0 valid_nn_auto = 0 return train_nn, train_nn_nb, train_nn_pca, valid_nn, valid_knn_nb, valid_knn_pca, valid_nn_nb, valid_nn_pca, train_nn_auto, valid_nn_auto
import numpy as np from l2_distance import l2_distance from utils import * from plot_digits import * from run_knn import * import matplotlib.pyplot as plt """load data train_set = load_train() valid_set = load_valid() train_data = train_set[0] train_labels = train_set[1] valid_data = valid_set[0] real_valid_label = valid_set[1] """ #train_data,train_label = load_train() train_data, train_label = load_train_small() #valid_data, real_valid_label = load_valid() valid_data, real_valid_label = load_test() k = [1, 3, 5, 7, 9] for i in k: valid_label = run_knn(i, train_data, train_label, valid_data) correct_prediction = (valid_label == real_valid_label).astype(int) training_rate = float(np.sum(correct_prediction)) / float(len(valid_label)) plt.scatter(i, training_rate) plt.xlabel("K") plt.ylabel("Classification Rate") plt.show()
(valid_a, valid_b) = valid_x.shape (valid_c, valid_d) = test_x.shape # print train_x.shape # print valid_x.shape # print "valid_x is of shape " + str(valid_n) + " , " + str(valid_m) k = np.zeros(5) cl_rate_valid = np.zeros(5) cl_rate_test = np.zeros(5) for i in range(5): k[i] = 2*i+1; valid_p = run_knn(k[i], train_x.T, train_t.T, valid_x.T) cl_rate_valid[i] = 1-(np.sum(np.logical_xor(valid_p, valid_t.T))/float(valid_a)) test_p = run_knn(k[i], train_x.T, train_t.T, test_x.T) cl_rate_test[i] = 1-(np.sum(np.logical_xor(test_p, test_t.T))/float(valid_c)) print cl_rate_valid print cl_rate_test plt.plot(k, cl_rate_valid, marker='o', label='Validation Set') plt.plot(k, cl_rate_test, marker='x', label='Test Set') legend = plt.legend(loc=3) plt.xlabel('k') plt.ylabel('Classification Rate') plt.axis([1, 9, 0.8, 1]) plt.show()