def load_data(self): # load train data self.train_labels, train_images = dataset.read() self.train_data_length = len(self.train_labels) self.number_of_dimensions = train_images[0].flatten().shape[0] #load test data self.test_labels, test_images = dataset.read(dataset='testing') self.test_data_length = len(self.test_labels) """Much Better accuracy without normalization""" #train_images = train_images / 255 #test_images = test_images / 255 self.train_images_flattened = train_images.reshape( train_images.shape[0], -1).T self.test_images_flattened = test_images.reshape( test_images.shape[0], -1).T train_mean = np.nanmean(self.train_images_flattened) self.train_images_flattened = self.train_images_flattened - train_mean self.test_images_flattened = self.test_images_flattened - train_mean self.weight_vector = np.random.random( (self.number_of_dimensions, self.number_of_classes)) * 0.000008 self.one_hot_encoded_labels = self.create_one_hot_representation( self.train_labels) self.test_one_hot_encoded_labels = self.create_one_hot_representation( self.test_labels)
def load_train_test(self): Y_train, X_train = ld.read() self.Y_test, self.X_test = ld.read('testing') X_train = np.reshape(X_train,(self.training_size,28*28))/255 self.X_test = np.reshape(self.X_test,(self.testing_size,28*28))/255 Y_train = self.one_hot_representation(Y_train) #Y_test = self.one_hot_representation(Y_test) return (X_train, Y_train, self.X_test, self.Y_test)
def driver(): Y_train,X_train = read() X_train = X_train.reshape((X_train.shape[0],784)) X_train = X_train/float(255) #W = npy.zeros((784,10)) W = npy.zeros((784,10)) update_weights(X_train,Y_train,W)
def load_data(self): self.train_labels, train_images = dataset.read() self.train_data_length = len(self.train_labels) self.test_labels, test_images = dataset.read(dataset='testing') self.test_data_length = len(self.test_labels) """normalize data set""" train_images = train_images / 255 test_images = test_images / 255 self.train_images_flattened = train_images.reshape( train_images.shape[0], -1) self.test_images_flattened = test_images.reshape( test_images.shape[0], -1) #train_std = np.nanstd(self.train_images_flattened, axis=0) train_mean = np.nanmean(self.train_images_flattened) self.train_images_flattened = self.train_images_flattened - train_mean self.test_images_flattened = self.test_images_flattened - train_mean self.train_images_squared = np.einsum('ij,ij->i', self.train_images_flattened, self.train_images_flattened)
def main(): train_labels, train_images = load_dataset.read( "training", "/Users/samarth/Desktop/Fall 2018/CSE 575/Assignment 2/MNIST") train_images = np.reshape(train_images, (60000, 784)) / 255.0 # train_images = getPCAData(train_images,50) # train_images = np.reshape(train_images, (60000, 784)) print(len(train_images)) test_labels, test_images = load_dataset.read( "testing", "/Users/samarth/Desktop/Fall 2018/CSE 575/Assignment 2/MNIST") test_images = np.reshape(test_images, (10000, 784)) / 255.0 k_list = [1, 3, 5, 10, 30, 50, 70, 80, 90, 100] i = 0 final = [] for test_image in test_images: dist = compute_distance_matrix(100, train_images, train_labels, test_image) # print(dist) # print(pred) final.append(dist) print('Calculating the matrix for k = 100 for test image[' + str(i) + ']') i += 1 with open('sorted_list_latest', 'wb') as fp: pickle.dump(final, fp) accuracy, accuracy_list = test_accuracy(k_list, test_images, test_labels) plt.plot(k_list, accuracy_list, color='g') plt.xlabel("K") plt.ylabel("Accuracy") plt.title("KNN Graph") plt.show()
def driver(): Y_train,X_train = read() X_train = X_train.reshape((60000,784)) X_train = X_train/float(255) Y_test,X_test = read("testing") X_test = X_test.reshape((10000,784)) X_test = X_test/float(255) Y_pred_k_based = dict() accuracies = dict() for k in [1, 3, 5, 10, 30, 50, 70, 80, 90, 100]: Y_pred_k_based[k] = list() labels = compute_euclidean_distance(X_train,Y_train,X_test) for i in range(len(X_test)): all_l = labels[i][1] for k in [1, 3, 5, 10, 30, 50, 70, 80, 90, 100]: label = predict(all_l[0:k]) lst = Y_pred_k_based[k] lst.append(label) for k in [1, 3, 5, 10, 30, 50, 70, 80, 90, 100]: lst_labels = Y_pred_k_based[k] count = 0 for i in range(0,len(X_test)): if lst_labels[i] == Y_test[i]: count = count+1 accuracies[k] = count/float(len(X_test)) plot_graph(accuracies) dum_accuracies_file(accuracies)
def loadData(data): # #Read data Label, Img1 = read(data) # #normalise data Img1 = Img1 / 255 # #flattendata Img = np.asarray([img.flatten() for img in Img1]) #create a frame Frame = pd.DataFrame(Img) Frame['label'] = Label return Frame
def predict(W): Y_test,X_test = read("testing") X_test = X_test.reshape((X_test.shape[0],784)) X_test = X_test/float(255) Y_pred = [] prod = npy.dot(X_test,W) values = softmax(prod) Y_pred = npy.argmax(values,axis=1) count = 0 for i in range(0,X_test.shape[0]): if(Y_test[i] == Y_pred[i]): count=count+1 return count/float(X_test.shape[0])
# coding: utf-8 # In[3]: import numpy as np import load_dataset import timeit import collections import timeit from bisect import bisect import scipy.spatial.distance as sd # In[4]: y_train , x_train= load_dataset.read("training","MNIST") y_test, x_test= load_dataset.read("testing","MNIST") x_train = x_train.reshape([60000,28*28]) x_test = x_test.reshape([10000,28*28]) # In[ ]: distanceMatrix = sd.cdist(x_test,x_train) # In[ ]: k = [1,3,5,10,30,50,70,80,90,100]
import numpy as np #from scipy.special import expit from load_dataset import read, show [train_lab, train_img] = read() [test_lab, test_img] = read("testing") #reshape to nx784 matrix train_img = train_img.reshape( train_img.shape[0], (train_img.shape[1] * train_img.shape[2])).astype(int) test_img = test_img.reshape( test_img.shape[0], (test_img.shape[1] * test_img.shape[2])).astype(int) #for test #train_tl=train_lab[:10000] #train_ti=train_img[:10000] #test_tl=test_lab[:5000] #test_ti=test_img[:5000] #print(train_tl.shape,train_ti.shape,test_tl.shape,test_ti.shape) #in logistic regression, all we need is find this w (ignore b) w = np.zeros((10, 784), dtype=float) #final predict function def sigmoid(w, X): return (1 / (1 + np.exp(-np.matmul(X, w)))).astype(float) #find w #first step: relabel to 0,1. such that for 5, y[4][i] is a 0-1 label array.
def __init__(self, image, label): self.image = image self.label = label class BallTreeNode: def __init__(self, image, radius, leafs): self.imageData = image self.radius = radius self.left = None self.right = None self.leafs = leafs path = "../assignment2_data/" trainingData = load_dataset.read("training", path) testData = load_dataset.read("testing", path) size = 200 trainLbls = trainingData[0][:size * 6] trainImgs = trainingData[1][:size * 6] testLbls = testData[0][:size] testImgs = testData[1][:size] ks = [1, 3, 5, 10, 30, 50, 70, 80, 90, 100] trainingData = [] testData = [] for i in range(len(trainLbls)): image = [[0] * 28 for _ in range(28)]
from __future__ import division import scipy import numpy as np import load_dataset as ld import matplotlib.pyplot as plt train_data = ld.read() alpha = 0.001 y_train_data = train_data[0] x_train_data = train_data[1].reshape([60000, 28 * 28]) / 255 test_data = ld.read('testing') y_test_data = test_data[0] x_test_data = test_data[1].reshape([10000, 28 * 28]) / 255 weights = np.tile(np.zeros(784), (9, 1)) def post_prob(x, y): if y == 9: return 1 / sum_exps(x) else: return np.exp(np.dot(weights[y], x)) / sum_exps(x) def sum_exps(x): exp_array = [] for weight in weights: exp_array.append(np.exp(np.dot(weight, x))) return (1 + np.sum(exp_array))
import load_dataset import time import numpy as np startTime = time.time() path = "../assignment2_data/" trainings = load_dataset.read("training", path) tests = load_dataset.read("testing", path) class ImageData: def __init__(self, label, image): self.label = label self.image = image self.image = image testSize = 10000 trainSize = 60000 learningRate = 0.00002 / trainSize trainingData = [ ImageData(label, image.flatten().astype(float)) for label, image in zip(trainings[0][:trainSize], trainings[1][:trainSize]) ] testData = [ ImageData(label, image.flatten().astype(float)) for label, image in zip(tests[0][:testSize], tests[1][:testSize])
def load_train_test(self): Y_train, X_train = ld.read('training') Y_test, X_test = ld.read('testing') X_train = np.reshape(X_train, (training_size, 28 * 28)) X_test = np.reshape(X_test, (testing_size, 28 * 28)) return (X_train, Y_train, X_test, Y_test)