def getDataSet(): mn = MNIST(".") #dir of files images, labels = mn.load_training() images = normalize_images(images) labels = vectorize_labels(labels) return np.array(images), np.array(labels)
def main(): nn = NeuronNetwork(input_size=784, output_size=10, hidden_layers=[15]) #input = np.random.randn(784).reshape(784,1) #dic = nn.prediction(input, print_result=True) # read data into variables # x_train[0 - 59999][0 - 783], labels_train[0 - 59999] mndata = MNIST('../data') x_train_in, labels_train = mndata.load_training() print('MNIST training data has been read') x_test_in, labels_test = mndata.load_testing() print('MNIST test data has been read') x_train, x_test = normalize_data(x_train_in, x_test_in) print('MNIST data has been normalized') trainer = Trainer(nn) # train(n_training_examples=60000, batch_size=200, n_epochs=20, learn_rate=1.5) = 0.872 accuracy # train(n_training_examples=60000, batch_size=200, n_epochs=40, learn_rate=1.5) = 0.906 accuracy trainer.train(x_train, labels_train, n_training_examples=60000, batch_size=200, n_epochs=50, learn_rate=1.5) error_list, acc = trainer.test(x_test, labels_test, n_test_examples=1000) #print ('error: {} ----> {}'.format(error_list[0], error_list[-1])) print('accuracy = {}'.format(acc)) #testing with examples for i in range(10): vec, pred = nn.prediction(x_test[i]) print('Image: {} ====> Prediction: {}'.format(labels_test[i], pred))
def get_natural_dataset_samples(num_of_samples): from loader import MNIST import random mndata = MNIST('MNIST_dataset') images, labels = mndata.load_training() selected_img = [] selected_labels = [] selected_idxs = random.sample(range(0, len(images)), num_of_samples) for i in range(0, len(selected_idxs)): # newPoint = [float(j) for j in images[selected_idxs[i]]] # selected_img.append(newPoint) selected_img.append(images[selected_idxs[i]]) selected_labels.append(labels[selected_idxs[i]]) return selected_img, selected_labels
from loader import MNIST import numpy as np # This is intended to be called from the directory aboves mndata = MNIST('./data') # Load a list with training images and training labels training_ims, training_labels = mndata.load_training() testing_ims, testing_labels = mndata.load_testing() # Transform everything into array training_ims = np.array(training_ims) training_labels = np.array(training_labels)
rand = noise.gauss(rand, rnd_gauss) test_random[i] = serialize(rand) filesave.as_text(test_clean, 'data/clean/test/file.my-obj') filesave.as_text(test_gauss_5, 'data/noisy/test/gauss_5.my-obj') filesave.as_text(test_gauss_10, 'data/noisy/test/gauss_10.my-obj') filesave.as_text(test_gauss_15, 'data/noisy/test/gauss_15.my-obj') filesave.as_text(test_snp_002, 'data/noisy/test/snp_002.my-obj') filesave.as_text(test_snp_005, 'data/noisy/test/snp_005.my-obj') filesave.as_text(test_snp_01, 'data/noisy/test/snp_01.my-obj') filesave.as_text(test_5_005, 'data/noisy/test/gauss_5_snp_005.my-obj') filesave.as_text(test_10_002, 'data/noisy/test/gauss_10_snp_002.my-obj') filesave.as_text(test_15_01, 'data/noisy/test/gauss_15_snp_01.my-obj') filesave.as_text(test_random, 'data/noisy/test/random.my-obj') imgs_train, labels_train = mnist.load_training() train_clean = np.empty(imgs_train.__len__(), dtype=object) train_gauss_5 = np.empty(imgs_train.__len__(), dtype=object) train_gauss_10 = np.empty(imgs_train.__len__(), dtype=object) train_gauss_15 = np.empty(imgs_train.__len__(), dtype=object) train_snp_002 = np.empty(imgs_train.__len__(), dtype=object) train_snp_005 = np.empty(imgs_train.__len__(), dtype=object) train_snp_01 = np.empty(imgs_train.__len__(), dtype=object) train_5_005 = np.empty(imgs_train.__len__(), dtype=object) train_10_002 = np.empty(imgs_train.__len__(), dtype=object) train_15_01 = np.empty(imgs_train.__len__(), dtype=object) train_random = np.empty(imgs_train.__len__(), dtype=object) for i in xrange(imgs_train.__len__()): im_arr = imgs_train[i]
class variables: def setup(self,n,eta,batchSize,repetitions,Dataset): print('Setting up Variables', end='') self.n = np.array(n) #Layers self.eta = eta #learning rate (now independent on batchSize) self.batchSize = batchSize self.repetitions = repetitions self.Dataset = Dataset #DataSet Option self.randrange = 1 self.dSetIndex = {0:'Database-MNIST',1:'Database-EMNIST'} self.w = np.array([np.zeros((self.n[x],self.n[x-1])) for x in range(len(self.n))[1:]]) #weights self.b = np.array([np.zeros((self.n[x],1)) for x in range(len(self.n))[1:]]) #biases self.nRow = np.array([np.zeros((self.n[x],1)) for x in range(len(self.n))])#neuralRow self.zRow = np.array([np.zeros((self.n[x],1)) for x in range(len(self.n))[1:]])#neuralRow pre-sigmoid self.delta = np.array([np.zeros((self.n[x],1)) for x in range(len(self.n))[1:]]) #error self.grad = np.array([ #gradient descent step np.array([np.zeros((self.n[x],1)) for x in range(len(self.n))[1:]]),#dC/dbias np.array([np.zeros((self.n[x],self.n[x-1])) for x in range(len(self.n))[1:]]) #dC/dweight ]) self.aveCost = 0 self.prevCost = 0 self.images = None self.imagesTe = None def imagesSetup(self,Dataset): self.Dataset = Dataset self.mndata = MNIST(self.dSetIndex[Dataset]) if Dataset == 0: #MNIST Setup print('.', end='') images = self.mndata.load_training() self.images = [np.array([np.array(images[0][x]),images[1][x]]) for x in range(len(images[0]))] #CONSIDER USING 'zip()' INSTEAD print('.', end='') self.mndata = MNIST(self.dSetIndex[Dataset]) imagesTe = self.mndata.load_testing() self.imagesTe = [np.array([np.array(imagesTe[0][x]),imagesTe[1][x]]) for x in range(len(imagesTe[0]))] elif Dataset == 1: #EMNIST Setup print('.', end='') images = self.mndata.load_training() self.images = [np.array([np.ravel(np.transpose([np.reshape(images[0][x],(28,28))])),images[1][x]]) for x in range(len(images[0]))] print('.', end='') self.mndata = MNIST(self.dSetIndex[Dataset]) imagesTe = self.mndata.load_testing() self.imagesTe = [np.array([np.ravel(np.transpose([np.reshape(imagesTe[0][x],(28,28))])),imagesTe[1][x]]) for x in range(len(imagesTe[0]))] #EMNIST Database Digits image matrices were 'transposed' so had to be transposed back #This method was used because the array containing the pixel data has to be an np.array #There seems to be a bug where 'np.transpose()' seems to make every entry of 'images' the same #This was tested to be faster than 'np.transpose()' print('.', end='') self.costArr = np.array([ [np.zeros(self.n[-1]) for x in range(self.batchSize)] for x in range(len(self.images)//self.batchSize) ]) self.cost = np.array([ np.zeros(self.batchSize) for x in range(len(self.images)//self.batchSize) ]) self.costArrTot = np.array([ np.zeros(self.n[-1]) for x in range(len(self.images)//self.batchSize) ]) self.costArrTe = np.array([np.zeros(self.n[-1]) for x in range(len(self.imagesTe))]) self.costTe = np.zeros(len(self.imagesTe)) print(' Complete.\n')
import numpy as np from loader import MNIST from nnCostFunction import nnCostFunction from randInitializeWeights import randInitializeWeights from computeNumericalGradient import unRolling from predict import predict from shuffle import shuffle # Get data from Mnist data = MNIST() data.load_training() data.load_testing() x_train = data.train_images y_train = data.train_labels x_test = data.test_images y_test = data.test_labels x_train = np.reshape(x_train, (len(x_train), 784)) y_train = np.reshape(y_train, (len(y_train), 1)) y_train_fix = np.reshape(np.zeros(len(y_train)*10), (len(y_train), 10)) for i in range(len(y_train)): for j in range(0, 10): if y_train[i] == j: y_train_fix[i][j] = 1 # Create Validation, Train list_x_val = [] list_y_val = [] list_x_train = []
from sklearn import tree import matplotlib.pyplot as plt import StringIO import pydotplus from loader import MNIST mndata = MNIST('./Datasets') trainingImages, trainingLabels = mndata.load_training() testImages, testLabels = mndata.load_testing() clf = tree.DecisionTreeClassifier() clf = clf.fit(trainingImages[:1000], trainingLabels[:1000]) scores = clf.score(testImages,testLabels.tolist()) print "Accuracy: %f " % scores importances = clf.feature_importances_ importances = importances.reshape((28, 28)) plt.matshow(importances, cmap=plt.cm.hot) plt.title("Pixel importances for decision tree") plt.show() dot_data = StringIO.StringIO() tree.export_graphviz(clf, out_file=dot_data) graph = pydotplus.graph_from_dot_data(dot_data.getvalue()) graph.write_pdf("Dtree.pdf") print "The Decision Tree was saved!"
def loadMnist(self): m = MNIST('./data') self.trvec, self.train_labels = m.load_training()
return (-1.0 / n) * res mndata = MNIST('data') mndata.test_img_fname = 't10k-images.idx3-ubyte' mndata.test_lbl_fname = 't10k-labels.idx1-ubyte' mndata.train_img_fname = 'train-images.idx3-ubyte' mndata.train_lbl_fname = 'train-labels.idx1-ubyte' print "Params: " print "Digits: " + str(digitA) + " -- -1 " + str(digitB) + " -- 1" print "Iterations: ", iterations print "Step (nu): ", nu print "Loading data..." mndata.load_training() mndata.load_testing() print "Training data count:", len(mndata.train_images) print "Testing data count:", len(mndata.test_images) [(train_imgs, train_classes), (test_imgs, test_classes)] = [ zip(*[(i, l) for (i, l) in zip(imgs, map(label_class, lbls)) if l == -1 or l == 1]) for (imgs, lbls) in [(mndata.train_images, mndata.train_labels), (mndata.test_images, mndata.test_labels)]] train_imgs = map(color_binarization, train_imgs) test_imgs = map(color_binarization, test_imgs) w = numpy.array([0.0 for _ in xrange(img_size)]) print "Training..."
return (-1.0 / n) * res mndata = MNIST('data') mndata.test_img_fname = 't10k-images.idx3-ubyte' mndata.test_lbl_fname = 't10k-labels.idx1-ubyte' mndata.train_img_fname = 'train-images.idx3-ubyte' mndata.train_lbl_fname = 'train-labels.idx1-ubyte' print "Params: " print "Digits: " + str(digitA) + " -- -1 " + str(digitB) + " -- 1" print "Iterations: ", iterations print "Step (nu): ", nu print "Loading data..." mndata.load_training() mndata.load_testing() print "Training data count:", len(mndata.train_images) print "Testing data count:", len(mndata.test_images) [(train_imgs, train_classes), (test_imgs, test_classes)] = [ zip(*[(i, l) for (i, l) in zip(imgs, map(label_class, lbls)) if l == -1 or l == 1]) for (imgs, lbls) in [( mndata.train_images, mndata.train_labels), (mndata.test_images, mndata.test_labels)] ] train_imgs = map(color_binarization, train_imgs) test_imgs = map(color_binarization, test_imgs)
from loader import MNIST mndata = MNIST("../data/input/") trn_img, trn_labels = mndata.load_training() tst_img, tst_labels = mndata.load_testing()
from algorithms.NearestCentroid.MyNearestCentroid import MyNearestCentroid from algorithms.NearestCentroid.nc_classify import test_nc_classify, test_nc_classify_with_sklearn from algorithms.NearestNeighbours.nearest_neighbour_classify import test_neigh_classify from algorithms.NearestSubclass.MyNearestSubclassCentroid import MyNearestSubclassCentroid from algorithms.NearestSubclass.nsc_classify import test_nsc_classify from algorithms.PerceptronBP.perceptron_bp_test import test_perceptron_bp from algorithms.PerceptronMSE.PerceptronMSEClassifier import PerceptronMSEClassifier from algorithms.PerceptronMSE.perceptron_mse_test import test_perceptron_mse from loader import MNIST import numpy as np from algorithms.PerceptronBP.PerceptronBPClassifier import PerceptronBPClassifier mndata = MNIST('../samples/MNIST/') trainingData, trainingLabels = mndata.load_training() testData, testLabels = mndata.load_testing() data = trainingData + testData labels = trainingLabels + testLabels # # ------- PCA --------- # pca = PCA(n_components=2).fit(np.array(trainingData)) # trainingData = pca.transform(np.array(trainingData)) # # pca = PCA(n_components=2).fit(np.array(testData)) # testData = pca.transform(np.array(testData)) # # pca = PCA(n_components=2).fit(np.array(data)) # data = pca.transform(np.array(data))
from loader import MNIST mndata = MNIST('../data/input/') trn_img, trn_labels = mndata.load_training() tst_img, tst_labels = mndata.load_testing()
###VECTORIZE METHODS!: @vectorize(['float32(float32, float32)'], target='cuda'), where the things are return(param a, param b) and so on ## Libraries import numpy as np from numpy import vectorize from scipy import special # for logistic function import matplotlib.pyplot as plt from loader import MNIST from sklearn import preprocessing # import scipy optimizer too?? ##### 1. Import data ##### print('Loading datasets...') PATH = '/home/wataru/Uni/4997/programming_hw/ZhuFnn/MNIST_data' mndata = MNIST(PATH) X, y = mndata.load_training() X_test, y_test = mndata.load_testing() X, y = np.array(X), np.array(y).reshape(-1, 1) # X(60,0000 x 784) y(60,0000x1) X_test, y_test = np.array(X_test), np.array(y_test).reshape(-1, 1) ##### 2. Set up parameters ##### m_train = X.shape[0] m_test = X_test.shape[0] input_size = X.shape[1] # number of features on the input + 1 (bias hidden_size = 50 output_size = np.unique(y).shape[ 0] # extract unique elements and count them as numbers of output labels lr = 3e-2 # learning rate epochs = 5000 # num of epoch