def main(): nn = NeuronNetwork(input_size=784, output_size=10, hidden_layers=[15]) #input = np.random.randn(784).reshape(784,1) #dic = nn.prediction(input, print_result=True) # read data into variables # x_train[0 - 59999][0 - 783], labels_train[0 - 59999] mndata = MNIST('../data') x_train_in, labels_train = mndata.load_training() print('MNIST training data has been read') x_test_in, labels_test = mndata.load_testing() print('MNIST test data has been read') x_train, x_test = normalize_data(x_train_in, x_test_in) print('MNIST data has been normalized') trainer = Trainer(nn) # train(n_training_examples=60000, batch_size=200, n_epochs=20, learn_rate=1.5) = 0.872 accuracy # train(n_training_examples=60000, batch_size=200, n_epochs=40, learn_rate=1.5) = 0.906 accuracy trainer.train(x_train, labels_train, n_training_examples=60000, batch_size=200, n_epochs=50, learn_rate=1.5) error_list, acc = trainer.test(x_test, labels_test, n_test_examples=1000) #print ('error: {} ----> {}'.format(error_list[0], error_list[-1])) print('accuracy = {}'.format(acc)) #testing with examples for i in range(10): vec, pred = nn.prediction(x_test[i]) print('Image: {} ====> Prediction: {}'.format(labels_test[i], pred))
def getDataSet(): mn = MNIST(".") #dir of files images, labels = mn.load_training() images = normalize_images(images) labels = vectorize_labels(labels) return np.array(images), np.array(labels)
def get_natural_dataset_samples(num_of_samples): from loader import MNIST import random mndata = MNIST('MNIST_dataset') images, labels = mndata.load_training() selected_img = [] selected_labels = [] selected_idxs = random.sample(range(0, len(images)), num_of_samples) for i in range(0, len(selected_idxs)): # newPoint = [float(j) for j in images[selected_idxs[i]]] # selected_img.append(newPoint) selected_img.append(images[selected_idxs[i]]) selected_labels.append(labels[selected_idxs[i]]) return selected_img, selected_labels
def imagesSetup(self,Dataset): self.Dataset = Dataset self.mndata = MNIST(self.dSetIndex[Dataset]) if Dataset == 0: #MNIST Setup print('.', end='') images = self.mndata.load_training() self.images = [np.array([np.array(images[0][x]),images[1][x]]) for x in range(len(images[0]))] #CONSIDER USING 'zip()' INSTEAD print('.', end='') self.mndata = MNIST(self.dSetIndex[Dataset]) imagesTe = self.mndata.load_testing() self.imagesTe = [np.array([np.array(imagesTe[0][x]),imagesTe[1][x]]) for x in range(len(imagesTe[0]))] elif Dataset == 1: #EMNIST Setup print('.', end='') images = self.mndata.load_training() self.images = [np.array([np.ravel(np.transpose([np.reshape(images[0][x],(28,28))])),images[1][x]]) for x in range(len(images[0]))] print('.', end='') self.mndata = MNIST(self.dSetIndex[Dataset]) imagesTe = self.mndata.load_testing() self.imagesTe = [np.array([np.ravel(np.transpose([np.reshape(imagesTe[0][x],(28,28))])),imagesTe[1][x]]) for x in range(len(imagesTe[0]))] #EMNIST Database Digits image matrices were 'transposed' so had to be transposed back #This method was used because the array containing the pixel data has to be an np.array #There seems to be a bug where 'np.transpose()' seems to make every entry of 'images' the same #This was tested to be faster than 'np.transpose()' print('.', end='') self.costArr = np.array([ [np.zeros(self.n[-1]) for x in range(self.batchSize)] for x in range(len(self.images)//self.batchSize) ]) self.cost = np.array([ np.zeros(self.batchSize) for x in range(len(self.images)//self.batchSize) ]) self.costArrTot = np.array([ np.zeros(self.n[-1]) for x in range(len(self.images)//self.batchSize) ]) self.costArrTe = np.array([np.zeros(self.n[-1]) for x in range(len(self.imagesTe))]) self.costTe = np.zeros(len(self.imagesTe)) print(' Complete.\n')
class variables: def setup(self,n,eta,batchSize,repetitions,Dataset): print('Setting up Variables', end='') self.n = np.array(n) #Layers self.eta = eta #learning rate (now independent on batchSize) self.batchSize = batchSize self.repetitions = repetitions self.Dataset = Dataset #DataSet Option self.randrange = 1 self.dSetIndex = {0:'Database-MNIST',1:'Database-EMNIST'} self.w = np.array([np.zeros((self.n[x],self.n[x-1])) for x in range(len(self.n))[1:]]) #weights self.b = np.array([np.zeros((self.n[x],1)) for x in range(len(self.n))[1:]]) #biases self.nRow = np.array([np.zeros((self.n[x],1)) for x in range(len(self.n))])#neuralRow self.zRow = np.array([np.zeros((self.n[x],1)) for x in range(len(self.n))[1:]])#neuralRow pre-sigmoid self.delta = np.array([np.zeros((self.n[x],1)) for x in range(len(self.n))[1:]]) #error self.grad = np.array([ #gradient descent step np.array([np.zeros((self.n[x],1)) for x in range(len(self.n))[1:]]),#dC/dbias np.array([np.zeros((self.n[x],self.n[x-1])) for x in range(len(self.n))[1:]]) #dC/dweight ]) self.aveCost = 0 self.prevCost = 0 self.images = None self.imagesTe = None def imagesSetup(self,Dataset): self.Dataset = Dataset self.mndata = MNIST(self.dSetIndex[Dataset]) if Dataset == 0: #MNIST Setup print('.', end='') images = self.mndata.load_training() self.images = [np.array([np.array(images[0][x]),images[1][x]]) for x in range(len(images[0]))] #CONSIDER USING 'zip()' INSTEAD print('.', end='') self.mndata = MNIST(self.dSetIndex[Dataset]) imagesTe = self.mndata.load_testing() self.imagesTe = [np.array([np.array(imagesTe[0][x]),imagesTe[1][x]]) for x in range(len(imagesTe[0]))] elif Dataset == 1: #EMNIST Setup print('.', end='') images = self.mndata.load_training() self.images = [np.array([np.ravel(np.transpose([np.reshape(images[0][x],(28,28))])),images[1][x]]) for x in range(len(images[0]))] print('.', end='') self.mndata = MNIST(self.dSetIndex[Dataset]) imagesTe = self.mndata.load_testing() self.imagesTe = [np.array([np.ravel(np.transpose([np.reshape(imagesTe[0][x],(28,28))])),imagesTe[1][x]]) for x in range(len(imagesTe[0]))] #EMNIST Database Digits image matrices were 'transposed' so had to be transposed back #This method was used because the array containing the pixel data has to be an np.array #There seems to be a bug where 'np.transpose()' seems to make every entry of 'images' the same #This was tested to be faster than 'np.transpose()' print('.', end='') self.costArr = np.array([ [np.zeros(self.n[-1]) for x in range(self.batchSize)] for x in range(len(self.images)//self.batchSize) ]) self.cost = np.array([ np.zeros(self.batchSize) for x in range(len(self.images)//self.batchSize) ]) self.costArrTot = np.array([ np.zeros(self.n[-1]) for x in range(len(self.images)//self.batchSize) ]) self.costArrTe = np.array([np.zeros(self.n[-1]) for x in range(len(self.imagesTe))]) self.costTe = np.zeros(len(self.imagesTe)) print(' Complete.\n')
import numpy as np from loader import MNIST from nnCostFunction import nnCostFunction from randInitializeWeights import randInitializeWeights from computeNumericalGradient import unRolling from predict import predict from shuffle import shuffle # Get data from Mnist data = MNIST() data.load_training() data.load_testing() x_train = data.train_images y_train = data.train_labels x_test = data.test_images y_test = data.test_labels x_train = np.reshape(x_train, (len(x_train), 784)) y_train = np.reshape(y_train, (len(y_train), 1)) y_train_fix = np.reshape(np.zeros(len(y_train)*10), (len(y_train), 10)) for i in range(len(y_train)): for j in range(0, 10): if y_train[i] == j: y_train_fix[i][j] = 1 # Create Validation, Train list_x_val = [] list_y_val = [] list_x_train = []
from sklearn import tree import matplotlib.pyplot as plt import StringIO import pydotplus from loader import MNIST mndata = MNIST('./Datasets') trainingImages, trainingLabels = mndata.load_training() testImages, testLabels = mndata.load_testing() clf = tree.DecisionTreeClassifier() clf = clf.fit(trainingImages[:1000], trainingLabels[:1000]) scores = clf.score(testImages,testLabels.tolist()) print "Accuracy: %f " % scores importances = clf.feature_importances_ importances = importances.reshape((28, 28)) plt.matshow(importances, cmap=plt.cm.hot) plt.title("Pixel importances for decision tree") plt.show() dot_data = StringIO.StringIO() tree.export_graphviz(clf, out_file=dot_data) graph = pydotplus.graph_from_dot_data(dot_data.getvalue()) graph.write_pdf("Dtree.pdf") print "The Decision Tree was saved!"
def loadMnist(self): m = MNIST('./data') self.trvec, self.train_labels = m.load_training()
import os import matplotlib.pyplot as plt import matplotlib.image as mpimg import numpy as np import random import array import cv2 from noise import noise from loader import MNIST from save import save __all__ = [MNIST, ] absPath = os.path.dirname(__file__) mnist = MNIST(absPath) noise = noise() filesave = save() def serialize(image): im_array = np.zeros(784, np.int32) #im_array = array.array('i', (0 for k in range(784))) k = 0 for i in xrange(image.__len__()): for j in xrange(image[i].__len__()): im_array[k] = int(round(image[i, j])) if im_array[k] < 0: im_array[k] = 0 k = k + 1
from loader import MNIST mndata = MNIST("../data/input/") trn_img, trn_labels = mndata.load_training() tst_img, tst_labels = mndata.load_testing()
from algorithms.CrossValidator import validateHyperParameter from algorithms.NearestCentroid.MyNearestCentroid import MyNearestCentroid from algorithms.NearestCentroid.nc_classify import test_nc_classify, test_nc_classify_with_sklearn from algorithms.NearestNeighbours.nearest_neighbour_classify import test_neigh_classify from algorithms.NearestSubclass.MyNearestSubclassCentroid import MyNearestSubclassCentroid from algorithms.NearestSubclass.nsc_classify import test_nsc_classify from algorithms.PerceptronBP.perceptron_bp_test import test_perceptron_bp from algorithms.PerceptronMSE.PerceptronMSEClassifier import PerceptronMSEClassifier from algorithms.PerceptronMSE.perceptron_mse_test import test_perceptron_mse from loader import MNIST import numpy as np from algorithms.PerceptronBP.PerceptronBPClassifier import PerceptronBPClassifier mndata = MNIST('../samples/MNIST/') trainingData, trainingLabels = mndata.load_training() testData, testLabels = mndata.load_testing() data = trainingData + testData labels = trainingLabels + testLabels # # ------- PCA --------- # pca = PCA(n_components=2).fit(np.array(trainingData)) # trainingData = pca.transform(np.array(trainingData)) # # pca = PCA(n_components=2).fit(np.array(testData)) # testData = pca.transform(np.array(testData)) #
from loader import MNIST mndata = MNIST('../data/input/') trn_img, trn_labels = mndata.load_training() tst_img, tst_labels = mndata.load_testing()
# %reset import numpy as np from loader import MNIST import matplotlib.pyplot as plt from sklearn.cluster import KMeans from sklearn.neighbors import NearestNeighbors from sklearn.preprocessing import normalize from display_network import * mndata = MNIST( 'C:/Users/IT/Desktop/MALE_LamThanhTai_15110121/File Project/MNIST/') mndata.load_testing() X = mndata.test_images X0 = np.asarray(X)[:1000, :] / 256.0 X = X0 K = 10 kmeans = KMeans(n_clusters=K).fit(X) pred_label = kmeans.predict(X) print(type(kmeans.cluster_centers_.T)) print(kmeans.cluster_centers_.T.shape) A = display_network(kmeans.cluster_centers_.T, K, 1) f1 = plt.imshow(A, interpolation='nearest', cmap="jet") f1.axes.get_xaxis().set_visible(False) f1.axes.get_yaxis().set_visible(False) plt.show() # plt.savefig('a1.png', bbox_inches='tight') # a colormap and a normalization instance
import os import matplotlib.pyplot as plt import matplotlib.image as mpimg import numpy as np import random import array import cv2 from noise import noise from loader import MNIST from save import save __all__ = [MNIST, ] absPath = os.path.dirname(__file__) mnist = MNIST(absPath) noise = noise() imgs_test, labels_test = mnist.load_testing() test_gauss_5 = np.zeros(50, dtype=object) test_gauss_10 = np.zeros(50, dtype=object) test_gauss_15 = np.zeros(50, dtype=object) test_snp_002 = np.zeros(50, dtype=object) test_snp_005 = np.zeros(50, dtype=object) test_snp_01 = np.zeros(50, dtype=object) test_5_005 = np.zeros(50, dtype=object) test_10_002 = np.zeros(50, dtype=object) test_15_01 = np.zeros(50, dtype=object) test_random = np.zeros(50, dtype=object)
return -1 if l == digitA else (1 if l == digitB else 0) def color_binarization(img): return numpy.array([(0 if pix < color_threshold else 1) for pix in img]) def do_train(w, imgs, classes): n = len(imgs) res = sum([(classes[i] * imgs[i]) / (1.0 + numpy.exp(numpy.dot(classes[i], w.T) * imgs[i])) for i in xrange(n)]) return (-1.0 / n) * res mndata = MNIST('data') mndata.test_img_fname = 't10k-images.idx3-ubyte' mndata.test_lbl_fname = 't10k-labels.idx1-ubyte' mndata.train_img_fname = 'train-images.idx3-ubyte' mndata.train_lbl_fname = 'train-labels.idx1-ubyte' print "Params: " print "Digits: " + str(digitA) + " -- -1 " + str(digitB) + " -- 1" print "Iterations: ", iterations print "Step (nu): ", nu print "Loading data..." mndata.load_training() mndata.load_testing() print "Training data count:", len(mndata.train_images)
def label_class(l): return -1 if l == digitA else (1 if l == digitB else 0) def color_binarization(img): return numpy.array([(0 if pix < color_threshold else 1) for pix in img]) def do_train(w, imgs, classes): n = len(imgs) res = sum([(classes[i] * imgs[i]) / (1.0 + numpy.exp(numpy.dot(classes[i], w.T) * imgs[i])) for i in xrange(n)]) return (-1.0 / n) * res mndata = MNIST('data') mndata.test_img_fname = 't10k-images.idx3-ubyte' mndata.test_lbl_fname = 't10k-labels.idx1-ubyte' mndata.train_img_fname = 'train-images.idx3-ubyte' mndata.train_lbl_fname = 'train-labels.idx1-ubyte' print "Params: " print "Digits: " + str(digitA) + " -- -1 " + str(digitB) + " -- 1" print "Iterations: ", iterations print "Step (nu): ", nu print "Loading data..." mndata.load_training() mndata.load_testing() print "Training data count:", len(mndata.train_images)
from loader import MNIST import numpy as np # This is intended to be called from the directory aboves mndata = MNIST('./data') # Load a list with training images and training labels training_ims, training_labels = mndata.load_training() testing_ims, testing_labels = mndata.load_testing() # Transform everything into array training_ims = np.array(training_ims) training_labels = np.array(training_labels)
''' ###VECTORIZE METHODS!: @vectorize(['float32(float32, float32)'], target='cuda'), where the things are return(param a, param b) and so on ## Libraries import numpy as np from numpy import vectorize from scipy import special # for logistic function import matplotlib.pyplot as plt from loader import MNIST from sklearn import preprocessing # import scipy optimizer too?? ##### 1. Import data ##### print('Loading datasets...') PATH = '/home/wataru/Uni/4997/programming_hw/ZhuFnn/MNIST_data' mndata = MNIST(PATH) X, y = mndata.load_training() X_test, y_test = mndata.load_testing() X, y = np.array(X), np.array(y).reshape(-1, 1) # X(60,0000 x 784) y(60,0000x1) X_test, y_test = np.array(X_test), np.array(y_test).reshape(-1, 1) ##### 2. Set up parameters ##### m_train = X.shape[0] m_test = X_test.shape[0] input_size = X.shape[1] # number of features on the input + 1 (bias hidden_size = 50 output_size = np.unique(y).shape[ 0] # extract unique elements and count them as numbers of output labels lr = 3e-2 # learning rate epochs = 5000 # num of epoch