def getDataSet(): mn = MNIST(".") #dir of files images, labels = mn.load_training() images = normalize_images(images) labels = vectorize_labels(labels) return np.array(images), np.array(labels)
def main(): nn = NeuronNetwork(input_size=784, output_size=10, hidden_layers=[15]) #input = np.random.randn(784).reshape(784,1) #dic = nn.prediction(input, print_result=True) # read data into variables # x_train[0 - 59999][0 - 783], labels_train[0 - 59999] mndata = MNIST('../data') x_train_in, labels_train = mndata.load_training() print('MNIST training data has been read') x_test_in, labels_test = mndata.load_testing() print('MNIST test data has been read') x_train, x_test = normalize_data(x_train_in, x_test_in) print('MNIST data has been normalized') trainer = Trainer(nn) # train(n_training_examples=60000, batch_size=200, n_epochs=20, learn_rate=1.5) = 0.872 accuracy # train(n_training_examples=60000, batch_size=200, n_epochs=40, learn_rate=1.5) = 0.906 accuracy trainer.train(x_train, labels_train, n_training_examples=60000, batch_size=200, n_epochs=50, learn_rate=1.5) error_list, acc = trainer.test(x_test, labels_test, n_test_examples=1000) #print ('error: {} ----> {}'.format(error_list[0], error_list[-1])) print('accuracy = {}'.format(acc)) #testing with examples for i in range(10): vec, pred = nn.prediction(x_test[i]) print('Image: {} ====> Prediction: {}'.format(labels_test[i], pred))
def imagesSetup(self,Dataset): self.Dataset = Dataset self.mndata = MNIST(self.dSetIndex[Dataset]) if Dataset == 0: #MNIST Setup print('.', end='') images = self.mndata.load_training() self.images = [np.array([np.array(images[0][x]),images[1][x]]) for x in range(len(images[0]))] #CONSIDER USING 'zip()' INSTEAD print('.', end='') self.mndata = MNIST(self.dSetIndex[Dataset]) imagesTe = self.mndata.load_testing() self.imagesTe = [np.array([np.array(imagesTe[0][x]),imagesTe[1][x]]) for x in range(len(imagesTe[0]))] elif Dataset == 1: #EMNIST Setup print('.', end='') images = self.mndata.load_training() self.images = [np.array([np.ravel(np.transpose([np.reshape(images[0][x],(28,28))])),images[1][x]]) for x in range(len(images[0]))] print('.', end='') self.mndata = MNIST(self.dSetIndex[Dataset]) imagesTe = self.mndata.load_testing() self.imagesTe = [np.array([np.ravel(np.transpose([np.reshape(imagesTe[0][x],(28,28))])),imagesTe[1][x]]) for x in range(len(imagesTe[0]))] #EMNIST Database Digits image matrices were 'transposed' so had to be transposed back #This method was used because the array containing the pixel data has to be an np.array #There seems to be a bug where 'np.transpose()' seems to make every entry of 'images' the same #This was tested to be faster than 'np.transpose()' print('.', end='') self.costArr = np.array([ [np.zeros(self.n[-1]) for x in range(self.batchSize)] for x in range(len(self.images)//self.batchSize) ]) self.cost = np.array([ np.zeros(self.batchSize) for x in range(len(self.images)//self.batchSize) ]) self.costArrTot = np.array([ np.zeros(self.n[-1]) for x in range(len(self.images)//self.batchSize) ]) self.costArrTe = np.array([np.zeros(self.n[-1]) for x in range(len(self.imagesTe))]) self.costTe = np.zeros(len(self.imagesTe)) print(' Complete.\n')
def get_natural_dataset_samples(num_of_samples): from loader import MNIST import random mndata = MNIST('MNIST_dataset') images, labels = mndata.load_training() selected_img = [] selected_labels = [] selected_idxs = random.sample(range(0, len(images)), num_of_samples) for i in range(0, len(selected_idxs)): # newPoint = [float(j) for j in images[selected_idxs[i]]] # selected_img.append(newPoint) selected_img.append(images[selected_idxs[i]]) selected_labels.append(labels[selected_idxs[i]]) return selected_img, selected_labels
import numpy as np from loader import MNIST from nnCostFunction import nnCostFunction from randInitializeWeights import randInitializeWeights from computeNumericalGradient import unRolling from predict import predict from shuffle import shuffle # Get data from Mnist data = MNIST() data.load_training() data.load_testing() x_train = data.train_images y_train = data.train_labels x_test = data.test_images y_test = data.test_labels x_train = np.reshape(x_train, (len(x_train), 784)) y_train = np.reshape(y_train, (len(y_train), 1)) y_train_fix = np.reshape(np.zeros(len(y_train)*10), (len(y_train), 10)) for i in range(len(y_train)): for j in range(0, 10): if y_train[i] == j: y_train_fix[i][j] = 1 # Create Validation, Train list_x_val = [] list_y_val = [] list_x_train = []
from sklearn import tree import matplotlib.pyplot as plt import StringIO import pydotplus from loader import MNIST mndata = MNIST('./Datasets') trainingImages, trainingLabels = mndata.load_training() testImages, testLabels = mndata.load_testing() clf = tree.DecisionTreeClassifier() clf = clf.fit(trainingImages[:1000], trainingLabels[:1000]) scores = clf.score(testImages,testLabels.tolist()) print "Accuracy: %f " % scores importances = clf.feature_importances_ importances = importances.reshape((28, 28)) plt.matshow(importances, cmap=plt.cm.hot) plt.title("Pixel importances for decision tree") plt.show() dot_data = StringIO.StringIO() tree.export_graphviz(clf, out_file=dot_data) graph = pydotplus.graph_from_dot_data(dot_data.getvalue()) graph.write_pdf("Dtree.pdf") print "The Decision Tree was saved!"
def loadMnist(self): m = MNIST('./data') self.trvec, self.train_labels = m.load_training()
return -1 if l == digitA else (1 if l == digitB else 0) def color_binarization(img): return numpy.array([(0 if pix < color_threshold else 1) for pix in img]) def do_train(w, imgs, classes): n = len(imgs) res = sum([(classes[i] * imgs[i]) / (1.0 + numpy.exp(numpy.dot(classes[i], w.T) * imgs[i])) for i in xrange(n)]) return (-1.0 / n) * res mndata = MNIST('data') mndata.test_img_fname = 't10k-images.idx3-ubyte' mndata.test_lbl_fname = 't10k-labels.idx1-ubyte' mndata.train_img_fname = 'train-images.idx3-ubyte' mndata.train_lbl_fname = 'train-labels.idx1-ubyte' print "Params: " print "Digits: " + str(digitA) + " -- -1 " + str(digitB) + " -- 1" print "Iterations: ", iterations print "Step (nu): ", nu print "Loading data..." mndata.load_training() mndata.load_testing() print "Training data count:", len(mndata.train_images)
from algorithms.CrossValidator import validateHyperParameter from algorithms.NearestCentroid.MyNearestCentroid import MyNearestCentroid from algorithms.NearestCentroid.nc_classify import test_nc_classify, test_nc_classify_with_sklearn from algorithms.NearestNeighbours.nearest_neighbour_classify import test_neigh_classify from algorithms.NearestSubclass.MyNearestSubclassCentroid import MyNearestSubclassCentroid from algorithms.NearestSubclass.nsc_classify import test_nsc_classify from algorithms.PerceptronBP.perceptron_bp_test import test_perceptron_bp from algorithms.PerceptronMSE.PerceptronMSEClassifier import PerceptronMSEClassifier from algorithms.PerceptronMSE.perceptron_mse_test import test_perceptron_mse from loader import MNIST import numpy as np from algorithms.PerceptronBP.PerceptronBPClassifier import PerceptronBPClassifier mndata = MNIST('../samples/MNIST/') trainingData, trainingLabels = mndata.load_training() testData, testLabels = mndata.load_testing() data = trainingData + testData labels = trainingLabels + testLabels # # ------- PCA --------- # pca = PCA(n_components=2).fit(np.array(trainingData)) # trainingData = pca.transform(np.array(trainingData)) # # pca = PCA(n_components=2).fit(np.array(testData)) # testData = pca.transform(np.array(testData)) #
from loader import MNIST mndata = MNIST('../data/input/') trn_img, trn_labels = mndata.load_training() tst_img, tst_labels = mndata.load_testing()
# %reset import numpy as np from loader import MNIST import matplotlib.pyplot as plt from sklearn.cluster import KMeans from sklearn.neighbors import NearestNeighbors from sklearn.preprocessing import normalize from display_network import * mndata = MNIST( 'C:/Users/IT/Desktop/MALE_LamThanhTai_15110121/File Project/MNIST/') mndata.load_testing() X = mndata.test_images X0 = np.asarray(X)[:1000, :] / 256.0 X = X0 K = 10 kmeans = KMeans(n_clusters=K).fit(X) pred_label = kmeans.predict(X) print(type(kmeans.cluster_centers_.T)) print(kmeans.cluster_centers_.T.shape) A = display_network(kmeans.cluster_centers_.T, K, 1) f1 = plt.imshow(A, interpolation='nearest', cmap="jet") f1.axes.get_xaxis().set_visible(False) f1.axes.get_yaxis().set_visible(False) plt.show() # plt.savefig('a1.png', bbox_inches='tight') # a colormap and a normalization instance
''' ###VECTORIZE METHODS!: @vectorize(['float32(float32, float32)'], target='cuda'), where the things are return(param a, param b) and so on ## Libraries import numpy as np from numpy import vectorize from scipy import special # for logistic function import matplotlib.pyplot as plt from loader import MNIST from sklearn import preprocessing # import scipy optimizer too?? ##### 1. Import data ##### print('Loading datasets...') PATH = '/home/wataru/Uni/4997/programming_hw/ZhuFnn/MNIST_data' mndata = MNIST(PATH) X, y = mndata.load_training() X_test, y_test = mndata.load_testing() X, y = np.array(X), np.array(y).reshape(-1, 1) # X(60,0000 x 784) y(60,0000x1) X_test, y_test = np.array(X_test), np.array(y_test).reshape(-1, 1) ##### 2. Set up parameters ##### m_train = X.shape[0] m_test = X_test.shape[0] input_size = X.shape[1] # number of features on the input + 1 (bias hidden_size = 50 output_size = np.unique(y).shape[ 0] # extract unique elements and count them as numbers of output labels lr = 3e-2 # learning rate epochs = 5000 # num of epoch