Example #1
0
def getDataSet():
    mn = MNIST(".")  #dir of files
    images, labels = mn.load_training()

    images = normalize_images(images)
    labels = vectorize_labels(labels)
    return np.array(images), np.array(labels)
Example #2
0
def getDataSet():
    mn = MNIST(".") #dir of files
    images, labels = mn.load_training()

    images = normalize_images(images)
    labels = vectorize_labels(labels)
    return np.array(images), np.array(labels)
Example #3
0
def main():
    nn = NeuronNetwork(input_size=784, output_size=10, hidden_layers=[15])
    #input = np.random.randn(784).reshape(784,1)
    #dic = nn.prediction(input, print_result=True)

    # read data into variables
    # x_train[0 - 59999][0 - 783], labels_train[0 - 59999]
    mndata = MNIST('../data')
    x_train_in, labels_train = mndata.load_training()
    print('MNIST training data has been read')
    x_test_in, labels_test = mndata.load_testing()
    print('MNIST test data has been read')
    x_train, x_test = normalize_data(x_train_in, x_test_in)
    print('MNIST data has been normalized')

    trainer = Trainer(nn)
    # train(n_training_examples=60000, batch_size=200, n_epochs=20, learn_rate=1.5) = 0.872 accuracy
    # train(n_training_examples=60000, batch_size=200, n_epochs=40, learn_rate=1.5) = 0.906 accuracy
    trainer.train(x_train,
                  labels_train,
                  n_training_examples=60000,
                  batch_size=200,
                  n_epochs=50,
                  learn_rate=1.5)
    error_list, acc = trainer.test(x_test, labels_test, n_test_examples=1000)

    #print ('error: {} ----> {}'.format(error_list[0], error_list[-1]))
    print('accuracy = {}'.format(acc))

    #testing with examples

    for i in range(10):
        vec, pred = nn.prediction(x_test[i])
        print('Image: {} ====> Prediction: {}'.format(labels_test[i], pred))
Example #4
0
def get_natural_dataset_samples(num_of_samples):
    from loader import MNIST

    import random
    mndata = MNIST('MNIST_dataset')
    images, labels = mndata.load_training()
    selected_img = []
    selected_labels = []
    selected_idxs = random.sample(range(0, len(images)), num_of_samples)
    for i in range(0, len(selected_idxs)):
        # newPoint = [float(j) for j in images[selected_idxs[i]]]
        # selected_img.append(newPoint)
        selected_img.append(images[selected_idxs[i]])
        selected_labels.append(labels[selected_idxs[i]])
    return selected_img, selected_labels
from loader import MNIST
import numpy as np

# This is intended to be called from the directory aboves

mndata = MNIST('./data')
# Load a list with training images and training labels
training_ims, training_labels = mndata.load_training()
testing_ims, testing_labels = mndata.load_testing()

# Transform everything into array
training_ims = np.array(training_ims)
training_labels = np.array(training_labels)
    rand = noise.gauss(rand, rnd_gauss)
    test_random[i] = serialize(rand)

filesave.as_text(test_clean, 'data/clean/test/file.my-obj')
filesave.as_text(test_gauss_5, 'data/noisy/test/gauss_5.my-obj')
filesave.as_text(test_gauss_10, 'data/noisy/test/gauss_10.my-obj')
filesave.as_text(test_gauss_15, 'data/noisy/test/gauss_15.my-obj')
filesave.as_text(test_snp_002, 'data/noisy/test/snp_002.my-obj')
filesave.as_text(test_snp_005, 'data/noisy/test/snp_005.my-obj')
filesave.as_text(test_snp_01, 'data/noisy/test/snp_01.my-obj')
filesave.as_text(test_5_005, 'data/noisy/test/gauss_5_snp_005.my-obj')
filesave.as_text(test_10_002, 'data/noisy/test/gauss_10_snp_002.my-obj')
filesave.as_text(test_15_01, 'data/noisy/test/gauss_15_snp_01.my-obj')
filesave.as_text(test_random, 'data/noisy/test/random.my-obj')

imgs_train, labels_train = mnist.load_training()

train_clean = np.empty(imgs_train.__len__(), dtype=object)
train_gauss_5 = np.empty(imgs_train.__len__(), dtype=object)
train_gauss_10 = np.empty(imgs_train.__len__(), dtype=object)
train_gauss_15 = np.empty(imgs_train.__len__(), dtype=object)
train_snp_002 = np.empty(imgs_train.__len__(), dtype=object)
train_snp_005 = np.empty(imgs_train.__len__(), dtype=object)
train_snp_01 = np.empty(imgs_train.__len__(), dtype=object)
train_5_005 = np.empty(imgs_train.__len__(), dtype=object)
train_10_002 = np.empty(imgs_train.__len__(), dtype=object)
train_15_01 = np.empty(imgs_train.__len__(), dtype=object)
train_random = np.empty(imgs_train.__len__(), dtype=object)

for i in xrange(imgs_train.__len__()):
    im_arr = imgs_train[i]
Example #7
0
class variables:
    
    def setup(self,n,eta,batchSize,repetitions,Dataset):
        print('Setting up Variables', end='')
        self.n = np.array(n) #Layers
        self.eta = eta #learning rate (now independent on batchSize)
        self.batchSize = batchSize
        self.repetitions = repetitions
        self.Dataset = Dataset #DataSet Option
        
        
        self.randrange = 1
        
        self.dSetIndex = {0:'Database-MNIST',1:'Database-EMNIST'}
        
        self.w = np.array([np.zeros((self.n[x],self.n[x-1])) for x in range(len(self.n))[1:]]) #weights 
        self.b = np.array([np.zeros((self.n[x],1)) for x in range(len(self.n))[1:]]) #biases
        self.nRow = np.array([np.zeros((self.n[x],1)) for x in range(len(self.n))])#neuralRow
        self.zRow = np.array([np.zeros((self.n[x],1)) for x in range(len(self.n))[1:]])#neuralRow pre-sigmoid
        
        self.delta = np.array([np.zeros((self.n[x],1)) for x in range(len(self.n))[1:]]) #error
        self.grad = np.array([ #gradient descent step
            np.array([np.zeros((self.n[x],1)) for x in range(len(self.n))[1:]]),#dC/dbias
            np.array([np.zeros((self.n[x],self.n[x-1])) for x in range(len(self.n))[1:]]) #dC/dweight
            ])
        self.aveCost = 0
        self.prevCost = 0
        
        self.images = None
        self.imagesTe = None
        
        
    def imagesSetup(self,Dataset):
        self.Dataset = Dataset
        self.mndata = MNIST(self.dSetIndex[Dataset])
        
        if Dataset == 0: #MNIST Setup
            print('.', end='')
            images = self.mndata.load_training()
            self.images = [np.array([np.array(images[0][x]),images[1][x]]) for x in range(len(images[0]))]
            #CONSIDER USING 'zip()' INSTEAD
            print('.', end='')
            self.mndata = MNIST(self.dSetIndex[Dataset])
            imagesTe = self.mndata.load_testing()
            self.imagesTe = [np.array([np.array(imagesTe[0][x]),imagesTe[1][x]]) for x in range(len(imagesTe[0]))]
        
        elif Dataset == 1: #EMNIST Setup
            print('.', end='')
            images = self.mndata.load_training()
            self.images = [np.array([np.ravel(np.transpose([np.reshape(images[0][x],(28,28))])),images[1][x]]) for x in range(len(images[0]))]
            print('.', end='')
            self.mndata = MNIST(self.dSetIndex[Dataset])
            imagesTe = self.mndata.load_testing()
            self.imagesTe = [np.array([np.ravel(np.transpose([np.reshape(imagesTe[0][x],(28,28))])),imagesTe[1][x]]) for x in range(len(imagesTe[0]))]
            #EMNIST Database Digits image matrices were 'transposed' so had to be transposed back

        #This method was used because the array containing the pixel data has to be an np.array
        #There seems to be a bug where 'np.transpose()' seems to make every entry of 'images' the same
        #This was tested to be faster than 'np.transpose()'
        print('.', end='')
        
        self.costArr = np.array([
            [np.zeros(self.n[-1]) for x in range(self.batchSize)]
            for x in range(len(self.images)//self.batchSize)
            ])
        self.cost = np.array([
            np.zeros(self.batchSize) for x in range(len(self.images)//self.batchSize)
            ])
        self.costArrTot = np.array([
            np.zeros(self.n[-1]) for x in range(len(self.images)//self.batchSize)
            ])
        
        self.costArrTe = np.array([np.zeros(self.n[-1]) for x in range(len(self.imagesTe))])
        self.costTe = np.zeros(len(self.imagesTe))
        print(' Complete.\n')
Example #8
0
import numpy as np
from loader import MNIST
from nnCostFunction import nnCostFunction
from randInitializeWeights import randInitializeWeights
from computeNumericalGradient import unRolling
from predict import predict
from shuffle import shuffle
# Get data from Mnist
data = MNIST()
data.load_training()
data.load_testing()

x_train = data.train_images
y_train = data.train_labels

x_test = data.test_images
y_test = data.test_labels

x_train = np.reshape(x_train, (len(x_train), 784))
y_train = np.reshape(y_train, (len(y_train), 1))
y_train_fix = np.reshape(np.zeros(len(y_train)*10), (len(y_train), 10))

for i in range(len(y_train)):
    for j in range(0, 10):
        if y_train[i] == j:
            y_train_fix[i][j] = 1

# Create Validation, Train
list_x_val = []
list_y_val = []
list_x_train = []
Example #9
0
from sklearn import tree
import matplotlib.pyplot as plt
import StringIO
import pydotplus
from loader import MNIST

mndata = MNIST('./Datasets')
trainingImages, trainingLabels = mndata.load_training()
testImages, testLabels = mndata.load_testing()

clf = tree.DecisionTreeClassifier()
clf = clf.fit(trainingImages[:1000], trainingLabels[:1000])

scores = clf.score(testImages,testLabels.tolist())
print "Accuracy: %f " % scores

importances = clf.feature_importances_
importances = importances.reshape((28, 28))

plt.matshow(importances, cmap=plt.cm.hot)
plt.title("Pixel importances for decision tree")
plt.show()

dot_data = StringIO.StringIO()
tree.export_graphviz(clf, out_file=dot_data)
graph = pydotplus.graph_from_dot_data(dot_data.getvalue())
graph.write_pdf("Dtree.pdf")
print "The Decision Tree was saved!"


Example #10
0
 def loadMnist(self):
     m = MNIST('./data')
     self.trvec, self.train_labels = m.load_training()
Example #11
0
    return (-1.0 / n) * res


mndata = MNIST('data')
mndata.test_img_fname = 't10k-images.idx3-ubyte'
mndata.test_lbl_fname = 't10k-labels.idx1-ubyte'
mndata.train_img_fname = 'train-images.idx3-ubyte'
mndata.train_lbl_fname = 'train-labels.idx1-ubyte'

print "Params: "
print "Digits: " + str(digitA) + " -- -1 " + str(digitB) + " -- 1"
print "Iterations: ", iterations
print "Step (nu): ", nu

print "Loading data..."
mndata.load_training()
mndata.load_testing()

print "Training data count:", len(mndata.train_images)
print "Testing data count:", len(mndata.test_images)

[(train_imgs, train_classes), (test_imgs, test_classes)] = [
    zip(*[(i, l) for (i, l) in zip(imgs, map(label_class, lbls)) if l == -1 or l == 1])
    for (imgs, lbls) in [(mndata.train_images, mndata.train_labels), (mndata.test_images, mndata.test_labels)]]

train_imgs = map(color_binarization, train_imgs)
test_imgs = map(color_binarization, test_imgs)

w = numpy.array([0.0 for _ in xrange(img_size)])

print "Training..."
Example #12
0
    return (-1.0 / n) * res


mndata = MNIST('data')
mndata.test_img_fname = 't10k-images.idx3-ubyte'
mndata.test_lbl_fname = 't10k-labels.idx1-ubyte'
mndata.train_img_fname = 'train-images.idx3-ubyte'
mndata.train_lbl_fname = 'train-labels.idx1-ubyte'

print "Params: "
print "Digits: " + str(digitA) + " -- -1 " + str(digitB) + " -- 1"
print "Iterations: ", iterations
print "Step (nu): ", nu

print "Loading data..."
mndata.load_training()
mndata.load_testing()

print "Training data count:", len(mndata.train_images)
print "Testing data count:", len(mndata.test_images)

[(train_imgs, train_classes), (test_imgs, test_classes)] = [
    zip(*[(i, l) for (i, l) in zip(imgs, map(label_class, lbls))
          if l == -1 or l == 1]) for (imgs, lbls) in [(
              mndata.train_images,
              mndata.train_labels), (mndata.test_images, mndata.test_labels)]
]

train_imgs = map(color_binarization, train_imgs)
test_imgs = map(color_binarization, test_imgs)
Example #13
0
from loader import MNIST

mndata = MNIST("../data/input/")
trn_img, trn_labels = mndata.load_training()
tst_img, tst_labels = mndata.load_testing()
Example #14
0
from algorithms.NearestCentroid.MyNearestCentroid import MyNearestCentroid
from algorithms.NearestCentroid.nc_classify import test_nc_classify, test_nc_classify_with_sklearn
from algorithms.NearestNeighbours.nearest_neighbour_classify import test_neigh_classify
from algorithms.NearestSubclass.MyNearestSubclassCentroid import MyNearestSubclassCentroid
from algorithms.NearestSubclass.nsc_classify import test_nsc_classify
from algorithms.PerceptronBP.perceptron_bp_test import test_perceptron_bp
from algorithms.PerceptronMSE.PerceptronMSEClassifier import PerceptronMSEClassifier
from algorithms.PerceptronMSE.perceptron_mse_test import test_perceptron_mse
from loader import MNIST
import numpy as np

from algorithms.PerceptronBP.PerceptronBPClassifier import PerceptronBPClassifier

mndata = MNIST('../samples/MNIST/')

trainingData, trainingLabels = mndata.load_training()
testData, testLabels = mndata.load_testing()
data = trainingData + testData
labels = trainingLabels + testLabels



# # ------- PCA ---------
# pca = PCA(n_components=2).fit(np.array(trainingData))
# trainingData = pca.transform(np.array(trainingData))
#
# pca = PCA(n_components=2).fit(np.array(testData))
# testData = pca.transform(np.array(testData))
#
# pca = PCA(n_components=2).fit(np.array(data))
# data = pca.transform(np.array(data))
Example #15
0
from loader import MNIST
mndata = MNIST('../data/input/')
trn_img, trn_labels = mndata.load_training()
tst_img, tst_labels = mndata.load_testing()
Example #16
0
###VECTORIZE METHODS!: @vectorize(['float32(float32, float32)'], target='cuda'), where the things are return(param a, param b) and so on

## Libraries
import numpy as np
from numpy import vectorize
from scipy import special  # for logistic function
import matplotlib.pyplot as plt
from loader import MNIST
from sklearn import preprocessing
# import scipy optimizer too??

##### 1. Import data #####
print('Loading datasets...')
PATH = '/home/wataru/Uni/4997/programming_hw/ZhuFnn/MNIST_data'
mndata = MNIST(PATH)
X, y = mndata.load_training()
X_test, y_test = mndata.load_testing()

X, y = np.array(X), np.array(y).reshape(-1, 1)  # X(60,0000 x 784) y(60,0000x1)
X_test, y_test = np.array(X_test), np.array(y_test).reshape(-1, 1)

##### 2. Set up parameters #####
m_train = X.shape[0]
m_test = X_test.shape[0]
input_size = X.shape[1]  # number of features on the input + 1 (bias
hidden_size = 50
output_size = np.unique(y).shape[
    0]  # extract unique elements and count them as numbers of output labels
lr = 3e-2  # learning rate
epochs = 5000  # num of epoch