Example #1
0
def prepare_data():
    # prepare data
    training_data = list(read(dataset='training', path='./'))
    testing_data = list(read(dataset='testing', path='./'))
    split = len(training_data)
    label, pixels = training_data[0]
    train_data = np.zeros(shape=(len(training_data), 1) + pixels.shape)
    train_label = np.zeros(shape=(len(training_data), 10))
    for n in range(len(training_data)):
        train_label[n, training_data[n][0]] = 1
        train_data[n, 0, :, :] = training_data[n][1] / 255.0

    Te_label, Te_pixels = testing_data[0]
    test_data = np.zeros(shape=(len(testing_data), 1) + Te_pixels.shape)
    test_label = np.zeros(shape=(len(testing_data), 10))
    for n in range(len(testing_data)):
        test_label[n, testing_data[n][0]] = 1
        test_data[n, 0, :, :] = testing_data[n][1] / 255.0

    # Downsample training data
    n_train_samples = 30000
    train_idxs = np.random.random_integers(0, split - 1, n_train_samples)
    train_data = train_data[train_idxs, ...]
    train_label = train_label[train_idxs, ...]

    return train_data, train_label, test_data, test_label
Example #2
0
def main():
    lb, im = mnist.read(dataset="training")
    lbt, imt = mnist.read(dataset="testing")
    im = im.reshape((len(im),len(im[0])*len(im[0])))
    imt = imt.reshape((len(imt),len(imt[0])*len(imt[0])))
    im.astype(float)
    imt.astype(float)
    # validation set
    imv = im[:500]
    lbv = lb[:500]
    imtt = im[500:]
    lbtt = lb[500:]
    tag = np.zeros(len(lbv),dtype=int)
    if READ:
	sigma = []
	mu = []
	for i in range(10):
 	    ts = np.loadtxt('sigma'+str(i)+'.out')
	    tm = np.loadtxt('mu'+str(i)+'.out')
	    sigma.append(ts)
	    mu.append(tm)
        sigma = np.array(sigma)
        mu = np.array(mu)
        sigma_inv = []
        for i in range(len(sigma)):
            sigma_inv.append(np.linalg.inv(sigma[i]))
        sigma_inv = np.array(sigma_inv)
    else:
        data = zip(lbtt, imtt)
        datat = zip(lbv, imv)
        # group data into 10 groups
        data_grouped = [list(v) for l,v in groupby(sorted(data, key=lambda x:x[0]), lambda x:x[0])]
        sigma, mu = cov.cov(data_grouped, 10000)
	sigma = np.array(sigma)
	mu = np.array(mu)
        sigma_inv = []
        for i in range(len(sigma)):
            sigma_inv.append(np.linalg.inv(sigma[i]))
        sigma_inv = np.array(sigma_inv)
    #print np.linalg.slogdet(sigma[0])
    pi = class_prob(lb)
    cnt = 0
    for imt_data in imv:
	p = -sys.maxint 
	ind = -1
    	pj = class_gauss(imt_data, sigma, sigma_inv, mu)
	for j in range(len(pj)):
	    temp = math.log(pi[j])+pj[j]
	    if (p < temp):
		p = temp
		ind = j
	print lbv[cnt],':',ind
	if lbv[cnt] == ind:
	    tag[cnt] = 1
	cnt += 1

    print 'hit:',sum(tag)
    print 'total:', len(tag)
    print 'miss rate:', 1.0-float(sum(tag))/len(tag)
Example #3
0
    def trainNetwork(self):
        training_set = mnist.read(path="./mnist")
        vectorized_training_set = mnist.read(path="./mnist")

        self.network.train(vectorized_training_set, epochs=self.epochs, batchsize=1, eta = self.eta / 60000)

        self.Status_Label.setText("Training finished!")
        self.StatusLamp.setGreen()
Example #4
0
def main():
    hidden_dim = 200
    batch_size = 100
    print("hidden_dim: " + str(hidden_dim) + ", batch_size: " +
          str(batch_size))
    training_set = np.array(
        list(mnist.read(dataset="training", path="/home/ywang/mnist")))
    print("size of the training set: " + str(len(training_set)))
    testing_set = np.array(
        list(mnist.read(dataset="testing", path="/home/ywang/mnist")))
    print("size of the testing set: " + str(len(testing_set)))
    np.random.seed(0)
    p = 1.0
    backstitch_alpha = 0.3
    print("backstitch alpha: " + str(backstitch_alpha))
    ratio = 0.5
    training_subset = training_set[np.random.binomial(1, p, len(training_set))
                                   == 1]
    print("number of the actual training examples: " +
          str(len(training_subset)))
    # resize the images to make the number of input features 4 times smaller
    #training_images = np.stack([training_subset[i][0] for i in range(len(training_subset))])
    training_images = np.stack([
        ndimage.zoom(training_subset[i][0], ratio)
        for i in range(len(training_subset))
    ])
    print("image size is: " + str(training_images.shape[1]) + " by " +
          str(training_images.shape[2]))
    training_images = np.reshape(training_images,
                                 [training_images.shape[0], -1])
    training_labels = np.stack(
        [training_subset[i][1] for i in range(len(training_subset))])
    training_examples = (training_images, training_labels)

    #testing_images = np.stack([testing_set[i][0] for i in range(len(testing_set))])
    testing_images = np.stack([
        ndimage.zoom(testing_set[i][0], ratio) for i in range(len(testing_set))
    ])
    testing_images = np.reshape(testing_images, [testing_images.shape[0], -1])
    testing_labels = np.stack(
        [testing_set[i][1] for i in range(len(testing_set))])
    testing_examples = (testing_images, testing_labels)

    nnet = NN(num_layers=1,
              input_dim=training_images.shape[1],
              hidden_dim=hidden_dim,
              num_classes=10,
              batch_size=batch_size,
              test_examples=testing_examples,
              nonlin='Tanh',
              update='natural',
              alpha=backstitch_alpha)
    nnet.Train(training_examples)
Example #5
0
    def test_numerical_gradient_checking(self):
        label, image = next(mnist.read())
        ninput = [pixel / 255 for row in image for pixel in row]
        expected = [1 if i == label else 0 for i in range(10)]
        nnet = NeuralNetwork([784, 16, 16, 10])

        epsilon = 1e-5
        numgrad = [np.empty(wmatrix.shape) for wmatrix in nnet.weight]

        for k, wmatrix in enumerate(nnet.weight):
            for i, w in np.ndenumerate(wmatrix):
                wmatrix[i] = w - epsilon
                nnet.feedforward(ninput)
                a = nnet.get_error(expected)
                wmatrix[i] = w + epsilon
                nnet.feedforward(ninput)
                b = nnet.get_error(expected)
                numgrad[k][i] = (b - a) / 2 * epsilon
                wmatrix[i] = w
        error_gradient = nnet.get_error_gradient(expected)

        unit = lambda v: v / norm(v) if (v != 0).any() else np.zeros(v.shape)

        for k in range(len(nnet.weight)):
            ag = error_gradient[k]
            ng = numgrad[k]
            print(f"custom = {norm(unit(ag) - unit(ng))}")
            print(
                f"derived from cs231 = {norm(unit(ag) * norm(ng) - ng) / max(norm(ag), norm(ng))}"
            )
Example #6
0
    def learn_digits(self):
   
        train_digits = mnist.read("training")
        k_number = []
        k_label = []
        for i in range(5000):
            k = train_digits.next() 
            k_label.append(k[0])
            k_number.append(k[1])
        y = np.array(list(k_label))
        x = np.array(list(k_number))
        print y[0]
        print x[0]

        # Now we prepare train_data and test_data.
        train = x[:5000].reshape(-1,784).astype(np.float32)

        # Create labels for train and test data
        k = np.arange(10)
        train_labels = y[:5000].astype(np.int) 

        # Initiate kNN, train the data, then test it with test data for k=1
        knn = cv2.KNearest()
        knn.train(train,train_labels)
        
        number = self.edit_image(self.snap())
        number = number.reshape(-1, 784).astype(np.float32)
        nparray = np.array(number)
        ret2, result2, neighbours2, dist2 = knn.find_nearest(nparray,k=5)
        print result2
Example #7
0
def main():
    lb, im = mnist.read(dataset="training")
    lbt, imt = mnist.read(dataset="testing")
    tested = len(lbt)
    tag = np.zeros(tested,dtype=int)
    im = im.reshape((len(im),len(im[0])*len(im[0])))
    imt = imt.reshape((len(imt),len(imt[0])*len(imt[0])))
    #im = im[:100]
    #imt = imt[:100]
    im.astype(float)
    imt.astype(float)
    data = zip(lb, im)
    datat = zip(lbt, imt)
    # group data into 10 groups
    data_grouped = [list(v) for l,v in groupby(sorted(data, key=lambda x:x[0]), lambda x:x[0])]
    cov(data_grouped, 1.0)
def load_mnist(dataset_type, n_samples):
    images, raw_targets = read(range(10), dataset_type)
    images = images.reshape(-1, 784)[:n_samples] / 255.0
    raw_targets = raw_targets[:n_samples].flatten()
    targets = numpy.zeros((n_samples, 10))
    targets[(range(n_samples), raw_targets)] = 1.0
    return images, targets
Example #9
0
    def learn_digits(self):

        train_digits = mnist.read("training")
        k_number = []
        k_label = []
        for i in range(5000):
            k = train_digits.next()
            k_label.append(k[0])
            k_number.append(k[1])
        y = np.array(list(k_label))
        x = np.array(list(k_number))
        print y[0]
        print x[0]

        # Now we prepare train_data and test_data.
        train = x[:5000].reshape(-1, 784).astype(np.float32)

        # Create labels for train and test data
        k = np.arange(10)
        train_labels = y[:5000].astype(np.int)

        # Initiate kNN, train the data, then test it with test data for k=1
        knn = cv2.KNearest()
        knn.train(train, train_labels)

        number = self.edit_image(self.snap())
        number = number.reshape(-1, 784).astype(np.float32)
        nparray = np.array(number)
        ret2, result2, neighbours2, dist2 = knn.find_nearest(nparray, k=5)
        print result2
Example #10
0
def run_mnist():
    #  FIXME: running EM on MNIST has the problem that all data collapses to one class
    # This is because the likelihood for that class is slightly higher than all other.
    # Probably has to do with the variance being lower for one, form k-means,
    # and that being more important than closeness to mean for such high dimensional data?
    # Running it with 0 iterations (i.e. on k-means) work fine, then it finds different orientations of the digits.
    data_per_class = 20

    training_data = list(mnist.read("training"))
    dim_x, dim_y = np.shape(training_data[0][1])
    ones = [d[1] for d in training_data if d[0] == 1]
    fours = [d[1] for d in training_data if d[0] == 4]
    fives = [d[1] for d in training_data if d[0] == 5]

    ones = ones[:data_per_class]
    fours = fours[:data_per_class]
    fives = fives[:data_per_class]

    data = np.array(ones + fours + fives).reshape((-1, dim_x * dim_y))
    solver = EM(data=data, num_classes=3, num_nuisances=3)
    split_data, thetas = solver.fit(max_iter=1)

    for c, class_thetas in enumerate(thetas):
        for n, theta in enumerate(class_thetas):
            print(f"Prior: {theta.prior}, Var: {theta.variance}")
            mnist.show(thetas[c][n].mean.reshape(28, 28))
Example #11
0
def main():
    lb, im = mnist.read(dataset="training")
    lbt, imt = mnist.read(dataset="testing")
    tested = len(lbt)
    if not KDTREE:
        tag = np.zeros(tested, dtype=int)
        im = im.reshape((len(im), len(im[0]) * len(im[0])))
        imt = imt.reshape((len(imt), len(imt[0]) * len(imt[0])))
        for i in range(tested):
            ind = knn_naive(imt[i], im, 10000)
            if lb[ind] == lbt[i]:
                tag[i] = 1
                print "Good."
            else:
                tag[i] = 0
                print "Oh no."
            print "Predicted", i, ":", lb[ind]
            print "Actual", i, ":", lbt[i]
        print "Hit:", np.sum(tag)
        print "Miss rate:", 1.0 - float(np.sum(tag)) / float(len(lbt))
    else:
        im = im.reshape((len(im), len(im[0]) * len(im[0])))
        imt = imt.reshape((len(imt), len(imt[0]) * len(imt[0])))
        if not PS:
            im, lb = randomselector(im, lb, 40000)
        else:
            import ps

            im, lb = ps.prototypeselector(im, lb, 10000)
            # im,lb = prototypeselector(im,lb,1000)
        print "Fitting trainning set..."
        neigh = KNeighborsClassifier(n_neighbors=1)
        neigh.fit(im, lb)
        print "Done!"
        tag = np.zeros(tested, dtype=int)
        for i in range(tested):
            tmp = neigh.predict(imt[i])
            if tmp[0] == lbt[i]:
                tag[i] = 1
                print "Good."
            else:
                tag[i] = 0
                print "Oh no."
            print "Predicted", i, ":", tmp
            print "Actual", i, ":", lbt[i]
        print "Hit:", np.sum(tag)
        print "Miss rate:", 1.0 - float(np.sum(tag)) / float(len(lbt))
Example #12
0
def read_mnist(partial=False):
    logger = logging.getLogger("mnist")

    digits1 = [0, 1, 2, 3, 4]
    digits2 = [5, 6, 7, 8, 9]

    # Train on a subset m1 of digit1 and m2 of digit2 of the 60k training data.
    if partial:
        m1 = 5000
        m2 = 5000
    else:
        m1 = 60000
        m2 = 60000

    # read training data
    logger.info("Reading training data ...")
    images, labels = mnist.read(digits1 + digits2,
                                dataset="training",
                                path=os.path.join("examples", "data"))
    logger.info("done.")

    def extract(images, labels):
        images = images / 256.0
        C1 = [k for k in xrange(len(labels)) if labels[k] in digits1]
        C2 = [k for k in xrange(len(labels)) if labels[k] in digits2]

        random.shuffle(C1)
        random.shuffle(C2)

        # Extract the random subsets together as a data matrix X (1 row per datapoint)
        train = C1[:m1] + C2[:m2]
        random.shuffle(train)
        X = array(images[train, :])
        d = array([2 * (k in digits1) - 1 for k in labels[train]])
        return (X, d)

    (X, d) = extract(images, labels)

    logger.info("Reading test data ...")
    timages, tlabels = mnist.read(digits1 + digits2,
                                  dataset="testing",
                                  path=os.path.join("examples", "data"))
    (Xt, dt) = extract(timages, tlabels)
    logger.info("done.")

    return (X, d, Xt, dt)
Example #13
0
def evaluate(path='.'):
    params = train(path)
    images, labels = mnist.read(range(10), 'testing', path)
    c = 0
    N = images.shape[0]
    for i in range(N):
        c += int(classify(images[i], params) == labels[i])
    return c / float(N)
Example #14
0
def evaluate(path='.'):
    params = train(path)
    images, labels = mnist.read(range(10), 'testing', path)
    c = 0
    N = images.shape[0]
    for i in range(N):
        c += int(classify(images[i], params) == labels[i])
    return c/float(N)
Example #15
0
def train(path='.'):
    params = None
    for i in range(10):
        images, _ = mnist.read([i], 'training', path)
        size = images[0].shape
        if params is None:
            params = np.zeros((10, size[0], size[1]))
        params[i] = images.mean(axis=0)
    return params
Example #16
0
def test_trained(params=None, head=100, tail=100):
    "Tests a network with params against first `head` and last `tail` examples"
    params = params if params is not None else load_params()
    nnet = NeuralNetwork(DLAYERS, params)
    mnist_db = list(mnist.read())
    print("[KNOWN]")
    test_and_report_against(nnet, mnist_db[:head])  # Training dataset
    print("[UNKNOWN]")
    test_and_report_against(nnet, mnist_db[-tail:])  # Unknown dataset
Example #17
0
def train(path='.'):
    params = None 
    for i in range(10):
        images, _ = mnist.read([i], 'training', path)
        size = images[0].shape
        if params is None:
            params = np.zeros((10, size[0], size[1]))
        params[i] = images.mean(axis=0) 
    return params
Example #18
0
def read_mnist(partial=False):
    logger = logging.getLogger("mnist")
    
    digits1 = [0,1,2,3,4]
    digits2 = [5,6,7,8,9]

    # Train on a subset m1 of digit1 and m2 of digit2 of the 60k training data.
    if partial:
        m1 = 5000; m2 = 5000
    else:
        m1 = 60000; m2 = 60000
        
    # read training data
    logger.info("Reading training data ...")
    images, labels = mnist.read(digits1 + digits2, dataset = "training", 
                                path = os.path.join("examples", "data"))
    logger.info("done.")

    def extract(images, labels):
        images = images / 256.0
        C1 = [ k for k in xrange(len(labels)) if labels[k] in digits1 ]
        C2 = [ k for k in xrange(len(labels)) if labels[k] in digits2 ]

        random.shuffle(C1)
        random.shuffle(C2)

        # Extract the random subsets together as a data matrix X (1 row per datapoint)
        train = C1[:m1] + C2[:m2]
        random.shuffle(train)
        X = array(images[train,:])
        d = array([ 2*(k in digits1) - 1 for k in labels[train] ])
        return (X,d)
        
    (X,d) = extract(images, labels)

    logger.info("Reading test data ...")
    timages, tlabels = mnist.read(digits1 + digits2, dataset = "testing", 
                                  path = os.path.join("examples", "data"))
    (Xt,dt) = extract(timages, tlabels)
    logger.info("done.")
    
    
    return (X, d, Xt, dt)
Example #19
0
def load_mnist_all(dataset):
    #load all the data

    images, labels = mnist.read(digits, dataset=dataset,
                                path="./mnist/")
    #turn cpxopt into numpy 2d array and a list
    images = np.array(images)
    labels = list(labels)

    return normalize_mnist(images), labels
Example #20
0
def adaboost():
    X_train, y_train = read('train')
    X_test, y_test = read('test')
    X_train = X_train.reshape((X_train.shape[0], -1))
    X_test = X_test.reshape((X_test.shape[0], -1))

    bdt_discrete = AdaBoostClassifier(DecisionTreeClassifier(
        max_depth=10, min_samples_split=20, min_samples_leaf=5),
                                      n_estimators=500,
                                      learning_rate=0.5,
                                      algorithm='SAMME')

    bdt_discrete.fit(X_train, y_train)

    discrete_test_errors = []

    for discrete_train_predict in bdt_discrete.staged_predict(X_test):
        discrete_test_errors.append(
            1. - accuracy_score(discrete_train_predict, y_test))

    return bdt_discrete, discrete_test_errors
class mainWindow():
    times = 1
    timestart = time.clock()
    to_read = [1, 8]  #[0,1,2,3,4,5,6,7,8,9]
    train, labels = mnist.read(to_read)
    data = imresize(train[0], (400, 500))

    def __init__(self):
        self.root = Tkinter.Tk()
        self.frame = Tkinter.Frame(self.root, width=500, height=400)
        self.frame.pack()
        self.canvas = Tkinter.Canvas(self.frame, width=500, height=400)
        self.canvas.place(x=-2, y=-2)
        self.root.after(1, self.start)  # INCREASE THE 0 TO SLOW IT DOWN
        self.root.mainloop()
        self.shakerato = False
        self.smokypixels = True
        self.max_rand = 255
        to_read = [1, 8]  #[0,1,2,3,4,5,6,7,8,9]
        self.train, self.labels = mnist.read(to_read)
        self.data = imresize(self.train[0], (400, 500))

    def change_img(self):
        this_p = numpy.random.randint(len(self.labels))
        self.train[this_p][self.labels[this_p] *
                           (28 / 10):(self.labels[this_p] + 1) * (28 / 10),
                           0:3] = 255
        self.data = imresize(numpy.fliplr(self.train[this_p]), (400, 500))

    def start(self):
        self.im=Image.fromstring('L', (self.data.shape[1],\
        self.data.shape[0]), self.data.astype('b').tostring())
        self.photo = ImageTk.PhotoImage(image=self.im)
        self.canvas.create_image(0, 0, image=self.photo, anchor=Tkinter.NW)
        self.root.update()
        self.times += 1
        #### HERE REGULATES TIME BETWEEN IMAGES
        if self.times % 25 == 0:
            #print "%.02f FPS"%(self.times/(time.clock()-self.timestart))
            self.change_img()
        self.root.after(25, self.start)
        #if(self.shakerato == True):
        #self.data=numpy.roll(self.data,numpy.random.choice(([-1,1])),numpy.random.choice(([-1,1])))
        #elif(self.smokypixels == True):
        ### THIS REGULATES PIXELS UPDATES
        tmp_data = self.data
        x, y = numpy.where(tmp_data <= 150)
        tmp_data[x, y] = 130
        x, y = numpy.where(tmp_data > 133)  #133
        for i in range(len(x) * 2):
            this_p = numpy.random.randint(len(x))
            self.data[x[this_p], y[this_p]] = numpy.random.randint(20)
 def __init__(self):
     self.root = Tkinter.Tk()
     self.frame = Tkinter.Frame(self.root, width=500, height=400)
     self.frame.pack()
     self.canvas = Tkinter.Canvas(self.frame, width=500, height=400)
     self.canvas.place(x=-2, y=-2)
     self.root.after(1, self.start)  # INCREASE THE 0 TO SLOW IT DOWN
     self.root.mainloop()
     self.shakerato = False
     self.smokypixels = True
     self.max_rand = 255
     to_read = [1, 8]  #[0,1,2,3,4,5,6,7,8,9]
     self.train, self.labels = mnist.read(to_read)
     self.data = imresize(self.train[0], (400, 500))
def save_mnist(dataset, dst):
    label_file = open(Path(dst, "label.txt"), "w")
    generator = read(dataset=dataset, path=mnist_source)

    count = [0 for i in range(10)]
    for label, image in generator:
        filename = "{}_{:04d}.png".format(label, count[label])

        label_file.write("{},{}\n".format(filename, label))
        imwrite(str(Path(dst, filename)), image)

        count[label] += 1

    label_file.close()
 def __init__(self):
     self.root = Tkinter.Tk()
     self.frame = Tkinter.Frame(self.root, width=500, height=400)
     self.frame.pack()
     self.canvas = Tkinter.Canvas(self.frame, width=500,height=400)
     self.canvas.place(x=-2,y=-2)
     self.root.after(1,self.start) # INCREASE THE 0 TO SLOW IT DOWN
     self.root.mainloop()
     self.shakerato = False
     self.smokypixels = True
     self.max_rand = 255
     to_read = [1,8]#[0,1,2,3,4,5,6,7,8,9]
     self.train , self.labels = mnist.read(to_read)	
     self.data = imresize(self.train[0], (400,500))
Example #25
0
def backpropagation_main():
    label, image = next(mnist.read())
    ninput = [pixel / 255 for row in image for pixel in row]
    expected = [1 if i == label else 0 for i in range(10)]

    nnet = NeuralNetwork(DLAYERS, params=None)
    # nnet = NeuralNetwork(DLAYERS, params=load_params())
    for i in range(1000000000000):
        guess = nnet.feedforward(ninput)
        cost = nnet.get_error(expected)
        print(f"[{i + 1}] cost = {cost}, guess = {guess}")
        try:
            nnet.backpropagate(expected)
        except KeyboardInterrupt:
            break
    guess = nnet.feedforward(ninput)
    cost = nnet.get_error(expected)
    print(f"[{i + 1}] cost = {cost}")
    save_params(nnet.params)
Example #26
0
def getTrainData(count, digit):
    #count is the number of training data
    #digit is the specific nimber between 0 and 9 from training dataset
    trainingData = np.zeros((count, 400), int)
    i = 0
    for img in read("training"):
        if i < count:
            if img[0] == digit:  #This is the label
                row, col = img[1].shape
                im = img[1]
                im = im.reshape(28, 28)
                im2 = cv2.resize(im, (20, 20), interpolation=cv2.INTER_CUBIC)
                im2 = im2.reshape(1, 400)
                trainingData[i, :] = im2

                i += 1

    trainingData = (trainingData > 125).astype(int)
    return trainingData
Example #27
0
def load_mnist(dataset, digitA, digitB):
    #use only the data points with labels A or B

    images, labels = mnist.read(digits, dataset=dataset,
                                path="./mnist/")
    #turn cpxopt into numpy 2d array and a list
    images = np.array(images)
    labels = list(labels)

    new_images = []
    new_labels = []

    for i, label in enumerate(labels):
      if label == digitA or label == digitB:

        new_images.append(images[i,:])

        if label == digitA:
          new_labels.append(1.0)
        if label == digitB:
          new_labels.append(-1.0)

    return normalize_mnist(np.array(new_images)), new_labels
Example #28
0
import numpy as np
import mnist

# Load the raw MNIST
X_train, y_train = mnist.read(dataset='training')
X_test, y_test = mnist.read(dataset='testing')

# Subsample the data for more efficient code execution in this exercise
num_training = 6000
X_train = X_train[:num_training]
y_train = y_train[:num_training]

num_test = 500
X_test = X_test[:num_test]
y_test = y_test[:num_test]

# Reshape the image data into rows
# Datatype int allows you to subtract images (is otherwise uint8)
X_train = np.reshape(X_train, (X_train.shape[0], -1)).astype('int')
X_test = np.reshape(X_test, (X_test.shape[0], -1)).astype('int')

# As a sanity check, we print out the size of the training and test data.
print('Training data shape: ', X_train.shape)
print('Training labels shape: ', y_train.shape)
print('Test data shape: ', X_test.shape)
print('Test labels shape: ', y_test.shape)


###############################################################################
#                                                                             #
#          Implement the k Nearest Neighbors algorithm here                   #
Example #29
0
# Do we have a fancy GPU?
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

# Parameters
num_epochs = 100
num_classes = 10  # i.e. the digits
batch_size = 300  # How many samples per batch use
learning_rate = 0.001

# Load MNIST data into
X_train = []
X_test = []
Y_train = []
Y_test = []

for element in m.read("training"):
    Y_train.append(element[0])
    X_train.append(element[1])

for element in m.read("testing"):
    Y_test.append(element[0])
    X_test.append(element[1])

X_train = np.expand_dims(np.asarray(X_train, dtype=np.float32), axis=1)
X_test = np.expand_dims(np.asarray(X_test, dtype=np.float32), axis=1)
Y_train = np.asarray(Y_train, dtype=np.int64)
Y_test = np.asarray(Y_test, dtype=np.int64)

train_set = torch.utils.data.TensorDataset(torch.from_numpy(X_train),
                                           torch.from_numpy(Y_train))
test_set = torch.utils.data.TensorDataset(torch.from_numpy(X_test),
Example #30
0
from mnist import read, show, normalize
import numpy as np
import matplotlib.pyplot as plt

train = list(read('train'))
data = train[0]

label = data[0]
pixels = data[1:]

pixels = np.array(pixels, dtype='uint8')
pixels = pixels.reshape((28, 28))

plt.title('Example of MNIST pattern')
plt.imshow(pixels, cmap='gray')
plt.show()
Example #31
0
        train_obj = softmaxObjective(train_x, train_y, theta)
        print('Iteration {}: Training set objective function: {} \r'.format(
            ct, train_obj))
        theta += -(alpha * gradient)
        assert theta.shape == (n, 10)
        ct += 1
    print ""
    print("Training set accuracy: {0:0.2f}%".format(
        100 * softmax_classifier_acuracy(theta, train_x, train_y)))
    print("Testing set accuracy:  {0:0.2f}%".format(
        100 * softmax_classifier_acuracy(theta, test_x, test_y)))


cwd = os.getcwd()
# "training" or "testing"
train_images = read("training", cwd)

n = 28 * 28
m = 1000  #No of training sets, max: 60000
train_y = np.empty([m, 1])
train_x = np.empty([m, n])
i = 0
for image in train_images:
    if i >= m: break
    train_x[i, :] = np.reshape(image[1], (1, n))
    train_y[i] = image[0]
    i += 1
train_x = train_x[:i, :]
train_y = train_y[:i]
mean_x = np.mean(train_x, axis=0)
std_x = np.std(train_x, axis=0) + 0.1
Example #32
0
"""
Implements a Neural Network

"""
from vectorflux import VectorFlux
from mnist import read, show, normalize

from vectorflux.layers import Dense
from vectorflux.layers.Dropout import Dropout

train = list(read('train'))
test = list(read('test'))

print("Train size: {}".format(len(train)))
print("Test size: {}".format(len(test)))

# Normalization for values
test_x, test_y = normalize(test)
train_x, train_y = normalize(train)

vf = VectorFlux()
vf.add(Dense(800, activation='sigmoid', input_shape=784, optimizer='Momentum'))
vf.add(Dropout(0.5, input_shape=800))
vf.add(Dense(800, activation='sigmoid', input_shape=800, optimizer='ADAM'))
vf.add(Dense(10, activation='sigmoid', input_shape=800))

vf.train(x_train=train_x,
         y_train=train_y,
         x_test=test_x,
         y_test=test_y,
         epochs=100000,
 def test(self):
     images, labels = mnist.read('testing')
     test(images, labels)
Example #34
0
def main():
    lbt, imt = mnist.read(dataset="testing")
    print lbt[4]
    mnist.show(imt[4])
Example #35
0
class GANeuralNetwork(Subject, NeuralNetwork):

    __mnist_db = list(mnist.read())

    _genome: List[float]
    _fitness: float

    def __init__(self, params):
        """
        Precondition: use set_layers_description() before any instanciation
        so dlayers is initialized
        """
        super().__init__(GANeuralNetwork.dlayers, params)
        self._genome = params
        self._fitness = None

    @classmethod
    def create_random(cls):
        return GANeuralNetwork(cls.get_random_params())

    @property
    def genome(self) -> List[float]:
        return self._genome

    @property
    def fitness(self) -> float:
        return self.batch_cost() if not self._fitness else self._fitness

    def batch_cost(self, batch_size=10, random_samples=False):
        "Runs a random minibatch and returns average network cost"
        costs = [None] * batch_size
        db = (
            sample(GANeuralNetwork.__mnist_db, batch_size) if random_samples
            else GANeuralNetwork.__mnist_db[:batch_size]
        )
        for i, (label, image) in enumerate(db): # TODO: parallelize runs
            # Run network
            ninput = [pixel / 255 for row in image for pixel in row] # Normalized
            guess = self(self.genome, ninput)
            # Cost calculation
            expected = [1 if i == label else 0 for i in range(10)]
            costs[i] = sum((g - e)**2 for g, e in zip(guess, expected))
        cost = mean(costs)
        self._fitness = -cost
        # print(f"Average cost of {cost} after {batch_size} runs")
        return self._fitness

    # TODO: Think more about this and make it
    # Maybe a urand in [c +- d] range with c = (min + max) / 2, d = max - min
    @staticmethod
    def mutate(gen):
        return gen + randn()

    @classmethod
    def set_layers_description(cls, dlayers):
        """
        Override of NeuralNetwork method that makes it static
        dlayers will be used as a static attribute of GANeuralNetwork class
        """
        cls.dlayers = dlayers

    @classmethod
    def get_random_params(cls):
        return super().get_random_params(cls)
 def train(self):
     images, labels = mnist.read('training')
     train(images, labels)
Example #37
0
@author: Bruce
"""

import os
import mnist, svmcmpl, cvxopt, random

digits1 = [ 0 ]
digits2 = [ 1 ]

m1 = 4000; m2 = 4000

# read training data
dpath = os.getcwd()+"\\data\\mnist"
print( dpath)

images, labels = mnist.read(digits1 + digits2, dataset = "training", path = dpath )
images = images / 256.

C1 = [ k for k in range(len(labels)) if labels[k] in digits1 ]
C2 = [ k for k in range(len(labels)) if labels[k] in digits2 ]

random.seed()
random.shuffle(C1)
random.shuffle(C2)

train = C1[:m1] + C2[:m2]
random.shuffle(train)
X = images[train,:]
d = cvxopt.matrix([ 2*(k in digits1) - 1 for k in labels[train] ])

gamma = 4.0
Example #38
0
import sys
import numpy as np
import scipy.misc
import imageio
sys.path.append("modules")
import mnist
sys.path.append("extension")
import pyceptron

training_set = mnist.read(dataset="training", path="mnist")
testing_set = mnist.read(dataset="testing", path="mnist")

architecture = [784, 300, 10]
activation = "ReLU"
softmax = 1

network = pyceptron.Network(architecture,
                            activation=activation,
                            softmax=softmax)
network.load_state("states/784_300_10_sgd_ReLU_softmax_229.state")

bad_lbl = list()
bad_img = list()
print(len(testing_set))
# Calculate the training error rate
for i in range(len(testing_set)):
    sample = testing_set[i]
    prediction = network.predict(sample[0])
    prediction = np.argmax(prediction)
    expectation = np.argmax(sample[1])
    if not prediction == expectation:
Example #39
0
import mnist
import Image
import os
from pylab import imread, imshow, gray, mean
from numpy import array
from cvxopt import matrix
from scipy.misc import imsave

alphabets = ['a', 'b', 'c', 'A']


# read all the training data and labels
for i in alphabets:
  
  print 'Reading char=' + str(i) + ', ord=' + str(ord(i))
  images, labels = mnist.read([ord(i)])
  
  for j in range(0, images.size[0]):
    img_matrix_1d = images[j,:]
    img_matrix = matrix(img_matrix_1d, (28,28)).trans()
    img_array = array(img_matrix)
    savePath = './raw/training/'+str(ord(i))+'/'
    if not os.path.exists(savePath):
      os.makedirs(savePath)
    imsave(savePath + str(j)+'.png', img_array)

Example #40
0
from scipy.misc import imsave
from sklearn import svm, metrics
from sklearn.cross_validation import LeaveOneOut
from sklearn.externals import joblib
from sklearn.multiclass import OneVsRestClassifier
import prepare_data
import scipy
from numpy import vstack
from numpy import append
import os
# train the digits 0
alphabets  =[] #['a','b','c','d','e','f','g','h','i','j','k','l','m','n','o','p','q','r','s','t','u','v','w','x','y','z'] #'a', 'd', 'e', 'c', 'u', 's', 'o', 'f', 't']
alphabets_ord = map(ord, alphabets)
ignoreList = ['0','1', '2', '3', '4', '5', '6', '7', '8', '9']
ignoreList = map(ord, ignoreList)
images, labels = mnist.read(alphabets_ord, ignoreList = ignoreList)

images = array(images)
labels = array(labels).reshape(1, len(labels))[0]

fonts = ['nothingyoucoulddo', 'comicsans', 'dakota', 'showhands', 'danielbd', 'danielbk', 'dandelion', 'daniel' ]
for font in fonts:
  font_images, font_labels = mnist.read(alphabets_ord, './../../data/', font + '_img.idx', font + '_label.idx', ignoreList=ignoreList)
  font_images = array(font_images)
  font_labels = array(font_labels).reshape(1, len(font_labels))[0]
  
  images = vstack((images, font_images))
  labels = append(labels, font_labels)

'''
images, labels = mnist.read(alphabets_ord, './../../data/', 'comicsans_img.idx', 'comicsans_label.idx')
Example #41
0
 def __init__(self, config, phase='train'):
     self.config = config
     self.imgs, self.lbls = mnist.read(dataset=phase,
                                       path=self.config.mnist_dir)
import os
import mnist
from cv2 import imwrite

src = "raw_mnist"

def initialize_folders():
    for i in range(10):
        folder_name = os.path.join(src, str(i))
        os.makedirs(folder_name, exist_ok=True)

if __name__ == "__main__":
    initialize_folders()
    gen = mnist.read()
    counter = 0
    while True:
        label, img = next(gen)
        dir_name = os.path.join(src, str(label))
        filename = os.path.join(src, str(label), "{}_{:04d}.png".format(label, len(os.listdir(dir_name))))
        imwrite(filename, img)

        counter+=1
        if counter%1000 == 0:
            print(counter)
Example #43
0
def main():
    f = 0.05
    th = 6.27344414456
    #f = 0.1
    #th = 9.25216525759
    #f = 0.15
    #th = 12.6067380436
    #f = 0.2
    #th = 14.6211445211
    lb, im = mnist.read(dataset="training")
    lbt, imt = mnist.read(dataset="testing")
    tested = len(lbt)
    tag = np.zeros(tested,dtype=int)
    im = im.reshape((len(im),len(im[0])*len(im[0])))
    imt = imt.reshape((len(imt),len(imt[0])*len(imt[0])))
    if READ:
	sigma = []
	mu = []
	for i in range(10):
 	    ts = np.loadtxt('sigma'+str(i)+'.out')
	    tm = np.loadtxt('mu'+str(i)+'.out')
	    sigma.append(ts)
	    mu.append(tm)
        sigma = np.array(sigma)
        mu = np.array(mu)	
        sigma_inv = []
        for i in range(len(sigma)):
            sigma_inv.append(np.linalg.inv(sigma[i]))
        sigma_inv = np.array(sigma_inv)
    else:
        data = zip(lb, im)
        datat = zip(lbt, imt)
        # group data into 10 groups
        data_grouped = [list(v) for l,v in groupby(sorted(data, key=lambda x:x[0]), lambda x:x[0])]
        sigma, mu = cov.cov(data_grouped, 10000)
	sigma = np.array(sigma)
	mu = np.array(mu)
        sigma_inv = []
        for i in range(len(sigma)):
            sigma_inv.append(np.linalg.inv(sigma[i]))
        sigma_inv = np.array(sigma_inv)
    #print np.linalg.slogdet(sigma[0])
    pi = class_prob(lb)
    cnt = 0
    ab = 0
    for imt_data in imt:
	p = -sys.maxint 
        p2 = -sys.maxint
	ind = -1
	ind2 = -1
    	pj = class_gauss(imt_data, sigma, sigma_inv, mu)
	for j in range(len(pj)):
	    temp = math.log(pi[j])+pj[j]
	    if (p < temp):
		p = temp
		ind = j
	    elif (p2 < temp):
                p2 = temp
                ind2 = j
	if (p-p2)  < th:
	    ab += 1
	    print lbt[cnt],':',ind, ':', p, '(abstain!)'
	else:
	    print lbt[cnt],':',ind, ':', p
	    if lbt[cnt] == ind:
	        tag[cnt] = 1
	cnt += 1

    print 'hit:',sum(tag)
    print 'total:', len(tag)
    print 'abstain:', ab
    print 'miss rate:', 1.0-float(sum(tag))/(len(tag)-ab)
import argparse
import mnist
import naive_bayes as nb
import numpy as np

parser = argparse.ArgumentParser(description='It is a program for ML HW#2.')
parser.add_argument('train_img_path', help='file path of train img', type=str)
parser.add_argument('train_lbl_path', help='file path of train lbl', type=str)
parser.add_argument('test_img_path', help='file path of test img', type=str)
parser.add_argument('test_lbl_path', help='file path of test lbl', type=str)
parser.add_argument('mode', help='toggle option', type=int)
args = parser.parse_args()
print('train_img_path: {}'.format(args.train_img_path))
print('train_lbl_path: {}'.format(args.train_lbl_path))
print('test_img_path: {}'.format(args.test_img_path))
print('test_img_path: {}'.format(args.test_lbl_path))

train_img, train_lbl = mnist.read(args.train_img_path, args.train_lbl_path)
test_img, test_lbl = mnist.read(args.test_img_path, args.test_lbl_path)
print(train_img.shape)

nb.classify(train_img, train_lbl, test_img, test_lbl, args.mode)
# Dense data
#y, x = [1,-1], [[1,0,1], [-1,0,-1]]
# Sparse data
y, x = [1,-1], [{1:1, 3:1}, {1:-1,3:-1}]
prob  = svm_problem(y, x)
param = svm_parameter('-t 0 -c 4 -b 1')
m = svm_train(prob, param)
'''


SUBSAMPLE = 10000

digits = [0,1,2,3,4,5,6,7,8,9]
DATAPATH = "./data/mnist/"

train_images, train_labels = mnist.read(digits, dataset='training', path=DATAPATH)
x = np.array(train_images).tolist() #svm requires a list
y = np.array(train_labels).T.astype(float).tolist()[0]

pprint(len(x))

x = x[:SUBSAMPLE]
y = y[:SUBSAMPLE]

#pprint(x)
#pprint(y)

#pprint(x[0])

prob  = svm_problem(y, x)
Example #46
0
    return fin


if __name__ == '__main__':
    config = Config()
    n_gpu = pytorch_utils.setgpu('6')
    net = ResNet_CAM()
    checkpoint = torch.load(root_path +
                            'checkpoints/020.ckpt')  # must before cuda
    net.load_state_dict(checkpoint['state_dict'])
    net = net.cuda()
    cudnn.benchmark = True
    net = DataParallel(net)
    net.eval()

    imgs, lbls = mnist.read(dataset='test', path=config.mnist_dir)
    idcs = np.random.randint(0, len(lbls), size=(3, ))
    img_pad = np.zeros([3, config.img_size, config.img_size], dtype=np.float32)
    classes = []
    for idx in idcs:
        img = imgs[idx].astype(np.float32)
        ih, iw = img.shape
        img = (img / 255. - config.mnist_mean) / config.mnist_std
        x, y = np.random.randint(0,
                                 config.img_size - ih, (2, ),
                                 dtype=np.int16)
        for k in range(3):
            img_pad[k, y:y + ih, x:x + iw] = img
        classes.append(lbls[idx])
    data = np.expand_dims(img_pad, 0).copy()
    data = torch.from_numpy(data)
Example #47
0
import mnist
import numpy as np
import matplotlib.pyplot as plt

plt.figure()
images = mnist.read()
for i in range(16):
	plt.subplot(4,4,i+1)
	im = images.next()[1]
	
	fig = plt.imshow(1 - im/128, cmap='Greys')
	fig.axes.get_xaxis().set_visible(False)
	fig.axes.get_yaxis().set_visible(False)

plt.savefig('ex.png', bbox_inches='tight', pad_inches=0, dpi=300)
#fig.show()
Example #48
0
import mnist, Image, sys
from pylab import imread, imshow, gray, mean
from numpy import array
from cvxopt import matrix
from pprint import pprint
from scipy.misc import imsave
from sklearn.metrics import classification_report, precision_score, recall_score
from sklearn.cross_validation import LeaveOneOut, StratifiedKFold
from sklearn.grid_search import GridSearchCV
from sklearn.svm import SVC, LinearSVC

# train the digits 0
alphabets  = [] # ['a', 'b', 'c', 'd', 'e', 'A', 'B', 'C', 'D', 'E']
alphabets_ord = map(ord, alphabets)
images, labels = mnist.read(alphabets_ord)
images = array(images)
labels = array(labels).reshape(1, len(labels))[0]
n_samples = len(images)

# read the training data and labelsi
sys.stdout.write('Reading in training data and labels')
X = images
y = labels
sys.stdout.write(' ... Done!\n')

# split the data into two equal parts respecting label proprtions
train, test = iter(StratifiedKFold(y, 2)).next()

#################################################

# set the tuning parameters
Example #49
0
from keras.models import Sequential
from keras.layers import Dense
import keras
import numpy as np
import mnist
import time

inp = 784

img,lbl = mnist.read(dataset = "training", path = "")

img = img[:1000]
lbl = lbl[:1000]

x=[]
for u in img:
    v = []
    for l in u:
        v.extend(l/255.0)
    x.append(v)
x = np.array(x)

y = []
for a in lbl:
    v = [0.0]*10
    v[a] = 1.0
    y.append(v)
y = np.array(y)

model = Sequential()
model.add(Dense(100, input_dim=inp, activation='sigmoid'))
Example #50
0
import mnist, Image, sys
from pylab import imread, imshow, gray, mean
from numpy import array
from cvxopt import matrix
from scipy.misc import imsave
from sklearn import svm, metrics

# train the digits 0
digit = [1,2,3,4,5]

# read the training data and labelsi
sys.stdout.write('Reading in training data and labels')
train_images, train_labels = mnist.read(digit, "training", "./../../data")
sys.stdout.write(' ... Done!\n')

# read the test data and labels
sys.stdout.write('Reading in testing data and labels')
test_images, test_labels = mnist.read(digit, "testing", "./../../data")
sys.stdout.write(' ... Done!\n')

# convert to arrays
x_train = array(train_images)
y_train = array(train_labels).reshape(1, len(train_labels))[0]

x_test = array(test_images)
y_test = array(test_labels).reshape(1, len(test_labels))[0]

# create classifier
classifier = svm.LinearSVC()

# train the classifier
Example #51
0
import mnist
import Image
from pylab import imread, imshow, gray, mean
from numpy import array
from cvxopt import matrix
from scipy.misc import imsave

# read all the training data and labels
images, labels = mnist.read([0])

# get the first row of pixel dataq
img0_matrix_1d = images[0,:]
img0_matrix = matrix(img0_matrix_1d, (28,28))
img0_array = array(img0_matrix)

# save the image 
imsave('./raw/mnist_training_digit_4_data_0.png', img0_array)