Esempio n. 1
0
def main_train():
    trX, teX, trY, teY = mnist(onehot=True)

    X = T.fmatrix()
    Y = T.fmatrix()

    w_h = init_weights((784, 625))
    w_h2 = init_weights((625, 625))
    w_o = init_weights((625, 10))
    params = [w_h, w_h2, w_o]

    noise_h, noise_h2, noise_py_x = model(X, params, 0.2, 0.5)
    h, h2, py_x = model(X, params, 0., 0.)
    y_x = T.argmax(py_x, axis=1)

    cost = T.mean(T.nnet.categorical_crossentropy(noise_py_x, Y))
    updates = RMSprop(cost, params, lr=0.001)

    train = theano.function(inputs=[X, Y], outputs=cost, updates=updates, allow_input_downcast=True)
    predict = theano.function(inputs=[X], outputs=y_x, allow_input_downcast=True)

    for i in range(100):
        for start, end in zip(range(0, len(trX), 128), range(128, len(trX), 128)):
            cost = train(trX[start:end], trY[start:end])
        print np.mean(np.argmax(teY, axis=1) == predict(teX))
        if i % 10 == 0:
            name = 'media/model/modnet-{0}.model'.format(str(i))
            save_model(name, params)
    name = 'media/model/modnet-final.model'
    save_model(name, params)
Esempio n. 2
0
def main_train():
    trX, teX, trY, teY = mnist(onehot=True)

    X = T.fmatrix()
    Y = T.fmatrix()

    w_h = init_weights((784, 625))
    w_o = init_weights((625, 10))

    params = [w_h, w_o]
    py_x = model(X, params)
    y_x = T.argmax(py_x, axis=1)

    cost = T.mean(T.nnet.categorical_crossentropy(py_x, Y))
    updates = sgd(cost, params)

    train = theano.function(inputs=[X, Y],
                            outputs=cost,
                            updates=updates,
                            allow_input_downcast=True)
    predict = theano.function(inputs=[X],
                              outputs=y_x,
                              allow_input_downcast=True)

    for i in range(100):
        for start, end in zip(range(0, len(trX), 128),
                              range(128, len(trX), 128)):
            cost = train(trX[start:end], trY[start:end])
        print np.mean(np.argmax(teY, axis=1) == predict(teX))
        if i % 10 == 0:
            name = 'media/model/net-{0}.model'.format(str(i))
            save_model(name, params)
    name = 'media/model/net-final.model'
    save_model(name, params)
Esempio n. 3
0
def run_regression(trials, batch_size):
    train_x, test_x, train_y, test_y = mnist(onehot=True)
    x_dim = len(train_x[0])
    y_dim = len(train_y[0])
    weight_vec = theano.shared(
        np.random.randn(x_dim, y_dim) * INITIAL_WEIGHT_MAX)
    offset = theano.shared(np.zeros(y_dim))
    input_data = T.fmatrix('input_data')
    label = T.fmatrix('label')
    softmax_output = T.nnet.softmax(T.dot(input_data, weight_vec) + offset)
    cost = T.mean(T.nnet.categorical_crossentropy(softmax_output, label))
    weight_grad, offset_grad = T.grad(cost=cost, wrt=[weight_vec, offset])
    updates = [[weight_vec, weight_vec - weight_grad * LEARNING_RATE],
               [offset, offset - offset_grad * LEARNING_RATE]]
    train_f = theano.function(inputs=[input_data, label],
                              outputs=cost,
                              updates=updates,
                              allow_input_downcast=True)
    predicted_label = T.argmax(softmax_output, axis=1)
    output_f = theano.function(inputs=[input_data],
                               outputs=predicted_label,
                               allow_input_downcast=True)

    for i in range(trials):
        for start, end in zip(range(0, len(train_x), batch_size),
                              range(batch_size, len(train_x), batch_size)):
            cost = train_f(train_x[start:end], train_y[start:end])
        print i, np.mean(np.argmax(test_y, axis=1) == output_f(test_x))
Esempio n. 4
0
def main_rain():
    trX, teX, trY, teY = mnist(onehot=True)

    trX = trX.reshape(-1, 1, 28, 28)
    teX = teX.reshape(-1, 1, 28, 28)
    X = T.ftensor4()
    Y = T.fmatrix()

    w = init_weights((32, 1, 3, 3))
    w2 = init_weights((64, 32, 3, 3))
    w3 = init_weights((128, 64, 3, 3))
    w4 = init_weights((128 * 3 * 3, 625))
    w_o = init_weights((625, 10))
    params = [w, w2, w3, w4, w_o]

    noise_l1, noise_l2, noise_l3, noise_l4, noise_py_x = model(X, params, 0.2, 0.5)
    l1, l2, l3, l4, py_x = model(X, params, 0., 0.)
    y_x = T.argmax(py_x, axis=1)

    cost = T.mean(T.nnet.categorical_crossentropy(noise_py_x, Y))

    updates = RMSprop(cost, params, lr=0.001)

    train = theano.function(inputs=[X, Y], outputs=cost, updates=updates, allow_input_downcast=True)
    predict = theano.function(inputs=[X], outputs=y_x, allow_input_downcast=True)
    for i in range(100):
        for start, end in zip(range(0, len(trX), 128), range(128, len(trX), 128)):
            cost = train(trX[start:end], trY[start:end])
        print np.mean(np.argmax(teY, axis=1) == predict(teX))
        if i % 10 == 0:
            name = 'media/model/conv-{0}.model'.format(str(i))
            save_model(name, params)
    name = 'media/model/conv-final.model'
    save_model(name, params)
def main_loop():
    trX, teX, trY, teY = mnist(ntrain=ntrain,ntest=2000,onehot=True)
    print "before", trX[30][0:10]
    seq = mnist_with_noise([trX,trY],10)
    print "after", trX[30][0:10]
    X = T.fmatrix()
    Y = T.fmatrix()
    #grads = T.fvector()

    w_h = [init_weights((784, 625)), init_weights((625, 10))]

    py_x = model(X, w_h)
    y_x = T.argmax(py_x, axis=1)

    cost = T.mean(T.nnet.categorical_crossentropy(py_x, Y))
    params = w_h
    updates = sgd(cost, params)
    grads = T.grad(cost=cost,wrt=params)
    grad_for_norm = T.grad(cost=cost,wrt=params)

    train = theano.function(inputs=[X, Y], outputs=[cost,grads[0],grads[1]], updates=updates, allow_input_downcast=True)
    predict = theano.function(inputs=[X], outputs=y_x, allow_input_downcast=True)
    get_grad = theano.function(inputs=[X,Y],outputs=[cost,grad_for_norm[0],grad_for_norm[1]], allow_input_downcast=True)

    mb_size = 128
    for i in range(2):
        grad_list = []
        for start, end in zip(range(0, len(trX), mb_size), range(mb_size, len(trX), mb_size)):
            cost,grads[0],grads[1] = train(trX[start:end], trY[start:end])
        print np.mean(np.argmax(teY, axis=1) == predict(teX))

    noisy_grads = []
    normal_grads = []
    noisy_cost = []
    normal_cost = []
    mb_size = 1
    n_predicts = 0
    for i in seq:
        cost,grad0,grad1 = get_grad(trX[i:i+1], trY[i:i+1])
        norm = np.linalg.norm(grad0) 
        if i < 0.1*ntrain:
            n_predicts += (np.argmax(trY[i:i+1], axis=1)==predict(trX[i:i+1]))
            noisy_grads.append(norm)
            noisy_cost.append(cost)
        else:
            normal_grads.append(norm)
            normal_cost.append(cost)


    print "noisy grad : mean,var - " ,np.mean(noisy_grads),np.var(noisy_grads)
    print "normal grad: mean,var - " ,np.mean(normal_grads),np.var(normal_grads)

    print "noisy cost : mean,var - " ,np.mean(noisy_cost),np.var(noisy_cost)
    print "normal cost: mean,var - " ,np.mean(normal_cost),np.var(normal_cost)

    print " noisy predicts out of 5000 -", n_predicts
    plt.plot(noisy_grads)
    plt.plot(normal_grads)

    plt.savefig('grad0.jpeg')
Esempio n. 6
0
def run_net(trials, batch_size):
    train_x, test_x, train_y, test_y = mnist(onehot=True)
    input_dim = len(train_x[0])
    output_dim = len(test_y[0])
    [w_h1, w_h2, weight_outputs] = build_weights([(input_dim, 625), (625, 625),
                                                  (625, output_dim)])
    X = T.fmatrix()  #symbolic variable for weight matrix
    Y = T.fmatrix()  #symbolic variable for output
    # outputs from the layers with dropout
    [dropout_h1, dropout_h2,
     dropout_net_output] = forward_prop([X, w_h1, w_h2, weight_outputs],
                                        [0.2, 0.5, 0.5, 0.5])
    # outputs from the layers without dropout
    [h1, h2, net_output] = forward_prop([X, w_h1, w_h2, weight_outputs],
                                        [0., 0., 0., 0.])
    # actual prediction
    predicted_label = T.argmax(net_output, axis=1)
    cost = T.mean(T.nnet.categorical_crossentropy(dropout_net_output, Y))
    updates = RMSprop(cost, [w_h1, w_h2, weight_outputs])
    net_train = function(inputs=[X, Y],
                         outputs=cost,
                         updates=updates,
                         allow_input_downcast=True)
    get_net_output = function(inputs=[X],
                              outputs=predicted_label,
                              allow_input_downcast=True)

    for trial in range(trials):
        for batch_start in range(0, len(train_x) - batch_size, batch_size):
            batch_end = batch_start + batch_size
            net_train(train_x[batch_start:batch_end],
                      train_y[batch_start:batch_end])
        print np.mean(np.argmax(test_y, axis=1) == get_net_output(test_x))
Esempio n. 7
0
File: ann.py Progetto: iverasp/ntnu
    def main(self):
        
        # Load training and test data
        training_x, test_x, training_y, test_y = load.mnist(onehot=True)
        
        # Symbolic variables
        x = tensor.fmatrix()
        y = tensor.fmatrix()
        
        # Initialize weights
        temp_weight = INPUT_SIZE
        weights = []
        for layer in self.layer_sizes:
            weight = self.init_weights((temp_weight, layer))
            weights.append(weight)
            temp_weight = layer
        weight = self.init_weights((temp_weight, OUTPUT_SIZE))
        weights.append(weight) 
        
        # Initialize model
        model_layer_noise = self.model2(x, weights, 0.2, 0.5)
        model_layer_values = self.model2(x, weights, 0., 0.)
        y_x = tensor.argmax(model_layer_values[-1], axis=1)
        
        # Initialize the update function
        cost = tensor.mean(tensor.nnet.categorical_crossentropy(
                                                model_layer_noise[-1], y))
        params = weights
        updates = self.RMSprop(cost, params) # SGD / RMSprop

        # Initialize core functionality
        train = theano.function(
                        inputs=[x, y],
                        outputs=cost, 
                        updates=updates,
                        allow_input_downcast=True)
        self.predict = theano.function(
                        inputs=[x], 
                        outputs=y_x, 
                        allow_input_downcast=True)
        
        # Training on mnist
        print("\nTRAINING...")
        for i in range(NUMBER_OF_RUNS):
            for start, end in zip(range(0, len(training_x), BATCH_SIZE),
                            range(BATCH_SIZE, len(training_x), BATCH_SIZE)):
                cost = train(training_x[start:end], training_y[start:end])
            print("Iteration ", i+1, "/", NUMBER_OF_RUNS, "(", 
                        numpy.mean(numpy.argmax(test_y, axis=1) 
                        == self.predict(test_x))*100, ")")
        
        # Testing
        print("\nTESTING ON: MNIST TRAINING SET...")
        print(numpy.mean(numpy.argmax(training_y, axis=1) == 
                self.predict(training_x))*100, "percent correct")
        print("\nTESTING ON: MNIST TEST SET...")
        print(numpy.mean(numpy.argmax(test_y, axis=1) 
                == self.predict(test_x))*100, "percent correct")
Esempio n. 8
0
def simple_nn():
    # mnist dataset, training + test
    trX, teX, trY, teY = mnist(onehot=True)

    X = T.fmatrix()
    Y = T.fmatrix()

    # init the weights
    w_h = init_weights((784, 625))
    w_h2 = init_weights((625, 625))
    w_o = init_weights((625, 10))

    noise_h, noise_h2, noise_py_x = model(
            X,
            w_h,
            w_h2,
            w_o,
            0.8,
            0.5
            )
    h, h2, py_x = model(
            X,
            w_h,
            w_h2,
            w_o,
            1.,
            1.
            )
    y_x = T.argmax(py_x, axis=1)

    cost = T.mean(T.nnet.categorical_crossentropy(noise_py_x, Y))
    params = [w_h, w_h2, w_o]
    updates = RMSprop(cost, params, lr=0.001)

    train = theano.function(
            inputs=[X, Y],
            outputs=cost,
            updates=updates,
            allow_input_downcast=True
            )
    predict = theano.function(
            inputs=[X],
            outputs=y_x,
            allow_input_downcast=True
            )

    f_out = open('res/res_dropout_nn', 'w')
    for i in range(100): #you can adjust this if training takes too long
        # train batches of 128 instances
        for start, end in zip(range(0, len(trX), 128), range(128, len(trX), 128)):
            cost = train(trX[start:end], trY[start:end])
        print i
        correct = np.mean(np.argmax(teY, axis=1) == predict(teX))
        res = str(i) + " " + str(correct) + "\n"
        f_out.write( res )
    print 'Result dropout:', correct
Esempio n. 9
0
def question2data():
    trX, teX, trY, teY = mnist(ntrain=1000, ntest=250, onehot=False)
    # keep threes and fours only
    trX, trY = keep_threes_and_fours(trX, trY)
    teX, teY = keep_threes_and_fours(teX, teY)
    # converts 3 to 0, and 4 to 1 (two classes)
    trY = convert_three_and_four_to_zero_and_one(trY)
    teY = convert_three_and_four_to_zero_and_one(teY)
    # FIXME: WE do not one hot; we can use a single output for binary classification
    trX, trY = permute(trX, trY)
    teX, teY = permute(teX, teY)
    return trX, trY, teX, teY
Esempio n. 10
0
def question2data():
    trX, teX, trY, teY = mnist(ntrain=1000, ntest=250, onehot=False)
    # keep threes and fours only
    trX, trY = keep_threes_and_fours(trX, trY)
    teX, teY = keep_threes_and_fours(teX, teY)
    # converts 3 to 0, and 4 to 1 (two classes)
    trY = convert_three_and_four_to_zero_and_one(trY)
    teY = convert_three_and_four_to_zero_and_one(teY)
    # FIXME: WE do not one hot; we can use a single output for binary classification
    trX, trY = permute(trX, trY)
    teX, teY = permute(teX, teY)
    return trX, trY, teX, teY
def mnist_example(epochs = 10, verbose = False, save = False):
	print "Initializing network"
	mnet = ModernNeuralNetwork([784,625,860,10])
	trX, teX, trY, teY = mnist(onehot=True)
	print "Creating Model"
	mnet.create_model_functions()
	print "Training Network"
	for i in range(epochs):
		for start, end in zip(range(0, len(trX), 128), range(128, len(trX), 128)):
			cost = mnet.train(trX[start:end], trY[start:end])
		if verbose:
			print np.mean(np.argmax(teY, axis=1) == mnet.predict(teX))
	if save:
		self.save_weights("MNIST_Weights.save")
		print("Saved weights to \"MNIST_Weights.save\".")
	def initiliaze(self):
		self.trX, self.teX, self.trY, self.teY = mnist(1000, 500, onehot=True)

		self.trX = self.trX.reshape(-1, 1, 28, 28)
		self.teX = self.teX.reshape(-1, 1, 28, 28)

		self.X = T.ftensor4()
		self.Y = T.fmatrix()

		self.w = self.init_weights((32, 1, 3, 3))
		self.w2 = self.init_weights((64, 32, 3, 3))
		self.w3 = self.init_weights((128, 64, 3, 3))
		self.w4 = self.init_weights((128 * 3 * 3, 625))
		self.w_o = self.init_weights((625, 10))
		self.w=np.array(self.w)
		print type(self.w)
Esempio n. 13
0
def export_wrong_samples(input_file, output_folder):
    trX, vlX, teX, trY, vlY, teY = mnist(onehot=False, ndim=2)
    with open(input_file, 'r') as f:
        train_ws, train_c, valid_ws, valid_c, test_ws, test_c = pickle.load(f)
    trX = np.asarray(trX[train_ws], dtype=np.uint8).reshape((-1, 28, 28))
    trY = trY[train_ws]
    vlX = np.asarray(vlX[valid_ws], dtype=np.uint8).reshape((-1, 28, 28))
    vlY = vlY[valid_ws]
    teX = np.asarray(teX[test_ws], dtype=np.uint8).reshape((-1, 28, 28))
    teY = teY[test_ws]
    export_image_array(trX, os.path.join(output_folder, 'train'), "")
    export_classification_info(trY, train_c, os.path.join(output_folder, 'train'), "")
    export_image_array(vlX, os.path.join(output_folder, 'valid'), "")
    export_classification_info(vlY, valid_c, os.path.join(output_folder, 'valid'), "")
    export_image_array(teX, os.path.join(output_folder, 'test'), "")
    export_classification_info(teY, test_c, os.path.join(output_folder, 'test'), "")
Esempio n. 14
0
def main():
    # 数据集,数据格式为4D矩阵(样本数,特征图个数,图像行数,图像列数)
    trX, teX, trY, teY = mnist(onehot=True)
    h1, hpiece1, h2, hpiece2 = 625, 5, 625, 5
    params = basicUtils.randomSearch(nIter=10)
    cvErrorList = []
    for param, num in zip(params, range(len(params))):
        lr, C = param
        print '*' * 40, num, 'parameters', param, '*' * 40
        maxout = CMaxoutmlp(28 * 28, h1, hpiece1, h2, hpiece2, 10, lr, C, 0.2, 0.5)
        cvError = maxout.cv(trX, trY)
        cvErrorList.append(copy(cvError))
    optIndex = np.argmin(cvErrorList, axis=0)
    lr, C = params[optIndex]
    print 'retraining', params[optIndex]
    maxout = CMaxoutmlp(28 * 28, h1, hpiece1, h2, hpiece2, 10, lr, C, 0.2, 0.5)
    maxout.trainmaxout(trX, teX, trY, teY)
Esempio n. 15
0
def main():
    f = open('output.txt', 'w')
    train = np.genfromtxt('data/train_data.txt')
    test = np.genfromtxt('data/test_data.txt')
    val = np.genfromtxt('data/val_data.txt')

    # Parse data for separating training labels and dataset
    n_feat = train[0].size
    train_data = train[:, :-1]
    print len(train_data)
    #print train_data[0]
    train_labels = train[:, n_feat - 1]
    print train_labels[0]
    test_data = test[:, :-1]
    test_labels = test[:, n_feat - 1]
    val_data = val[:, :-1]
    val_labels = val[:, n_feat - 1]
    trX, teX, trY, teY = mnist(onehot=False)
    print trY[0]
    train_data = trX[:1000]
    train_labels = trY[:1000]
    test_labels = teY
    test_data = teX
    SlistX = []
    SlistY = []
    SlistX.append(trX[0])
    SlistY.append(trY[0])
    # Print training + validation error for the classifier
    k_neighbors = [1]
    for k in k_neighbors:
        preds_train = knn_algorithm(SlistX, SlistY, train_data, k,
                                    train_labels)
        print len(SlistX)
        preds_val = knn_algorithm(train_data, train_labels, val_data, k)
        preds_test = knn_algorithm(train_data, train_labels, test_data, k)
        f.write("%s-neighbors: \n" % k)
        f.write("Training error: %s \n" %
                (calc_error(train_labels, preds_train)))
        f.write("Validation error: %s \n" %
                (calc_error(val_labels, preds_val)))
        f.write("Test error: %s \n" % (calc_error(test_labels, preds_test)))
        f.write("\n")
def run_regression(trials, batch_size):
	train_x, test_x, train_y, test_y = mnist(onehot=True)
	x_dim = len(train_x[0])
	y_dim = len(train_y[0])
	weight_vec = theano.shared(np.random.randn(x_dim, y_dim) * INITIAL_WEIGHT_MAX)
	offset = theano.shared(np.zeros(y_dim))
	input_data = T.fmatrix('input_data')
	label = T.fmatrix('label')
	softmax_output = T.nnet.softmax(T.dot(input_data, weight_vec) + offset)
	cost = T.mean(T.nnet.categorical_crossentropy(softmax_output, label))
	weight_grad, offset_grad = T.grad(cost=cost, wrt=[weight_vec, offset])
	updates = [[weight_vec, weight_vec - weight_grad * LEARNING_RATE], [offset, offset - offset_grad * LEARNING_RATE]]
	train_f = theano.function(inputs=[input_data, label], outputs=cost, updates=updates, allow_input_downcast=True)
	predicted_label = T.argmax(softmax_output, axis=1)
	output_f = theano.function(inputs=[input_data], outputs=predicted_label, allow_input_downcast=True)

	for i in range(trials):
		for start, end in zip(range(0, len(train_x), batch_size), range(batch_size, len(train_x), batch_size)):
			cost = train_f(train_x[start:end], train_y[start:end])
		print i, np.mean(np.argmax(test_y, axis=1) == output_f(test_x))
def run_net(num_iters, batch_size):
	train_input, test_input, train_output, test_output = mnist(onehot=True)

	# reshape image vectors into 4 tensor format for convolution
	train_input = train_input.reshape(-1, 1, 28, 28)
	test_input = test_input.reshape(-1, 1, 28, 28)

	input_sym = T.ftensor4()
	output_sym = T.fmatrix()

	w = init_weights((8, 1, 5, 5)) # 8 filters of size 5 X 5
	w2 = init_weights((8, 8, 3, 3)) # 8 filters of size 3 X 3
	w3 = init_weights((392, 625)) # fully connected layer
	w_o = init_weights((625, 10)) # fully connected output layer

	# accumulator variables for RMS prop
	acc_w = init_weights((8, 1, 5, 5), initial_max_val=0.0)
	acc_w2 = init_weights((8, 8, 3, 3), initial_max_val=0.0)
	acc_w3 = init_weights((392, 625), initial_max_val=0.0)
	acc_wo = init_weights((625, 10), initial_max_val=0.0)

	noise_act_1_pooled, noise_act_2_flattened, noise_l4, noise_net_output = \
		forward_prop(input_sym, w, w2, w3, w_o, 0.2, 0.5)
	act_1_pooled, act_2_flattened, l4, net_output = \
		forward_prop(input_sym, w, w2, w3, w_o, 0., 0.)
	prediction = T.argmax(net_output, axis=1)

	cost = T.mean(T.nnet.categorical_crossentropy(noise_net_output, output_sym))
	params = [w, w2, w3, w_o]
	accs = [acc_w, acc_w2, acc_w3, acc_wo]
	updates = rms_prop(cost, params, accs, lr=0.001)

	train = theano.function(inputs=[input_sym, output_sym], outputs=cost, updates=updates, \
		allow_input_downcast=True)
	predict = theano.function(inputs=[input_sym], outputs=prediction, allow_input_downcast=True)

	for i in range(num_iters):
		for batch_start in range(0, len(train_input) - batch_size, batch_size):
			cost = train(train_input[batch_start:batch_start + batch_size], \
				train_output[batch_start:batch_start + batch_size])
		print np.mean(np.argmax(test_output, axis=1) == predict(test_input))
Esempio n. 18
0
def read_data_sets(data_dir='/data/datasets/',
                  dtype=dtypes.float32,
                  reshape=True,
                  validation_size=1000):
   
    train_images, test_images, train_labels, test_labels  = mnist(data_dir)

    validation_images = train_images[:validation_size]
    validation_labels = train_labels[:validation_size]
    train_images = train_images[validation_size:]
    train_labels = train_labels[validation_size:]

    train = DataSet(train_images, train_labels, dtype=dtype, reshape=reshape)
    validation = DataSet(validation_images,
                       validation_labels,
                       dtype=dtype,
                       reshape=reshape)
                       
    test = DataSet(test_images, test_labels, dtype=dtype, reshape=reshape)

    return base.Datasets(train=train, validation=validation, test=test)
def run_net(trials, batch_size):
	train_x, test_x, train_y, test_y = mnist(onehot=True)
	input_dim = len(train_x[0])
	output_dim = len(test_y[0])
	[w_h1, w_h2, weight_outputs] = build_weights([(input_dim, 625), (625, 625), (625, output_dim)])
	X = T.fmatrix() #symbolic variable for weight matrix
	Y = T.fmatrix() #symbolic variable for output
	# outputs from the layers with dropout
	[dropout_h1, dropout_h2, dropout_net_output] = forward_prop([X, w_h1, w_h2, weight_outputs], [0.2, 0.5, 0.5, 0.5])
	# outputs from the layers without dropout
	[h1, h2, net_output] = forward_prop([X, w_h1, w_h2, weight_outputs], [0., 0., 0., 0.])
	# actual prediction
	predicted_label = T.argmax(net_output, axis=1)
	cost = T.mean(T.nnet.categorical_crossentropy(dropout_net_output, Y))
	updates = RMSprop(cost, [w_h1, w_h2, weight_outputs])
	net_train = function(inputs=[X, Y], outputs=cost, updates=updates, allow_input_downcast=True)
	get_net_output = function(inputs=[X], outputs=predicted_label, allow_input_downcast=True)

	for trial in range(trials):
		for batch_start in range(0, len(train_x) - batch_size, batch_size):
			batch_end = batch_start + batch_size
			net_train(train_x[batch_start:batch_end], train_y[batch_start:batch_end])
		print np.mean(np.argmax(test_y, axis=1) == get_net_output(test_x))
Esempio n. 20
0
def compute_grads_and_weights_mnist(A_indices, segment, L_measurements,ntrain=50000,ntest=10000,mb_size=128,nhidden=625 ):
    trX, teX, trY, teY = mnist(ntrain=ntrain,ntest=ntest,onehot=True)
    seq = mnist_with_noise([trX,trY],0)
    X = T.fmatrix()
    Y = T.fmatrix()
    w_h = [init_weights((784, nhidden)), init_weights((nhidden, 10))]
    py_x = model(X, w_h)
    y_x = T.argmax(py_x, axis=1)

    cost = T.mean(T.nnet.categorical_crossentropy(py_x, Y))
    params = w_h
    updates = sgd(cost, params)
    grads = T.grad(cost=cost,wrt=params)
    grad_for_norm = T.grad(cost=cost,wrt=params)

    train = theano.function(inputs=[X, Y], outputs=[cost,grads[0],grads[1]], updates=updates, allow_input_downcast=True)
    predict = theano.function(inputs=[X], outputs=[y_x,py_x], allow_input_downcast=True)
    get_grad = theano.function(inputs=[X,Y],outputs=[cost,grad_for_norm[0],grad_for_norm[1]], allow_input_downcast=True)

    for i in range(1):
        for start, end in zip(range(0, len(trX), mb_size), range(mb_size, len(trX), mb_size)):
            cost,grads0,grads1 = train(trX[start:end], trY[start:end])
        y,p_y =  predict(teX)
        print np.mean(np.argmax(teY, axis=1) == y)
Esempio n. 21
0
    if sparsity_ind == False:
        cost = -T.mean(
            T.sum(target_y * T.log(pred_y) +
                  (1 - target_y) * T.log(1 - pred_y),
                  axis=1))
    else:
        cost = - T.mean(T.sum(target_y * T.log(pred_y) + (1 - target_y) * T.log(1 - pred_y), axis=1)) \
        + penalty*T.shape(h)[1]*(sparsity*T.log(sparsity) + (1-sparsity)*T.log(1-sparsity)) \
        - penalty*sparsity*T.sum(T.log(T.mean(h, axis=0)+1e-6)) \
        - penalty*(1-sparsity)*T.sum(T.log(1-T.mean(h, axis=0)+1e-6))

    return cost


trX, teX, trY, teY = mnist()  # use all data
#trX, trY = trX[:1000], trY[:1000]
#teX, teY = teX[:200], teY[:200]

x = T.fmatrix('x')
d = T.fmatrix('d')

rng = np.random.RandomState(123)
theano_rng = RandomStreams(rng.randint(2**30))

# Initialise weights and bias for auto-encoder
# Encoding layers
W1 = init_weights(28 * 28, 900)
b1 = init_bias(900)
W2 = init_weights(900, 625)
b2 = init_bias(625)
Esempio n. 22
0
    return np.asarray(X, dtype=theano.config.floatX)

def init_weights(shape):
    return theano.shared(floatX(np.random.randn(*shape) * 0.01))

def model(X, w):
    return T.nnet.softmax(T.dot(X, w))

start_time = time.time()

##################################################
data_folder = 'truncated_data' ###################
dimension = 331 ###################################
##################################################

trX, teX, trY, teY = mnist(onehot=True, datasets_dir=os.getcwd() + '/' + data_folder + '_' + str(dimension) + '/', dimension=dimension)

X = T.fmatrix()
Y = T.fmatrix()

w = init_weights((math.pow(dimension, 2), 2))

py_x = model(X, w)
y_pred = T.argmax(py_x, axis=1)

cost = T.mean(T.nnet.categorical_crossentropy(py_x, Y))
gradient = T.grad(cost=cost, wrt=w)
update = [[w, w - gradient * 0.0001]]

train = theano.function(inputs=[X, Y], outputs=cost, updates=update, allow_input_downcast=True)
predict = theano.function(inputs=[X], outputs=y_pred, allow_input_downcast=True)
import numpy as np
from load import mnist

# takes care of conversions to make your stuff theano-friendly: float32 or float64
def floatX(X):
    return np.asarray(X, dtype=theano.config.floatX)

def init_weights(shape):
    return theano.shared(floatX(np.random.randn(*shape) * 0.01)) #initialiaze to a some gaussian

# our model in matrix format
def model(X, w):
    return T.nnet.softmax(T.dot(X, w))

# training matrices
trX, teX, trY, teY = mnist(onehot=True) #one hot encoding!

X = T.fmatrix()
Y = T.fmatrix()

w = init_weights((784, 10))

# probability of the labels, given the input
py_x = model(X, w)
y_pred = T.argmax(py_x, axis=1)

# categorical cross entropy is basically telling us to maximize the value that's true
cost = T.mean(T.nnet.categorical_crossentropy(py_x, Y)) 
gradient = T.grad(cost=cost, wrt=w)
update = [[w, w - gradient * 0.05]]
Esempio n. 24
0
	def initialize_mnist(self):
		self.trX, self.teX, self.trY, self.teY = mnist(onehot = True)
		self.trX = self.trX.reshape(-1, 1, 28, 28)
		self.teX = self.teX.reshape(-1, 1, 28, 28)
		self.set_weights("mnist")
Esempio n. 25
0
X_hat = model(X, weights_layer_1, weights_layer_y, bias_layer_1, bias_layer_y)

# Loss
L = - T.sum(X * T.log(X_hat) + (1 - X) * T.log(1 - X_hat), axis=1)
loss = T.mean(L)

# Parameter Updating
params = [weights_layer_y, bias_layer_y, weights_layer_1, bias_layer_1]
updates = sgd(loss, params, lr=learning_rate)

# Compiling
train = theano.function(inputs=[X], outputs=loss, updates=updates, allow_input_downcast=True, on_unused_input='warn')
predict = theano.function(inputs=[X], outputs=X_hat, allow_input_downcast=True)

# Load the data
trX, teX, trY, teY = mnist(scale_data=False)
n_batches_train = trX.shape[0] / batch_size


# Training
for i in range(epochs):
    cost_per_batch = np.zeros(n_batches_train)
    pred_cost_per_batch = np.zeros(n_batches_train)
    cost = []
    for start, end in zip(range(0, n_batches_train * batch_size, batch_size),
                          range(batch_size, n_batches_train * batch_size, batch_size)):
        cost.append(train(trX[start:end]))

    print "Epoch number {0}".format(i)
    print 'Mean Cost per Batch %s' % str(i), np.mean(cost)
Esempio n. 26
0
    lr = lr_0 * np.exp( -n / float(n_epochs_organizing_phase) )

    if lr < lr_min:
        return lr_min
    else:
        return lr

def init_neighborhood_size(map_shape):
    m, n = map_shape
    sigma_0 = np.sqrt(m**2 + n**2) / 2.
    return sigma_0

def init_timeconstant(n_epochs_organizing_phase, sigma_0):
    return float(n_epochs_organizing_phase) / np.log(sigma_0)

trX, teX, trY, teY = mnist(ntrain=60000, ntest=10000, onehot=False)
xmin_val = trX[0].min()
xmax_val = trX[0].max()

def remove_threes_and_fours(X, Y):
    """ Y: array-like, shape (n_examples,) """
    three_idxs = np.where(Y == 3)
    four_idxs = np.where(Y == 4)
    ia = np.indices(Y.shape)
    remaining_idxs = np.setxor1d(ia, np.concatenate((three_idxs[0], four_idxs[0])))
    return X[remaining_idxs], Y[remaining_idxs]

if raw_input('remove_classes 3 and 4? (y/n)') == 'y':
    trX, trY = remove_threes_and_fours(trX, trY)
    teX, teY = remove_threes_and_fours(teX, teY)
Esempio n. 27
0
def conv_nn():
    # mnist dataset, training + test
    trX, teX, trY, teY = mnist(onehot=True)

    trX = trX.reshape(-1, 1, 28, 28)
    teX = teX.reshape(-1, 1, 28, 28)

    X = T.ftensor4()
    Y = T.fmatrix()

    # init the weights
    # isotropic filters: (5,5)
    # anisotropic filters: (6,3) and (3,6)
    w_1 = init_weights((32, 1, 5, 5))
    w_2 = init_weights((64, 32, 5, 5))
    w_3 = init_weights((128, 64, 2, 2))
    #number of pixel in last conv layer:
    #for isotropic filter: num_filter * pix_per_filter = 128 * 9 = 1152
    #for anisotropic filter: num_filter * pix_per_filter = 128 * 8 = 1024
    w_h2 = init_weights((1152, 625 ))
    w_o = init_weights((625, 10))

    noise_out_1, noise_out_2, noise_out_2, noise_h2, noise_py_x = model_conv(
            X,
            w_1,
            w_2,
            w_3,
            w_h2,
            w_o,
            0.8,
            0.5
            )
    out_1, out_2, out_3, h2, py_x = model_conv(
            X,
            w_1,
            w_2,
            w_3,
            w_h2,
            w_o,
            1.,
            1.
            )
    y_x = T.argmax(py_x, axis=1)

    cost = T.mean(T.nnet.categorical_crossentropy(noise_py_x, Y))
    params = [w_1, w_2, w_3, w_h2, w_o]
    updates = RMSprop(cost, params, lr=0.001)

    train = theano.function(
            inputs=[X, Y],
            outputs=cost,
            updates=updates,
            allow_input_downcast=True
            )
    predict = theano.function(
            inputs=[X],
            outputs=y_x,
            allow_input_downcast=True
            )

    f_out = open('res/res_conv_aniso_nn', 'w')
    for i in range(100): #you can adjust this if training takes too long
        for start, end in zip(range(0, len(trX), 128), range(128, len(trX), 128)):
            cost = train(trX[start:end], trY[start:end])
        print i
        correct = np.mean(np.argmax(teY, axis=1) == predict(teX))
        res = str(i) + " " + str(correct) + "\n"
        f_out.write( res )
    print 'Result Conv:', correct

    fweights = w_1.get_value()
    # save the filters of the first layer
    for i in range(fweights.shape[0]):
        fname = 'res/filters_aniso/filter_' + str(i)
        np.save(fname, fweights[i,0,:,:])
Esempio n. 28
0
        print("Sending index ", i2, "from server to worker", process)
        process += 1

    for process in range(1, size):
        comm.isend(-1, dest=process, tag=1)

    def floatX(X):
        return np.asarray(X, dtype=theano.config.floatX)

    def init_weights(shape):
        return theano.shared(floatX(np.random.randn(*shape) * 0.01))

    def model(X, w):
        return T.nnet.softmax(T.dot(X, w))

    trX, teX, trY, teY = mnist(onehot=True)
    X = T.fmatrix()
    Y = T.fmatrix()
    w = init_weights((784, 10))

    w0 = theano.shared(value=np.zeros((784, 10), dtype=theano.config.floatX))
    proc = 1

    w_tilde = w.copy()
    #broadcast w_tilde
    while proc < size:
        comm.isend(w_tilde, dest=proc, tag=2)
        proc += 1
    for proc in range(1, size):
        comm.isend(-1, dest=proc, tag=2)
def main():
    trX, teX, trY, teY = mnist(onehot=True)

    trX = trX.reshape(-1, 1, 28, 28)
    teX = teX.reshape(-1, 1, 28, 28)

    dtype0 = 'float32'
    dtype1 = 'float64'

    X = T.ftensor4()
    Y = T.fmatrix()

    w = init_weights((32, 1, 3, 3), dtype0)
    w2 = init_weights((64, 32, 3, 3), dtype0)
    w3 = init_weights((128, 64, 3, 3), dtype0)
    w4 = init_weights((128 * 3 * 3, 625), dtype0)
    w_o = init_weights((625, 10), dtype0)

    trX, trY, X, Y = cast_4(trX, trY, X, Y, dtype0)

    noise_l1, noise_l2, noise_l3, noise_l4, noise_py_x = model(
        X, w, w2, w3, w4, w_o, 0.2, 0.5, dtype0)
    l1, l2, l3, l4, py_x = model(X, w, w2, w3, w4, w_o, 0., 0., dtype0)
    y_x = T.argmax(py_x, axis=1)

    cost = T.mean(T.nnet.categorical_crossentropy(noise_py_x, Y))
    params = [w, w2, w3, w4, w_o]
    updates = RMSprop(cost, params, dtype0, lr=0.001)
    train = theano.function(inputs=[X, Y],
                            outputs=cost,
                            updates=updates,
                            allow_input_downcast=True)
    predict = theano.function(inputs=[X],
                              outputs=y_x,
                              allow_input_downcast=True)

    # train_model with mini-batch training
    for i in range(1):
        start_time = time.time()
        for start, end in zip(range(0, len(trX), 128),
                              range(128, len(trX), 128)):
            print(start, ' ', end)
            cost = train(trX[start:end], trY[start:end])
            print("--- %s seconds ---" % (time.time() - start_time))
    pred_teX = predict(teX)
    print(np.mean(np.argmax(teY, axis=1) == pred_teX))

    w = theano.shared(value=np.asarray(w.eval(), dtype=dtype1),
                      name='w',
                      borrow=True)
    w2 = theano.shared(value=np.asarray(w2.eval(), dtype=dtype1),
                       name='w2',
                       borrow=True)
    w3 = theano.shared(value=np.asarray(w3.eval(), dtype=dtype1),
                       name='w3',
                       borrow=True)
    w4 = theano.shared(value=np.asarray(w4.eval(), dtype=dtype1),
                       name='w4',
                       borrow=True)
    w_o = theano.shared(value=np.asarray(w_o.eval(), dtype=dtype1),
                        name='w_o',
                        borrow=True)

    trX, trY, X, Y = cast_4(trX, trY, X, Y, dtype1)

    noise_l1 = T.cast(noise_l1, dtype=dtype1)
    noise_l2 = T.cast(noise_l2, dtype=dtype1)
    noise_l3 = T.cast(noise_l3, dtype=dtype1)
    noise_l4 = T.cast(noise_l4, dtype=dtype1)
    noise_py_x = T.cast(noise_py_x, dtype=dtype1)
    l1 = T.cast(l1, dtype=dtype1)
    l2 = T.cast(l2, dtype=dtype1)
    l3 = T.cast(l3, dtype=dtype1)
    l4 = T.cast(l4, dtype=dtype1)

    for i in range(1):
        start_time = time.time()
        for start, end in zip(range(0, len(trX), 128),
                              range(128, len(trX), 128)):
            print(start, ' ', end)
            cost = train(trX[start:end], trY[start:end])
            print("--- %s seconds ---" % (time.time() - start_time))
    pred_teX = predict(teX)
    print(np.mean(np.argmax(teY, axis=1) == pred_teX))
    print("Finished!")
def model(X, w_h, w_o):
    h = T.nnet.sigmoid(T.dot(X, w_h))
    py_x = T.nnet.softmax(T.dot(h, w_o))
    return py_x


def sgd(cost, params, learning_rate=0.05):
    grads = T.grad(cost=cost, wrt=params)
    updates = []
    for p, g in zip(params, grads):
        updates.append([p, p - g * learning_rate])
    return updates


trainX, testX, trainY, testY = load.mnist(onehot=True)

X = T.fmatrix()
Y = T.fmatrix()

hidden_layer_size = 625
w_h = init_weights(shape=(trainX.shape[1], hidden_layer_size))
w_o = init_weights(shape=(hidden_layer_size, trainY.shape[1]))

py_x = model(X, w_h, w_o)
y_prediction = T.argmax(py_x, axis=1)

cost = T.mean(T.nnet.categorical_crossentropy(py_x, Y))
params = [w_h, w_o]
updates = sgd(cost, params)
import numpy as np
from sklearn.linear_model import LogisticRegression
import load
import convnet


if __name__ == "__main__":
    print("\nLoading Data...")
    load_activations = convnet.ConvolutionalNeuralNetwork().load_data
    num_chunks = 20
    trAs = [load_activations("saved/trA{0:02d}.txt".format(i), (60000 / num_chunks, 625)) for i in range(num_chunks)]
    trA = np.concatenate(trAs)
    print("trA.shape: {0}".format(trA.shape))
    teA = load_activations("saved/teA.txt", (10000, 625))
    print("teA.shape: {0}".format(teA.shape))
    trX, teX, trY, teY = load.mnist(onehot=True)
    trC = np.argmax(trY, axis=1)
    print("trC.shape: {0}".format(trC.shape))
    teC = np.argmax(teY, axis=1)
    print("teC.shape: {0}".format(teC.shape))
    print("Done.")

    print("\nCreating Regression Model...")
    lr = LogisticRegression()
    lr.fit(trA, trC)
    print("Done.")

    print("\nAnalyzing Training Data...")
    predictions = lr.predict(trA)
    print("predictions.shape: {0}".format(predictions.shape))
    accuracy = np.mean(predictions == trC)
Esempio n. 32
0
File: AE.py Progetto: MaGold/AE
        w = np.swapaxes(w, 0, 1)
        w = w.reshape(w.shape[0], 1, dim, dim)
        print(dim)
        print(w.shape)
        Plots.plot_filters(w, 1, idx, "layer" + str(i+1))
    return

def plotter(samples, predictions, Ws, img_x, idx):
    plot_all_filters(Ws, idx)
    shp = (samples.shape[0], 1, img_x, img_x)
    samples = samples.reshape(shp)
    predictions = predictions.reshape(shp)
    Plots.plot_predictions_grid(samples, predictions, i, shp)
    return

trX, trY, teX, teY, channels, img_x = mnist(onehot=True)
trX = trX.reshape(trX.shape[0], 784)
teX = teX.reshape(teX.shape[0], 784)
X = T.fmatrix()

layers = [784, 100, 10, 100, 784]
Ws = get_params(layers)

noise_out = model(X, Ws, 0.2, 0.5)
clean_out = model(X, Ws, 0., 0.)

noise_L = T.sum((X - noise_out)**2, axis=1)
noise_cost = noise_L.mean()

clean_L = T.sum((X - clean_out)**2, axis=1)
clean_cost = clean_L.mean()
Esempio n. 33
0
import theano.tensor as T
import numpy as np
import time

from load import mnist

def floatX(X):  # convert to correct dtype
    return np.asarray(X, dtype=theano.config.floatX)

def init_weights(shape):   # initialize model parameters
    return theano.shared(floatX(np.random.randn(*shape) * 0.001), borrow=True)

def model (X, w):   # model in matrix format
    return T.nnet.softmax(T.dot(X, w))

trX, teX, trY, teY = mnist(onehot=True)     # loading data matrices

X = T.fmatrix()
Y = T.fmatrix()

w = init_weights((784, 10)) # 784 = 28 * 28 size
py_x = model(X, w)
y_pred = T.argmax(py_x, axis=1) # probability outputs and maxima predictions

cost = T.mean(T.nnet.categorical_crossentropy(py_x, Y))     # classification metric to optimize
gradient = T.grad(cost=cost, wrt=w)
updates = [[w, w - 0.05 * gradient]]

train = theano.function(inputs=[X, Y], outputs=[cost, y_pred], updates=updates, allow_input_downcast=True)
predict = theano.function(inputs=[X], outputs=y_pred, allow_input_downcast=True)
# 1 encoder, decoder and a softmax layer

def init_weights(n_visible, n_hidden):
    initial_W = np.asarray(
        np.random.uniform(
            low=-4 * np.sqrt(6. / (n_hidden + n_visible)),
            high=4 * np.sqrt(6. / (n_hidden + n_visible)),
            size=(n_visible, n_hidden)),
        dtype=theano.config.floatX)
    return theano.shared(value=initial_W, name='W', borrow=True)

def init_bias(n):
    return theano.shared(value=np.zeros(n,dtype=theano.config.floatX),borrow=True)

trX, teX, trY, teY = mnist()

trX, trY = trX[:12000], trY[:12000]
teX, teY = teX[:2000], teY[:2000]

x = T.fmatrix('x')  
d = T.fmatrix('d')


rng = np.random.RandomState(123)
theano_rng = RandomStreams(rng.randint(2 ** 30))

corruption_level=0.1
training_epochs = 25
learning_rate = 0.1
batch_size = 128
Esempio n. 35
0

#Momentum
#Decay parameter ??
def sgd_momentum(cost, params, lr=0.1, decay=0.0001, momentum=0.1):
    grads = T.grad(cost=cost, wrt=params)
    updates = []
    for p, g in zip(params, grads):
        v = theano.shared(p.get_value())
        v_new = momentum * v - (g + decay * p) * lr
        updates.append([p, p + v_new])
        updates.append([v, v_new])
        return updates


trX, teX, trY, teY = mnist()

trX, trY = trX[:12000], trY[:12000]
teX, teY = teX[:2000], teY[:2000]

x = T.fmatrix('x')
d = T.fmatrix('d')

rng = np.random.RandomState(123)
theano_rng = RandomStreams(rng.randint(2**30))

corruption_level = 0.1
training_epochs = 5
learning_rate = 0.1
batch_size = 128
beta = 0.5
    def initialize_mnist(self):
        self.trX, self.teX, self.trY, self.teY = mnist(onehot=True)

        self.w_h = self.init_weights((784, 625))
        self.w_h2 = self.init_weights((625, 625))
        self.w_o = self.init_weights((625, 10))
Esempio n. 37
0
    data_nn = np.loadtxt('res/res_simple_nn')
    data_dropout = np.loadtxt('res/res_dropout_nn')
    data_prelu   = np.loadtxt('res/res_prelu_nn')
    data_conv = np.loadtxt('res/res_conv_nn')
    data_aniso = np.loadtxt('res/res_conv_aniso_nn')

    # isotropic_filter
    #filter0 = np.load('res/filter/filter_0.npy')
    #filter1 = np.load('res/filter/filter_15.npy')
    #filter2 = np.load('res/filter/filter_30.npy')

    # anisotropic filter
    filter0 = np.load('res/filters_aniso/filter_0.npy')
    filter1 = np.load('res/filters_aniso/filter_15.npy')
    filter2 = np.load('res/filters_aniso/filter_30.npy')

    trX, teX, trY, teY = mnist(ntrain = 10, ntest = 10)

    #plot_trainerror(data_nn,data_dropout,data_prelu,data_conv)
    #plot_single(data_aniso)
    plot_greyscale(filter0, 'filter0')
    im = trX[5].reshape( (28,28) )
    conv = convolve(im, filter0)
    plot_greyscale(conv, 'convolution')
    plot_greyscale(filter1, 'filter15')
    conv = convolve(im, filter1)
    plot_greyscale(conv, 'convolution')
    plot_greyscale(filter2, 'filter30')
    conv = convolve(im, filter2)
    plot_greyscale(conv, 'convolution')
Esempio n. 38
0
def model(X, w1, s1, theta1, t1, w2, s2, theta2, t2, D, G, p_drop_lista, p_drop_hidden):
    a1 = L.lista(X, w1, s1, theta1, t1)
    a1 = L.dropout(a1, p_drop_lista)

    y = T.dot(a1, G)
    y = L.dropout(y, p_drop_hidden)

    a2 = L.lista(y, w2, s2, theta2, t2)
    a2 = L.dropout(a2, p_drop_lista)

    x_o = T.dot(a2, D)
    return a1, y, a2, x_o


trX, teX, _, trY, teY, _ = mnist(onehot=False)

IM_SIZE = 28 * 28
Y_SIZE = 50
BATCH_SIZE = 256
t1 = 3
t2 = 3
X = T.fmatrix()
Y = T.fmatrix()
dict_size = IM_SIZE * 4

LOAD_OLD = True
if not LOAD_OLD:
    w1 = L.init_weights([IM_SIZE, dict_size])
    s1 = L.init_weights([dict_size, dict_size])
    theta1 = L.init_weights([dict_size])
Esempio n. 39
0
def main_loop():
    trX, teX, trY, teY = mnist(ntrain=ntrain,ntest=2000,onehot=True)
    print "before", trX[30][0:10]
    seq = mnist_with_noise([trX,trY],10)
    print "after", trX[30][0:10]
    X = T.fmatrix()
    Y = T.fmatrix()
    #grads = T.fvector()

    w_h = [init_weights((784, 625)), init_weights((625, 10))]

    py_x = model(X, w_h)
    y_x = T.argmax(py_x, axis=1)
    pre_softmax = get_pre_softmax_func(X, w_h)
    cost = T.mean(T.nnet.categorical_crossentropy(py_x, Y))
    params = w_h
    updates = sgd(cost, params)
    grads = T.grad(cost=cost,wrt=params)
    grad_for_norm = T.grad(cost=cost,wrt=params)

    train = theano.function(inputs=[X, Y], outputs=[cost,grads[0],grads[1]], updates=updates, allow_input_downcast=True)
    predict = theano.function(inputs=[X], outputs=[y_x,py_x], allow_input_downcast=True)
    get_grad = theano.function(inputs=[X,Y],outputs=[cost,grad_for_norm[0],grad_for_norm[1],pre_softmax], allow_input_downcast=True)
    #get_pre_softmax = theano.function([X],)
    mb_size = 128
    for i in range(1):
        grad_list = []
        for start, end in zip(range(0, len(trX), mb_size), range(mb_size, len(trX), mb_size)):
            cost,grads0,grads1 = train(trX[start:end], trY[start:end])
        y,p_y =  predict(teX)
        print np.mean(np.argmax(teY, axis=1) == y)

    noisy_grads = []
    normal_grads = []
    noisy_cost = []
    normal_cost = []
    mb_size = 1
    n_predicts = 0
    noisy_pre_softmax_norm = []
    normal_pre_softmax_norm = []
    for i in seq:
        cost,grad0,grad1,pre_soft = get_grad(trX[i:i+1], trY[i:i+1])
        norm = np.linalg.norm(grad0)
        y,py = predict(trX[i:i+1])
        if i < 0.1*ntrain:
            n_predicts += (np.argmax(trY[i:i+1], axis=1)==y)
            noisy_grads.append(norm)
            noisy_cost.append(cost)
            noisy_pre_softmax_norm.append(np.linalg.norm(pre_soft))


        else:
            normal_grads.append(norm)
            normal_cost.append(cost)
            normal_pre_softmax_norm.append(np.linalg.norm(pre_soft))



    print "noisy grad : mean,var - " ,np.mean(noisy_grads),np.var(noisy_grads)
    print "normal grad: mean,var - " ,np.mean(normal_grads),np.var(normal_grads)

    print "noisy cost : mean,var - " ,np.mean(noisy_cost),np.var(noisy_cost)
    print "normal cost: mean,var - " ,np.mean(normal_cost),np.var(normal_cost)

    print "noisy pre_softmax norm  : mean,var - " ,np.mean(noisy_pre_softmax_norm),np.var(noisy_pre_softmax_norm)
    print "normal pre softmax norm : mean,var - " ,np.mean(normal_pre_softmax_norm),np.var(normal_pre_softmax_norm)
    print " noisy predicts out of 5000 -", n_predicts
    plt.plot(noisy_grads)
    plt.plot(normal_grads)

    plt.savefig('grad0.jpeg')
Esempio n. 40
0
    def main(self):

        # Load training and test data
        training_x, test_x, training_y, test_y = load.mnist(onehot=True)

        # Symbolic variables
        x = tensor.fmatrix()
        y = tensor.fmatrix()

        # Initialize weights
        temp_weight = INPUT_SIZE
        weights = []
        for layer in self.layer_sizes:
            weight = self.init_weights((temp_weight, layer))
            weights.append(weight)
            temp_weight = layer
        weight = self.init_weights((temp_weight, OUTPUT_SIZE))
        weights.append(weight)

        # Initialize model
        model_layer_noise = self.model2(x, weights, 0.2, 0.5)
        model_layer_values = self.model2(x, weights, 0., 0.)
        y_x = tensor.argmax(model_layer_values[-1], axis=1)

        # Initialize the update function
        cost = tensor.mean(
            tensor.nnet.categorical_crossentropy(model_layer_noise[-1], y))
        params = weights
        updates = self.RMSprop(cost, params)  # SGD / RMSprop

        # Initialize core functionality
        train = theano.function(inputs=[x, y],
                                outputs=cost,
                                updates=updates,
                                allow_input_downcast=True)
        self.predict = theano.function(inputs=[x],
                                       outputs=y_x,
                                       allow_input_downcast=True)

        # Training on mnist
        print("\nTRAINING...")
        for i in range(NUMBER_OF_RUNS):
            for start, end in zip(
                    range(0, len(training_x), BATCH_SIZE),
                    range(BATCH_SIZE, len(training_x), BATCH_SIZE)):
                cost = train(training_x[start:end], training_y[start:end])
            print(
                "Iteration ", i + 1, "/", NUMBER_OF_RUNS, "(",
                numpy.mean(
                    numpy.argmax(test_y, axis=1) == self.predict(test_x)) *
                100, ")")

        # Testing
        print("\nTESTING ON: MNIST TRAINING SET...")
        print(
            numpy.mean(
                numpy.argmax(training_y, axis=1) == self.predict(training_x)) *
            100, "percent correct")
        print("\nTESTING ON: MNIST TEST SET...")
        print(
            numpy.mean(numpy.argmax(test_y, axis=1) == self.predict(test_x)) *
            100, "percent correct")
reconstruction_mse = T.dot(reconstruction_dims.T, (X - X_hat)**2)/T.sum(T.neq(reconstruction_dims, 0.))
prediction_mse = T.dot(prediction_dims.T, (X - X_hat)**2)/T.sum(T.neq(prediction_dims, 0.))

# Parameter Updating
params = [weights_layer_1, weights_layer_mu, weights_layer_sig, weights_layer_2, weights_layer_y, bias_layer_1,
          bias_layer_mu, bias_layer_sig, bias_layer_2, bias_layer_y]
updates = RMSprop(-L, params, lr=learning_rate)

# Compiling
train = theano.function(inputs=[X, epsilon], outputs=[L, log_lik, D_KL], updates=updates, allow_input_downcast=True)
predict = theano.function(inputs=[X, epsilon], outputs=[model_input, X_hat, T.mean(reconstruction_mse),
                                                        T.mean(prediction_mse)], allow_input_downcast=True)
crop = theano.function(inputs=[X], outputs=crop_X, allow_input_downcast=True)

# Load the data
trX, teX, trY, teY = mnist(scale_data=False, add_noise=False, add_pattern=False)
n_batches_train = trX.shape[0] / batch_size



for i in range(epochs):
    L_per_batch = np.zeros(n_batches_train)
    log_lik_per_batch = np.zeros(n_batches_train)
    D_KL_per_batch = np.zeros(n_batches_train)
    batch_num = 0
    for start, end in zip(range(0, n_batches_train * batch_size, batch_size),
                          range(batch_size, n_batches_train * batch_size, batch_size)):
        e = np.random.normal(0, 1, (batch_size, n_z))
        batch_L, batch_log_lik, batch_D_KL = train(trX[start:end], e)

        L_per_batch[batch_num] = batch_L
Esempio n. 42
0
import load
import convnet

if __name__ == "__main__":
    print("\nLoading Data...")
    load_activations = convnet.ConvolutionalNeuralNetwork().load_data
    num_chunks = 20
    trAs = [
        load_activations("saved/trA{0:02d}.txt".format(i),
                         (60000 / num_chunks, 625)) for i in range(num_chunks)
    ]
    trA = np.concatenate(trAs)
    print("trA.shape: {0}".format(trA.shape))
    teA = load_activations("saved/teA.txt", (10000, 625))
    print("teA.shape: {0}".format(teA.shape))
    trX, teX, trY, teY = load.mnist(onehot=True)
    trC = np.argmax(trY, axis=1)
    print("trC.shape: {0}".format(trC.shape))
    teC = np.argmax(teY, axis=1)
    print("teC.shape: {0}".format(teC.shape))
    print("Done.")

    print("\nCreating Regression Model...")
    lr = LogisticRegression()
    lr.fit(trA, trC)
    print("Done.")

    print("\nAnalyzing Training Data...")
    predictions = lr.predict(trA)
    print("predictions.shape: {0}".format(predictions.shape))
    accuracy = np.mean(predictions == trC)
# Compiling
train = theano.function(inputs=[X, epsilon],
                        outputs=[L, log_lik, D_KL],
                        updates=updates,
                        allow_input_downcast=True)
predict = theano.function(inputs=[X, epsilon],
                          outputs=[
                              model_input, X_hat,
                              T.mean(reconstruction_mse),
                              T.mean(prediction_mse)
                          ],
                          allow_input_downcast=True)

# Load the data
trX, teX, trY, teY = mnist(scale_data=False)
n_batches_train = trX.shape[0] / batch_size

for i in range(epochs):
    L_per_batch = np.zeros(n_batches_train)
    log_lik_per_batch = np.zeros(n_batches_train)
    D_KL_per_batch = np.zeros(n_batches_train)
    batch_num = 0
    for start, end in zip(
            range(0, n_batches_train * batch_size, batch_size),
            range(batch_size, n_batches_train * batch_size, batch_size)):
        e = np.random.normal(0, 1, (batch_size, n_z))
        batch_L, batch_log_lik, batch_D_KL = train(trX[start:end], e)

        L_per_batch[batch_num] = batch_L
        log_lik_per_batch[batch_num] = batch_log_lik
Esempio n. 44
0
from svdResNet import svdResNet
from load import mnist
import sys


def usageAndExit():
    print "Useage: python run_mlp.py [Net]\n	Net=svdMLP/MLP"
    sys.exit(0)


if len(sys.argv) < 2:
    usageAndExit()

netType = sys.argv[1]

trX, teX, trY, teY = mnist(onehot=False)

print "Training data: ", trX.shape, trY.shape
print "Test data: ", teX.shape, teY.shape

n_in = 28 * 28
n_out = 10
n_h = 128
n_r = 16
n_layers = 10
m = 0.1

num_epoch = 1  #50
batchsize = 1000
validation_int = 100
learning_rate = 0.0001
    test_accuracy = test_acc / test_batches
    print("  " + prefix + " loss:\t\t{:.6f}\t{:.6f}".format(average_test_score, class_err))
    print("  " + prefix + " accuracy:\t{:.6f} %".format(
        test_accuracy * 100))
    return average_test_score, test_accuracy, wrong_samples, wrong_classification


# -----------------------LOAD IMAGES AND LABELS----------------------------#
print('Loading data')

# Load index of labeled images in train set
with open(os.path.join(DATA_PATH, 'labeled_index.pkl'), 'r') as f:
    labeled_idx = pickle.load(f)

# Load image and label of train, validation, test set
trX, vlX, teX, trY, vlY, teY = mnist(onehot=True, normalize_axes=None, ndim=2)
IM_SIZE = trX.shape[1]
# with open('../data/pca_model.pkl','r') as f:
#     pca = pickle.load(f)
# trX=pca.transform(trX)
# vlX=pca.transform(vlX)
# teX=pca.transform(teX)

# -----------------------SET PARAMETERS-------------------------#
losses_ratio = run_parameters.losses_ratio
supervised_cost_fun = run_parameters.supervised_cost_fun

# -----------------------CREATE RUN FUNCTIONS------------------#
# Creating the computation graph
print('Building computation graph')
input_var = T.fmatrix('input_var')
import numpy as np

from load import mnist

trX, vlX, _, _, _, _ = mnist()
trX = np.concatenate((trX, vlX))
Esempio n. 47
0
    l2a = rectify(conv2d(l1, w2))
    l2 = max_pool_2d(l2a, (2, 2))
    l2 = dropout(l2, p_drop_conv)

    l3a = rectify(conv2d(l2, w3))
    l3b = max_pool_2d(l3a, (2, 2))
    l3 = T.flatten(l3b, outdim=2)
    l3 = dropout(l3, p_drop_conv)

    l4 = rectify(T.dot(l3, w4))
    l4 = dropout(l4, p_drop_hidden)

    pyx = softmax(T.dot(l4, w_o))
    return l1, l2, l3, l4, pyx

trX, teX, trY, teY = mnist(onehot=True)

trX = trX.reshape(-1, 1, 28, 28)
teX = teX.reshape(-1, 1, 28, 28)

X = T.ftensor4()
Y = T.fmatrix()

w = init_weights((32, 1, 3, 3))
w2 = init_weights((64, 32, 3, 3))
w3 = init_weights((128, 64, 3, 3))
w4 = init_weights((128 * 3 * 3, 625))
w_o = init_weights((625, 10))

noise_l1, noise_l2, noise_l3, noise_l4, noise_py_x = model(X, w, w2, w3, w4, 0.2, 0.5)
l1, l2, l3, l4, py_x = model(X, w, w2, w3, w4, 0., 0.)
Esempio n. 48
0
        return lr_min
    else:
        return lr


def init_neighborhood_size(map_shape):
    m, n = map_shape
    sigma_0 = np.sqrt(m**2 + n**2) / 2.
    return sigma_0


def init_timeconstant(n_epochs_organizing_phase, sigma_0):
    return float(n_epochs_organizing_phase) / np.log(sigma_0)


trX, teX, trY, teY = mnist(ntrain=60000, ntest=10000, onehot=False)
xmin_val = trX[0].min()
xmax_val = trX[0].max()


def remove_threes_and_fours(X, Y):
    """ Y: array-like, shape (n_examples,) """
    three_idxs = np.where(Y == 3)
    four_idxs = np.where(Y == 4)
    ia = np.indices(Y.shape)
    remaining_idxs = np.setxor1d(ia,
                                 np.concatenate((three_idxs[0], four_idxs[0])))
    return X[remaining_idxs], Y[remaining_idxs]


if raw_input('remove_classes 3 and 4? (y/n)') == 'y':