예제 #1
0
	def trainNNModel(self, X_train, y_train):
		"""
		Trains a four-layer tensorflow neural network: X->LINEAR->RELU->LINEAR->RELU->LINEAR->RELU->LINEAR->SOFTMAX.
		Args:
		X_train -- training set data
		y_train -- training set labels
		Returns:
		cost -- Final cost after training
		"""
		# ops.reset_default_graph()  # to be able to rerun the model without overwriting tf variables
		tf.set_random_seed(1)
		costs = []
		load = DataLoader()

		(n_x,m) = X_train.shape
		n_y = y_train.shape[0]

		# X,y = self.create_placeholders(n_x,n_y)

		y_pred,_ = self.forward_pass()
		cost = self.cross_entropy_loss(y_pred)
		# cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits = tf.transpose(y_pred), labels = tf.transpose(self.y)))

		optimizer = tf.train.AdamOptimizer(learning_rate=self.learning_rate).minimize(cost)

		seed = 1

		with tf.Session() as sess:
			sess.run(tf.global_variables_initializer())

			saver = tf.train.Saver()

			for epoch in range(self.epochs):
				epoch_cost = 0.
				n_mb = int(m/self.mbatchsz)
				seed += 1
				minibatches = load.create_batches(X_train,y_train,n_mb,seed,self.mbatchsz)

				for minibatch in minibatches:
					mbX,mby = minibatch
					_,mb_Cost = sess.run([optimizer,cost], feed_dict = {self.X: mbX, self.y: mby})
					epoch_cost += mb_Cost/n_mb

				print "Cost after epoch %i: %f" % (epoch, epoch_cost)
				costs.append(epoch_cost)

			finalCost = costs[-1]
			yp,_ = self.forward_pass()
			count = tf.equal(tf.argmax(yp), tf.argmax(self.y))
			accuracy = tf.reduce_mean(tf.cast(count,"float"))
			trainAcc = accuracy.eval({self.X: X_train, self.y: y_train})
			print "Final training cost after epoch %i: %f"%(self.epochs,finalCost)
			print "Train Accuracy for 4 layer neural network: ", trainAcc
			savepath = saver.save(sess,"./weights/weights.cpkt")

		return costs
def train_model_gru(n_neurons,
                    n_steps=28,
                    n_inputs=28,
                    n_outputs=10,
                    learning_rate=0.0001,
                    n_epochs=100,
                    batch_size=32):
    X, y = create_placeholders(n_inputs, n_outputs, n_steps)
    # lstm-cell
    current_input = tf.unstack(X, n_steps, 1)
    gru_cell_train = GRUCell_train(n_neurons, n_inputs)
    outputs, states = tf.nn.static_rnn(gru_cell_train,
                                       current_input,
                                       dtype=tf.float32)
    # fully-connected layer
    FC_W = tf.get_variable(
        "FC_W", [n_neurons, n_outputs],
        initializer=tf.contrib.layers.xavier_initializer(seed=42))
    FC_b = tf.get_variable("FC_b", [n_outputs],
                           initializer=tf.zeros_initializer())
    Z = tf.add(tf.matmul(outputs[-1], FC_W), FC_b)
    #optimizatio
    loss = compute_loss(Z, y)
    optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
    training_op = optimizer.minimize(loss)
    correct_prediction = tf.equal(tf.argmax(Z, 1), tf.argmax(y, 1))
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

    init = tf.global_variables_initializer()
    # data-loading
    ld = DataLoader()
    X_train, Y_train = ld.load_data()
    Y_train = np.eye(10)[np.asarray(Y_train, dtype=np.int32)]
    X_Batched, Y_Batched = ld.create_batches(X_train, Y_train, batch_size)
    X_test, y_test = ld.load_data(mode='test')
    y_test = np.eye(10)[np.asarray(y_test, dtype=np.int32)]
    X_test = X_test.reshape((-1, n_steps, n_inputs))
    saver = tf.train.Saver()
    weight_filepath = "./weights/gru/hidden_unit" + str(
        n_neurons) + "/model.ckpt"
    with tf.Session() as sess:
        init.run()
        #training
        for epoch in range(n_epochs):
            for X_batch, y_batch in itertools.izip(X_Batched, Y_Batched):
                X_batch = X_batch.reshape((-1, n_steps, n_inputs))
                sess.run(training_op, feed_dict={X: X_batch, y: y_batch})
            acc_train = accuracy.eval(feed_dict={X: X_batch, y: y_batch})
            print("Train accuracy after %s epochs: %s" %
                  (str(epoch + 1), str(acc_train * 100)))
        acc_test = accuracy.eval(feed_dict={X: X_test, y: y_test})
        print("Test accuracy:  ", acc_test * 100)
        # Save parameters in memory
        saver.save(sess, weight_filepath)
    return acc_test
예제 #3
0
    def trainRNNmodel(self, X_train, y_train):
        tf.set_random_seed(1)
        costs = []
        load = DataLoader()
        m = X_train.shape[0]

        cost = tf.reduce_mean(
            tf.nn.softmax_cross_entropy_with_logits_v2(logits=self.logits,
                                                       labels=self.y))
        optimizer = tf.train.AdamOptimizer(
            learning_rate=self.learning_rate).minimize(cost)

        correct_pred = tf.equal(tf.argmax(self.pred, 1), tf.argmax(self.y, 1))
        accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))

        seed = 1

        with tf.Session() as sess:
            sess.run(tf.global_variables_initializer())
            saver = tf.train.Saver()

            for epoch in range(self.epochs):
                epoch_cost = 0.
                n_mb = int(m / self.mbatchsz)
                seed += 1
                minibatches = load.create_batches(X_train, y_train, n_mb, seed,
                                                  self.mbatchsz)

                for minibatch in minibatches:
                    mbX, mby = minibatch
                    _, mb_Cost = sess.run([optimizer, cost],
                                          feed_dict={
                                              self.X: mbX,
                                              self.y: mby
                                          })
                    epoch_cost += mb_Cost / n_mb

                print "Cost after epoch %i: %f" % (epoch, epoch_cost)
                costs.append(epoch_cost)

            finalCost, trainAcc = sess.run([cost, accuracy],
                                           feed_dict={
                                               self.X: mbX,
                                               self.y: mby
                                           })
            print "Final training cost after epoch %i: %f" % (self.epochs,
                                                              finalCost)
            print "Train Accuracy for", self.model, "recurrent neural network with %i units: " % (
                self.n_h), trainAcc
            savepath = saver.save(
                sess,
                "./weights/weights_" + self.model + str(self.n_h) + ".cpkt")

        return costs
예제 #4
0
epoch_seed = 1

layers_dims = [784, 40, 10]  #  2-layer model
learning_rate = 0.0001
regL2 = 0.0001
np.random.seed(100)
Net = NN(layers_dims, learning_rate, regL2)

costs = []
epochs = []
testaccs = []

for epoch in range(num_epochs):
    epoch_seed += 1
    epochcost = 0
    minibatches = load.create_batches(X_train, y_train, num_batches,
                                      epoch_seed, mbatch_sz)
    for minibatch in minibatches:
        mbX, mby = minibatch

        y_pred, cache = Net.forward_pass(mbX)
        # print y_pred
        # print mbX.shape,mby.shape,y_pred.shape

        epochcost += Net.cross_entropy_loss(mby, y_pred)

        Net.backward_pass(mbX, mby, cache)

    # if epoch%100 == 0:
    epochs.append(epoch)
    costs.append(epochcost / num_batches)
예제 #5
0
# argument parsing
parser = argparse.ArgumentParser()
parser.add_argument('--test' , action='store_true',help='To test the data')
parser.add_argument('--train', action='store_true',help='To train the data')
parser.add_argument('--hidden_unit', action='store',help='No of hidden units',type=int)
parser.add_argument('--model', action='store',help='which model will be used - LSTM or GRU')
args = parser.parse_args()

tf.set_random_seed(100)
np.random.seed(60)

no_hidden_units = args.hidden_unit
X = tf.placeholder(tf.float32, [None, vector_size, no_vectors])
Y = tf.placeholder(tf.int32, [None, no_classes])

# data-loading
data_loader = DataLoader()
x_train, y_train = data_loader.load_data()
y_train = np.eye(10)[np.asarray(y_train, dtype=np.int32)]
x_batch, y_batch = data_loader.create_batches(x_train, y_train, batch_size)	
x_test,y_test = data_loader.load_data(mode='test')
y_test = np.eye(10)[np.asarray(y_test, dtype=np.int32)]
x_test = x_test.reshape((-1, vector_size, no_vectors))


if args.train:		
	_train_model(vector_size, no_vectors, no_classes, l_rate, epochs, batch_size)
   
elif args.test:		
	test_model(vector_size, no_vectors, no_classes)
def main():
    config = tf.ConfigProto(allow_soft_placement=True,
                            log_device_placement=False)
    config.gpu_options.allow_growth = True
    sess = tf.Session(config=config)
    model_name = model_path + FLAGS.dataset
    dataloader = DataLoader(BATCH_SIZE)
    model = HACap(dataloader)
    saver = tf.train.Saver(tf.global_variables())
    sess.run(tf.global_variables_initializer())
    if FLAGS.test:
        saver.restore(sess, model_name)
        dataloader.create_batches(file_path='./dataset/',
                                  dataset_title=dataset_path[FLAGS.dataset],
                                  batch_size=BATCH_SIZE,
                                  data_type="train",
                                  skip=0)
        for i in range(dataloader.num_batches):
            print('batches {} / {}'.format(i, dataloader.num_batches))
            image_cap_tuple = dataloader.next_batch()
            caption = model.generate_caption(sess,
                                             image_cap_tuple,
                                             is_realized=False)
            image, _ = image_cap_tuple
            image_cap_tuple = image, caption
            draw_attention(
                image_cap_tuple,
                np.transpose(model.get_attention(sess, image_cap_tuple),
                             (1, 0, 2)), dataloader.idx2word, i)
        '''   
            draw_guidance(image_cap_tuple, 
                np.transpose(model.get_guidance(sess, image_cap_tuple), (1, 2, 0)))
        '''
        '''
            caption = model.generate_caption(sess, image_caption=image_cap_tuple)
            image, _ = image_cap_tuple
            handle_image_cap_tuple((image, caption))
        '''
    else:
        dataloader.create_batches('./dataset/',
                                  BATCH_SIZE,
                                  dataset_path[FLAGS.dataset],
                                  data_type="train",
                                  skip=None)
        print(dataloader.vocab_size)
        if not FLAGS.init:
            saver.restore(sess, model_name)
        else:
            if FLAGS.conti:
                saver.restore(sess, model_name)
            for pretrain_batch in range(10000):
                if pretrain_batch < 5 and (not FLAGS.conti):
                    for pretrain_d_batch in range(5):
                        print("pretraining discriminator epoch#%d-%d" %
                              (pretrain_batch, pretrain_d_batch))
                        dataloader.reset_pointer()
                        for batch_idx in range(dataloader.num_batches):
                            if batch_idx % 1000 == 0:
                                print(
                                    "pretraining discriminator epoch#%d-%d/%d"
                                    % (pretrain_d_batch, batch_idx,
                                       dataloader.num_batches))
                            batch = dataloader.next_batch()
                            if rdfloat() > skip_rate:
                                model.train_discriminator(sess, batch)
                for pretrain_g_batch in range(15):
                    print("pretraining generator epoch#%d-%d" %
                          (pretrain_batch, pretrain_g_batch))
                    dataloader.reset_pointer()
                    for batch_idx in range(dataloader.num_batches):
                        batch = dataloader.next_batch()
                        if rdfloat() > skip_rate:
                            model.train_via_MLE(sess, batch)
                        if batch_idx % 1000 == 0:
                            print("pretraining generator epoch#%d-%d/%d" %
                                  (pretrain_g_batch, batch_idx,
                                   dataloader.num_batches))
                            image_cap_tuple = batch
                            caption = model.generate_caption(
                                sess,
                                image_caption=image_cap_tuple,
                                is_train=0.0)
                            image, truth = image_cap_tuple
                            print("fake:")
                            handle_image_cap_tuple((image, caption))
                            print("real:")
                            handle_image_cap_tuple(
                                (image, model.ind_to_str(truth)))
                            print("real_alpha:")
                            print(
                                model.get_attention(sess, image_cap_tuple)[0])
                if FLAGS.save:
                    saver.save(sess, model_name)
        for adv_batch in range(10000000):
            print("adversarial training epoch#%d" % adv_batch)
            dataloader.reset_pointer()
            for batch_idx in range(dataloader.num_batches):
                batch = dataloader.next_batch()
                if rdfloat() > skip_rate:
                    for d_idx in range(2):
                        model.train_discriminator(sess, batch)
                    if adv_batch % 1 == 0:
                        for teacher_forcing_idx in range(5):
                            model.train_via_MLE(sess, batch)
                            if batch_idx % 1000 == 0:
                                image_cap_tuple = batch
                                print("adversarial training epoch#%d-%d/%d" %
                                      (adv_batch, batch_idx,
                                       dataloader.num_batches))
                                caption = model.generate_caption(
                                    sess, image_caption=image_cap_tuple)
                                image, truth = image_cap_tuple
                                print("fake:")
                                handle_image_cap_tuple((image, caption))
                                print("real:")
                                handle_image_cap_tuple(
                                    (image, model.ind_to_str(truth)))

                    for g_idx in range(1):
                        model.train_via_reinforce(sess, batch)
            if FLAGS.save and adv_batch % 5 == 0:
                saver.save(sess, model_name)
def train_model_lstm(n_neurons, n_steps=28, n_inputs = 28,n_outputs=10, learning_rate=0.0001,  n_epochs=100, batch_size=32):
	X, y = create_placeholders(n_inputs, n_outputs, n_steps)
	# lstm-cell
	current_input = tf.unstack(X , n_steps, 1)
	lstm_cell_train = LSTMCell(n_neurons, n_inputs)
	outputs, states	= tf.nn.static_rnn(lstm_cell_train, current_input, dtype=tf.float32)  
	# fully-connected layer
	FC_W = tf.get_variable("FC_W", [n_neurons, n_outputs], initializer = tf.contrib.layers.xavier_initializer(seed=42))
	FC_b = tf.get_variable("FC_b", [n_outputs], initializer = tf.zeros_initializer())
	Z = tf.add(tf.matmul(outputs[-1], FC_W), FC_b)
	#optimization
	loss = compute_loss(Z, y)
	optimizer = tf.train.AdamOptimizer(learning_rate = learning_rate)
	training_op = optimizer.minimize(loss)
	correct_prediction = tf.equal(tf.argmax(Z, 1), tf.argmax(y, 1))
	accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

	init = tf.global_variables_initializer()
	# data-loading
	ld=DataLoader()
	X_train, Y_train =ld.load_data()
	X_val = X_train[55000:]
	y_val = Y_train[55000:]
	y_val = np.eye(10)[np.asarray(y_val, dtype=np.int32)]
	X_val = X_val.reshape((-1, n_steps, n_inputs))
	Y_train = np.eye(10)[np.asarray(Y_train, dtype=np.int32)]
	X_Batched, Y_Batched = ld.create_batches(X_train[:55000], Y_train[:55000], batch_size)		
	X_test,y_test=ld.load_data(mode='test')
	y_test = np.eye(10)[np.asarray(y_test, dtype=np.int32)]
	X_test = X_test.reshape((-1, n_steps, n_inputs))
	# for validation
	best_validation_accuracy = 0.0
	last_improvement = 0	
	patience = 10
	# for saving weights
	saver = tf.train.Saver()
	weight_filepath = "./weights/lstm/hidden_unit" + str(n_neurons)+ "/model.ckpt"
	with tf.Session() as sess:
		init.run()
		#training
		stop = False
		for epoch in range(n_epochs):
			if stop != True:
				for X_batch, y_batch in itertools.izip(X_Batched, Y_Batched):
					X_batch	= X_batch.reshape((-1,	n_steps, n_inputs))
					sess.run(training_op, feed_dict={X:	X_batch, y: y_batch})	
				acc_train = accuracy.eval(feed_dict={X: X_batch, y: y_batch})
				print("Train accuracy after %s epochs: %s" %( str(epoch+1), str(acc_train*100) ))
				acc_val = accuracy.eval(feed_dict={X: X_val, y: y_val})
				print 'Validation Accuracy: ' + str(acc_val*100)
				if acc_val > best_validation_accuracy:
					last_improvement = epoch
					best_validation_accuracy = acc_val
					# Save parameters in memory
					saver.save(sess, weight_filepath)
				if epoch - last_improvement > patience:
					print("Early stopping ...")
					stop = True
		acc_test = accuracy.eval(feed_dict={X: X_test, y: y_test})				
		print("Test accuracy:  ", acc_test*100)
	return acc_test
예제 #8
0
n_epochs = 200
Hidden_unit_dimension = 300  # Number of units in the hidden layer
output_dimension = 10  # Number of classes of the image

# Load the training and test Data
dl = DataLoader()
X, Y = dl.load_data('train')
X_test, Y_test = dl.load_data('test')
'''Implement mini-batch SGD here'''
# Create an object of the Neural Network Model
nn = NN(X.shape[1], Hidden_unit_dimension, output_dimension)

for current_epoch in range(n_epochs):
    for current_batch in range(0, X.shape[0], batch_size):

        X_mini, y_mini = dl.create_batches(X, Y, batch_size, shuffle=True)
        num_examples = X_mini.shape[0]

        hidden_layer, probs = nn.forward(
            X_mini)  # Compute the hidden layer and softmax layer scores
        #print(probs.shape)

        loss = nn.compute_loss(probs, X_mini, y_mini, reg)  # Compute the loss

        if current_batch % (X.shape[0] - 1) == 0:
            print("Epoch %d: loss %f" % (current_epoch + 1, loss))
        dW1, dW2, db1, db2 = nn.backward(hidden_layer, probs, X_mini, y_mini,
                                         reg)
        nn.update_parameters(dW1, dW2, db1, db2, step_size)
'''Test the Training and Tesy accuracy'''
nn.predict(X, Y, mode='train')
def gru_train(num_neurons,
              time_step=28,
              input_size=28,
              target_size=10,
              learning_rate=0.0001,
              epoch=100,
              batch_size=32):
    X = tf.placeholder(tf.float32, [None, time_step, input_size])
    y = tf.placeholder(tf.int32, [None, target_size])
    # lstm-cell
    current_input = tf.unstack(X, time_step, 1)
    gru_cell_train = GRUCell_train(num_neurons, input_size)
    outputs, states = tf.nn.static_rnn(gru_cell_train,
                                       current_input,
                                       dtype=tf.float32)
    # fully-connected layer
    FC_W = tf.get_variable(
        "FC_W", [num_neurons, target_size],
        initializer=tf.contrib.layers.xavier_initializer(seed=42))
    FC_b = tf.get_variable("FC_b", [target_size],
                           initializer=tf.zeros_initializer())
    Z = tf.add(tf.matmul(outputs[-1], FC_W), FC_b)
    #optimizatio
    loss = loss = tf.reduce_mean(
        tf.nn.softmax_cross_entropy_with_logits(logits=Z, labels=y))
    optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
    training_op = optimizer.minimize(loss)
    correct_prediction = tf.equal(tf.argmax(Z, 1), tf.argmax(y, 1))
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

    init = tf.global_variables_initializer()
    # data-loading
    ld = DataLoader()
    train_img, train_label = ld.load_data()
    train_label = np.eye(10)[np.asarray(train_label, dtype=np.int32)]
    minibatch_imged, minibatch_labeled = ld.create_batches(
        train_img, train_label, batch_size)
    test_img, test_label = ld.load_data(mode='test')
    test_label = np.eye(10)[np.asarray(test_label, dtype=np.int32)]
    test_img = test_img.reshape((-1, time_step, input_size))
    saver = tf.train.Saver()
    weight_filepath = "./weights/gru/hidden_unit" + str(
        num_neurons) + "/model.ckpt"
    with tf.Session() as sess:
        init.run()
        #training
        for epoch in range(epoch):
            for minibatch_img, minibatch_label in itertools.izip(
                    minibatch_imged, minibatch_labeled):
                minibatch_img = minibatch_img.reshape(
                    (-1, time_step, input_size))
                sess.run(training_op,
                         feed_dict={
                             X: minibatch_img,
                             y: minibatch_label
                         })
            acc_train = accuracy.eval(feed_dict={
                X: minibatch_img,
                y: minibatch_label
            })
            print("Train accuracy after %s epochs: %s" %
                  (str(epoch + 1), str(acc_train * 100)))
        acc_test = accuracy.eval(feed_dict={X: test_img, y: test_label})
        #print("Test accuracy:  ", acc_test*100)
        # Save parameters in memory
        saver.save(sess, weight_filepath)
    return acc_test