def trainNNModel(self, X_train, y_train): """ Trains a four-layer tensorflow neural network: X->LINEAR->RELU->LINEAR->RELU->LINEAR->RELU->LINEAR->SOFTMAX. Args: X_train -- training set data y_train -- training set labels Returns: cost -- Final cost after training """ # ops.reset_default_graph() # to be able to rerun the model without overwriting tf variables tf.set_random_seed(1) costs = [] load = DataLoader() (n_x,m) = X_train.shape n_y = y_train.shape[0] # X,y = self.create_placeholders(n_x,n_y) y_pred,_ = self.forward_pass() cost = self.cross_entropy_loss(y_pred) # cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits = tf.transpose(y_pred), labels = tf.transpose(self.y))) optimizer = tf.train.AdamOptimizer(learning_rate=self.learning_rate).minimize(cost) seed = 1 with tf.Session() as sess: sess.run(tf.global_variables_initializer()) saver = tf.train.Saver() for epoch in range(self.epochs): epoch_cost = 0. n_mb = int(m/self.mbatchsz) seed += 1 minibatches = load.create_batches(X_train,y_train,n_mb,seed,self.mbatchsz) for minibatch in minibatches: mbX,mby = minibatch _,mb_Cost = sess.run([optimizer,cost], feed_dict = {self.X: mbX, self.y: mby}) epoch_cost += mb_Cost/n_mb print "Cost after epoch %i: %f" % (epoch, epoch_cost) costs.append(epoch_cost) finalCost = costs[-1] yp,_ = self.forward_pass() count = tf.equal(tf.argmax(yp), tf.argmax(self.y)) accuracy = tf.reduce_mean(tf.cast(count,"float")) trainAcc = accuracy.eval({self.X: X_train, self.y: y_train}) print "Final training cost after epoch %i: %f"%(self.epochs,finalCost) print "Train Accuracy for 4 layer neural network: ", trainAcc savepath = saver.save(sess,"./weights/weights.cpkt") return costs
def train_model_gru(n_neurons, n_steps=28, n_inputs=28, n_outputs=10, learning_rate=0.0001, n_epochs=100, batch_size=32): X, y = create_placeholders(n_inputs, n_outputs, n_steps) # lstm-cell current_input = tf.unstack(X, n_steps, 1) gru_cell_train = GRUCell_train(n_neurons, n_inputs) outputs, states = tf.nn.static_rnn(gru_cell_train, current_input, dtype=tf.float32) # fully-connected layer FC_W = tf.get_variable( "FC_W", [n_neurons, n_outputs], initializer=tf.contrib.layers.xavier_initializer(seed=42)) FC_b = tf.get_variable("FC_b", [n_outputs], initializer=tf.zeros_initializer()) Z = tf.add(tf.matmul(outputs[-1], FC_W), FC_b) #optimizatio loss = compute_loss(Z, y) optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate) training_op = optimizer.minimize(loss) correct_prediction = tf.equal(tf.argmax(Z, 1), tf.argmax(y, 1)) accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) init = tf.global_variables_initializer() # data-loading ld = DataLoader() X_train, Y_train = ld.load_data() Y_train = np.eye(10)[np.asarray(Y_train, dtype=np.int32)] X_Batched, Y_Batched = ld.create_batches(X_train, Y_train, batch_size) X_test, y_test = ld.load_data(mode='test') y_test = np.eye(10)[np.asarray(y_test, dtype=np.int32)] X_test = X_test.reshape((-1, n_steps, n_inputs)) saver = tf.train.Saver() weight_filepath = "./weights/gru/hidden_unit" + str( n_neurons) + "/model.ckpt" with tf.Session() as sess: init.run() #training for epoch in range(n_epochs): for X_batch, y_batch in itertools.izip(X_Batched, Y_Batched): X_batch = X_batch.reshape((-1, n_steps, n_inputs)) sess.run(training_op, feed_dict={X: X_batch, y: y_batch}) acc_train = accuracy.eval(feed_dict={X: X_batch, y: y_batch}) print("Train accuracy after %s epochs: %s" % (str(epoch + 1), str(acc_train * 100))) acc_test = accuracy.eval(feed_dict={X: X_test, y: y_test}) print("Test accuracy: ", acc_test * 100) # Save parameters in memory saver.save(sess, weight_filepath) return acc_test
def trainRNNmodel(self, X_train, y_train): tf.set_random_seed(1) costs = [] load = DataLoader() m = X_train.shape[0] cost = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits_v2(logits=self.logits, labels=self.y)) optimizer = tf.train.AdamOptimizer( learning_rate=self.learning_rate).minimize(cost) correct_pred = tf.equal(tf.argmax(self.pred, 1), tf.argmax(self.y, 1)) accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32)) seed = 1 with tf.Session() as sess: sess.run(tf.global_variables_initializer()) saver = tf.train.Saver() for epoch in range(self.epochs): epoch_cost = 0. n_mb = int(m / self.mbatchsz) seed += 1 minibatches = load.create_batches(X_train, y_train, n_mb, seed, self.mbatchsz) for minibatch in minibatches: mbX, mby = minibatch _, mb_Cost = sess.run([optimizer, cost], feed_dict={ self.X: mbX, self.y: mby }) epoch_cost += mb_Cost / n_mb print "Cost after epoch %i: %f" % (epoch, epoch_cost) costs.append(epoch_cost) finalCost, trainAcc = sess.run([cost, accuracy], feed_dict={ self.X: mbX, self.y: mby }) print "Final training cost after epoch %i: %f" % (self.epochs, finalCost) print "Train Accuracy for", self.model, "recurrent neural network with %i units: " % ( self.n_h), trainAcc savepath = saver.save( sess, "./weights/weights_" + self.model + str(self.n_h) + ".cpkt") return costs
epoch_seed = 1 layers_dims = [784, 40, 10] # 2-layer model learning_rate = 0.0001 regL2 = 0.0001 np.random.seed(100) Net = NN(layers_dims, learning_rate, regL2) costs = [] epochs = [] testaccs = [] for epoch in range(num_epochs): epoch_seed += 1 epochcost = 0 minibatches = load.create_batches(X_train, y_train, num_batches, epoch_seed, mbatch_sz) for minibatch in minibatches: mbX, mby = minibatch y_pred, cache = Net.forward_pass(mbX) # print y_pred # print mbX.shape,mby.shape,y_pred.shape epochcost += Net.cross_entropy_loss(mby, y_pred) Net.backward_pass(mbX, mby, cache) # if epoch%100 == 0: epochs.append(epoch) costs.append(epochcost / num_batches)
# argument parsing parser = argparse.ArgumentParser() parser.add_argument('--test' , action='store_true',help='To test the data') parser.add_argument('--train', action='store_true',help='To train the data') parser.add_argument('--hidden_unit', action='store',help='No of hidden units',type=int) parser.add_argument('--model', action='store',help='which model will be used - LSTM or GRU') args = parser.parse_args() tf.set_random_seed(100) np.random.seed(60) no_hidden_units = args.hidden_unit X = tf.placeholder(tf.float32, [None, vector_size, no_vectors]) Y = tf.placeholder(tf.int32, [None, no_classes]) # data-loading data_loader = DataLoader() x_train, y_train = data_loader.load_data() y_train = np.eye(10)[np.asarray(y_train, dtype=np.int32)] x_batch, y_batch = data_loader.create_batches(x_train, y_train, batch_size) x_test,y_test = data_loader.load_data(mode='test') y_test = np.eye(10)[np.asarray(y_test, dtype=np.int32)] x_test = x_test.reshape((-1, vector_size, no_vectors)) if args.train: _train_model(vector_size, no_vectors, no_classes, l_rate, epochs, batch_size) elif args.test: test_model(vector_size, no_vectors, no_classes)
def main(): config = tf.ConfigProto(allow_soft_placement=True, log_device_placement=False) config.gpu_options.allow_growth = True sess = tf.Session(config=config) model_name = model_path + FLAGS.dataset dataloader = DataLoader(BATCH_SIZE) model = HACap(dataloader) saver = tf.train.Saver(tf.global_variables()) sess.run(tf.global_variables_initializer()) if FLAGS.test: saver.restore(sess, model_name) dataloader.create_batches(file_path='./dataset/', dataset_title=dataset_path[FLAGS.dataset], batch_size=BATCH_SIZE, data_type="train", skip=0) for i in range(dataloader.num_batches): print('batches {} / {}'.format(i, dataloader.num_batches)) image_cap_tuple = dataloader.next_batch() caption = model.generate_caption(sess, image_cap_tuple, is_realized=False) image, _ = image_cap_tuple image_cap_tuple = image, caption draw_attention( image_cap_tuple, np.transpose(model.get_attention(sess, image_cap_tuple), (1, 0, 2)), dataloader.idx2word, i) ''' draw_guidance(image_cap_tuple, np.transpose(model.get_guidance(sess, image_cap_tuple), (1, 2, 0))) ''' ''' caption = model.generate_caption(sess, image_caption=image_cap_tuple) image, _ = image_cap_tuple handle_image_cap_tuple((image, caption)) ''' else: dataloader.create_batches('./dataset/', BATCH_SIZE, dataset_path[FLAGS.dataset], data_type="train", skip=None) print(dataloader.vocab_size) if not FLAGS.init: saver.restore(sess, model_name) else: if FLAGS.conti: saver.restore(sess, model_name) for pretrain_batch in range(10000): if pretrain_batch < 5 and (not FLAGS.conti): for pretrain_d_batch in range(5): print("pretraining discriminator epoch#%d-%d" % (pretrain_batch, pretrain_d_batch)) dataloader.reset_pointer() for batch_idx in range(dataloader.num_batches): if batch_idx % 1000 == 0: print( "pretraining discriminator epoch#%d-%d/%d" % (pretrain_d_batch, batch_idx, dataloader.num_batches)) batch = dataloader.next_batch() if rdfloat() > skip_rate: model.train_discriminator(sess, batch) for pretrain_g_batch in range(15): print("pretraining generator epoch#%d-%d" % (pretrain_batch, pretrain_g_batch)) dataloader.reset_pointer() for batch_idx in range(dataloader.num_batches): batch = dataloader.next_batch() if rdfloat() > skip_rate: model.train_via_MLE(sess, batch) if batch_idx % 1000 == 0: print("pretraining generator epoch#%d-%d/%d" % (pretrain_g_batch, batch_idx, dataloader.num_batches)) image_cap_tuple = batch caption = model.generate_caption( sess, image_caption=image_cap_tuple, is_train=0.0) image, truth = image_cap_tuple print("fake:") handle_image_cap_tuple((image, caption)) print("real:") handle_image_cap_tuple( (image, model.ind_to_str(truth))) print("real_alpha:") print( model.get_attention(sess, image_cap_tuple)[0]) if FLAGS.save: saver.save(sess, model_name) for adv_batch in range(10000000): print("adversarial training epoch#%d" % adv_batch) dataloader.reset_pointer() for batch_idx in range(dataloader.num_batches): batch = dataloader.next_batch() if rdfloat() > skip_rate: for d_idx in range(2): model.train_discriminator(sess, batch) if adv_batch % 1 == 0: for teacher_forcing_idx in range(5): model.train_via_MLE(sess, batch) if batch_idx % 1000 == 0: image_cap_tuple = batch print("adversarial training epoch#%d-%d/%d" % (adv_batch, batch_idx, dataloader.num_batches)) caption = model.generate_caption( sess, image_caption=image_cap_tuple) image, truth = image_cap_tuple print("fake:") handle_image_cap_tuple((image, caption)) print("real:") handle_image_cap_tuple( (image, model.ind_to_str(truth))) for g_idx in range(1): model.train_via_reinforce(sess, batch) if FLAGS.save and adv_batch % 5 == 0: saver.save(sess, model_name)
def train_model_lstm(n_neurons, n_steps=28, n_inputs = 28,n_outputs=10, learning_rate=0.0001, n_epochs=100, batch_size=32): X, y = create_placeholders(n_inputs, n_outputs, n_steps) # lstm-cell current_input = tf.unstack(X , n_steps, 1) lstm_cell_train = LSTMCell(n_neurons, n_inputs) outputs, states = tf.nn.static_rnn(lstm_cell_train, current_input, dtype=tf.float32) # fully-connected layer FC_W = tf.get_variable("FC_W", [n_neurons, n_outputs], initializer = tf.contrib.layers.xavier_initializer(seed=42)) FC_b = tf.get_variable("FC_b", [n_outputs], initializer = tf.zeros_initializer()) Z = tf.add(tf.matmul(outputs[-1], FC_W), FC_b) #optimization loss = compute_loss(Z, y) optimizer = tf.train.AdamOptimizer(learning_rate = learning_rate) training_op = optimizer.minimize(loss) correct_prediction = tf.equal(tf.argmax(Z, 1), tf.argmax(y, 1)) accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) init = tf.global_variables_initializer() # data-loading ld=DataLoader() X_train, Y_train =ld.load_data() X_val = X_train[55000:] y_val = Y_train[55000:] y_val = np.eye(10)[np.asarray(y_val, dtype=np.int32)] X_val = X_val.reshape((-1, n_steps, n_inputs)) Y_train = np.eye(10)[np.asarray(Y_train, dtype=np.int32)] X_Batched, Y_Batched = ld.create_batches(X_train[:55000], Y_train[:55000], batch_size) X_test,y_test=ld.load_data(mode='test') y_test = np.eye(10)[np.asarray(y_test, dtype=np.int32)] X_test = X_test.reshape((-1, n_steps, n_inputs)) # for validation best_validation_accuracy = 0.0 last_improvement = 0 patience = 10 # for saving weights saver = tf.train.Saver() weight_filepath = "./weights/lstm/hidden_unit" + str(n_neurons)+ "/model.ckpt" with tf.Session() as sess: init.run() #training stop = False for epoch in range(n_epochs): if stop != True: for X_batch, y_batch in itertools.izip(X_Batched, Y_Batched): X_batch = X_batch.reshape((-1, n_steps, n_inputs)) sess.run(training_op, feed_dict={X: X_batch, y: y_batch}) acc_train = accuracy.eval(feed_dict={X: X_batch, y: y_batch}) print("Train accuracy after %s epochs: %s" %( str(epoch+1), str(acc_train*100) )) acc_val = accuracy.eval(feed_dict={X: X_val, y: y_val}) print 'Validation Accuracy: ' + str(acc_val*100) if acc_val > best_validation_accuracy: last_improvement = epoch best_validation_accuracy = acc_val # Save parameters in memory saver.save(sess, weight_filepath) if epoch - last_improvement > patience: print("Early stopping ...") stop = True acc_test = accuracy.eval(feed_dict={X: X_test, y: y_test}) print("Test accuracy: ", acc_test*100) return acc_test
n_epochs = 200 Hidden_unit_dimension = 300 # Number of units in the hidden layer output_dimension = 10 # Number of classes of the image # Load the training and test Data dl = DataLoader() X, Y = dl.load_data('train') X_test, Y_test = dl.load_data('test') '''Implement mini-batch SGD here''' # Create an object of the Neural Network Model nn = NN(X.shape[1], Hidden_unit_dimension, output_dimension) for current_epoch in range(n_epochs): for current_batch in range(0, X.shape[0], batch_size): X_mini, y_mini = dl.create_batches(X, Y, batch_size, shuffle=True) num_examples = X_mini.shape[0] hidden_layer, probs = nn.forward( X_mini) # Compute the hidden layer and softmax layer scores #print(probs.shape) loss = nn.compute_loss(probs, X_mini, y_mini, reg) # Compute the loss if current_batch % (X.shape[0] - 1) == 0: print("Epoch %d: loss %f" % (current_epoch + 1, loss)) dW1, dW2, db1, db2 = nn.backward(hidden_layer, probs, X_mini, y_mini, reg) nn.update_parameters(dW1, dW2, db1, db2, step_size) '''Test the Training and Tesy accuracy''' nn.predict(X, Y, mode='train')
def gru_train(num_neurons, time_step=28, input_size=28, target_size=10, learning_rate=0.0001, epoch=100, batch_size=32): X = tf.placeholder(tf.float32, [None, time_step, input_size]) y = tf.placeholder(tf.int32, [None, target_size]) # lstm-cell current_input = tf.unstack(X, time_step, 1) gru_cell_train = GRUCell_train(num_neurons, input_size) outputs, states = tf.nn.static_rnn(gru_cell_train, current_input, dtype=tf.float32) # fully-connected layer FC_W = tf.get_variable( "FC_W", [num_neurons, target_size], initializer=tf.contrib.layers.xavier_initializer(seed=42)) FC_b = tf.get_variable("FC_b", [target_size], initializer=tf.zeros_initializer()) Z = tf.add(tf.matmul(outputs[-1], FC_W), FC_b) #optimizatio loss = loss = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits(logits=Z, labels=y)) optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate) training_op = optimizer.minimize(loss) correct_prediction = tf.equal(tf.argmax(Z, 1), tf.argmax(y, 1)) accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) init = tf.global_variables_initializer() # data-loading ld = DataLoader() train_img, train_label = ld.load_data() train_label = np.eye(10)[np.asarray(train_label, dtype=np.int32)] minibatch_imged, minibatch_labeled = ld.create_batches( train_img, train_label, batch_size) test_img, test_label = ld.load_data(mode='test') test_label = np.eye(10)[np.asarray(test_label, dtype=np.int32)] test_img = test_img.reshape((-1, time_step, input_size)) saver = tf.train.Saver() weight_filepath = "./weights/gru/hidden_unit" + str( num_neurons) + "/model.ckpt" with tf.Session() as sess: init.run() #training for epoch in range(epoch): for minibatch_img, minibatch_label in itertools.izip( minibatch_imged, minibatch_labeled): minibatch_img = minibatch_img.reshape( (-1, time_step, input_size)) sess.run(training_op, feed_dict={ X: minibatch_img, y: minibatch_label }) acc_train = accuracy.eval(feed_dict={ X: minibatch_img, y: minibatch_label }) print("Train accuracy after %s epochs: %s" % (str(epoch + 1), str(acc_train * 100))) acc_test = accuracy.eval(feed_dict={X: test_img, y: test_label}) #print("Test accuracy: ", acc_test*100) # Save parameters in memory saver.save(sess, weight_filepath) return acc_test