def train(config): assert config.model_type in ('RNN', 'LSTM') # Setup the model that we are going to use if config.model_type == 'RNN': print("Initializing Vanilla RNN model...") model = VanillaRNN(config.input_length, config.input_dim, config.num_hidden, config.num_classes, config.batch_size) else: print("Initializing LSTM model...") model = LSTM(config.input_length, config.input_dim, config.num_hidden, config.num_classes, config.batch_size) ########################################################################### # Implement code here. ########################################################################### # Load test data test_size = int(config.batch_size * config.train_steps / 3) x_test, y_test = get_batch(test_size, config.input_length) input_placeholder = tf.placeholder(tf.float32, shape=(config.input_length - 1, None, config.input_dim)) labels_placeholder = tf.placeholder(tf.int32, shape=(None, config.num_classes)) logits = model.compute_logits(input_placeholder) logits_test = model.compute_logits_test(input_placeholder, test_size) # Define the optimizer optimizer = tf.train.RMSPropOptimizer(config.learning_rate) ########################################################################### # QUESTION: what happens here and why? ########################################################################### dummy = model.compute_loss(logits, labels_placeholder) # ... implement me grads_and_vars = optimizer.compute_gradients(dummy) grads, variables = zip(*grads_and_vars) grads_clipped, _ = tf.clip_by_global_norm( grads, clip_norm=config.max_norm_gradient) apply_gradients_op = optimizer.apply_gradients( zip(grads_clipped, variables)) #, global_step=global_step) ############################################################################ accuracy = model.accuracy(logits_test, labels_placeholder) summary = tf.summary.merge_all() init = tf.global_variables_initializer() saver = tf.train.Saver() sess = tf.Session() summary_writer = tf.summary.FileWriter(config.summary_path, sess.graph) sess.run(init) ############################################################################ for train_step in range(config.train_steps): # Only for time measurement of step through network t1 = time.time() # Load palindromes x, y = get_batch(config.batch_size, config.input_length) feed_dict = { input_placeholder: x, labels_placeholder: y, } #_, loss_value, accuracy_value = sess.run([apply_gradients_op, dummy, accuracy], feed_dict=feed_dict) _, loss_value = sess.run([apply_gradients_op, dummy], feed_dict=feed_dict) # Only for time measurement of step through network t2 = time.time() examples_per_second = config.batch_size / float(t2 - t1) # Print the training progress if train_step % config.print_every == 0: feed_dict = { input_placeholder: x_test, labels_placeholder: y_test, } accuracy_value = sess.run(accuracy, feed_dict=feed_dict) print("[{}] Train Step {:04d}/{:04d}, Batch Size = {}, " "Examples/Sec = {:.2f}, Accuracy = {:.2f}, Loss = {:.2f}". format(datetime.now().strftime("%Y-%m-%d %H:%M"), train_step, config.train_steps, config.batch_size, examples_per_second, accuracy_value, loss_value)) # Update the events file. summary_str = sess.run(summary, feed_dict=feed_dict) summary_writer.add_summary(summary_str, train_step) summary_writer.flush()
def train(config): assert config.model_type in ('RNN', 'LSTM') # Setup the model that we are going to use if config.model_type == 'RNN': print("Initializing Vanilla RNN model...") model = VanillaRNN(config.input_length, config.input_dim, config.num_hidden, config.num_classes, config.batch_size) else: print("Initializing LSTM model...") model = LSTM(config.input_length, config.input_dim, config.num_hidden, config.num_classes, config.batch_size) ########################################################################### # Implement code here. ########################################################################### #Create placeholders with tf.name_scope('input'): inputs = tf.placeholder( tf.int32, shape=[config.batch_size, config.input_length - 1], name='inputs') labels = tf.placeholder(tf.int32, shape=[config.batch_size], name='labels') test_inputs = tf.placeholder( tf.int32, shape=[config.batch_size, config.input_length - 1], name='test_inputs') test_labels = tf.placeholder(tf.int32, shape=[config.batch_size], name='test_labels') #Compute the logits with tf.name_scope('logits'): logits = model.compute_logits(inputs) #Compute the loss with tf.name_scope('loss'): loss = model.compute_loss(logits, labels) tf.summary.scalar('loss', loss) # Define the optimizer optimizer = tf.train.RMSPropOptimizer(config.learning_rate) ########################################################################### # Implement code here. ########################################################################### ########################################################################### # QUESTION: what happens here and why? ->put threshold in order to avoid exploding gradients (gradient clipping) ########################################################################### global_step = tf.Variable(0, trainable=False, name='global_step') dummy = loss grads_and_vars = optimizer.compute_gradients(dummy) grads, variables = zip(*grads_and_vars) grads_clipped, _ = tf.clip_by_global_norm( grads, clip_norm=config.max_norm_gradient) apply_gradients_op = optimizer.apply_gradients(zip(grads_clipped, variables), global_step=global_step) #Compute the accuracy with tf.name_scope('accuracy'): with tf.name_scope('predictions'): predictions = model.compute_logits(test_inputs) with tf.name_scope('accuracy'): accuracy = model.accuracy(predictions, test_labels) tf.summary.scalar('accuracy', accuracy) merged = tf.summary.merge_all() test_writer = tf.summary.FileWriter(config.summary_path + '/test', graph=tf.get_default_graph()) init = tf.global_variables_initializer() sess = tf.Session() sess.run(init) ############################################################################ ########################################################################### # Implement code here. ########################################################################### for train_step in range(config.train_steps + 1): # Only for time measurement of step through network t1 = time.time() batch = utils.generate_palindrome_batch(config.batch_size, config.input_length) #Take the first T-1 digits as input batch_x = batch[:, 0:(config.input_length - 1)] #Take the last digit as the label, correct class batch_y = batch[:, -1] sess.run(apply_gradients_op, feed_dict={ inputs: batch_x, labels: batch_y }) # Only for time measurement of step through network t2 = time.time() examples_per_second = config.batch_size / float(t2 - t1) # Print the training progress if train_step % config.print_every == 0: #Create batch to test batch_test = utils.generate_palindrome_batch( config.batch_size, config.input_length) batch_x_test = batch_test[:, 0:(config.input_length - 1)] batch_y_test = batch_test[:, -1] l, acc, summary = sess.run( [loss, accuracy, merged], feed_dict={ inputs: batch_x, labels: batch_y, test_inputs: batch_x_test, test_labels: batch_y_test }) test_writer.add_summary(summary, train_step) print("[{}] Train Step {:04d}/{:04d}, Batch Size = {}, " "Examples/Sec = {:.2f}, Accuracy = {}%, Loss = {}".format( datetime.now().strftime("%Y-%m-%d %H:%M"), train_step, config.train_steps, config.batch_size, examples_per_second, acc, l)) test_writer.close() sess.close()