def train(config): # Initialize the text dataset dataset = TextDataset(config.txt_file) # Initialize the model model = TextGenerationModel(batch_size=config.batch_size, seq_length=config.seq_length, vocabulary_size=dataset.vocab_size, lstm_num_hidden=config.lstm_num_hidden, lstm_num_layers=config.lstm_num_layers) ########################################################################### # Implement code here. ########################################################################### phrase = 'in the year' input_placeholder = tf.placeholder( tf.float32, [config.seq_length, config.batch_size, dataset.vocab_size]) #label_placeholder = tf.placeholder(tf.float32, [config.seq_length, config.batch_size, dataset.vocab_size]) #input_placeholder = tf.placeholder(tf.int32, [config.seq_length, config.batch_size]) label_placeholder = tf.placeholder(tf.int32, [config.batch_size, config.seq_length]) char_placeholder = tf.placeholder(tf.float32, [1, 1, dataset.vocab_size]) phrase_placeholder = tf.placeholder(tf.float32, [len(phrase), 1, dataset.vocab_size]) phrase = dataset.convert_to_numbers(phrase) phrase_ = np.zeros((len(phrase), dataset.vocab_size)) for i, char in enumerate(phrase): phrase_[i, :] = one_hot([char], dataset.vocab_size) feed_dict_bonus = { input_placeholder: np.zeros((config.seq_length, config.batch_size, dataset.vocab_size)), label_placeholder: np.zeros((config.batch_size, config.seq_length)), char_placeholder: np.zeros((1, 1, dataset.vocab_size)), phrase_placeholder: np.expand_dims(phrase_, 1) } # Transform to one hot #input_placeholder = tf.one_hot(input_placeholder, dataset.vocab_size) #label_placeholder = tf.one_hot(label_placeholder, dataset.vocab_size) # Compute logits logits_per_step = model._build_model(input_placeholder) # Define the optimizer optimizer = tf.train.RMSPropOptimizer(config.learning_rate, decay=config.learning_rate_decay) # Compute the gradients for each variable loss = model._compute_loss(logits_per_step, label_placeholder) grads_and_vars = optimizer.compute_gradients(loss) train_op = optimizer.apply_gradients(grads_and_vars) #, global_step) grads, variables = zip(*grads_and_vars) grads_clipped, _ = tf.clip_by_global_norm( grads, clip_norm=config.max_norm_gradient) apply_gradients_op = optimizer.apply_gradients( zip(grads_clipped, variables)) #, global_step=global_step) # Compute prediction of next character #next_logits = model._build_model(char_placeholder) #probabilities = model.probabilities(next_logits) #predictions = model.predictions(probabilities) # Compute prediction given phrase predictions = model.get_predictions(phrase_placeholder, 100) predictions_char = model.get_predictions(char_placeholder, 100) summary = tf.summary.merge_all() init = tf.global_variables_initializer() saver = tf.train.Saver() sess = tf.Session() #saver.restore(sess, "./checkpoints/model.ckpt") summary_writer = tf.summary.FileWriter(config.summary_path, sess.graph) sess.run(init) ########################################################################### # Implement code here. ########################################################################### for train_step in range(int(config.train_steps)): # Only for time measurement of step through network t1 = time.time() ####################################################################### # Implement code here. ####################################################################### # Load next sequence inputs, targets = get_batch(config.batch_size, config.seq_length, dataset) #inputs,targets = dataset.batch(config.batch_size, config.seq_length) #inputs = np.transpose(inputs) #targets = np.transpose(targets) feed_dict = { input_placeholder: inputs, label_placeholder: targets, char_placeholder: np.zeros( (1, 1, dataset.vocab_size )), #np.expand_dims(one_hot([1], dataset.vocab_size),1) phrase_placeholder: np.zeros((len(phrase), 1, dataset.vocab_size)) } _, loss_value = sess.run([apply_gradients_op, loss], feed_dict=feed_dict) # Only for time measurement of step through network t2 = time.time() examples_per_second = config.batch_size / float(t2 - t1) # Output the training progress if train_step % config.print_every == 0: print( "[{}] Train Step {:04d}/{:04d}, Batch Size = {}, Examples/Sec = {:.2f}, Loss = {:.2f}" .format(datetime.now().strftime("%Y-%m-%d %H:%M"), train_step + 1, int(config.train_steps), config.batch_size, examples_per_second, loss_value)) # Update the events file. #summary_str = sess.run(summary, feed_dict=feed_dict) #summary_writer.add_summary(summary_str, train_step) #summary_writer.flush() if train_step % config.sample_every == 0: random_char = np.random.randint(0, dataset.vocab_size) feed_dict_bonus[char_placeholder] = np.expand_dims( one_hot([random_char], dataset.vocab_size), 0) predic, predic_char = sess.run([predictions, predictions_char], feed_dict=feed_dict_bonus) #print(predic) final_string = dataset.convert_to_string(predic) print(phrase, final_string) final_string = dataset.convert_to_string(predic_char) random_char = dataset.convert_to_string([random_char]) print(random_char, final_string, '\n') '''inputs = np.zeros((config.seq_length, config.batch_size, dataset.vocab_size)) targets = np.zeros((config.batch_size, config.seq_length)) char = [0] char_ = np.zeros((1, config.batch_size, dataset.vocab_size)) final_string = dataset.convert_to_string(char) for _ in range(config.seq_length): for i in range(config.batch_size): char_[:, i, :] = one_hot(char, dataset.vocab_size) feed_dict = { input_placeholder: inputs, label_placeholder: targets, char_placeholder: char_ } predic = sess.run(predictions, feed_dict=feed_dict) char = [predic[0]] final_string += dataset.convert_to_string(char) #print(predic.shape) #print(predic) #print(dataset.convert_to_string(predic)) print('\n\n\n\n\n\n') print(final_string)''' if train_step % config.checkpoint_every == 0: saver.save(sess, save_path='./checkpoints/model4.ckpt')
def train(config): # Initialize the text dataset dataset = TextDataset(config.txt_file) # Initialize the model model = TextGenerationModel(batch_size=config.batch_size, seq_length=config.seq_length, vocabulary_size=dataset.vocab_size, lstm_num_hidden=config.lstm_num_hidden, lstm_num_layers=config.lstm_num_layers, dropout_keep_prob=config.dropout_keep_prob) ########################################################################### # Implement code here. ########################################################################### with tf.name_scope('input'): inputs = tf.placeholder(tf.int32, shape=[config.batch_size, config.seq_length], name='inputs') labels = tf.placeholder(tf.int32, shape=[config.batch_size, config.seq_length], name='labels') input_sample = tf.placeholder(tf.int32, shape=[config.batch_size, 1], name='input_sample') state = tf.placeholder(tf.float32, [ config.lstm_num_layers, 2, config.batch_size, config.lstm_num_hidden ]) #Create tuple for the state placeholder layer = tf.unstack(state, axis=0) rnn_tuple_state = tuple([ tf.nn.rnn_cell.LSTMStateTuple(layer[i][0], layer[i][1]) for i in range(config.lstm_num_layers) ]) #Logits with tf.name_scope('logits'): logits, _ = model._build_model(inputs, rnn_tuple_state) #Loss with tf.name_scope('loss'): loss = model._compute_loss(logits, labels) tf.summary.scalar('loss', loss) #Generate text with tf.name_scope('sample_logits'): sample_logits, final_state = model._build_model( input_sample, rnn_tuple_state) #predictions with tf.name_scope('predictions'): predictions = model.predictions(sample_logits) global_step = tf.Variable(0, trainable=False, name='global_step') #decaying learning rate decaying_learning_rate = tf.train.exponential_decay( config.learning_rate, global_step, config.learning_rate_step, config.learning_rate_decay, name='decaying_eta') tf.add_to_collection(tf.GraphKeys, decaying_learning_rate) # Define the optimizer optimizer = tf.train.RMSPropOptimizer(decaying_learning_rate) # Compute the gradients for each variable grads_and_vars = optimizer.compute_gradients(loss) #train_op = optimizer.apply_gradients(grads_and_vars, global_step) grads, variables = zip(*grads_and_vars) grads_clipped, _ = tf.clip_by_global_norm( grads, clip_norm=config.max_norm_gradient) apply_gradients_op = optimizer.apply_gradients(zip(grads_clipped, variables), global_step=global_step) merged = tf.summary.merge_all() test_writer = tf.summary.FileWriter(config.summary_path + '/test', graph=tf.get_default_graph()) #Initial zero state init_state = np.zeros( (config.lstm_num_layers, 2, config.batch_size, config.lstm_num_hidden)) init = tf.global_variables_initializer() sess = tf.Session() sess.run(init) ########################################################################### # Implement code here. ########################################################################### for train_step in range(int(config.train_steps)): # Only for time measurement of step through network t1 = time.time() ####################################################################### # Implement code here. ####################################################################### x_train, y_train = dataset.batch(config.batch_size, config.seq_length) #train sess.run(apply_gradients_op, feed_dict={ inputs: x_train, labels: y_train, state: init_state }) # Only for time measurement of step through network t2 = time.time() examples_per_second = config.batch_size / float(t2 - t1) # Output the training progress if train_step % config.print_every == 0: l, summary = sess.run([loss, merged], feed_dict={ inputs: x_train, labels: y_train, state: init_state }) test_writer.add_summary(summary, train_step) print( "[{}] Train Step {:04d}/{:04d}, Batch Size = {}, Examples/Sec = {:.2f}, Loss = {}" .format(datetime.now().strftime("%Y-%m-%d %H:%M"), train_step + 1, int(config.train_steps), config.batch_size, examples_per_second, l)) if train_step % config.sample_every == 0: sample_inputs = (np.random.randint(0, dataset.vocab_size, size=config.batch_size)) new_sample = np.reshape(sample_inputs, (sample_inputs.shape[0], 1)) new_sentence = np.empty([(config.batch_size), (config.seq_length)]) #Generate new sentence of length seq_length for i in range(config.seq_length): if i == 0: pred, final = sess.run([predictions, final_state], feed_dict={ input_sample: new_sample, state: init_state }) new_sample = pred.T new_sentence[:, i][:, None] = new_sample #When unrolling for 30 timesteps, save the state and feed it again in the model elif (i >= 30 & i < 60): pred, final = sess.run([predictions, final_state], feed_dict={ input_sample: new_sample, state: final }) new_sample = pred.T new_sentence[:, i][:, None] = new_sample else: pred, final = sess.run([predictions, final_state], feed_dict={ input_sample: new_sample, state: final }) new_sample = pred.T new_sentence[:, i][:, None] = new_sample for idx, elem in enumerate(new_sentence): #We can skip the .encode('utf-8') for better looking output. It was used in order not to produce errors when running in surfsara. print('Sentence {}:{} {}'.format( idx, dataset.convert_to_string(sample_inputs)[idx].encode( 'utf-8'), dataset.convert_to_string(elem).encode('utf-8'))) test_writer.close() sess.close()