def run(self, epochs=3, batch_size=512, rnn_size=128, num_layers=1, encoding_embedding_size=200, decoding_embedding_size=200, learning_rate=0.01, k_p=.75): max_target_sent_length = max([len(sent) for sent in \ self.source_int_text]) train_graph = tf.Graph() with train_graph.as_default(): input_data, targets, lr, keep_prob = self.model_placeholders() sequence_length = tf.placeholder_with_default( \ max_target_sent_length, None, name = 'sequence_length') input_shape = tf.shape(input_data) train, infer = self.seq2seq_model( tf.reverse(input_data, [-1]), targets, keep_prob, batch_size, sequence_length, len(self.source_vocab_to_int), len(self.target_vocab_to_int), encoding_embedding_size, decoding_embedding_size, rnn_size, num_layers, self.target_vocab_to_int) tf.identity(infer, 'logits') with tf.name_scope('optimization'): cost = tf.contrib.seq2seq.sequence_loss( train, targets, tf.ones([input_shape[0], sequence_length])) optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate) gradients = optimizer.compute_gradients(cost) capped_gradients = [(tf.clip_by_value(grad, -1., 1.), var) \ for grad, var in gradients if grad is not None] train_op = optimizer.apply_gradients(capped_gradients) train_source = self.source_int_text[batch_size:] train_target = self.target_int_text[batch_size:] valid_source = h.pad_sentence_batch(self.source_int_text[:batch_size]) valid_target = h.pad_sentence_batch(self.target_int_text[:batch_size]) with tf.Session(graph=train_graph) as s: s.run(tf.global_variables_initializer()) for e in range(1, epochs + 1): for idx, (source_batch, target_batch) in enumerate( h.batch_data(train_source, train_target, batch_size)): _, loss = s.run( [train_op, cost], feed_dict={ input_data: source_batch, targets: target_batch, lr: learning_rate, sequence_length: target_batch.shape[1], keep_prob: k_p }) batch_train_logits = s.run(infer, feed_dict={ input_data: source_batch, keep_prob: 1.0 }) batch_valid_logits = s.run(infer, feed_dict={ input_data: valid_source, keep_prob: 1.0 }) train_acc = self.get_accuracy(target_batch, batch_train_logits) valid_acc = self.get_accuracy(np.array(valid_target), batch_valid_logits) print('Epoch {:>3} Batch {:>4}/{} - Train Accuracy:' \ ' {:>6.3f}, Validation Accuracy: {:>6.3f},'\ ' Loss: {:>6.3f}'.format(e, idx, \ len(self.source_int_text) // batch_size, train_acc, \ valid_acc, loss)) saver = tf.train.Saver() saver.save(s, 'checkpoints/dev') h.save_params('checkpoints/dev') print('Done')
return np.mean(np.equal(target, np.argmax(logits, 2))) train_source = source_int_text[batch_size:] train_target = target_int_text[batch_size:] valid_source = helper.pad_sentence_batch(source_int_text[:batch_size]) valid_target = helper.pad_sentence_batch(target_int_text[:batch_size]) with tf.Session(graph=train_graph) as sess: sess.run(tf.global_variables_initializer()) for epoch_i in range(epochs): for batch_i, (source_batch, target_batch) in enumerate( helper.batch_data(train_source, train_target, batch_size)): start_time = time.time() _, loss = sess.run( [train_op, cost], { input_data: source_batch, targets: target_batch, lr: learning_rate, sequence_length: target_batch.shape[1], keep_prob: keep_probability }) batch_train_logits = sess.run(inference_logits, { input_data: source_batch, keep_prob: 1.0 })
def train(experiment_name, method, model, epochs, input_batch_size, train_source, train_target, valid_source, valid_target, learning_rate, keep_probability, save_path, start_checkpoint, target_int_to_vocab, source_int_to_vocab, source_vocab_to_int, log_dir, graph_batch_size, max_hours, eval_model, eval_batch_size, reward_func, early_stopping = False): print("training...") # FIX global reward_function reward_function = reward_func tf.set_random_seed(1234) small_valid_source = np.array(helper.pad_sentence_batch(valid_source[:graph_batch_size])) small_valid_target = np.array(helper.pad_sentence_batch(valid_target[:graph_batch_size])) small_eval_size = 100 if method == 'reinforce': small_eval_frequency = 10 full_eval_frequency = 100 else: small_eval_frequency = 1000 full_eval_frequency = 'epoch' save_every_x_epoch = 1 global_step = 0 saver = tf.train.Saver(max_to_keep=None) train_writer = tf.summary.FileWriter(log_dir) log_file = log_dir + '/dev_eval.csv' log_eval(['step', 'acc', 'reward', 'reward_std', 'ci_low', 'ci_high'], log_file) # log header start_time = time.time() best_valid_acc = 0. with tf.Session() as sess: if start_checkpoint is not None: saver.restore(sess, start_checkpoint) sess.run(tf.local_variables_initializer()) else: sess.run(tf.global_variables_initializer()) sess.run(tf.local_variables_initializer()) for epoch_i in range(epochs): if max_hours is not None: hours, rem = divmod(time.time()-start_time, 3600) if hours >= max_hours: break if full_eval_frequency == 'epoch': # evaluate full dev set eval_full(train_writer, eval_model, eval_batch_size, epoch_i, sess, valid_source, valid_target, target_int_to_vocab, source_int_to_vocab, log_file) for batch_i, (source_batch, target_batch) in enumerate(helper.batch_data(train_source, train_target, input_batch_size)): if full_eval_frequency != 'epoch' and batch_i % full_eval_frequency == 0: eval_full(train_writer, eval_model, eval_batch_size, global_step, sess, valid_source, valid_target, target_int_to_vocab, source_int_to_vocab, log_file) saver.save(sess, save_path=save_path, global_step=global_step) if max_hours is not None: hours, rem = divmod(time.time()-start_time, 3600) if hours >= max_hours: break if method == 'MLE': _, loss = sess.run( [model.mle_train_op, model.mle_cost], {model.input_data: source_batch, model.targets: target_batch, model.lr: learning_rate, model.sequence_length: target_batch.shape[1], model.keep_prob: keep_probability}) probs = [] # fix for logging elif method == 'reinforce': # initialize the accumulated gradients holder sess.run(model.zero_ops) for source, target in zip(source_batch, target_batch): # add gradient for sentence avg_reward, loss, samples, probs = reinforce_on_sentence(model, len(target), graph_batch_size, source, target, learning_rate, keep_probability, sess, target_int_to_vocab, source_int_to_vocab) # update based on accumlated gradients sess.run(model.update_batch, {model.lr: learning_rate}) if batch_i % small_eval_frequency == 0: valid_acc, avg_valid_reward, reward_std, reward_ci_low, reward_ci_high, _ = eval_validation_set(valid_source, valid_target, eval_batch_size, eval_model, sess, target_int_to_vocab, source_int_to_vocab, small_eval_size) curr_time = time.strftime("%H:%M:%S", time.localtime()) output = experiment_name + ': Epoch {:>3} Batch {:>4}/{} - Validation Accuracy: {:>6.3f}, Loss: {:>6.3f}, Val Reward: {:>6.3f}, time: {}'.format(epoch_i, batch_i, len(train_source) // input_batch_size, valid_acc, loss, avg_valid_reward, curr_time) print(output) """ translate_sentence = 'safety is one of the crucial problem that many country and companies concern .' translate_sentence = sentence_to_seq(translate_sentence, source_vocab_to_int) translate_logits = sess.run(model.inference_logits, {model.input_data: [translate_sentence]*graph_batch_size, model.keep_prob: 1.0, model.sequence_length: len(translate_sentence)})[0] print(np.argmax(translate_logits, 1)) print(' '.join(word for word in [target_int_to_vocab[i] for i in np.argmax(translate_logits, 1)] if word != '<PAD>'and word != '<EOS>').encode('utf-8')) """ summary = sess.run( eval_model.merged_batch_summaries, { eval_model.avg_small_dev_accuracy: valid_acc, eval_model.avg_small_dev_reward: avg_valid_reward, eval_model.train_loss: loss, eval_model.probabilities: np.exp(probs), eval_model.log_probabilities: probs } ) train_writer.add_summary(summary, global_step) global_step += 1 if method == 'MLE' and early_stopping: valid_acc = eval_full(train_writer, eval_model, eval_batch_size, epoch_i, sess, valid_source, valid_target, target_int_to_vocab, source_int_to_vocab, log_file, False) if valid_acc > best_valid_acc: best_valid_acc = valid_acc saver.save(sess, save_path=save_path, global_step=epoch_i) elif method == 'MLE' and epoch_i % save_every_x_epoch == 0: saver.save(sess, save_path=save_path, global_step=epoch_i) # eval final model if full_eval_frequency == 'epoch': step = epochs else: step = global_step valid_acc = eval_full(train_writer, eval_model, eval_batch_size, step, sess, valid_source, valid_target, target_int_to_vocab, source_int_to_vocab, log_file) if method == 'MLE' and early_stopping: if valid_acc > best_valid_acc: saver.save(sess, save_path, global_step = step) else: # Save Model saver.save(sess, save_path, global_step = step) print('Model Trained and Saved')
def train_op(self, train_source, train_target, valid_source, valid_target, save_path='checkpoints/dev'): #self.train_graph = tf.Graph() #with self.train_graph.as_default(): tf.reset_default_graph() # Create TF Placeholders for input, target, learning_rate and keep_prob input_data = tf.placeholder(tf.int32, [None, None], name='input') target_data = tf.placeholder(tf.int32, [None, None], name='target') lrate = tf.placeholder(tf.float32, name='learning_rate') kprob = tf.placeholder(tf.float32, name='keep_prob') slength = tf.placeholder_with_default(self.max_source_sentence_length, None, name='sequence_length') train_logits, inference_logits = self.seq2seq_model( tf.reverse(input_data, [-1]), target_data, slength, kprob) tf.identity(inference_logits, 'logits') input_shape = tf.shape(input_data) with tf.name_scope("optimization"): # Loss function cost = tf.contrib.seq2seq.sequence_loss( train_logits, target_data, tf.ones([input_shape[0], slength])) # Optimizer optimizer = tf.train.AdamOptimizer(lrate) # Gradient Clipping gradients = optimizer.compute_gradients(cost) capped_gradients = [(tf.clip_by_value(grad, -1., 1.), var) for grad, var in gradients if grad is not None] train_op = optimizer.apply_gradients(capped_gradients) with tf.Session() as sess: sess.run(tf.global_variables_initializer()) start_time = time.time() bsize = self.batch_size epoch = self.epochs for epoch_i in range(self.epochs): for batch_i, (source_batch, target_batch) in enumerate( helper.batch_data(train_source, train_target, bsize)): _, loss = sess.run( [train_op, cost], { input_data: source_batch, target_data: target_batch, lrate: self.learning_rate, slength: target_batch.shape[1], kprob: self.keep_prob }) batch_train_logits = sess.run(inference_logits, { input_data: source_batch, kprob: 1.0 }) batch_valid_logits = sess.run(inference_logits, { input_data: valid_source, kprob: 1.0 }) train_acc = self.get_accuracy(target_batch, batch_train_logits) valid_acc = self.get_accuracy(np.array(valid_target), batch_valid_logits) if batch_i % 100 == 0: print( 'Epoch {:>3} Batch {:>4}/{} - Train Accuracy: {:>6.3f}, Validation Accuracy: {:>6.3f}, Loss: {:>6.3f}' .format(epoch_i, batch_i, len(train_source) // bsize, train_acc, valid_acc, loss)) end_time = time.time() # Save Model saver = tf.train.Saver() saver.save(sess, save_path) print('Model Trained and Saved') print('Cost time: {} sec'.format(end_time - start_time))
def train(): (source_int_text, target_int_text), (source_vocab_to_int, target_vocab_to_int), _ = helper.load_preprocess() # Check TensorFlow Version assert LooseVersion(tf.__version__) >= LooseVersion( '1.0'), 'Please use TensorFlow version 1.0 or newer' print('TensorFlow Version: {}'.format(tf.__version__)) # Check for a GPU if not tf.test.gpu_device_name(): warnings.warn( 'No GPU found. Please use a GPU to train your neural network.') else: print('Default GPU Device: {}'.format(tf.test.gpu_device_name())) epochs = 3 # Batch Size batch_size = 128 # RNN Size rnn_size = 256 # Number of Layers num_layers = 2 # Embedding Size encoding_embedding_size = 200 decoding_embedding_size = 200 # Learning Rate learning_rate = 0.001 # Dropout Keep Probability keep_probability = 0.5 # ### Build the Graph save_path = 'ckpt' (source_int_text, target_int_text), (source_vocab_to_int, target_vocab_to_int), _ = helper.load_preprocess() max_target_sentence_length = max( [len(sentence) for sentence in source_int_text]) train_graph = tf.Graph() with train_graph.as_default(): input_data, targets, lr, keep_prob = model_inputs() sequence_length = tf.placeholder_with_default( max_target_sentence_length, None, name='sequence_length') input_shape = tf.shape(input_data) train_logits, inference_logits = seq2seq_model( tf.reverse(input_data, [-1]), targets, keep_prob, batch_size, sequence_length, len(source_vocab_to_int), len(target_vocab_to_int), encoding_embedding_size, decoding_embedding_size, rnn_size, num_layers, target_vocab_to_int) tf.identity(inference_logits, 'logits') with tf.name_scope("optimization"): # Loss function cost = tf.contrib.seq2seq.sequence_loss( train_logits, targets, tf.ones([input_shape[0], sequence_length])) # Optimizer optimizer = tf.train.AdamOptimizer(lr) # Gradient Clipping gradients = optimizer.compute_gradients(cost) capped_gradients = [(tf.clip_by_value(grad, -1., 1.), var) for grad, var in gradients if grad is not None] train_op = optimizer.apply_gradients(capped_gradients) def get_accuracy(target, logits): """ Calculate accuracy """ max_seq = max(target.shape[1], logits.shape[1]) if max_seq - target.shape[1]: target = np.pad(target_batch, [(0, 0), (0, max_seq - target_batch.shape[1]), (0, 0)], 'constant') if max_seq - batch_train_logits.shape[1]: logits = np.pad(logits, [(0, 0), (0, max_seq - logits.shape[1]), (0, 0)], 'constant') return np.mean(np.equal(target, np.argmax(logits, 2))) train_source = source_int_text[batch_size:] train_target = target_int_text[batch_size:] valid_source = helper.pad_sentence_batch(source_int_text[:batch_size]) valid_target = helper.pad_sentence_batch(target_int_text[:batch_size]) with tf.Session(graph=train_graph) as sess: sess.run(tf.global_variables_initializer()) for epoch_i in range(epochs): for batch_i, (source_batch, target_batch) in enumerate( helper.batch_data(train_source, train_target, batch_size)): start_time = time.time() _, loss = sess.run( [train_op, cost], { input_data: source_batch, targets: target_batch, lr: learning_rate, sequence_length: target_batch.shape[1], keep_prob: keep_probability }) batch_train_logits = sess.run(inference_logits, { input_data: source_batch, keep_prob: 1.0 }) batch_valid_logits = sess.run(inference_logits, { input_data: valid_source, keep_prob: 1.0 }) train_acc = get_accuracy(target_batch, batch_train_logits) valid_acc = get_accuracy(np.array(valid_target), batch_valid_logits) end_time = time.time() print( 'Epoch {:>3} Batch {:>4}/{} - Train Accuracy: {:>6.3f}, Validation Accuracy: {:>6.3f}, Loss: {:>6.3f}' .format(epoch_i, batch_i, len(source_int_text) // batch_size, train_acc, valid_acc, loss)) # Save Model saver = tf.train.Saver() saver.save(sess, save_path) print('Model Trained and Saved') helper.save_params(save_path)