def testEmbeddingTiedRNNSeq2Seq(self): with self.test_session() as sess: with tf.variable_scope("root", initializer=tf.constant_initializer(0.5)): enc_inp = [ tf.constant(1, tf.int32, shape=[2]) for i in xrange(2) ] dec_inp = [ tf.constant(i, tf.int32, shape=[2]) for i in xrange(3) ] cell = rnn_cell.BasicLSTMCell(2) dec, mem = seq2seq.embedding_tied_rnn_seq2seq( enc_inp, dec_inp, cell, 5) sess.run([tf.variables.initialize_all_variables()]) res = sess.run(dec) self.assertEqual(len(res), 3) self.assertEqual(res[0].shape, (2, 5)) res = sess.run(mem) self.assertEqual(len(res), 4) self.assertEqual(res[0].shape, (2, 4)) # Test externally provided output projection. w = tf.get_variable("proj_w", [2, 5]) b = tf.get_variable("proj_b", [5]) with tf.variable_scope("proj_seq2seq"): dec, _ = seq2seq.embedding_tied_rnn_seq2seq( enc_inp, dec_inp, cell, 5, output_projection=(w, b)) sess.run([tf.variables.initialize_all_variables()]) res = sess.run(dec) self.assertEqual(len(res), 3) self.assertEqual(res[0].shape, (2, 2)) # Test that previous-feeding model ignores inputs after the first. dec_inp2 = [ tf.constant(0, tf.int32, shape=[2]) for _ in xrange(3) ] tf.get_variable_scope().reuse_variables() d1, _ = seq2seq.embedding_tied_rnn_seq2seq(enc_inp, dec_inp, cell, 5, feed_previous=True) d2, _ = seq2seq.embedding_tied_rnn_seq2seq(enc_inp, dec_inp2, cell, 5, feed_previous=True) d3, _ = seq2seq.embedding_tied_rnn_seq2seq( enc_inp, dec_inp2, cell, 5, feed_previous=tf.constant(True)) res1 = sess.run(d1) res2 = sess.run(d2) res3 = sess.run(d3) self.assertAllClose(res1, res2) self.assertAllClose(res1, res3)
def testEmbeddingTiedRNNSeq2Seq(self): with self.test_session() as sess: with tf.variable_scope("root", initializer=tf.constant_initializer(0.5)): enc_inp = [tf.constant(1, tf.int32, shape=[2]) for i in xrange(2)] dec_inp = [tf.constant(i, tf.int32, shape=[2]) for i in xrange(3)] cell = rnn_cell.BasicLSTMCell(2) dec, mem = seq2seq.embedding_tied_rnn_seq2seq(enc_inp, dec_inp, cell, 5) sess.run([tf.variables.initialize_all_variables()]) res = sess.run(dec) self.assertEqual(len(res), 3) self.assertEqual(res[0].shape, (2, 5)) res = sess.run(mem) self.assertEqual(len(res), 4) self.assertEqual(res[0].shape, (2, 4)) # Test externally provided output projection. w = tf.get_variable("proj_w", [2, 5]) b = tf.get_variable("proj_b", [5]) with tf.variable_scope("proj_seq2seq"): dec, _ = seq2seq.embedding_tied_rnn_seq2seq( enc_inp, dec_inp, cell, 5, output_projection=(w, b)) sess.run([tf.variables.initialize_all_variables()]) res = sess.run(dec) self.assertEqual(len(res), 3) self.assertEqual(res[0].shape, (2, 2)) # Test that previous-feeding model ignores inputs after the first. dec_inp2 = [tf.constant(0, tf.int32, shape=[2]) for _ in xrange(3)] tf.get_variable_scope().reuse_variables() d1, _ = seq2seq.embedding_tied_rnn_seq2seq(enc_inp, dec_inp, cell, 5, feed_previous=True) d2, _ = seq2seq.embedding_tied_rnn_seq2seq(enc_inp, dec_inp2, cell, 5, feed_previous=True) d3, _ = seq2seq.embedding_tied_rnn_seq2seq( enc_inp, dec_inp2, cell, 5, feed_previous=tf.constant(True)) res1 = sess.run(d1) res2 = sess.run(d2) res3 = sess.run(d3) self.assertAllClose(res1, res2) self.assertAllClose(res1, res3)
print('Loaded {} training examples'.format(len(train_exs))) # with tf.Session() as sess: # sess = tf.InteractiveSession() sess = tf.Session(config=tf.ConfigProto(log_device_placement=True)) # tensors to store model state and training data for each batch seqs = [tf.placeholder(tf.int32, shape=[_seq_length]) for _ in xrange(_batch_size)] encoder_inputs = [tf.placeholder(tf.int32, shape=[_seq_length]) for _ in xrange(_batch_size)] decoder_inputs = [tf.placeholder(tf.int32, shape=[_seq_length]) for _ in xrange(_batch_size)] targets = [tf.placeholder(tf.int32, shape=[_seq_length]) for _ in xrange(_batch_size)] target_weights = [tf.ones(dtype=tf.float32, shape=[_seq_length]) for _ in xrange(_batch_size)] # set up the tied seq-to-seq LSTM with given parameters single_cell = rnn_cell.BasicLSTMCell(_lstm_cell_dimension) cell = rnn_cell.MultiRNNCell([single_cell] * _lstm_num_layers) outputs, _ = seq2seq.embedding_tied_rnn_seq2seq(encoder_inputs, decoder_inputs, cell, _vocab_size_including_GO) seqloss = seq2seq.sequence_loss_by_example(outputs, encoder_inputs, target_weights, _vocab_size_including_GO) tf.train.SummaryWriter(_train_log_dir, sess.graph_def) global_step = tf.Variable(0, name='global_step', trainable=False) sess.run(tf.initialize_all_variables()) # Set up the optimizer with gradient clipping params = tf.trainable_variables() gradients = tf.gradients(seqloss, params) optimizer = tf.train.GradientDescentOptimizer(_lstm_learn_rate) clipped_gradients, norm = tf.clip_by_global_norm(gradients, _lstm_max_grad_norm) train_op = optimizer.apply_gradients(zip(clipped_gradients, params), global_step=global_step)
tf.placeholder(tf.int32, shape=[_seq_length]) for _ in xrange(_batch_size) ] decoder_inputs = [ tf.placeholder(tf.int32, shape=[_seq_length]) for _ in xrange(_batch_size) ] targets = [ tf.placeholder(tf.int32, shape=[_seq_length]) for _ in xrange(_batch_size) ] target_weights = [ tf.ones(dtype=tf.float32, shape=[_seq_length]) for _ in xrange(_batch_size) ] # set up the tied seq-to-seq LSTM with given parameters single_cell = rnn_cell.BasicLSTMCell(_lstm_cell_dimension) cell = rnn_cell.MultiRNNCell([single_cell] * _lstm_num_layers) outputs, _ = seq2seq.embedding_tied_rnn_seq2seq(encoder_inputs, decoder_inputs, cell, _vocab_size_including_GO) seqloss = seq2seq.sequence_loss_by_example(outputs, encoder_inputs, target_weights, _vocab_size_including_GO) tf.train.SummaryWriter(_train_log_dir, sess.graph_def) global_step = tf.Variable(0, name='global_step', trainable=False) sess.run(tf.initialize_all_variables()) # Set up the optimizer with gradient clipping params = tf.trainable_variables() gradients = tf.gradients(seqloss, params) optimizer = tf.train.GradientDescentOptimizer(_lstm_learn_rate) clipped_gradients, norm = tf.clip_by_global_norm(gradients, _lstm_max_grad_norm) train_op = optimizer.apply_gradients(zip(clipped_gradients, params),