def __load_model(self,num_layers): # Initial memory value for recurrence. self.prev_mem = tf.zeros((self.train_batch_size, self.memory_dim)) # choose RNN/GRU/LSTM cell with tf.variable_scope("train_test", reuse=True): lstm = rnn_cell.LSTMCell(self.memory_dim) # Stacks layers of RNN's to form a stacked decoder self.cell = rnn_cell.MultiRNNCell([lstm] * num_layers) # embedding model if not self.attention: with tf.variable_scope("train_test"): self.dec_outputs, self.dec_memory = seq2seq.embedding_rnn_seq2seq(\ self.enc_inp, self.dec_inp, self.cell, \ self.vocab_size, self.vocab_size, self.seq_length) with tf.variable_scope("train_test", reuse = True): self.dec_outputs_tst, _ = seq2seq.embedding_rnn_seq2seq(\ self.enc_inp, self.dec_inp, self.cell, \ self.vocab_size, self.vocab_size, self.seq_length, feed_previous=True) else: with tf.variable_scope("train_test"): self.dec_outputs, self.dec_memory = seq2seq.embedding_attention_seq2seq(\ self.enc_inp, self.dec_inp, self.cell, \ self.vocab_size, self.vocab_size, self.seq_length) with tf.variable_scope("train_test", reuse = True): self.dec_outputs_tst, _ = seq2seq.embedding_attention_seq2seq(\ self.enc_inp, self.dec_inp, self.cell, \ self.vocab_size, self.vocab_size, self.seq_length, feed_previous=True)
def __load_model(self): # Initial memory value for recurrence. self.prev_mem = tf.zeros((self.train_batch_size, self.memory_dim)) # choose RNN/GRU/LSTM cell with tf.variable_scope("train_test", reuse=True): self.cell = rnn_cell.LSTMCell(self.memory_dim) # embedding model if not self.attention: with tf.variable_scope("train_test"): self.dec_outputs, self.dec_memory = seq2seq.embedding_rnn_seq2seq(\ self.enc_inp, self.dec_inp, self.cell, \ self.vocab_size, self.vocab_size, self.seq_length) with tf.variable_scope("train_test", reuse = True): self.dec_outputs_tst, _ = seq2seq.embedding_rnn_seq2seq(\ self.enc_inp, self.dec_inp, self.cell, \ self.vocab_size, self.vocab_size, self.seq_length, feed_previous=True) else: with tf.variable_scope("train_test"): self.dec_outputs, self.dec_memory = seq2seq.embedding_attention_seq2seq(\ self.enc_inp, self.dec_inp, self.cell, \ self.vocab_size, self.vocab_size, self.seq_length) with tf.variable_scope("train_test", reuse = True): self.dec_outputs_tst, _ = seq2seq.embedding_attention_seq2seq(\ self.enc_inp, self.dec_inp, self.cell, \ self.vocab_size, self.vocab_size, self.seq_length, feed_previous=True)
def __load_model(self, num_layers): # Initial memory value for recurrence. self.prev_mem = tf.zeros((self.train_batch_size, self.memory_dim)) # choose RNN/GRU/LSTM cell with tf.variable_scope("forward"): fw_single_cell = rnn_cell.GRUCell(self.memory_dim) # Stacks layers of RNN's to form a stacked decoder self.forward_cell = rnn_cell.MultiRNNCell([fw_single_cell] * num_layers) with tf.variable_scope("backward"): bw_single_cell = rnn_cell.GRUCell(self.memory_dim) # Stacks layers of RNN's to form a stacked decoder self.backward_cell = rnn_cell.MultiRNNCell([bw_single_cell] * num_layers) # embedding model if not self.attention: with tf.variable_scope("forward"): self.dec_outputs_fwd, _ = seq2seq.embedding_rnn_seq2seq(\ self.enc_inp_fwd, self.dec_inp, self.forward_cell, \ self.vocab_size, self.vocab_size, self.seq_length) with tf.variable_scope("forward", reuse=True): self.dec_outputs_fwd_tst, _ = seq2seq.embedding_rnn_seq2seq(\ self.enc_inp_fwd, self.dec_inp, self.forward_cell, \ self.vocab_size, self.vocab_size, self.seq_length, feed_previous=True) with tf.variable_scope("backward"): self.dec_outputs_bwd, _ = seq2seq.embedding_rnn_seq2seq(\ self.enc_inp_bwd, self.dec_inp, self.backward_cell, \ self.vocab_size, self.vocab_size, self.seq_length) with tf.variable_scope("backward", reuse=True): self.dec_outputs_bwd_tst, _ = seq2seq.embedding_rnn_seq2seq(\ self.enc_inp_bwd, self.dec_inp, self.backward_cell, \ self.vocab_size, self.vocab_size, self.seq_length, feed_previous=True) else: with tf.variable_scope("forward"): self.dec_outputs_fwd, _ = seq2seq.embedding_attention_seq2seq(\ self.enc_inp_fwd, self.dec_inp, self.forward_cell, \ self.vocab_size, self.vocab_size, self.seq_length) with tf.variable_scope("forward", reuse=True): self.dec_outputs_fwd_tst, _ = seq2seq.embedding_attention_seq2seq(\ self.enc_inp_fwd, self.dec_inp, self.forward_cell, \ self.vocab_size, self.vocab_size, self.seq_length, feed_previous=True) with tf.variable_scope("backward"): self.dec_outputs_bwd, _ = seq2seq.embedding_attention_seq2seq(\ self.enc_inp_bwd, self.dec_inp, self.backward_cell, \ self.vocab_size, self.vocab_size, self.seq_length) with tf.variable_scope("backward", reuse=True): self.dec_outputs_bwd_tst, _ = seq2seq.embedding_attention_seq2seq(\ self.enc_inp_bwd, self.dec_inp, self.backward_cell, \ self.vocab_size, self.vocab_size, self.seq_length, feed_previous=True)
def __load_model(self, num_layers): # Initial memory value for recurrence. self.prev_mem = tf.zeros((self.train_batch_size, self.memory_dim)) # choose RNN/GRU/LSTM cell with tf.variable_scope("forward"): fw_single_cell = rnn_cell.GRUCell(self.memory_dim) # Stacks layers of RNN's to form a stacked decoder self.forward_cell = rnn_cell.MultiRNNCell([fw_single_cell] * num_layers) with tf.variable_scope("backward"): bw_single_cell = rnn_cell.GRUCell(self.memory_dim) # Stacks layers of RNN's to form a stacked decoder self.backward_cell = rnn_cell.MultiRNNCell([bw_single_cell] * num_layers) # embedding model if not self.attention: with tf.variable_scope("forward"): self.dec_outputs_fwd, _ = seq2seq.embedding_rnn_seq2seq(\ self.enc_inp_fwd, self.dec_inp, self.forward_cell, \ self.vocab_size, self.vocab_size, self.seq_length) with tf.variable_scope("forward", reuse = True): self.dec_outputs_fwd_tst, _ = seq2seq.embedding_rnn_seq2seq(\ self.enc_inp_fwd, self.dec_inp, self.forward_cell, \ self.vocab_size, self.vocab_size, self.seq_length, feed_previous=True) with tf.variable_scope("backward"): self.dec_outputs_bwd, _ = seq2seq.embedding_rnn_seq2seq(\ self.enc_inp_bwd, self.dec_inp, self.backward_cell, \ self.vocab_size, self.vocab_size, self.seq_length) with tf.variable_scope("backward", reuse = True): self.dec_outputs_bwd_tst, _ = seq2seq.embedding_rnn_seq2seq(\ self.enc_inp_bwd, self.dec_inp, self.backward_cell, \ self.vocab_size, self.vocab_size, self.seq_length, feed_previous=True) else: with tf.variable_scope("forward"): self.dec_outputs_fwd, _ = seq2seq.embedding_attention_seq2seq(\ self.enc_inp_fwd, self.dec_inp, self.forward_cell, \ self.vocab_size, self.vocab_size, self.seq_length) with tf.variable_scope("forward", reuse = True): self.dec_outputs_fwd_tst, _ = seq2seq.embedding_attention_seq2seq(\ self.enc_inp_fwd, self.dec_inp, self.forward_cell, \ self.vocab_size, self.vocab_size, self.seq_length, feed_previous=True) with tf.variable_scope("backward"): self.dec_outputs_bwd, _ = seq2seq.embedding_attention_seq2seq(\ self.enc_inp_bwd, self.dec_inp, self.backward_cell, \ self.vocab_size, self.vocab_size, self.seq_length) with tf.variable_scope("backward", reuse = True): self.dec_outputs_bwd_tst, _ = seq2seq.embedding_attention_seq2seq(\ self.enc_inp_bwd, self.dec_inp, self.backward_cell, \ self.vocab_size, self.vocab_size, self.seq_length, feed_previous=True)
def __load_model(self): # Initial memory value for recurrence. self.prev_mem = tf.zeros((self.train_batch_size, self.memory_dim)) # choose RNN/GRU/LSTM cell with tf.variable_scope("forward"): self.forward_cell = rnn_cell.LSTMCell(self.memory_dim) with tf.variable_scope("backward"): self.backward_cell = rnn_cell.LSTMCell(self.memory_dim) # embedding model if not self.attention: with tf.variable_scope("forward"): self.dec_outputs_fwd, _ = seq2seq.embedding_rnn_seq2seq(\ self.enc_inp_fwd, self.dec_inp, self.forward_cell, \ self.vocab_size, self.vocab_size, self.seq_length) with tf.variable_scope("forward", reuse=True): self.dec_outputs_fwd_tst, _ = seq2seq.embedding_rnn_seq2seq(\ self.enc_inp_fwd, self.dec_inp, self.forward_cell, \ self.vocab_size, self.vocab_size, self.seq_length, feed_previous=True) with tf.variable_scope("backward"): self.dec_outputs_bwd, _ = seq2seq.embedding_rnn_seq2seq(\ self.enc_inp_bwd, self.dec_inp, self.backward_cell, \ self.vocab_size, self.vocab_size, self.seq_length) with tf.variable_scope("backward", reuse=True): self.dec_outputs_bwd_tst, _ = seq2seq.embedding_rnn_seq2seq(\ self.enc_inp_bwd, self.dec_inp, self.backward_cell, \ self.vocab_size, self.vocab_size, self.seq_length, feed_previous=True) else: with tf.variable_scope("forward"): self.dec_outputs_fwd, _ = seq2seq.embedding_attention_seq2seq(\ self.enc_inp_fwd, self.dec_inp, self.forward_cell, \ self.vocab_size, self.vocab_size, self.seq_length) with tf.variable_scope("forward", reuse=True): self.dec_outputs_fwd_tst, _ = seq2seq.embedding_attention_seq2seq(\ self.enc_inp_fwd, self.dec_inp, self.forward_cell, \ self.vocab_size, self.vocab_size, self.seq_length, feed_previous=True) with tf.variable_scope("backward"): self.dec_outputs_bwd, _ = seq2seq.embedding_attention_seq2seq(\ self.enc_inp_bwd, self.dec_inp, self.backward_cell, \ self.vocab_size, self.vocab_size, self.seq_length) with tf.variable_scope("backward", reuse=True): self.dec_outputs_bwd_tst, _ = seq2seq.embedding_attention_seq2seq(\ self.enc_inp_bwd, self.dec_inp, self.backward_cell, \ self.vocab_size, self.vocab_size, self.seq_length, feed_previous=True)
def seq2seq_f(encoder_inputs, decoder_inputs, do_decode): return seq2seq.embedding_rnn_seq2seq( encoder_inputs, decoder_inputs, cell, vocab_size, vocab_size, output_projection=output_projection, feed_previous=do_decode)
def testEmbeddingRNNSeq2Seq(self): with self.test_session() as sess: with tf.variable_scope("root", initializer=tf.constant_initializer(0.5)): enc_inp = [tf.constant(1, tf.int32, shape=[2]) for i in xrange(2)] dec_inp = [tf.constant(i, tf.int32, shape=[2]) for i in xrange(3)] cell = rnn_cell.BasicLSTMCell(2) dec, mem = seq2seq.embedding_rnn_seq2seq(enc_inp, dec_inp, cell, 2, 5) sess.run([tf.variables.initialize_all_variables()]) res = sess.run(dec) self.assertEqual(len(res), 3) self.assertEqual(res[0].shape, (2, 5)) res = sess.run(mem) self.assertEqual(len(res), 4) self.assertEqual(res[0].shape, (2, 4)) # Test externally provided output projection. w = tf.get_variable("proj_w", [2, 5]) b = tf.get_variable("proj_b", [5]) with tf.variable_scope("proj_seq2seq"): dec, _ = seq2seq.embedding_rnn_seq2seq( enc_inp, dec_inp, cell, 2, 5, output_projection=(w, b)) sess.run([tf.variables.initialize_all_variables()]) res = sess.run(dec) self.assertEqual(len(res), 3) self.assertEqual(res[0].shape, (2, 2)) # Test that previous-feeding model ignores inputs after the first. dec_inp2 = [tf.constant(0, tf.int32, shape=[2]) for _ in xrange(3)] tf.get_variable_scope().reuse_variables() d1, _ = seq2seq.embedding_rnn_seq2seq(enc_inp, dec_inp, cell, 2, 5, feed_previous=True) d2, _ = seq2seq.embedding_rnn_seq2seq(enc_inp, dec_inp2, cell, 2, 5, feed_previous=True) d3, _ = seq2seq.embedding_rnn_seq2seq(enc_inp, dec_inp2, cell, 2, 5, feed_previous=tf.constant(True)) res1 = sess.run(d1) res2 = sess.run(d2) res3 = sess.run(d3) self.assertAllClose(res1, res2) self.assertAllClose(res1, res3)
embedding_dim = 50 memory_dim = 100 x_seq = [tf.placeholder(tf.int32, shape=(None,), name="x%i" % t) for t in range(seq_length)] t_seq = [tf.placeholder(tf.int32, shape=(None,), name="t%i" % t) for t in range(seq_length)] weights = [tf.ones_like(t_i, dtype=tf.float32) for t_i in t_seq] # Decoder input: prepend some "GO" token and drop the final token of the encoder input dec_inp = ([tf.zeros_like(x_seq[0], dtype=np.int32, name="GO")] + x_seq[:-1]) # Initial memory value for recurrence. prev_mem = tf.zeros((batch_size, memory_dim)) # GRU cell = rnn_cell.GRUCell(memory_dim) dec_outputs, dec_memory = seq2seq.embedding_rnn_seq2seq(x_seq, dec_inp, cell, vocab_size, vocab_size) loss = seq2seq.sequence_loss(dec_outputs, t_seq, weights, vocab_size) tf.scalar_summary("loss", loss) magnitude = tf.sqrt(tf.reduce_sum(tf.square(dec_memory[1]))) tf.scalar_summary("magnitude at t=1", magnitude) summary_op = tf.merge_all_summaries() learning_rate = 0.05 momentum = 0.9 optimizer = tf.train.MomentumOptimizer(learning_rate, momentum) train_op = optimizer.minimize(loss) logdir = tempfile.mkdtemp() print logdir
def seq2seq_f(encoder_inputs, decoder_inputs, do_decode): return seq2seq.embedding_rnn_seq2seq(encoder_inputs, decoder_inputs, cell, vocab_size, vocab_size, output_projection=output_projection, feed_previous=do_decode)
def __init__(self): # Set up hyperparameters self.num_layers = 3 self.layer_size = 256 # Set up the core RNN cells of the tensor network single_cell = rnn_cell.BasicLSTMCell(self.layer_size) self.cell = rnn_cell.MultiRNNCell([single_cell] * self.num_layers) # Set up placeholders for the inputs and outputs. # Leave batch size unspecified as a None shape. # The input problem self.encoder_inputs = [ tf.placeholder(tf.int32, shape=[None], name='encoder{0}'.format(i)) for i in range(SOURCE_LEN) ] # The correct answers self.labels = [ tf.placeholder(tf.int32, shape=[None], name='labels{0}'.format(i)) for i in range(TARGET_LEN) ] # Each item is equal, so weights are ones self.weights = [ tf.ones_like(label, dtype=tf.float32) for label in self.labels ] # decoder_inputs has the correct output from the previous timestep, # with a zero-hot "go" token on the first one go_token = tf.zeros_like(self.labels[0], dtype=np.int32, name="GO") self.decoder_inputs = [go_token] + self.labels[:-1] # Construct the guts of the model. # This same model will be used for training and testing, so we # don't feed_previous. self.outputs, self.states = seq2seq.embedding_rnn_seq2seq( self.encoder_inputs, self.decoder_inputs, self.cell, len(SOURCE_VOCAB), len(TARGET_VOCAB), feed_previous=False) self.loss = seq2seq.sequence_loss(self.outputs, self.labels, self.weights) # Set up the ops we need for training if True: # momentum learning_rate = 0.05 momentum = 0.9 self.optimizer = tf.train.MomentumOptimizer( learning_rate, momentum) self.train_op = self.optimizer.minimize(self.loss) else: # adam # Assumes batch size of 100 self.cost = tf.reduce_sum(self.loss) / TARGET_LEN / 100 self.lr = tf.Variable(0.0, trainable=False) tvars = tf.trainable_variables() # Clip gradients at 5.0 grads, _ = tf.clip_by_global_norm(tf.gradients(self.cost, tvars), 5.0) optimizer = tf.train.AdamOptimizer(self.lr) self.train_op = optimizer.apply_gradients(zip(grads, tvars)) self.sess = tf.Session() self.sess.run(tf.initialize_all_variables())
def __init__(self): # Set up hyperparameters self.num_layers = 3 self.layer_size = 256 # Set up the core RNN cells of the tensor network single_cell = rnn_cell.BasicLSTMCell(self.layer_size) self.cell = rnn_cell.MultiRNNCell([single_cell] * self.num_layers) # Set up placeholders for the inputs and outputs. # Leave batch size unspecified as a None shape. # The input problem self.encoder_inputs = [tf.placeholder(tf.int32, shape=[None], name='encoder{0}'.format(i)) for i in range(SOURCE_LEN)] # The correct answers self.labels = [tf.placeholder(tf.int32, shape=[None], name='labels{0}'.format(i)) for i in range(TARGET_LEN)] # Each item is equal, so weights are ones self.weights = [tf.ones_like(label, dtype=tf.float32) for label in self.labels] # decoder_inputs has the correct output from the previous timestep, # with a zero-hot "go" token on the first one go_token = tf.zeros_like(self.labels[0], dtype=np.int32, name="GO") self.decoder_inputs = [go_token] + self.labels[:-1] # Construct the guts of the model. # This same model will be used for training and testing, so we # don't feed_previous. self.outputs, self.states = seq2seq.embedding_rnn_seq2seq( self.encoder_inputs, self.decoder_inputs, self.cell, len(SOURCE_VOCAB), len(TARGET_VOCAB), feed_previous=False) self.loss = seq2seq.sequence_loss( self.outputs, self.labels, self.weights) # Set up the ops we need for training if True: # momentum learning_rate = 0.05 momentum = 0.9 self.optimizer = tf.train.MomentumOptimizer(learning_rate, momentum) self.train_op = self.optimizer.minimize(self.loss) else: # adam # Assumes batch size of 100 self.cost = tf.reduce_sum(self.loss) / TARGET_LEN / 100 self.lr = tf.Variable(0.0, trainable=False) tvars = tf.trainable_variables() # Clip gradients at 5.0 grads, _ = tf.clip_by_global_norm(tf.gradients(self.cost, tvars), 5.0) optimizer = tf.train.AdamOptimizer(self.lr) self.train_op = optimizer.apply_gradients(zip(grads, tvars)) self.sess = tf.Session() self.sess.run(tf.initialize_all_variables())