def bi_lstm_layer(in_layer, config, reuse=False, name='Bi_LSTM'): num_units = config.rnn_hidden_units output_size = config.rnn_output_size batch_size = int(in_layer.get_shape()[0]) num_steps = int(in_layer.get_shape()[1]) input_size = int(in_layer.get_shape()[2]) initializer = tf.random_uniform_initializer(-0.1, 0.1) lstm_cell_f = rnn_cell.LSTMCell(num_units, input_size, use_peepholes=True, num_proj=output_size, cell_clip=1.0, initializer=initializer) lstm_cell_b = rnn_cell.LSTMCell(num_units, input_size, use_peepholes=True, num_proj=output_size, cell_clip=1.0, initializer=initializer) initial_state_f = lstm_cell_f.zero_state(batch_size, tf.float32) inputs_list = [ tf.reshape(x, [batch_size, input_size]) for x in tf.split(1, num_steps, in_layer) ] rnn_out, rnn_states = bi_rnn(lstm_cell_f, lstm_cell_b, inputs_list, initial_state=initial_state_f, scope=name, reuse=reuse) out_layer = tf.transpose(tf.pack(rnn_out), perm=[1, 0, 2]) return out_layer
def __init__(self, rnn_size, rnn_layer, batch_size, input_embedding_size, dim_image, dim_hidden, max_words_q, vocabulary_size, drop_out_rate): self.rnn_size = rnn_size self.rnn_layer = rnn_layer self.batch_size = batch_size self.input_embedding_size = input_embedding_size self.dim_image = dim_image self.dim_hidden = dim_hidden self.max_words_q = max_words_q self.vocabulary_size = vocabulary_size self.drop_out_rate = drop_out_rate # 问题embedding self.embed_ques_W = tf.Variable(tf.random_uniform([self.vocabulary_size, self.input_embedding_size], -0.08, 0.08), name='embed_ques_W') # RNN编码器 self.lstm_1 = rnn_cell.LSTMCell(rnn_size, input_embedding_size, use_peepholes=True) self.lstm_dropout_1 = rnn_cell.DropoutWrapper(self.lstm_1, output_keep_prob = 1 - self.drop_out_rate) self.lstm_2 = rnn_cell.LSTMCell(rnn_size, rnn_size, use_peepholes=True) self.lstm_dropout_2 = rnn_cell.DropoutWrapper(self.lstm_2, output_keep_prob = 1 - self.drop_out_rate) self.stacked_lstm = rnn_cell.MultiRNNCell([self.lstm_dropout_1, self.lstm_dropout_2]) # 状态embedding self.embed_state_W = tf.Variable(tf.random_uniform([2*rnn_size*rnn_layer, self.dim_hidden], -0.08,0.08),name='embed_state_W') self.embed_state_b = tf.Variable(tf.random_uniform([self.dim_hidden], -0.08, 0.08), name='embed_state_b') # 图像embedding self.embed_image_W = tf.Variable(tf.random_uniform([dim_image, self.dim_hidden], -0.08, 0.08), name='embed_image_W') self.embed_image_b = tf.Variable(tf.random_uniform([dim_hidden], -0.08, 0.08), name='embed_image_b') # 打分embedding self.embed_scor_W = tf.Variable(tf.random_uniform([dim_hidden, num_output], -0.08, 0.08), name='embed_scor_W') self.embed_scor_b = tf.Variable(tf.random_uniform([num_output], -0.08, 0.08), name='embed_scor_b')
def __load_model(self, num_layers): # Initial memory value for recurrence. self.prev_mem = tf.zeros((self.train_batch_size, self.memory_dim)) # choose RNN/GRU/LSTM cell with tf.variable_scope("forward"): fw_single_cell = rnn_cell.LSTMCell(self.memory_dim) # Stacks layers of RNN's to form a stacked decoder self.forward_cell = rnn_cell.MultiRNNCell([fw_single_cell] * num_layers) with tf.variable_scope("backward"): bw_single_cell = rnn_cell.LSTMCell(self.memory_dim) # Stacks layers of RNN's to form a stacked decoder self.backward_cell = rnn_cell.MultiRNNCell([bw_single_cell] * num_layers) # embedding model if not self.attention: with tf.variable_scope("forward"): self.dec_outputs_fwd, _ = seq2seq.embedding_rnn_seq2seq(\ self.enc_inp_fwd, self.dec_inp, self.forward_cell, \ self.vocab_size, self.vocab_size, self.seq_length) with tf.variable_scope("forward", reuse=True): self.dec_outputs_fwd_tst, _ = seq2seq.embedding_rnn_seq2seq(\ self.enc_inp_fwd, self.dec_inp, self.forward_cell, \ self.vocab_size, self.vocab_size, self.seq_length, feed_previous=True) with tf.variable_scope("backward"): self.dec_outputs_bwd, _ = seq2seq.embedding_rnn_seq2seq(\ self.enc_inp_bwd, self.dec_inp, self.backward_cell, \ self.vocab_size, self.vocab_size, self.seq_length) with tf.variable_scope("backward", reuse=True): self.dec_outputs_bwd_tst, _ = seq2seq.embedding_rnn_seq2seq(\ self.enc_inp_bwd, self.dec_inp, self.backward_cell, \ self.vocab_size, self.vocab_size, self.seq_length, feed_previous=True) else: with tf.variable_scope("forward"): self.dec_outputs_fwd, _ = seq2seq.embedding_attention_seq2seq(\ self.enc_inp_fwd, self.dec_inp, self.forward_cell, \ self.vocab_size, self.vocab_size, self.seq_length) with tf.variable_scope("forward", reuse=True): self.dec_outputs_fwd_tst, _ = seq2seq.embedding_attention_seq2seq(\ self.enc_inp_fwd, self.dec_inp, self.forward_cell, \ self.vocab_size, self.vocab_size, self.seq_length, feed_previous=True) with tf.variable_scope("backward"): self.dec_outputs_bwd, _ = seq2seq.embedding_attention_seq2seq(\ self.enc_inp_bwd, self.dec_inp, self.backward_cell, \ self.vocab_size, self.vocab_size, self.seq_length) with tf.variable_scope("backward", reuse=True): self.dec_outputs_bwd_tst, _ = seq2seq.embedding_attention_seq2seq(\ self.enc_inp_bwd, self.dec_inp, self.backward_cell, \ self.vocab_size, self.vocab_size, self.seq_length, feed_previous=True)
def _testShardNoShardEquivalentOutput(self, use_gpu): num_units = 3 input_size = 5 batch_size = 2 num_proj = 4 num_proj_shards = 4 num_unit_shards = 2 with self.test_session(use_gpu=use_gpu, graph=tf.Graph()) as sess: inputs = 10 * [tf.placeholder(tf.float32)] initializer = tf.constant_initializer(0.001) cell_noshard = rnn_cell.LSTMCell(num_units, input_size, num_proj=num_proj, use_peepholes=True, initializer=initializer, num_unit_shards=num_unit_shards, num_proj_shards=num_proj_shards) cell_shard = rnn_cell.LSTMCell(num_units, input_size, use_peepholes=True, initializer=initializer, num_proj=num_proj) with tf.variable_scope("noshard_scope"): outputs_noshard, states_noshard = rnn.rnn(cell_noshard, inputs, dtype=tf.float32) with tf.variable_scope("shard_scope"): outputs_shard, states_shard = rnn.rnn(cell_shard, inputs, dtype=tf.float32) self.assertEqual(len(outputs_noshard), len(inputs)) self.assertEqual(len(outputs_noshard), len(outputs_shard)) tf.initialize_all_variables().run() input_value = np.random.randn(batch_size, input_size) feeds = dict((x, input_value) for x in inputs) values_noshard = sess.run(outputs_noshard, feed_dict=feeds) values_shard = sess.run(outputs_shard, feed_dict=feeds) state_values_noshard = sess.run(states_noshard, feed_dict=feeds) state_values_shard = sess.run(states_shard, feed_dict=feeds) self.assertEqual(len(values_noshard), len(values_shard)) self.assertEqual(len(state_values_noshard), len(state_values_shard)) for (v_noshard, v_shard) in zip(values_noshard, values_shard): self.assertAllClose(v_noshard, v_shard, atol=1e-3) for (s_noshard, s_shard) in zip(state_values_noshard, state_values_shard): self.assertAllClose(s_noshard, s_shard, atol=1e-3)
def __load_model(self): # Initial memory value for recurrence. self.prev_mem = tf.zeros((self.train_batch_size, self.memory_dim)) # choose RNN/GRU/LSTM cell with tf.variable_scope("train_test", reuse=True): self.cell = rnn_cell.LSTMCell(self.memory_dim) # embedding model if not self.attention: with tf.variable_scope("train_test"): self.dec_outputs, self.dec_memory = seq2seq.embedding_rnn_seq2seq(\ self.enc_inp, self.dec_inp, self.cell, \ self.vocab_size, self.vocab_size, self.seq_length) with tf.variable_scope("train_test", reuse=True): self.dec_outputs_tst, _ = seq2seq.embedding_rnn_seq2seq(\ self.enc_inp, self.dec_inp, self.cell, \ self.vocab_size, self.vocab_size, self.seq_length, feed_previous=True) else: with tf.variable_scope("train_test"): self.dec_outputs, self.dec_memory = seq2seq.embedding_attention_seq2seq(\ self.enc_inp, self.dec_inp, self.cell, \ self.vocab_size, self.vocab_size, self.seq_length) with tf.variable_scope("train_test", reuse=True): self.dec_outputs_tst, _ = seq2seq.embedding_attention_seq2seq(\ self.enc_inp, self.dec_inp, self.cell, \ self.vocab_size, self.vocab_size, self.seq_length, feed_previous=True)
def _testDoubleInput(self, use_gpu): num_units = 3 input_size = 5 batch_size = 2 num_proj = 4 num_proj_shards = 4 num_unit_shards = 2 with self.test_session(use_gpu=use_gpu, graph=tf.Graph()) as sess: initializer = tf.random_uniform_initializer(-1, 1, seed=self._seed) inputs = 10 * [tf.placeholder(tf.float64)] cell = rnn_cell.LSTMCell(num_units, input_size=input_size, use_peepholes=True, num_proj=num_proj, num_unit_shards=num_unit_shards, num_proj_shards=num_proj_shards, initializer=initializer) outputs, _ = rnn.rnn(cell, inputs, initial_state=cell.zero_state( batch_size, tf.float64)) self.assertEqual(len(outputs), len(inputs)) tf.initialize_all_variables().run() input_value = np.asarray(np.random.randn(batch_size, input_size), dtype=np.float64) values = sess.run(outputs, feed_dict={inputs[0]: input_value}) self.assertEqual(values[0].dtype, input_value.dtype)
def _testNoProjNoShardingSimpleStateSaver(self, use_gpu): num_units = 3 input_size = 5 batch_size = 2 with self.test_session(use_gpu=use_gpu, graph=tf.Graph()) as sess: initializer = tf.random_uniform_initializer(-0.01, 0.01, seed=self._seed) state_saver = TestStateSaver(batch_size, 2 * num_units) cell = rnn_cell.LSTMCell(num_units, input_size, use_peepholes=False, initializer=initializer) inputs = 10 * [ tf.placeholder(tf.float32, shape=(batch_size, input_size)) ] with tf.variable_scope("share_scope"): outputs, states = rnn.state_saving_rnn(cell, inputs, state_saver=state_saver, state_name="save_lstm") self.assertEqual(len(outputs), len(inputs)) for out in outputs: self.assertEqual(out.get_shape().as_list(), [batch_size, num_units]) tf.initialize_all_variables().run() input_value = np.random.randn(batch_size, input_size) (last_state_value, saved_state_value) = sess.run( [states[-1], state_saver.saved_state], feed_dict={inputs[0]: input_value}) self.assertAllEqual(last_state_value, saved_state_value)
def _testProjSharding(self, use_gpu): num_units = 3 input_size = 5 batch_size = 2 num_proj = 4 num_proj_shards = 4 num_unit_shards = 2 with self.test_session(use_gpu=use_gpu, graph=tf.Graph()) as sess: initializer = tf.random_uniform_initializer(-0.01, 0.01, seed=self._seed) inputs = 10 * [ tf.placeholder(tf.float32, shape=(None, input_size)) ] cell = rnn_cell.LSTMCell(num_units, input_size=input_size, use_peepholes=True, num_proj=num_proj, num_unit_shards=num_unit_shards, num_proj_shards=num_proj_shards, initializer=initializer) outputs, _ = rnn.rnn(cell, inputs, dtype=tf.float32) self.assertEqual(len(outputs), len(inputs)) tf.initialize_all_variables().run() input_value = np.random.randn(batch_size, input_size) sess.run(outputs, feed_dict={inputs[0]: input_value})
def __init__(self, dim_image, n_words, dim_hidden, batch_size, n_lstm_steps, drop_out_rate, bias_init_vector=None): self.dim_image = dim_image self.n_words = n_words self.dim_hidden = dim_hidden self.batch_size = batch_size self.n_lstm_steps = n_lstm_steps self.drop_out_rate = drop_out_rate with tf.device("/cpu:0"): self.Wemb = tf.Variable(tf.random_uniform([n_words, dim_hidden], -0.1, 0.1), name='Wemb') self.lstm3 = rnn_cell.LSTMCell(self.dim_hidden, 2 * self.dim_hidden, use_peepholes=True) self.lstm3_dropout = rnn_cell.DropoutWrapper(self.lstm3, output_keep_prob=1 - self.drop_out_rate) self.encode_image_W = tf.Variable(tf.random_uniform( [dim_image, dim_hidden], -0.1, 0.1), name='encode_image_W') self.encode_image_b = tf.Variable(tf.zeros([dim_hidden]), name='encode_image_b') self.embed_att_w = tf.Variable(tf.random_uniform([dim_hidden, 1], -0.1, 0.1), name='embed_att_w') self.embed_att_Wa = tf.Variable(tf.random_uniform( [dim_hidden, dim_hidden], -0.1, 0.1), name='embed_att_Wa') self.embed_att_Ua = tf.Variable(tf.random_uniform( [dim_hidden, dim_hidden], -0.1, 0.1), name='embed_att_Ua') self.embed_att_ba = tf.Variable(tf.zeros([dim_hidden]), name='embed_att_ba') self.embed_word_W = tf.Variable(tf.random_uniform( [dim_hidden, n_words], -0.1, 0.1), name='embed_word_W') if bias_init_vector is not None: self.embed_word_b = tf.Variable(bias_init_vector.astype( np.float32), name='embed_word_b') else: self.embed_word_b = tf.Variable(tf.zeros([n_words]), name='embed_word_b') self.embed_nn_Wp = tf.Variable(tf.random_uniform( [3 * dim_hidden, dim_hidden], -0.1, 0.1), name='embed_nn_Wp') self.embed_nn_bp = tf.Variable(tf.zeros([dim_hidden]), name='embed_nn_bp')
def testSharingWeightsWithDifferentNamescope(self): num_units = 3 input_size = 5 batch_size = 2 num_proj = 4 with self.test_session(graph=tf.Graph()) as sess: initializer = tf.random_uniform_initializer(-1, 1, seed=self._seed) inputs = 10 * [ tf.placeholder(tf.float32, shape=(None, input_size)) ] cell = rnn_cell.LSTMCell(num_units, input_size, use_peepholes=True, num_proj=num_proj, initializer=initializer) with tf.name_scope("scope0"): with tf.variable_scope("share_scope"): outputs0, _ = rnn.rnn(cell, inputs, dtype=tf.float32) with tf.name_scope("scope1"): with tf.variable_scope("share_scope", reuse=True): outputs1, _ = rnn.rnn(cell, inputs, dtype=tf.float32) tf.initialize_all_variables().run() input_value = np.random.randn(batch_size, input_size) output_values = sess.run(outputs0 + outputs1, feed_dict={inputs[0]: input_value}) outputs0_values = output_values[:10] outputs1_values = output_values[10:] self.assertEqual(len(outputs0_values), len(outputs1_values)) for out0, out1 in zip(outputs0_values, outputs1_values): self.assertAllEqual(out0, out1)
def _testCellClipping(self, use_gpu): num_units = 3 input_size = 5 batch_size = 2 with self.test_session(use_gpu=use_gpu, graph=tf.Graph()) as sess: initializer = tf.random_uniform_initializer(-0.01, 0.01, seed=self._seed) cell = rnn_cell.LSTMCell(num_units, input_size, use_peepholes=True, cell_clip=0.0, initializer=initializer) inputs = 10 * [ tf.placeholder(tf.float32, shape=(batch_size, input_size)) ] outputs, _ = rnn.rnn(cell, inputs, dtype=tf.float32) self.assertEqual(len(outputs), len(inputs)) for out in outputs: self.assertEqual(out.get_shape().as_list(), [batch_size, num_units]) tf.initialize_all_variables().run() input_value = np.random.randn(batch_size, input_size) values = sess.run(outputs, feed_dict={inputs[0]: input_value}) for value in values: # if cell c is clipped to 0, tanh(c) = 0 => m==0 self.assertAllEqual(value, np.zeros((batch_size, num_units)))
def prediction(self): fw_cell = rnn_cell.LSTMCell(self._num_hidden) fw_cell = rnn_cell.DropoutWrapper(fw_cell, output_keep_prob=self.dropout) bw_cell = rnn_cell.LSTMCell(self._num_hidden) bw_cell = rnn_cell.DropoutWrapper(bw_cell, output_keep_prob=self.dropout) if self._num_layers > 1: fw_cell = rnn_cell.MultiRNNCell([fw_cell] * self._num_layers) bw_cell = rnn_cell.MultiRNNCell([bw_cell] * self._num_layers) output, _, _ = rnn.bidirectional_rnn(fw_cell, bw_cell, tf.unpack(tf.transpose(self.data, perm=[1, 0, 2])), dtype=tf.float32, sequence_length=self.length) max_length = int(self.target.get_shape()[1]) num_classes = int(self.target.get_shape()[2]) weight, bias = self._weight_and_bias(2*self._num_hidden, num_classes) output = tf.reshape(tf.transpose(tf.pack(output), perm=[1, 0, 2]), [-1, 2*self._num_hidden]) prediction = tf.nn.softmax(tf.matmul(output, weight) + bias) prediction = tf.reshape(prediction, [-1, max_length, num_classes]) return prediction
def __init__(self, params, emb_mat): self.params = params V, d, L, e = params.vocab_size, params.hidden_size, params.rnn_num_layers, params.word_size prev_size = e hidden_sizes = [d for _ in range(params.emb_num_layers)] for layer_idx in range(params.emb_num_layers): with tf.variable_scope("emb_%d" % layer_idx): cur_hidden_size = hidden_sizes[layer_idx] emb_mat = tf.tanh( my.nn.linear([V, prev_size], cur_hidden_size, emb_mat)) prev_size = cur_hidden_size self.emb_mat = emb_mat self.emb_hidden_sizes = [d for _ in range(params.emb_num_layers)] self.input_size = self.emb_hidden_sizes[ -1] if self.emb_hidden_sizes else e if params.lstm == 'basic': self.first_cell = my.rnn_cell.BasicLSTMCell( d, input_size=self.input_size, forget_bias=params.forget_bias) self.second_cell = my.rnn_cell.BasicLSTMCell( d, forget_bias=params.forget_bias) elif params.lstm == 'regular': self.first_cell = rnn_cell.LSTMCell(d, self.input_size, cell_clip=params.cell_clip) self.second_cell = rnn_cell.LSTMCell(d, d, cell_clip=params.cell_clip) elif params.lstm == 'gru': self.first_cell = rnn_cell.GRUCell(d, input_size=self.input_size) self.second_cell = rnn_cell.GRUCell(d) else: raise Exception() if params.train and params.keep_prob < 1.0: self.first_cell = tf.nn.rnn_cell.DropoutWrapper( self.first_cell, input_keep_prob=params.keep_prob, output_keep_prob=params.keep_prob) self.cell = rnn_cell.MultiRNNCell([self.first_cell] + [self.second_cell] * (L - 1)) self.scope = tf.get_variable_scope() self.used = False
def _testDoubleInputWithDropoutAndDynamicCalculation(self, use_gpu): """Smoke test for using LSTM with doubles, dropout, dynamic calculation.""" num_units = 3 input_size = 5 batch_size = 2 num_proj = 4 num_proj_shards = 4 num_unit_shards = 2 with self.test_session(use_gpu=use_gpu, graph=tf.Graph()) as sess: sequence_length = tf.placeholder(tf.int64) initializer = tf.random_uniform_initializer(-0.01, 0.01, seed=self._seed) inputs = 10 * [tf.placeholder(tf.float64)] cell = rnn_cell.LSTMCell(num_units, input_size=input_size, use_peepholes=True, num_proj=num_proj, num_unit_shards=num_unit_shards, num_proj_shards=num_proj_shards, initializer=initializer) dropout_cell = rnn_cell.DropoutWrapper(cell, 0.5, seed=0) outputs, states = rnn.rnn(dropout_cell, inputs, sequence_length=sequence_length, initial_state=cell.zero_state( batch_size, tf.float64)) self.assertEqual(len(outputs), len(inputs)) self.assertEqual(len(outputs), len(states)) tf.initialize_all_variables().run( feed_dict={sequence_length: [2, 3]}) input_value = np.asarray(np.random.randn(batch_size, input_size), dtype=np.float64) values = sess.run(outputs, feed_dict={ inputs[0]: input_value, sequence_length: [2, 3] }) state_values = sess.run(states, feed_dict={ inputs[0]: input_value, sequence_length: [2, 3] }) self.assertEqual(values[0].dtype, input_value.dtype) self.assertEqual(state_values[0].dtype, input_value.dtype)
def prediction(self): # Recurrent network. network = rnn_cell.LSTMCell(self._num_hidden) network = rnn_cell.DropoutWrapper(network, output_keep_prob=self.dropout) network = rnn_cell.MultiRNNCell([network] * self._num_layers) output, _ = rnn.dynamic_rnn(network, self.data, dtype=tf.float32) # Softmax layer. max_length = int(self.target.get_shape()[1]) num_classes = int(self.target.get_shape()[2]) weight, bias = self._weight_and_bias(self._num_hidden, num_classes) # Flatten to apply same weights to all time steps. output = tf.reshape(output, [-1, self._num_hidden]) prediction = tf.nn.softmax(tf.matmul(output, weight) + bias) prediction = tf.reshape(prediction, [-1, max_length, num_classes]) return prediction
def testSharingWeightsWithReuse(self): num_units = 3 input_size = 5 batch_size = 2 num_proj = 4 with self.test_session(graph=tf.Graph()) as sess: initializer = tf.random_uniform_initializer(-1, 1, seed=self._seed) inputs = 10 * [ tf.placeholder(tf.float32, shape=(None, input_size)) ] cell = rnn_cell.LSTMCell(num_units, input_size, use_peepholes=True, num_proj=num_proj, initializer=initializer) with tf.variable_scope("share_scope"): outputs0, _ = rnn.rnn(cell, inputs, dtype=tf.float32) with tf.variable_scope("share_scope", reuse=True): outputs1, _ = rnn.rnn(cell, inputs, dtype=tf.float32) with tf.variable_scope("diff_scope"): outputs2, _ = rnn.rnn(cell, inputs, dtype=tf.float32) tf.initialize_all_variables().run() input_value = np.random.randn(batch_size, input_size) output_values = sess.run(outputs0 + outputs1 + outputs2, feed_dict={inputs[0]: input_value}) outputs0_values = output_values[:10] outputs1_values = output_values[10:20] outputs2_values = output_values[20:] self.assertEqual(len(outputs0_values), len(outputs1_values)) self.assertEqual(len(outputs0_values), len(outputs2_values)) for o1, o2, o3 in zip(outputs0_values, outputs1_values, outputs2_values): # Same weights used by both RNNs so outputs should be the same. self.assertAllEqual(o1, o2) # Different weights used so outputs should be different. self.assertTrue(np.linalg.norm(o1 - o3) > 1e-6)
def _testNoProjNoSharding(self, use_gpu): num_units = 3 input_size = 5 batch_size = 2 with self.test_session(use_gpu=use_gpu, graph=tf.Graph()) as sess: initializer = tf.random_uniform_initializer(-0.01, 0.01, seed=self._seed) cell = rnn_cell.LSTMCell(num_units, input_size, initializer=initializer) inputs = 10 * [ tf.placeholder(tf.float32, shape=(batch_size, input_size)) ] outputs, _ = rnn.rnn(cell, inputs, dtype=tf.float32) self.assertEqual(len(outputs), len(inputs)) for out in outputs: self.assertEqual(out.get_shape().as_list(), [batch_size, num_units]) tf.initialize_all_variables().run() input_value = np.random.randn(batch_size, input_size) sess.run(outputs, feed_dict={inputs[0]: input_value})
def __init__(self, embedding_mat, non_static, lstm_type, hidden_unit, sequence_length, max_pool_size, num_classes, embedding_size, filter_sizes, num_filters, l2_reg_lambda=0.0): # Placeholders for input, output and dropout self.input_x = tf.placeholder(tf.int32, [None, sequence_length], name="input_x") self.input_y = tf.placeholder(tf.float32, [None, num_classes], name="input_y") self.dropout_keep_prob = tf.placeholder(tf.float32, name="dropout_keep_prob") self.batch_size = tf.placeholder(tf.int32) self.pad = tf.placeholder(tf.float32, [None, 1, embedding_size, 1], name="pad") self.real_len = tf.placeholder(tf.int32, [None], name="real_len") # Keeping track of l2 regularization loss (optional) l2_loss = tf.constant(0.0) # Extend input to a 4D Tensor, because tf.nn.conv2d requires so. with tf.device('/cpu:0'), tf.name_scope("embedding"): if not non_static: W = tf.constant(embedding_mat, name="W") else: W = tf.Variable(embedding_mat, name="W") self.embedded_chars = tf.nn.embedding_lookup(W, self.input_x) emb = tf.expand_dims(self.embedded_chars, -1) # CNN pooled_concat = [] reduced = np.int32(np.ceil((sequence_length) * 1.0 / max_pool_size)) for i, filter_size in enumerate(filter_sizes): with tf.name_scope("conv-maxpool-%s" % filter_size): # Zero paddings so that the convolution output have dimension batch x sequence_length x emb_size x channel num_prio = (filter_size - 1) // 2 num_post = (filter_size - 1) - num_prio pad_prio = tf.concat(1, [self.pad] * num_prio) pad_post = tf.concat(1, [self.pad] * num_post) emb_pad = tf.concat(1, [pad_prio, emb, pad_post]) # Convolution Layer filter_shape = [filter_size, embedding_size, 1, num_filters] W = tf.Variable(tf.truncated_normal(filter_shape, stddev=0.1), name="W") b = tf.Variable(tf.constant(0.1, shape=[num_filters]), name="b") conv = tf.nn.conv2d(emb_pad, W, strides=[1, 1, 1, 1], padding="VALID", name="conv") # Apply nonlinearity h = tf.nn.relu(tf.nn.bias_add(conv, b), name="relu") # Maxpooling over the outputs pooled = tf.nn.max_pool(h, ksize=[1, max_pool_size, 1, 1], strides=[1, max_pool_size, 1, 1], padding='SAME', name="pool") pooled = tf.reshape(pooled, [-1, reduced, num_filters]) pooled_concat.append(pooled) pooled_concat = tf.concat(2, pooled_concat) pooled_concat = tf.nn.dropout(pooled_concat, self.dropout_keep_prob) # LSTM if lstm_type == "gru": lstm_cell = rnn_cell.GRUCell(num_units=hidden_unit, input_size=embedding_size) else: if lstm_type == "basic": lstm_cell = rnn_cell.BasicLSTMCell(num_units=hidden_unit, input_size=embedding_size) else: lstm_cell = rnn_cell.LSTMCell(num_units=hidden_unit, input_size=embedding_size, use_peepholes=True) lstm_cell = rnn_cell.DropoutWrapper( lstm_cell, output_keep_prob=self.dropout_keep_prob) self._initial_state = lstm_cell.zero_state(self.batch_size, tf.float32) inputs = [ tf.squeeze(input_, [1]) for input_ in tf.split(1, reduced, pooled_concat) ] outputs, state = rnn.rnn(lstm_cell, inputs, initial_state=self._initial_state, sequence_length=self.real_len) # Collect the appropriate last words into variable output (dimension = batch x embedding_size) output = outputs[0] with tf.variable_scope("Output"): tf.get_variable_scope().reuse_variables() one = tf.ones([1, hidden_unit], tf.float32) for i in range(1, len(outputs)): ind = self.real_len < (i + 1) ind = tf.to_float(ind) ind = tf.expand_dims(ind, -1) mat = tf.matmul(ind, one) output = tf.add(tf.mul(output, mat), tf.mul(outputs[i], 1.0 - mat)) # Final (unnormalized) scores and predictions with tf.name_scope("output"): self.W = tf.Variable(tf.truncated_normal( [hidden_unit, num_classes], stddev=0.1), name="W") b = tf.Variable(tf.constant(0.1, shape=[num_classes]), name="b") l2_loss += tf.nn.l2_loss(W) l2_loss += tf.nn.l2_loss(b) self.scores = tf.nn.xw_plus_b(output, self.W, b, name="scores") self.predictions = tf.argmax(self.scores, 1, name="predictions") # CalculateMean cross-entropy loss with tf.name_scope("loss"): losses = tf.nn.softmax_cross_entropy_with_logits( self.scores, self.input_y) self.loss = tf.reduce_mean(losses) + l2_reg_lambda * l2_loss # Accuracy with tf.name_scope("accuracy"): correct_predictions = tf.equal(self.predictions, tf.argmax(self.input_y, 1)) self.accuracy = tf.reduce_mean(tf.cast(correct_predictions, "float"), name="accuracy")
def __init__(self, vocab_size, sequence_length, num_units, max_gradient_norm, batch_size, learning_rate, learning_rate_decay_factor): self.vocab_size = vocab_size self.sequence_length = sequence_length self.batch_size = batch_size self.learning_rate = tf.Variable(float(learning_rate), trainable=False) self.learning_rate_decay_op = self.learning_rate.assign( self.learning_rate * learning_rate_decay_factor) self.global_step = tf.Variable(0, trainable=False) w = training.utils.gaussian_weights_variable( [num_units, self.vocab_size]) b = tf.Variable(tf.zeros([self.vocab_size])) lstm_cell = rnn_cell.LSTMCell(num_units, vocab_size) self.encoder_inputs = [] self.decoder_inputs = [] self.target_weights = [] for _ in range(sequence_length): self.encoder_inputs.append( tf.placeholder(tf.float32, shape=(batch_size, self.vocab_size))) self.decoder_inputs.append( tf.placeholder(tf.float32, shape=(batch_size, self.vocab_size))) self.target_weights.append( tf.placeholder(tf.float32, shape=(batch_size, ))) # Decoder has one extra cell because it starts with the GO symbol, # and the targets are shifted by one. # Not sure this is actually useful, as it is always set to 0. # As this is inspired by TensorFlow seq2seq models, there might be # something dodgy in there. self.decoder_inputs.append( tf.placeholder(tf.float32, shape=(batch_size, self.vocab_size))) self.target_weights.append(np.ones((batch_size, ))) # Targets used by the sequence loss must be integer indices. targets = [ tf.cast(tf.argmax(i, 1), dtype=tf.int32) for i in self.decoder_inputs[1:] ] outputs, self.state = seq2seq.basic_rnn_seq2seq( self.encoder_inputs, self.decoder_inputs, lstm_cell) self.logits = [tf.nn.xw_plus_b(o, w, b) for o in outputs] self.loss = seq2seq.sequence_loss( self.logits[:self.sequence_length], targets, self.target_weights[:self.sequence_length], self.vocab_size) params = tf.trainable_variables() opt = tf.train.GradientDescentOptimizer(self.learning_rate) gradients = tf.gradients(self.loss, params) clipped_gradients, self.gradient_norms = tf.clip_by_global_norm( gradients, max_gradient_norm) self.updates = opt.apply_gradients(zip(clipped_gradients, params), global_step=self.global_step) self.saver = tf.train.Saver(tf.all_variables())
def __init__(self, is_training=False, hidden_units=128, num_layers=1, input_sequence_len=20, output_sequence_len=10, num_input_symbols=20, num_output_symbols=20, weight_amplitude=0.08, batch_size=32, peep=False): self.encoder_inputs = [] self.decoder_inputs = [] for i in range(input_sequence_len): self.encoder_inputs.append( tf.placeholder(tf.float32, shape=(None, num_input_symbols), name="encoder_{0}".format(i))) for i in range(output_sequence_len + 1): self.decoder_inputs.append( tf.placeholder(tf.float32, shape=(None, num_output_symbols), name="decoder_{0}".format(i))) def random_uniform(): return tf.random_uniform_initializer(-weight_amplitude, weight_amplitude) if num_layers > 1: cells = [ rnn_cell.LSTMCell(hidden_units, use_peepholes=peep, input_size=num_input_symbols, initializer=random_uniform()) ] cells += [ rnn_cell.LSTMCell(hidden_units, use_peepholes=peep, input_size=hidden_units, initializer=random_uniform()) for _ in range(num_layers - 1) ] self.cell = rnn_cell.MultiRNNCell(cells) else: self.cell = rnn_cell.LSTMCell(hidden_units, use_peepholes=peep, initializer=random_uniform()) self.w_softmax = tf.get_variable('w_softmax', shape=(hidden_units, num_output_symbols), initializer=random_uniform()) self.b_softmax = tf.get_variable('b_softmax', shape=(num_output_symbols, ), initializer=random_uniform()) # decoder_outputs is a list of tensors with output_sequence_len: [(batch_size x hidden_units)] decoder_outputs, _ = self._init_seq2seq(self.encoder_inputs, self.decoder_inputs, self.cell, feed_previous=not is_training) output_logits = [ tf.matmul(decoder_output, self.w_softmax) + self.b_softmax for decoder_output in decoder_outputs ] self.output_probs = [tf.nn.softmax(logit) for logit in output_logits] # If this is a training model create the training operation and loss function if is_training: self.targets = self.decoder_inputs[1:] losses = [ tf.nn.softmax_cross_entropy_with_logits(logit, target) for logit, target in zip(output_logits, self.targets) ] loss = tf.reduce_sum(tf.add_n(losses)) self.cost = loss / output_sequence_len / batch_size self.learning_rate = tf.Variable(DEFAULT_LEARNING_RATE, trainable=False) train_vars = tf.trainable_variables() grads = tf.gradients(self.cost, train_vars) optimizer = tf.train.AdamOptimizer(self.learning_rate) self.train_op = optimizer.apply_gradients(zip(grads, train_vars))
def __init__(self, config): sent_len = self.sent_len = config.sent_len word_len = config.word_len batch_size = config.batch_size vocab_size = config.vocab_size embed_size = config.embed_size keep_prob1 = config.keep_prob1 keep_prob2 = config.keep_prob2 num_layers1 = config.num_layers1 num_layers2 = config.num_layers2 state_size1 = config.state_size1 state_size2 = config.state_size2 self.input_data = tf.placeholder(tf.int32, [batch_size * sent_len, word_len]) self.lengths = tf.placeholder(tf.int64, [batch_size]) self.wordlengths = tf.placeholder(tf.int64, [batch_size * sent_len]) self.targets = tf.placeholder(tf.float32, [batch_size, 1]) # Get embedding layer which requires CPU with tf.device("/cpu:0"): embedding = tf.get_variable("embedding", [vocab_size, embed_size]) inputs = tf.nn.embedding_lookup(embedding, self.input_data) #LSTM 1 -> Encode the characters of every tok into a fixed dense representation with tf.variable_scope("rnn1", reuse=None): lstm_cell_1 = rnn_cell.LSTMCell(state_size1, input_size=embed_size) lstm_back_cell_1 = rnn_cell.LSTMCell(state_size1, input_size=embed_size) if keep_prob1 < 1: #Only on the inputs for rnn1. That way we don't dropout twice lstm_cell_1 = rnn_cell.DropoutWrapper( lstm_cell_1, input_keep_prob=keep_prob1) lstm_back_cell_1 = rnn_cell.DropoutWrapper( lstm_back_cell_1, input_keep_prob=keep_prob1) cell_1 = rnn_cell.MultiRNNCell([lstm_cell_1] * num_layers1) backcell_1 = rnn_cell.MultiRNNCell([lstm_back_cell_1] * num_layers1) rnn_splits = [ tf.squeeze(input_, [1]) for input_ in tf.split(1, word_len, inputs) ] # Run the bidirectional rnn outputs1, last_fw_state1, last_bw_state1 = rnn.bidirectional_rnn( cell_1, backcell_1, rnn_splits, sequence_length=self.wordlengths, dtype=tf.float32) #tok_embeds = outputs1[-1] tok_embeds = tf.concat(1, [last_fw_state1, last_bw_state1]) with tf.variable_scope("rnn2", reuse=None): lstm_cell_2 = rnn_cell.LSTMCell(state_size2, input_size=state_size1 * 4) lstm_back_cell_2 = rnn_cell.LSTMCell(state_size2, input_size=state_size1 * 4) # Add dropout. NOTE: this adds to the input and output layers. Remember that the input layer # is the output from the conv net, so this also adds dropout to the output of the conv net if keep_prob2 < 1: lstm_cell_2 = rnn_cell.DropoutWrapper( lstm_cell_2, input_keep_prob=keep_prob2, output_keep_prob=keep_prob2) lstm_back_cell_2 = rnn_cell.DropoutWrapper( lstm_back_cell_2, input_keep_prob=keep_prob2, output_keep_prob=keep_prob2) cell_2 = rnn_cell.MultiRNNCell([lstm_cell_2] * num_layers2) backcell_2 = rnn_cell.MultiRNNCell([lstm_back_cell_2] * num_layers2) # The rnn synthesis of the tokens is size [batch_size*sent_len, state_size*2] # we want it to be a list of sent_len of [batch_size, state_size*2] # We partition as [0,1,2,...n,0,1,2,...n...] rnn_inputs2 = tf.dynamic_partition( tok_embeds, list(range(sent_len)) * batch_size, sent_len) #Sent level rnn outputs2, last_fw_state2, last_bw_state2 = rnn.bidirectional_rnn( cell_2, backcell_2, rnn_inputs2, sequence_length=self.lengths, dtype=tf.float32) #sent_embed = tf.reshape(tf.concat(1, [last_fw_state2, last_bw_state2]), [batch_size, state_size2*4]) sent_embed = tf.concat(1, [last_fw_state2, last_bw_state2]) with tf.variable_scope("linear", reuse=None): w = tf.get_variable("w", [state_size2 * 4, 1]) b = tf.get_variable("b", [1]) raw_logits = tf.matmul(sent_embed, w) + b self.probabilities = tf.sigmoid(raw_logits) self.cost = tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits(raw_logits, self.targets)) #Calculate gradients and propagate #Aggregation method 2 is really important for rnn per the tensorflow issues list tvars = tf.trainable_variables() self.lr = tf.Variable(0.0, trainable=False) #Assign to overwrite optimizer = tf.train.AdamOptimizer() grads, _vars = zip(*optimizer.compute_gradients( self.cost, tvars, aggregation_method=2)) grads, self.grad_norm = tf.clip_by_global_norm(grads, config.max_grad_norm) self.train_op = optimizer.apply_gradients(zip(grads, _vars))