def __init__(self, memory, memory_sequence_length=None, cell=None, cell_dropout_mode=None, vocab_size=None, output_layer=None, cell_input_fn=None, hparams=None): AttentionRNNDecoder.__init__(self, memory, memory_sequence_length, cell, cell_dropout_mode, vocab_size, output_layer, cell_input_fn, hparams)
def test_beam_search_cell(self): """Tests :meth:`texar.tf.modules.AttentionRNNDecoder._get_beam_search_cell` """ seq_length = np.random.randint(self._max_time, size=[self._batch_size ]) + 1 encoder_values_length = tf.constant(seq_length) hparams = { "attention": { "kwargs": { "num_units": self._attention_dim, "probability_fn": "sparsemax" } } } decoder = AttentionRNNDecoder( memory=self._encoder_output, memory_sequence_length=encoder_values_length, vocab_size=self._vocab_size, hparams=hparams) helper_train = get_helper( decoder.hparams.helper_train.type, inputs=self._inputs, sequence_length=[self._max_time] * self._batch_size, **decoder.hparams.helper_train.kwargs.todict()) _, _, _ = decoder(helper=helper_train) # 4+1 trainable variables: cell-kernel, cell-bias, # fc-weight, fc-bias, and # memory_layer: For LuongAttention, we only transform the memory layer; # thus num_units *must* match the expected query depth. self.assertEqual(len(decoder.trainable_variables), 5) beam_width = 3 beam_cell = decoder._get_beam_search_cell(beam_width) cell_input = tf.random_uniform( [self._batch_size * beam_width, self._emb_dim]) cell_state = beam_cell.zero_state(self._batch_size * beam_width, tf.float32) _ = beam_cell(cell_input, cell_state) # Test if beam_cell is sharing variables with decoder cell. for tvar in beam_cell.trainable_variables: self.assertTrue(tvar in decoder.trainable_variables)
def test_decode_train(self): """Tests decoding in training mode. """ seq_length = np.random.randint(self._max_time, size=[self._batch_size ]) + 1 encoder_values_length = tf.constant(seq_length) hparams = { "attention": { "kwargs": { "num_units": self._attention_dim, # Note: to use sparsemax in TF-CPU, it looks # `memory_sequence_length` must equal max_time. # "probability_fn": "sparsemax" } } } decoder = AttentionRNNDecoder( memory=self._encoder_output, memory_sequence_length=encoder_values_length, vocab_size=self._vocab_size, hparams=hparams) helper_train = get_helper( decoder.hparams.helper_train.type, inputs=self._inputs, sequence_length=[self._max_time] * self._batch_size, **decoder.hparams.helper_train.kwargs.todict()) outputs, final_state, sequence_lengths = decoder(helper=helper_train) # 4+1 trainable variables: cell-kernel, cell-bias, # fc-weight, fc-bias, and # memory_layer: For LuongAttention, we only transform the memory layer; # thus num_units *must* match the expected query depth. self.assertEqual(len(decoder.trainable_variables), 5) cell_dim = decoder.hparams.rnn_cell.kwargs.num_units with self.test_session() as sess: sess.run(tf.global_variables_initializer()) outputs_, final_state_, sequence_lengths_ = sess.run( [outputs, final_state, sequence_lengths], feed_dict={context.global_mode(): tf.estimator.ModeKeys.TRAIN}) self.assertIsInstance(outputs_, AttentionRNNDecoderOutput) self.assertEqual( outputs_.logits.shape, (self._batch_size, self._max_time, self._vocab_size)) self.assertEqual(outputs_.sample_id.shape, (self._batch_size, self._max_time)) self.assertEqual(final_state_.cell_state[0].shape, (self._batch_size, cell_dim)) np.testing.assert_array_equal(sequence_lengths_, [self._max_time] * self._batch_size)
def initialize(self, name=None): init = AttentionRNNDecoder.initialize(self, name) batch_size = tf.shape(init[0])[0] # decoded_ids can be initialized as any arbitrary value # because it will be assigned later in decoding initial_decoded_ids = tf.ones((batch_size, 60), dtype=tf.int32) initial_rnn_state = init[2] initial_state = [initial_decoded_ids, initial_rnn_state] init[2] = initial_state return init
def test_decode_infer(self): """Tests decoding in inference mode. """ seq_length = np.random.randint(self._max_time, size=[self._batch_size ]) + 1 encoder_values_length = tf.constant(seq_length) hparams = { "attention": { "kwargs": { "num_units": 256, } } } decoder = AttentionRNNDecoder( vocab_size=self._vocab_size, memory=self._encoder_output, memory_sequence_length=encoder_values_length, hparams=hparams) helper_infer = get_helper( decoder.hparams.helper_infer.type, embedding=self._embedding, start_tokens=[1] * self._batch_size, end_token=2, **decoder.hparams.helper_train.kwargs.todict()) outputs, final_state, sequence_lengths = decoder(helper=helper_infer) # 4+1 trainable variables: cell-kernel, cell-bias, # fc-weight, fc-bias, and # memory_layer: For LuongAttention, we only transform the memory layer; # thus num_units *must* match the expected query depth. self.assertEqual(len(decoder.trainable_variables), 5) cell_dim = decoder.hparams.rnn_cell.kwargs.num_units with self.test_session() as sess: sess.run(tf.global_variables_initializer()) outputs_, final_state_, sequence_lengths_ = sess.run( [outputs, final_state, sequence_lengths], feed_dict={ context.global_mode(): tf.estimator.ModeKeys.PREDICT }) self.assertIsInstance(outputs_, AttentionRNNDecoderOutput) max_length = max(sequence_lengths_) self.assertEqual(outputs_.logits.shape, (self._batch_size, max_length, self._vocab_size)) self.assertEqual(outputs_.sample_id.shape, (self._batch_size, max_length)) self.assertEqual(final_state_.cell_state[0].shape, (self._batch_size, cell_dim))