def test_gru_decode_train(self): r"""Tests decoding in training mode. """ seq_length = np.random.randint(self._max_time, size=[self._batch_size ]) + 1 encoder_values_length = torch.tensor(seq_length) decoder = AttentionRNNDecoder(encoder_output_size=64, vocab_size=self._vocab_size, input_size=self._emb_dim + 64, hparams=self._hparams_gru) sequence_length = torch.tensor([self._max_time] * self._batch_size) helper_train = decoder.create_helper() outputs, final_state, sequence_lengths = decoder( memory=self._encoder_output, memory_sequence_length=encoder_values_length, helper=helper_train, inputs=self._inputs, sequence_length=sequence_length) self.assertEqual(len(decoder.trainable_variables), 7) self._test_outputs(decoder, outputs, final_state, sequence_lengths)
def test_multicell_decode_infer(self): r"""Tests decoding in inference mode. """ seq_length = np.random.randint(self._max_time, size=[self._batch_size ]) + 1 encoder_values_length = torch.tensor(seq_length) decoder = AttentionRNNDecoder(encoder_output_size=64, vocab_size=self._vocab_size, input_size=self._emb_dim + 64, hparams=self._hparams_multicell) decoder.eval() helper_infer = decoder.create_helper(embedding=self._embedding, start_tokens=torch.tensor( [1] * self._batch_size), end_token=2) outputs, final_state, sequence_lengths = decoder( memory=self._encoder_output, memory_sequence_length=encoder_values_length, helper=helper_infer) self.assertEqual(len(decoder.trainable_variables), 15) self._test_outputs(decoder, outputs, final_state, sequence_lengths, test_mode=True, is_multi=True)
def test_decode_train(self): r"""Tests decoding in training mode. """ seq_length = np.random.randint(self._max_time, size=[self._batch_size ]) + 1 encoder_values_length = torch.tensor(seq_length) for (cell_type, is_multi), hparams in self._test_hparams.items(): decoder = AttentionRNNDecoder(encoder_output_size=64, token_embedder=self._embedder, vocab_size=self._vocab_size, input_size=self._emb_dim, hparams=hparams) sequence_length = torch.tensor([self._max_time] * self._batch_size) helper_train = decoder.create_helper() outputs, final_state, sequence_lengths = decoder( memory=self._encoder_output, memory_sequence_length=encoder_values_length, helper=helper_train, inputs=self._inputs, sequence_length=sequence_length) self._test_outputs(decoder, outputs, final_state, sequence_lengths)
def test_decode_infer(self): r"""Tests decoding in inference mode. """ seq_length = np.random.randint(self._max_time, size=[self._batch_size ]) + 1 encoder_values_length = torch.tensor(seq_length) for (cell_type, is_multi), hparams in self._test_hparams.items(): decoder = AttentionRNNDecoder(encoder_output_size=64, token_embedder=self._embedder, vocab_size=self._vocab_size, input_size=self._emb_dim, hparams=hparams) decoder.eval() helper_infer = decoder.create_helper(start_tokens=torch.tensor( [1] * self._batch_size), end_token=2) outputs, final_state, sequence_lengths = decoder( memory=self._encoder_output, memory_sequence_length=encoder_values_length, helper=helper_infer) self._test_outputs(decoder, outputs, final_state, sequence_lengths, test_mode=True)
def __init__(self, memory, memory_sequence_length=None, cell=None, cell_dropout_mode=None, vocab_size=None, output_layer=None, cell_input_fn=None, hparams=None): AttentionRNNDecoder.__init__(self, memory, memory_sequence_length, cell, cell_dropout_mode, vocab_size, output_layer, cell_input_fn, hparams)
def setUp(self): self._vocab_size = 10 self._max_time = 16 self._batch_size = 8 self._emb_dim = 20 self._attention_dim = 256 self._inputs = torch.randint(self._vocab_size, size=(self._batch_size, self._max_time)) embedding = torch.rand(self._vocab_size, self._emb_dim, dtype=torch.float) self._embedder = WordEmbedder(init_value=embedding) self._encoder_output = torch.rand(self._batch_size, self._max_time, 64) self._test_hparams = {} # (cell_type, is_multi) -> hparams for cell_type in ["RNNCell", "LSTMCell", "GRUCell"]: hparams = { "rnn_cell": { 'type': cell_type, 'kwargs': { 'num_units': 256, }, }, "attention": { "kwargs": { "num_units": self._attention_dim }, } } self._test_hparams[(cell_type, False)] = HParams( hparams, AttentionRNNDecoder.default_hparams()) hparams = { "rnn_cell": { 'type': 'LSTMCell', 'kwargs': { 'num_units': 256, }, 'num_layers': 3, }, "attention": { "kwargs": { "num_units": self._attention_dim }, } } self._test_hparams[("LSTMCell", True)] = HParams( hparams, AttentionRNNDecoder.default_hparams())
def test_beam_search_cell(self): """Tests :meth:`texar.modules.AttentionRNNDecoder._get_beam_search_cell` """ seq_length = np.random.randint( self._max_time, size=[self._batch_size]) + 1 encoder_values_length = tf.constant(seq_length) hparams = { "attention": { "kwargs": { "num_units": self._attention_dim, "probability_fn": "sparsemax" } } } decoder = AttentionRNNDecoder( memory=self._encoder_output, memory_sequence_length=encoder_values_length, vocab_size=self._vocab_size, hparams=hparams) helper_train = get_helper( decoder.hparams.helper_train.type, inputs=self._inputs, sequence_length=[self._max_time]*self._batch_size, **decoder.hparams.helper_train.kwargs.todict()) _, _, _ = decoder(helper=helper_train) ## 4+1 trainable variables: cell-kernel, cell-bias, ## fc-weight, fc-bias, and ## memory_layer: For LuongAttention, we only transform the memory layer; ## thus num_units *must* match the expected query depth. self.assertEqual(len(decoder.trainable_variables), 5) beam_width = 3 beam_cell = decoder._get_beam_search_cell(beam_width) cell_input = tf.random_uniform([self._batch_size * beam_width, self._emb_dim]) cell_state = beam_cell.zero_state(self._batch_size * beam_width, tf.float32) _ = beam_cell(cell_input, cell_state) # Test if beam_cell is sharing variables with decoder cell. for tvar in beam_cell.trainable_variables: self.assertTrue(tvar in decoder.trainable_variables)
def test_decode_train(self): """Tests decoding in training mode. """ seq_length = np.random.randint(self._max_time, size=[self._batch_size ]) + 1 encoder_values_length = tf.constant(seq_length) hparams = { "attention": { "kwargs": { "num_units": self._attention_dim, # Note: to use sparsemax in TF-CPU, it looks # `memory_sequence_length` must equal max_time. #"probability_fn": "sparsemax" } } } decoder = AttentionRNNDecoder( memory=self._encoder_output, memory_sequence_length=encoder_values_length, vocab_size=self._vocab_size, hparams=hparams) helper_train = get_helper( decoder.hparams.helper_train.type, inputs=self._inputs, sequence_length=[self._max_time] * self._batch_size, **decoder.hparams.helper_train.kwargs.todict()) outputs, final_state, sequence_lengths = decoder(helper=helper_train) # 4+1 trainable variables: cell-kernel, cell-bias, # fc-weight, fc-bias, and # memory_layer: For LuongAttention, we only transform the memory layer; # thus num_units *must* match the expected query depth. self.assertEqual(len(decoder.trainable_variables), 5) cell_dim = decoder.hparams.rnn_cell.kwargs.num_units with self.test_session() as sess: sess.run(tf.global_variables_initializer()) outputs_, final_state_, sequence_lengths_ = sess.run( [outputs, final_state, sequence_lengths], feed_dict={context.global_mode(): tf.estimator.ModeKeys.TRAIN}) self.assertIsInstance(outputs_, AttentionRNNDecoderOutput) self.assertEqual( outputs_.logits.shape, (self._batch_size, self._max_time, self._vocab_size)) self.assertEqual(outputs_.sample_id.shape, (self._batch_size, self._max_time)) self.assertEqual(final_state_.cell_state[0].shape, (self._batch_size, cell_dim)) np.testing.assert_array_equal(sequence_lengths_, [self._max_time] * self._batch_size)
def initialize(self, name=None): init = AttentionRNNDecoder.initialize(self, name) batch_size = tf.shape(init[0])[0] # decoded_ids can be initialized as any arbitrary value # because it will be assigned later in decoding initial_decoded_ids = tf.ones((batch_size, 60), dtype=tf.int32) initial_rnn_state = init[2] initial_state = [initial_decoded_ids, initial_rnn_state] init[2] = initial_state return init
def test_decode_infer(self): """Tests decoding in inference mode. """ seq_length = np.random.randint(self._max_time, size=[self._batch_size ]) + 1 encoder_values_length = tf.constant(seq_length) hparams = { "attention": { "kwargs": { "num_units": 256, } } } decoder = AttentionRNNDecoder( vocab_size=self._vocab_size, memory=self._encoder_output, memory_sequence_length=encoder_values_length, hparams=hparams) helper_infer = get_helper( decoder.hparams.helper_infer.type, embedding=self._embedding, start_tokens=[1] * self._batch_size, end_token=2, **decoder.hparams.helper_train.kwargs.todict()) outputs, final_state, sequence_lengths = decoder(helper=helper_infer) # 4+1 trainable variables: cell-kernel, cell-bias, # fc-weight, fc-bias, and # memory_layer: For LuongAttention, we only transform the memory layer; # thus num_units *must* match the expected query depth. self.assertEqual(len(decoder.trainable_variables), 5) cell_dim = decoder.hparams.rnn_cell.kwargs.num_units with self.test_session() as sess: sess.run(tf.global_variables_initializer()) outputs_, final_state_, sequence_lengths_ = sess.run( [outputs, final_state, sequence_lengths], feed_dict={ context.global_mode(): tf.estimator.ModeKeys.PREDICT }) self.assertIsInstance(outputs_, AttentionRNNDecoderOutput) max_length = max(sequence_lengths_) self.assertEqual(outputs_.logits.shape, (self._batch_size, max_length, self._vocab_size)) self.assertEqual(outputs_.sample_id.shape, (self._batch_size, max_length)) self.assertEqual(final_state_.cell_state[0].shape, (self._batch_size, cell_dim))
def setUp(self): self._vocab_size = 10 self._max_time = 16 self._batch_size = 8 self._emb_dim = 20 self._attention_dim = 256 self._inputs = torch.rand(self._batch_size, self._max_time, self._emb_dim, dtype=torch.float32) self._embedding = torch.rand(self._vocab_size, self._emb_dim, dtype=torch.float32) self._encoder_output = torch.rand(self._batch_size, self._max_time, 64) hparams = { "rnn_cell": { 'type': 'RNNCell', 'kwargs': { 'num_units': 256, }, }, "attention": { "kwargs": { "num_units": self._attention_dim }, } } self._hparams_rnn = HParams(hparams, AttentionRNNDecoder.default_hparams()) hparams = { "rnn_cell": { 'type': 'LSTMCell', 'kwargs': { 'num_units': 256, }, }, "attention": { "kwargs": { "num_units": self._attention_dim }, } } self._hparams_lstm = HParams(hparams, AttentionRNNDecoder.default_hparams()) hparams = { "rnn_cell": { 'type': 'GRUCell', 'kwargs': { 'num_units': 256, }, }, "attention": { "kwargs": { "num_units": self._attention_dim }, } } self._hparams_gru = HParams(hparams, AttentionRNNDecoder.default_hparams()) hparams = { "rnn_cell": { 'type': 'RNNCell', 'kwargs': { 'num_units': 256, }, 'num_layers': 3, }, "attention": { "kwargs": { "num_units": self._attention_dim }, } } self._hparams_multicell = HParams( hparams, AttentionRNNDecoder.default_hparams())