def test_recurrent_beam1(self): # beam=1 and greedy should return the same result batch_size = 2 max_output_length = 3 src_mask, emb, decoder, encoder_output, encoder_hidden = self._build( batch_size=batch_size) greedy_output, _ = recurrent_greedy( src_mask=src_mask, embed=emb, bos_index=self.bos_index, eos_index=self.eos_index, max_output_length=max_output_length, decoder=decoder, encoder_output=encoder_output, encoder_hidden=encoder_hidden) beam_size = 1 alpha = 1.0 output, _ = beam_search(size=beam_size, eos_index=self.eos_index, pad_index=self.pad_index, src_mask=src_mask, embed=emb, bos_index=self.bos_index, n_best=1, max_output_length=max_output_length, decoder=decoder, alpha=alpha, encoder_output=encoder_output, encoder_hidden=encoder_hidden) np.testing.assert_array_equal(greedy_output, output)
def test_transformer_beam7(self): batch_size = 2 beam_size = 7 alpha = 1. max_output_length = 3 src_mask, embed, decoder, encoder_output, \ encoder_hidden = self._build(batch_size=batch_size) output, attention_scores = beam_search( size=beam_size, eos_index=self.eos_index, pad_index=self.pad_index, src_mask=src_mask, embed=embed, bos_index=self.bos_index, n_best=1, max_output_length=max_output_length, decoder=decoder, alpha=alpha, encoder_output=encoder_output, encoder_hidden=encoder_hidden) # Transformer beam doesn't return attention scores self.assertIsNone(attention_scores) # batch x time # now it produces EOS, so everything after gets cut off self.assertEqual(output.shape, (batch_size, 1)) np.testing.assert_equal(output, [[3], [3]])
def test_transformer_beam1(self): batch_size = 2 beam_size = 1 alpha = 1. max_output_length = 3 src_mask, embed, decoder, encoder_output, \ encoder_hidden = self._build(batch_size=batch_size) output, attention_scores = beam_search( size=beam_size, eos_index=self.eos_index, pad_index=self.pad_index, src_mask=src_mask, embed=embed, bos_index=self.bos_index, max_output_length=max_output_length, decoder=decoder, alpha=alpha, encoder_output=encoder_output, encoder_hidden=encoder_hidden) # Transformer beam doesn't return attention scores self.assertIsNone(attention_scores) # batch x time self.assertEqual(output.shape, (batch_size, max_output_length)) np.testing.assert_equal(output, [[5, 5, 5], [5, 5, 5]]) # now compare to greedy, they should be the same for beam=1 greedy_output, _ = transformer_greedy( src_mask=src_mask, embed=embed, bos_index=self.bos_index, max_output_length=max_output_length, decoder=decoder, encoder_output=encoder_output, encoder_hidden=encoder_hidden) np.testing.assert_equal(output, greedy_output)
def test_recurrent_beam1(self): # beam=1 and greedy should return the same result batch_size = 2 max_output_length = 3 src_mask, model, encoder_output, encoder_hidden = self._build( batch_size=batch_size) greedy_output, _ = recurrent_greedy( src_mask=src_mask, max_output_length=max_output_length, model=model, encoder_output=encoder_output, encoder_hidden=encoder_hidden) beam_size = 1 alpha = 1.0 output, _ = beam_search(size=beam_size, src_mask=src_mask, n_best=1, max_output_length=max_output_length, model=model, alpha=alpha, encoder_output=encoder_output, encoder_hidden=encoder_hidden) np.testing.assert_array_equal(greedy_output, output)
def run_batch(self, batch: Batch, max_output_length: int, beam_size: int, beam_alpha: float) -> (np.array, np.array): """ Get outputs and attentions scores for a given batch :param batch: batch to generate hypotheses for :param max_output_length: maximum length of hypotheses :param beam_size: size of the beam for beam search, if 0 use greedy :param beam_alpha: alpha value for beam search :return: stacked_output: hypotheses for batch, stacked_attention_scores: attention scores for batch """ encoder_output, encoder_hidden = self.encode(batch.src, batch.src_lengths, batch.src_mask, self.encoder) if self.encoder_2: encoder_output_2, encoder_hidden_2 = self.encode( src=batch.src_prev, src_length=batch.src_prev_lengths, src_mask=batch.src_prev_mask, encoder=self.encoder_2) x = self.last_layer(encoder_output, batch.src_mask, encoder_output_2, batch.src_prev_mask) encoder_output, encoder_hidden = self.last_layer_norm(x), None # if maximum output length is not globally specified, adapt to src len if max_output_length is None: max_output_length = int(max(batch.src_lengths.cpu().numpy()) * 1.5) # greedy decoding if beam_size < 2: stacked_output, stacked_attention_scores = greedy( encoder_hidden=encoder_hidden, encoder_output=encoder_output, eos_index=self.eos_index, src_mask=batch.src_mask, embed=self.trg_embed, bos_index=self.bos_index, decoder=self.decoder, max_output_length=max_output_length) # batch, time, max_src_length else: # beam size stacked_output, stacked_attention_scores = \ beam_search( size=beam_size, encoder_output=encoder_output, encoder_hidden=encoder_hidden, src_mask=batch.src_mask, embed=self.trg_embed, max_output_length=max_output_length, alpha=beam_alpha, eos_index=self.eos_index, pad_index=self.pad_index, bos_index=self.bos_index, decoder=self.decoder) return stacked_output, stacked_attention_scores
def run_batch(self, batch: Batch, max_output_length: int, beam_size: int, beam_alpha: float, return_logp: bool = False) \ -> (np.array, np.array, Optional[np.array]): """ Get outputs and attentions scores for a given batch :param batch: batch to generate hypotheses for :param max_output_length: maximum length of hypotheses :param beam_size: size of the beam for beam search, if 0 use greedy :param beam_alpha: alpha value for beam search :param return_logp: keep track of log probabilities as well :return: - stacked_output: hypotheses for batch, - stacked_attention_scores: attention scores for batch - log_probs: log probabilities for batch hypotheses """ encoder_output, encoder_hidden = self.encode(batch.src, batch.src_lengths, batch.src_mask) # if maximum output length is not globally specified, adapt to src len if max_output_length is None: max_output_length = int(max(batch.src_lengths.cpu().numpy()) * 1.5) # greedy decoding if beam_size == 0: stacked_output, stacked_attention_scores, logprobs = greedy( encoder_hidden=encoder_hidden, encoder_output=encoder_output, src_mask=batch.src_mask, embed=self.trg_embed, bos_index=self.bos_index, decoder=self.decoder, max_output_length=max_output_length, eos_index=self.eos_index, return_logp=return_logp) # batch, time, max_src_length else: # beam size > 0 stacked_output, stacked_attention_scores, logprobs = \ beam_search(size=beam_size, encoder_output=encoder_output, encoder_hidden=encoder_hidden, src_mask=batch.src_mask, embed=self.trg_embed, max_output_length=max_output_length, alpha=beam_alpha, eos_index=self.eos_index, pad_index=self.pad_index, bos_index=self.bos_index, decoder=self.decoder, return_logp=return_logp) return stacked_output, stacked_attention_scores, logprobs
def test_recurrent_beam7(self): batch_size = 2 max_output_length = 3 src_mask, model, encoder_output, encoder_hidden = self._build( batch_size=batch_size) beam_size = 7 alpha = 1.0 output, _ = beam_search(size=beam_size, src_mask=src_mask, n_best=1, max_output_length=max_output_length, model=model, alpha=alpha, encoder_output=encoder_output, encoder_hidden=encoder_hidden) self.assertEqual(output.shape, (2, 1)) np.testing.assert_array_equal(output, [[3], [3]])
def run_batch(self, batch: Batch, max_output_length: int, beam_size: int, beam_alpha: float): """ Get outputs and attentions scores for a given batch :param batch: :param max_output_length: :param beam_size: :param beam_alpha: :return: """ encoder_output, encoder_hidden = self.encode(batch.src, batch.src_lengths, batch.src_mask) # if maximum output length is not globally specified, adapt to src len if max_output_length is None: max_output_length = int(max(batch.src_lengths.cpu().numpy()) * 1.5) # greedy decoding if beam_size == 0: stacked_output, stacked_attention_scores = greedy( encoder_hidden=encoder_hidden, encoder_output=encoder_output, src_mask=batch.src_mask, embed=self.trg_embed, bos_index=self.bos_index, decoder=self.decoder, max_output_length=max_output_length) # batch, time, max_src_length else: # beam size stacked_output, stacked_attention_scores = \ beam_search(size=beam_size, encoder_output=encoder_output, encoder_hidden=encoder_hidden, src_mask=batch.src_mask, embed=self.trg_embed, max_output_length=max_output_length, alpha=beam_alpha, eos_index=self.eos_index, pad_index=self.pad_index, bos_index=self.bos_index, decoder=self.decoder) return stacked_output, stacked_attention_scores
def test_recurrent_beam7(self): batch_size = 2 max_output_length = 3 src_mask, emb, decoder, encoder_output, encoder_hidden = self._build( batch_size=batch_size) beam_size = 7 alpha = 1.0 output, _ = beam_search(size=beam_size, eos_index=self.eos_index, pad_index=self.pad_index, src_mask=src_mask, embed=emb, bos_index=self.bos_index, n_best=1, max_output_length=max_output_length, decoder=decoder, alpha=alpha, encoder_output=encoder_output, encoder_hidden=encoder_hidden) self.assertEqual(output.shape, (2, 1)) np.testing.assert_array_equal(output, [[3], [3]])
def run_batch(self, batch: Batch, max_output_length: int, beam_size: int, beam_alpha: float) -> (np.array, np.array): """ Get outputs and attentions scores for a given batch :param batch: batch to generate hypotheses for :param max_output_length: maximum length of hypotheses :param beam_size: size of the beam for beam search, if 0 use greedy :param beam_alpha: alpha value for beam search :return: stacked_output: hypotheses for batch, stacked_attention_scores: attention scores for batch """ encoder_output, encoder_hidden = self.encode(batch.src, batch.src_lengths, batch.src_mask) # if maximum output length is not globally specified, adapt to src len if max_output_length is None: max_output_length = int(max(batch.src_lengths.cpu().numpy()) * 1.5) if hasattr(batch, "kbsrc"): # B x KB x EMB; B x KB; B x KB kb_keys, kb_values, kb_values_embed, kb_trv, kb_mask = self.preprocess_batch_kb( batch, kbattdims=self.kb_att_dims) if kb_keys is None: knowledgebase = None else: knowledgebase = (kb_keys, kb_values, kb_values_embed, kb_mask) else: knowledgebase = None # greedy decoding if beam_size == 0: stacked_output, stacked_attention_scores, stacked_kb_att_scores, _ = greedy( encoder_hidden=encoder_hidden, encoder_output=encoder_output, src_mask=batch.src_mask, embed=self.trg_embed, bos_index=self.bos_index, decoder=self.decoder, generator=self.generator, max_output_length=max_output_length, knowledgebase=knowledgebase) # batch, time, max_src_length else: # beam size stacked_output, stacked_attention_scores, stacked_kb_att_scores = \ beam_search( decoder=self.decoder, generator=self.generator, size=beam_size, encoder_output=encoder_output, encoder_hidden=encoder_hidden, src_mask=batch.src_mask, embed=self.trg_embed, max_output_length=max_output_length, alpha=beam_alpha, eos_index=self.eos_index, pad_index=self.pad_index, bos_index=self.bos_index, knowledgebase = knowledgebase) if knowledgebase != None and self.do_postproc: with self.Timer("postprocessing hypotheses"): # replace kb value tokens with actual values in hypotheses, e.g. # ['your','@event','is','at','@meeting_time'] => ['your', 'conference', 'is', 'at', '7pm'] # assert kb_values.shape[1] == 1, kb_values.shape stacked_output = self.postprocess_batch_hypotheses( stacked_output, stacked_kb_att_scores, kb_values, kb_trv) print( f"proc_batch: Hypotheses: {self.trv_vocab.arrays_to_sentences(stacked_output)}" ) else: print( f"proc_batch: Hypotheses: {self.trg_vocab.arrays_to_sentences(stacked_output)}" ) return stacked_output, stacked_attention_scores, stacked_kb_att_scores