Exemple #1
0
    def test_recurrent_beam1(self):
        # beam=1 and greedy should return the same result
        batch_size = 2
        max_output_length = 3
        src_mask, emb, decoder, encoder_output, encoder_hidden = self._build(
            batch_size=batch_size)

        greedy_output, _ = recurrent_greedy(
            src_mask=src_mask,
            embed=emb,
            bos_index=self.bos_index,
            eos_index=self.eos_index,
            max_output_length=max_output_length,
            decoder=decoder,
            encoder_output=encoder_output,
            encoder_hidden=encoder_hidden)

        beam_size = 1
        alpha = 1.0
        output, _ = beam_search(size=beam_size,
                                eos_index=self.eos_index,
                                pad_index=self.pad_index,
                                src_mask=src_mask,
                                embed=emb,
                                bos_index=self.bos_index,
                                n_best=1,
                                max_output_length=max_output_length,
                                decoder=decoder,
                                alpha=alpha,
                                encoder_output=encoder_output,
                                encoder_hidden=encoder_hidden)
        np.testing.assert_array_equal(greedy_output, output)
Exemple #2
0
 def test_transformer_beam7(self):
     batch_size = 2
     beam_size = 7
     alpha = 1.
     max_output_length = 3
     src_mask, embed, decoder, encoder_output, \
     encoder_hidden = self._build(batch_size=batch_size)
     output, attention_scores = beam_search(
         size=beam_size,
         eos_index=self.eos_index,
         pad_index=self.pad_index,
         src_mask=src_mask,
         embed=embed,
         bos_index=self.bos_index,
         n_best=1,
         max_output_length=max_output_length,
         decoder=decoder,
         alpha=alpha,
         encoder_output=encoder_output,
         encoder_hidden=encoder_hidden)
     # Transformer beam doesn't return attention scores
     self.assertIsNone(attention_scores)
     # batch x time
     # now it produces EOS, so everything after gets cut off
     self.assertEqual(output.shape, (batch_size, 1))
     np.testing.assert_equal(output, [[3], [3]])
Exemple #3
0
    def test_transformer_beam1(self):
        batch_size = 2
        beam_size = 1
        alpha = 1.
        max_output_length = 3
        src_mask, embed, decoder, encoder_output, \
        encoder_hidden = self._build(batch_size=batch_size)
        output, attention_scores = beam_search(
            size=beam_size,
            eos_index=self.eos_index,
            pad_index=self.pad_index,
            src_mask=src_mask,
            embed=embed,
            bos_index=self.bos_index,
            max_output_length=max_output_length,
            decoder=decoder,
            alpha=alpha,
            encoder_output=encoder_output,
            encoder_hidden=encoder_hidden)
        # Transformer beam doesn't return attention scores
        self.assertIsNone(attention_scores)
        # batch x time
        self.assertEqual(output.shape, (batch_size, max_output_length))
        np.testing.assert_equal(output, [[5, 5, 5], [5, 5, 5]])

        # now compare to greedy, they should be the same for beam=1
        greedy_output, _ = transformer_greedy(
            src_mask=src_mask,
            embed=embed,
            bos_index=self.bos_index,
            max_output_length=max_output_length,
            decoder=decoder,
            encoder_output=encoder_output,
            encoder_hidden=encoder_hidden)
        np.testing.assert_equal(output, greedy_output)
Exemple #4
0
    def test_recurrent_beam1(self):
        # beam=1 and greedy should return the same result
        batch_size = 2
        max_output_length = 3
        src_mask, model, encoder_output, encoder_hidden = self._build(
            batch_size=batch_size)

        greedy_output, _ = recurrent_greedy(
            src_mask=src_mask,
            max_output_length=max_output_length,
            model=model,
            encoder_output=encoder_output,
            encoder_hidden=encoder_hidden)

        beam_size = 1
        alpha = 1.0
        output, _ = beam_search(size=beam_size,
                                src_mask=src_mask,
                                n_best=1,
                                max_output_length=max_output_length,
                                model=model,
                                alpha=alpha,
                                encoder_output=encoder_output,
                                encoder_hidden=encoder_hidden)
        np.testing.assert_array_equal(greedy_output, output)
Exemple #5
0
    def run_batch(self, batch: Batch, max_output_length: int, beam_size: int,
                  beam_alpha: float) -> (np.array, np.array):
        """
        Get outputs and attentions scores for a given batch

        :param batch: batch to generate hypotheses for
        :param max_output_length: maximum length of hypotheses
        :param beam_size: size of the beam for beam search, if 0 use greedy
        :param beam_alpha: alpha value for beam search
        :return: stacked_output: hypotheses for batch,
            stacked_attention_scores: attention scores for batch
        """
        encoder_output, encoder_hidden = self.encode(batch.src,
                                                     batch.src_lengths,
                                                     batch.src_mask,
                                                     self.encoder)
        if self.encoder_2:
            encoder_output_2, encoder_hidden_2 = self.encode(
                src=batch.src_prev,
                src_length=batch.src_prev_lengths,
                src_mask=batch.src_prev_mask,
                encoder=self.encoder_2)
            x = self.last_layer(encoder_output, batch.src_mask,
                                encoder_output_2, batch.src_prev_mask)

            encoder_output, encoder_hidden = self.last_layer_norm(x), None

        # if maximum output length is not globally specified, adapt to src len
        if max_output_length is None:
            max_output_length = int(max(batch.src_lengths.cpu().numpy()) * 1.5)

        # greedy decoding
        if beam_size < 2:
            stacked_output, stacked_attention_scores = greedy(
                encoder_hidden=encoder_hidden,
                encoder_output=encoder_output,
                eos_index=self.eos_index,
                src_mask=batch.src_mask,
                embed=self.trg_embed,
                bos_index=self.bos_index,
                decoder=self.decoder,
                max_output_length=max_output_length)
            # batch, time, max_src_length
        else:  # beam size
            stacked_output, stacked_attention_scores = \
                    beam_search(
                        size=beam_size, encoder_output=encoder_output,
                        encoder_hidden=encoder_hidden,
                        src_mask=batch.src_mask, embed=self.trg_embed,
                        max_output_length=max_output_length,
                        alpha=beam_alpha, eos_index=self.eos_index,
                        pad_index=self.pad_index,
                        bos_index=self.bos_index,
                        decoder=self.decoder)

        return stacked_output, stacked_attention_scores
Exemple #6
0
    def run_batch(self, batch: Batch, max_output_length: int, beam_size: int,
                  beam_alpha: float, return_logp: bool = False) \
            -> (np.array, np.array, Optional[np.array]):
        """
        Get outputs and attentions scores for a given batch

        :param batch: batch to generate hypotheses for
        :param max_output_length: maximum length of hypotheses
        :param beam_size: size of the beam for beam search, if 0 use greedy
        :param beam_alpha: alpha value for beam search
        :param return_logp: keep track of log probabilities as well
        :return:
            - stacked_output: hypotheses for batch,
            - stacked_attention_scores: attention scores for batch
            - log_probs: log probabilities for batch hypotheses
        """
        encoder_output, encoder_hidden = self.encode(batch.src,
                                                     batch.src_lengths,
                                                     batch.src_mask)

        # if maximum output length is not globally specified, adapt to src len
        if max_output_length is None:
            max_output_length = int(max(batch.src_lengths.cpu().numpy()) * 1.5)

        # greedy decoding
        if beam_size == 0:
            stacked_output, stacked_attention_scores, logprobs = greedy(
                encoder_hidden=encoder_hidden,
                encoder_output=encoder_output,
                src_mask=batch.src_mask,
                embed=self.trg_embed,
                bos_index=self.bos_index,
                decoder=self.decoder,
                max_output_length=max_output_length,
                eos_index=self.eos_index,
                return_logp=return_logp)
            # batch, time, max_src_length
        else:  # beam size > 0
            stacked_output, stacked_attention_scores, logprobs = \
                beam_search(size=beam_size, encoder_output=encoder_output,
                            encoder_hidden=encoder_hidden,
                            src_mask=batch.src_mask, embed=self.trg_embed,
                            max_output_length=max_output_length,
                            alpha=beam_alpha, eos_index=self.eos_index,
                            pad_index=self.pad_index, bos_index=self.bos_index,
                            decoder=self.decoder, return_logp=return_logp)

        return stacked_output, stacked_attention_scores, logprobs
Exemple #7
0
    def test_recurrent_beam7(self):
        batch_size = 2
        max_output_length = 3
        src_mask, model, encoder_output, encoder_hidden = self._build(
            batch_size=batch_size)

        beam_size = 7
        alpha = 1.0
        output, _ = beam_search(size=beam_size,
                                src_mask=src_mask,
                                n_best=1,
                                max_output_length=max_output_length,
                                model=model,
                                alpha=alpha,
                                encoder_output=encoder_output,
                                encoder_hidden=encoder_hidden)

        self.assertEqual(output.shape, (2, 1))
        np.testing.assert_array_equal(output, [[3], [3]])
Exemple #8
0
    def run_batch(self, batch: Batch, max_output_length: int, beam_size: int,
                  beam_alpha: float):
        """
        Get outputs and attentions scores for a given batch

        :param batch:
        :param max_output_length:
        :param beam_size:
        :param beam_alpha:
        :return:
        """
        encoder_output, encoder_hidden = self.encode(batch.src,
                                                     batch.src_lengths,
                                                     batch.src_mask)

        # if maximum output length is not globally specified, adapt to src len
        if max_output_length is None:
            max_output_length = int(max(batch.src_lengths.cpu().numpy()) * 1.5)

        # greedy decoding
        if beam_size == 0:
            stacked_output, stacked_attention_scores = greedy(
                encoder_hidden=encoder_hidden,
                encoder_output=encoder_output,
                src_mask=batch.src_mask,
                embed=self.trg_embed,
                bos_index=self.bos_index,
                decoder=self.decoder,
                max_output_length=max_output_length)
            # batch, time, max_src_length
        else:  # beam size
            stacked_output, stacked_attention_scores = \
                beam_search(size=beam_size, encoder_output=encoder_output,
                            encoder_hidden=encoder_hidden,
                            src_mask=batch.src_mask, embed=self.trg_embed,
                            max_output_length=max_output_length,
                            alpha=beam_alpha, eos_index=self.eos_index,
                            pad_index=self.pad_index, bos_index=self.bos_index,
                            decoder=self.decoder)

        return stacked_output, stacked_attention_scores
Exemple #9
0
    def test_recurrent_beam7(self):
        batch_size = 2
        max_output_length = 3
        src_mask, emb, decoder, encoder_output, encoder_hidden = self._build(
            batch_size=batch_size)

        beam_size = 7
        alpha = 1.0
        output, _ = beam_search(size=beam_size,
                                eos_index=self.eos_index,
                                pad_index=self.pad_index,
                                src_mask=src_mask,
                                embed=emb,
                                bos_index=self.bos_index,
                                n_best=1,
                                max_output_length=max_output_length,
                                decoder=decoder,
                                alpha=alpha,
                                encoder_output=encoder_output,
                                encoder_hidden=encoder_hidden)

        self.assertEqual(output.shape, (2, 1))
        np.testing.assert_array_equal(output, [[3], [3]])
Exemple #10
0
    def run_batch(self, batch: Batch, max_output_length: int, beam_size: int,
                  beam_alpha: float) -> (np.array, np.array):
        """
        Get outputs and attentions scores for a given batch

        :param batch: batch to generate hypotheses for
        :param max_output_length: maximum length of hypotheses
        :param beam_size: size of the beam for beam search, if 0 use greedy
        :param beam_alpha: alpha value for beam search
        :return: 
            stacked_output: hypotheses for batch,
            stacked_attention_scores: attention scores for batch
        """

        encoder_output, encoder_hidden = self.encode(batch.src,
                                                     batch.src_lengths,
                                                     batch.src_mask)

        # if maximum output length is not globally specified, adapt to src len
        if max_output_length is None:
            max_output_length = int(max(batch.src_lengths.cpu().numpy()) * 1.5)

        if hasattr(batch, "kbsrc"):
            # B x KB x EMB; B x KB; B x KB
            kb_keys, kb_values, kb_values_embed, kb_trv, kb_mask = self.preprocess_batch_kb(
                batch, kbattdims=self.kb_att_dims)
            if kb_keys is None:
                knowledgebase = None
            else:
                knowledgebase = (kb_keys, kb_values, kb_values_embed, kb_mask)
        else:
            knowledgebase = None

        # greedy decoding
        if beam_size == 0:
            stacked_output, stacked_attention_scores, stacked_kb_att_scores, _ = greedy(
                encoder_hidden=encoder_hidden,
                encoder_output=encoder_output,
                src_mask=batch.src_mask,
                embed=self.trg_embed,
                bos_index=self.bos_index,
                decoder=self.decoder,
                generator=self.generator,
                max_output_length=max_output_length,
                knowledgebase=knowledgebase)
            # batch, time, max_src_length
        else:  # beam size
            stacked_output, stacked_attention_scores, stacked_kb_att_scores = \
                    beam_search(
                        decoder=self.decoder,
                        generator=self.generator,
                        size=beam_size, encoder_output=encoder_output,
                        encoder_hidden=encoder_hidden,
                        src_mask=batch.src_mask, embed=self.trg_embed,
                        max_output_length=max_output_length,
                        alpha=beam_alpha, eos_index=self.eos_index,
                        pad_index=self.pad_index,
                        bos_index=self.bos_index,
                        knowledgebase = knowledgebase)

        if knowledgebase != None and self.do_postproc:
            with self.Timer("postprocessing hypotheses"):
                # replace kb value tokens with actual values in hypotheses, e.g.
                # ['your','@event','is','at','@meeting_time'] => ['your', 'conference', 'is', 'at', '7pm']
                # assert kb_values.shape[1] == 1, kb_values.shape
                stacked_output = self.postprocess_batch_hypotheses(
                    stacked_output, stacked_kb_att_scores, kb_values, kb_trv)

            print(
                f"proc_batch: Hypotheses: {self.trv_vocab.arrays_to_sentences(stacked_output)}"
            )
        else:
            print(
                f"proc_batch: Hypotheses: {self.trg_vocab.arrays_to_sentences(stacked_output)}"
            )

        return stacked_output, stacked_attention_scores, stacked_kb_att_scores