Example #1
0
    def assert_single_loss_equals_batch_loss(self, model, batch_size=5):
        """
    Tests whether single loss equals batch loss.
    Here we don't truncate the target side and use masking.
    """
        batch_size = 5
        src_sents = self.training_corpus.train_src_data[:batch_size]
        src_min = min([len(x) for x in src_sents])
        src_sents_trunc = [s[:src_min] for s in src_sents]
        for single_sent in src_sents_trunc:
            single_sent[src_min - 1] = Vocab.ES
        trg_sents = self.training_corpus.train_trg_data[:batch_size]
        trg_max = max([len(x) for x in trg_sents])
        trg_masks = Mask(np.zeros([batch_size, trg_max]))
        for i in range(batch_size):
            for j in range(len(trg_sents[i]), trg_max):
                trg_masks.np_arr[i, j] = 1.0
        trg_sents_padded = [[w for w in s] + [Vocab.ES] * (trg_max - len(s))
                            for s in trg_sents]

        single_loss = 0.0
        for sent_id in range(batch_size):
            dy.renew_cg()
            train_loss = model.calc_loss(src=src_sents_trunc[sent_id],
                                         trg=trg_sents[sent_id]).value()
            single_loss += train_loss

        dy.renew_cg()

        batched_loss = model.calc_loss(src=mark_as_batch(src_sents_trunc),
                                       trg=mark_as_batch(
                                           trg_sents_padded,
                                           trg_masks)).value()
        self.assertAlmostEqual(single_loss, sum(batched_loss), places=4)
Example #2
0
 def pad(self, outputs):
     # Padding
     max_col = max(len(xs) for xs in outputs)
     P0 = dy.vecInput(outputs[0][0].dim()[0][0])
     masks = numpy.zeros((len(outputs), max_col), dtype=int)
     ret = []
     modified = False
     for xs, mask in zip(outputs, masks):
         deficit = max_col - len(xs)
         if deficit > 0:
             xs.extend([P0 for _ in range(deficit)])
             mask[-deficit:] = 1
             modified = True
         ret.append(dy.concatenate_cols(xs))
     mask = Mask(masks) if modified else None
     return dy.concatenate_to_batch(ret), mask
Example #3
0
  def assert_single_loss_equals_batch_loss(self, model, pad_src_to_multiple=1):
    """
    Tests whether single loss equals batch loss.
    Here we don't truncate the target side and use masking.
    """
    batch_size = 5
    src_sents = self.src_data[:batch_size]
    src_min = min([x.sent_len() for x in src_sents])
    src_sents_trunc = [s.words[:src_min] for s in src_sents]
    for single_sent in src_sents_trunc:
      single_sent[src_min-1] = Vocab.ES
      while len(single_sent)%pad_src_to_multiple != 0:
        single_sent.append(Vocab.ES)
    trg_sents = sorted(self.trg_data[:batch_size], key=lambda x: x.sent_len(), reverse=True)
    trg_max = max([x.sent_len() for x in trg_sents])
    np_arr = np.zeros([batch_size, trg_max])
    for i in range(batch_size):
      for j in range(trg_sents[i].sent_len(), trg_max):
        np_arr[i,j] = 1.0
    trg_masks = Mask(np_arr)
    trg_sents_padded = [[w for w in s] + [Vocab.ES]*(trg_max-s.sent_len()) for s in trg_sents]

    src_sents_trunc = [SimpleSentenceInput(s) for s in src_sents_trunc]
    trg_sents_padded = [SimpleSentenceInput(s) for s in trg_sents_padded]

    single_loss = 0.0
    for sent_id in range(batch_size):
      dy.renew_cg()
      train_loss = model.calc_loss(src=src_sents_trunc[sent_id],
                                   trg=trg_sents[sent_id],
                                   loss_calculator=AutoRegressiveMLELoss()).value()
      single_loss += train_loss

    dy.renew_cg()

    batched_loss = model.calc_loss(src=mark_as_batch(src_sents_trunc),
                                   trg=mark_as_batch(trg_sents_padded, trg_masks),
                                   loss_calculator=AutoRegressiveMLELoss()).value()
    self.assertAlmostEqual(single_loss, np.sum(batched_loss), places=4)
Example #4
0
    def generate(self,
                 src: Batch,
                 idx: Sequence[int],
                 search_strategy: SearchStrategy,
                 forced_trg_ids: Batch = None):
        if src.batch_size() != 1:
            raise NotImplementedError(
                "batched decoding not implemented for DefaultTranslator. "
                "Specify inference batcher with batch size 1.")
        assert src.batch_size() == len(
            idx), f"src: {src.batch_size()}, idx: {len(idx)}"
        # Generating outputs
        self.start_sent(src)
        outputs = []
        cur_forced_trg = None
        sent = src[0]
        sent_mask = None
        if src.mask: sent_mask = Mask(np_arr=src.mask.np_arr[0:1])
        sent_batch = mark_as_batch([sent], mask=sent_mask)
        # TODO MBR can be implemented here. It takes only the first result from the encoder
        # To further implement MBR, we need to handle the generation considering multiple encoder output.
        initial_state = self._encode_src(sent_batch)[0]
        if forced_trg_ids is not None: cur_forced_trg = forced_trg_ids[0]
        search_outputs = search_strategy.generate_output(
            self,
            initial_state,
            src_length=[sent.sent_len()],
            forced_trg_ids=cur_forced_trg)
        sorted_outputs = sorted(search_outputs,
                                key=lambda x: x.score[0],
                                reverse=True)
        assert len(sorted_outputs) >= 1
        for curr_output in sorted_outputs:
            output_actions = [x for x in curr_output.word_ids[0]]
            attentions = [x for x in curr_output.attentions[0]]
            score = curr_output.score[0]
            if len(sorted_outputs) == 1:
                outputs.append(
                    TextOutput(actions=output_actions,
                               vocab=getattr(self.trg_reader, "vocab", None),
                               score=score))
            else:
                outputs.append(
                    NbestOutput(TextOutput(actions=output_actions,
                                           vocab=getattr(
                                               self.trg_reader, "vocab", None),
                                           score=score),
                                nbest_id=idx[0]))
        if self.compute_report:
            attentions = np.concatenate([x.npvalue() for x in attentions],
                                        axis=1)
            self.add_sent_for_report({
                "idx":
                idx[0],
                "attentions":
                attentions,
                "src":
                sent,
                "src_vocab":
                getattr(self.src_reader, "vocab", None),
                "trg_vocab":
                getattr(self.trg_reader, "vocab", None),
                "output":
                outputs[0]
            })

        return outputs