def assert_single_loss_equals_batch_loss(self, model, batch_size=5): """ Tests whether single loss equals batch loss. Here we don't truncate the target side and use masking. """ batch_size = 5 src_sents = self.training_corpus.train_src_data[:batch_size] src_min = min([len(x) for x in src_sents]) src_sents_trunc = [s[:src_min] for s in src_sents] for single_sent in src_sents_trunc: single_sent[src_min - 1] = Vocab.ES trg_sents = self.training_corpus.train_trg_data[:batch_size] trg_max = max([len(x) for x in trg_sents]) trg_masks = Mask(np.zeros([batch_size, trg_max])) for i in range(batch_size): for j in range(len(trg_sents[i]), trg_max): trg_masks.np_arr[i, j] = 1.0 trg_sents_padded = [[w for w in s] + [Vocab.ES] * (trg_max - len(s)) for s in trg_sents] single_loss = 0.0 for sent_id in range(batch_size): dy.renew_cg() train_loss = model.calc_loss(src=src_sents_trunc[sent_id], trg=trg_sents[sent_id]).value() single_loss += train_loss dy.renew_cg() batched_loss = model.calc_loss(src=mark_as_batch(src_sents_trunc), trg=mark_as_batch( trg_sents_padded, trg_masks)).value() self.assertAlmostEqual(single_loss, sum(batched_loss), places=4)
def pad(self, outputs): # Padding max_col = max(len(xs) for xs in outputs) P0 = dy.vecInput(outputs[0][0].dim()[0][0]) masks = numpy.zeros((len(outputs), max_col), dtype=int) ret = [] modified = False for xs, mask in zip(outputs, masks): deficit = max_col - len(xs) if deficit > 0: xs.extend([P0 for _ in range(deficit)]) mask[-deficit:] = 1 modified = True ret.append(dy.concatenate_cols(xs)) mask = Mask(masks) if modified else None return dy.concatenate_to_batch(ret), mask
def assert_single_loss_equals_batch_loss(self, model, pad_src_to_multiple=1): """ Tests whether single loss equals batch loss. Here we don't truncate the target side and use masking. """ batch_size = 5 src_sents = self.src_data[:batch_size] src_min = min([x.sent_len() for x in src_sents]) src_sents_trunc = [s.words[:src_min] for s in src_sents] for single_sent in src_sents_trunc: single_sent[src_min-1] = Vocab.ES while len(single_sent)%pad_src_to_multiple != 0: single_sent.append(Vocab.ES) trg_sents = sorted(self.trg_data[:batch_size], key=lambda x: x.sent_len(), reverse=True) trg_max = max([x.sent_len() for x in trg_sents]) np_arr = np.zeros([batch_size, trg_max]) for i in range(batch_size): for j in range(trg_sents[i].sent_len(), trg_max): np_arr[i,j] = 1.0 trg_masks = Mask(np_arr) trg_sents_padded = [[w for w in s] + [Vocab.ES]*(trg_max-s.sent_len()) for s in trg_sents] src_sents_trunc = [SimpleSentenceInput(s) for s in src_sents_trunc] trg_sents_padded = [SimpleSentenceInput(s) for s in trg_sents_padded] single_loss = 0.0 for sent_id in range(batch_size): dy.renew_cg() train_loss = model.calc_loss(src=src_sents_trunc[sent_id], trg=trg_sents[sent_id], loss_calculator=AutoRegressiveMLELoss()).value() single_loss += train_loss dy.renew_cg() batched_loss = model.calc_loss(src=mark_as_batch(src_sents_trunc), trg=mark_as_batch(trg_sents_padded, trg_masks), loss_calculator=AutoRegressiveMLELoss()).value() self.assertAlmostEqual(single_loss, np.sum(batched_loss), places=4)
def generate(self, src: Batch, idx: Sequence[int], search_strategy: SearchStrategy, forced_trg_ids: Batch = None): if src.batch_size() != 1: raise NotImplementedError( "batched decoding not implemented for DefaultTranslator. " "Specify inference batcher with batch size 1.") assert src.batch_size() == len( idx), f"src: {src.batch_size()}, idx: {len(idx)}" # Generating outputs self.start_sent(src) outputs = [] cur_forced_trg = None sent = src[0] sent_mask = None if src.mask: sent_mask = Mask(np_arr=src.mask.np_arr[0:1]) sent_batch = mark_as_batch([sent], mask=sent_mask) # TODO MBR can be implemented here. It takes only the first result from the encoder # To further implement MBR, we need to handle the generation considering multiple encoder output. initial_state = self._encode_src(sent_batch)[0] if forced_trg_ids is not None: cur_forced_trg = forced_trg_ids[0] search_outputs = search_strategy.generate_output( self, initial_state, src_length=[sent.sent_len()], forced_trg_ids=cur_forced_trg) sorted_outputs = sorted(search_outputs, key=lambda x: x.score[0], reverse=True) assert len(sorted_outputs) >= 1 for curr_output in sorted_outputs: output_actions = [x for x in curr_output.word_ids[0]] attentions = [x for x in curr_output.attentions[0]] score = curr_output.score[0] if len(sorted_outputs) == 1: outputs.append( TextOutput(actions=output_actions, vocab=getattr(self.trg_reader, "vocab", None), score=score)) else: outputs.append( NbestOutput(TextOutput(actions=output_actions, vocab=getattr( self.trg_reader, "vocab", None), score=score), nbest_id=idx[0])) if self.compute_report: attentions = np.concatenate([x.npvalue() for x in attentions], axis=1) self.add_sent_for_report({ "idx": idx[0], "attentions": attentions, "src": sent, "src_vocab": getattr(self.src_reader, "vocab", None), "trg_vocab": getattr(self.trg_reader, "vocab", None), "output": outputs[0] }) return outputs