def test_beam_advance_against_known_reference(self): scorer = GNMTGlobalScorer(0.7, 0., "avg", "none") beam = BeamSearch(self.BEAM_SZ, self.BATCH_SZ, 0, 1, 2, self.N_BEST, torch.device("cpu"), scorer, 0, 30, False, 0, set(), torch.randint(0, 30, (self.BATCH_SZ, )), False) expected_beam_scores = self.init_step(beam, 1.) expected_beam_scores = self.first_step(beam, expected_beam_scores, 3) expected_beam_scores = self.second_step(beam, expected_beam_scores, 4) self.third_step(beam, expected_beam_scores, 5)
def forward_dev_beam_search(self, encoder_output: torch.Tensor, pad_mask): batch_size = encoder_output.size(1) self.state["cache"] = None memory_lengths = pad_mask.ne(pad_token_index).sum(dim=0) self.map_state(lambda state, dim: tile(state, self.beam_size, dim=dim)) encoder_output = tile(encoder_output, self.beam_size, dim=1) pad_mask = tile(pad_mask, self.beam_size, dim=1) memory_lengths = tile(memory_lengths, self.beam_size, dim=0) # TODO: # - fix attn (?) # - use coverage_penalty="summary" ou "wu" and beta=0.2 (ou pas) # - use length_penalty="wu" and alpha=0.2 (ou pas) beam = BeamSearch(beam_size=self.beam_size, n_best=1, batch_size=batch_size, mb_device=default_device, global_scorer=GNMTGlobalScorer(alpha=0, beta=0, coverage_penalty="none", length_penalty="avg"), pad=pad_token_index, eos=eos_token_index, bos=bos_token_index, min_length=1, max_length=100, return_attention=False, stepwise_penalty=False, block_ngram_repeat=0, exclusion_tokens=set(), memory_lengths=memory_lengths, ratio=-1) for i in range(self.max_seq_out_len): inp = beam.current_predictions.view(1, -1) out, attn = self.forward_step(src=pad_mask, tgt=inp, memory_bank=encoder_output, step=i) # 1 x batch*beam x hidden out = self.linear(out) # 1 x batch*beam x vocab_out out = log_softmax(out, dim=2) # 1 x batch*beam x vocab_out out = out.squeeze(0) # batch*beam x vocab_out # attn = attn.squeeze(0) # batch*beam x vocab_out # out = out.view(batch_size, self.beam_size, -1) # batch x beam x vocab_out # attn = attn.view(batch_size, self.beam_size, -1) # TODO: fix attn (?) beam.advance(out, attn) any_beam_is_finished = beam.is_finished.any() if any_beam_is_finished: beam.update_finished() if beam.done: break select_indices = beam.current_origin if any_beam_is_finished: # Reorder states. encoder_output = encoder_output.index_select(1, select_indices) pad_mask = pad_mask.index_select(1, select_indices) memory_lengths = memory_lengths.index_select(0, select_indices) self.map_state(lambda state, dim: state.index_select(dim, select_indices)) outputs = beam.predictions outputs = [x[0] for x in outputs] outputs = pad_sequence(outputs, batch_first=True) return [outputs]
def test_beam_advance_against_known_reference(self): scorer = GNMTGlobalScorer(0.7, 0., "avg", "none") beam = Beam(self.BEAM_SZ, 0, 1, self.EOS_IDX, n_best=self.N_BEST, exclusion_tokens=set(), min_length=0, global_scorer=scorer, block_ngram_repeat=0) expected_beam_scores = self.init_step(beam) expected_beam_scores = self.first_step(beam, expected_beam_scores, 3) expected_beam_scores = self.second_step(beam, expected_beam_scores, 4) self.third_step(beam, expected_beam_scores, 5)