Example #1
0
 def test_diverse_beam_search(self):
     search_strategy = search.DiverseBeamSearch(self.tgt_dict,
                                                num_groups=2,
                                                diversity_strength=0.)
     generator = SequenceGenerator(
         [self.model],
         self.tgt_dict,
         beam_size=2,
         search_strategy=search_strategy,
     )
     sample = {
         'net_input': {
             'src_tokens': self.src_tokens,
             'src_lengths': self.src_lengths
         }
     }
     hypos = generator.forward(sample)
     eos, w1, w2 = self.eos, self.w1, self.w2
     # sentence 1, beam 1
     self.assertHypoTokens(hypos[0][0], [w1, w1, eos])
     self.assertHypoScore(hypos[0][0], [0.9, 0.6, 1.0])
     # sentence 1, beam 2
     self.assertHypoTokens(hypos[0][1], [w1, w1, eos])
     self.assertHypoScore(hypos[0][1], [0.9, 0.6, 1.0])
     # sentence 2, beam 1
     self.assertHypoTokens(hypos[1][0], [w1, w2, eos])
     self.assertHypoScore(hypos[1][0], [0.7, 0.4, 0.9])
     # sentence 2, beam 2
     self.assertHypoTokens(hypos[1][1], [w1, w2, eos])
     self.assertHypoScore(hypos[1][1], [0.7, 0.4, 0.9])
Example #2
0
 def test_diverse_beam_search(self):
     generator = SequenceGenerator(
         self.tgt_dict,
         beam_size=2,
         diverse_beam_groups=2,
         diverse_beam_strength=0.0,
     )
     sample = {
         "net_input": {
             "src_tokens": self.src_tokens,
             "src_lengths": self.src_lengths,
         }
     }
     hypos = generator.generate([self.model], sample)
     eos, w1, w2 = self.eos, self.w1, self.w2
     # sentence 1, beam 1
     self.assertHypoTokens(hypos[0][0], [w1, w1, eos])
     self.assertHypoScore(hypos[0][0], [0.9, 0.6, 1.0])
     # sentence 1, beam 2
     self.assertHypoTokens(hypos[0][1], [w1, w1, eos])
     self.assertHypoScore(hypos[0][1], [0.9, 0.6, 1.0])
     # sentence 2, beam 1
     self.assertHypoTokens(hypos[1][0], [w1, w2, eos])
     self.assertHypoScore(hypos[1][0], [0.7, 0.4, 0.9])
     # sentence 2, beam 2
     self.assertHypoTokens(hypos[1][1], [w1, w2, eos])
     self.assertHypoScore(hypos[1][1], [0.7, 0.4, 0.9])
Example #3
0
    def generate(self, src_sents):
        self._model.split_to_gpus(n_gpus=1)
        self._model.eval()

        src_text = src_sents[0]
        generator = SequenceGenerator(tgt_dict=self._model.dictionary,
                                      max_len_b=200,
                                      min_len=50,
                                      beam_size=10,
                                      len_penalty=2.,
                                      no_repeat_ngram_size=3)

        src_tokens = self._model.encode(src_text)
        if src_tokens.shape[0] > SRC_MAX_LEN:
            src_tokens = src_tokens[-SRC_MAX_LEN:]

        outputs = generator.generate(
            models=[self._model.model],
            sample={
                'net_input': {
                    'src_tokens': src_tokens.unsqueeze(0).to('cuda'),
                    'src_lengths': torch.tensor([len(src_tokens)]).to('cuda')
                }
            },
            bos_token=self._model.dictionary.bos())

        return [self._model.decode(outputs[0][0]['tokens'].cpu())]
Example #4
0
 def test_diverse_beam_search(self):
     search_strategy = search.DiverseSiblingsSearch(self.tgt_dict,
                                                    diversity_rate=0.5)
     generator = SequenceGenerator([self.model],
                                   self.tgt_dict,
                                   beam_size=2,
                                   search_strategy=search_strategy)
     sample = {
         "net_input": {
             "src_tokens": self.src_tokens,
             "src_lengths": self.src_lengths,
         }
     }
     hypos = generator.forward(sample)
     eos, w1, w2 = self.eos, self.w1, self.w2
     # sentence 1, beam 1
     self.assertHypoTokens(hypos[0][0], [w1, w1, eos])
     self.assertHypoScore(hypos[0][0], [0.9, 0.6, 1.0], [0, 1, 1], 0.5)
     # sentence 1, beam 2
     self.assertHypoTokens(hypos[0][1], [w1, w2, eos])
     self.assertHypoScore(hypos[0][1], [0.9, 0.4, 1.0], [0, 2, 1], 0.5)
     # sentence 2, beam 1
     self.assertHypoTokens(hypos[1][0], [w1, w2, eos])
     self.assertHypoScore(hypos[1][0], [0.7, 0.4, 0.9], [0, 1, 1], 0.5)
     # sentence 2, beam 2
     self.assertHypoTokens(hypos[1][1], [w1, w1, eos])
     self.assertHypoScore(hypos[1][1], [0.7, 0.35, 0.9], [0, 2, 1], 0.5)
Example #5
0
 def test_diverse_beam_search(self):
     generator = SequenceGenerator(
         [self.model],
         self.tgt_dict,
         beam_size=2,
         diverse_beam_groups=2,
         diverse_beam_strength=0.,
     )
     encoder_input = {
         'src_tokens': self.src_tokens,
         'src_lengths': self.src_lengths
     }
     hypos = generator.generate(encoder_input)
     eos, w1, w2 = self.eos, self.w1, self.w2
     # sentence 1, beam 1
     self.assertHypoTokens(hypos[0][0], [w1, w1, eos])
     self.assertHypoScore(hypos[0][0], [0.9, 0.6, 1.0])
     # sentence 1, beam 2
     self.assertHypoTokens(hypos[0][1], [w1, w1, eos])
     self.assertHypoScore(hypos[0][1], [0.9, 0.6, 1.0])
     # sentence 2, beam 1
     self.assertHypoTokens(hypos[1][0], [w1, w2, eos])
     self.assertHypoScore(hypos[1][0], [0.7, 0.4, 0.9])
     # sentence 2, beam 2
     self.assertHypoTokens(hypos[1][1], [w1, w2, eos])
     self.assertHypoScore(hypos[1][1], [0.7, 0.4, 0.9])
Example #6
0
    def __init__(self, args):
        task, model, model_args = self.load_model(args)
        use_cuda = torch.cuda.is_available() and not args.cpu
        tgt_dict = task.target_dictionary

        generator = SequenceGenerator(
            [model],
            tgt_dict,
            beam_size=args.beam,
            stop_early=(not args.no_early_stop),
            normalize_scores=(not args.unnormalized),
            len_penalty=args.lenpen,
            unk_penalty=args.unkpen,
            sampling=args.sampling,
            sampling_topk=args.sampling_topk,
            sampling_temperature=args.sampling_temperature,
            minlen=args.min_len,
        )

        if use_cuda:
            generator.cuda()

        self.generator = generator
        self.task = task
        self.model = model
        self.use_cuda = use_cuda
        self.args = args
        self.model_args = model_args
Example #7
0
    def _generate_hypotheses(self, model, sample):
        # initialize generator
        if self.args.seq_sampling:
            search_strategy = search.Sampling(self.target_dictionary)
        else:
            search_strategy = search.BeamSearch(self.target_dictionary)
        if self._generator is None:
            self._generator = SequenceGenerator(
                tgt_dict=self.target_dictionary,
                models=[model],
                beam_size=self.args.seq_beam,
                max_len_a=self.args.seq_max_len_a,
                max_len_b=self.args.seq_max_len_b,
                unk_penalty=self.args.seq_unkpen,
                search_strategy=search_strategy)

        # generate hypotheses
        sample['hypos'] = self._generator.generate(
            [model],
            sample,
        )

        # add reference to the set of hypotheses
        if self.args.seq_keep_reference:
            self.add_reference_to_hypotheses(sample)
Example #8
0
 def test_topp_sampling_search_low_prob(self):
     # Given a prob low enough to top-P sampling, we expect only the top
     # 1 token to be sampled, which always results in the same output.
     low_sampling_topp = self.min_top1_prob / 2.0
     search_strategy = search.Sampling(self.tgt_dict,
                                       sampling_topp=low_sampling_topp)
     generator = SequenceGenerator([self.model],
                                   self.tgt_dict,
                                   beam_size=2,
                                   search_strategy=search_strategy)
     sample = {
         'net_input': {
             'src_tokens': self.src_tokens,
             'src_lengths': self.src_lengths
         }
     }
     hypos = generator.forward(sample)
     eos, w1 = self.eos, self.w1
     # sentence 1, beam 1
     self.assertHypoTokens(hypos[0][0], [w1, w1, eos])
     self.assertHypoScore(hypos[0][0], [1.0, 0.4, 1.0])
     # sentence 1, beam 2
     self.assertHypoTokens(hypos[0][1], [w1, w1, eos])
     self.assertHypoScore(hypos[0][1], [1.0, 0.4, 1.0])
     # sentence 2, beam 1
     self.assertHypoTokens(hypos[1][0], [w1, w1, eos])
     self.assertHypoScore(hypos[1][0], [1.0, 0.4, 1.0])
     # sentence 2, beam 2
     self.assertHypoTokens(hypos[1][1], [w1, w1, eos])
     self.assertHypoScore(hypos[1][1], [1.0, 0.4, 1.0])
def setup_model(args):
    import_user_module(args)

    if args.buffer_size < 1:
        args.buffer_size = 1
    if args.max_tokens is None and args.max_sentences is None:
        args.max_sentences = 1

    assert not args.sampling or args.nbest == args.beam, \
        '--sampling requires --nbest to be equal to --beam'
    assert not args.max_sentences or args.max_sentences <= args.buffer_size, \
        '--max-sentences/--batch-size cannot be larger than --buffer-size'

    logger.info('fairseq args: {}'.format(args))

    # Setup task, e.g., translation
    task = tasks.setup_task(args)

    # Load ensemble
    logger.info('| loading model(s) from {}'.format(args.path))
    models, _model_args = utils.load_ensemble_for_inference(
        args.path.split(':'),
        task,
        model_arg_overrides=eval(args.model_overrides),
    )

    # Set dictionary
    tgt_dict = task.target_dictionary

    # Optimize ensemble for generation
    for model in models:
        model.make_generation_fast_(
            beamable_mm_beam_size=None if args.no_beamable_mm else args.beam,
            need_attn=args.print_alignment,
        )

    translator = SequenceGenerator(
        models,
        tgt_dict,
        beam_size=args.beam,
        minlen=args.min_len,
        stop_early=(not args.no_early_stop),
        normalize_scores=(not args.unnormalized),
        len_penalty=args.lenpen,
        unk_penalty=args.unkpen,
        sampling=args.sampling,
        sampling_topk=args.sampling_topk,
        sampling_temperature=args.sampling_temperature,
        diverse_beam_groups=args.diverse_beam_groups,
        diverse_beam_strength=args.diverse_beam_strength,
        match_source_len=args.match_source_len,
        no_repeat_ngram_size=args.no_repeat_ngram_size,
    )

    if torch.cuda.is_available() and not args.cpu:
        translator.cuda()

    logger.info('model has been read successfully!')
    return models, task, tgt_dict, translator
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self.generator = self.load_pretrained_generator(args[0].generator_path)
        if not args[0].cpu:
            self.generator.cuda()

        self.passed_iters = 0
        self.sequence_generator = SequenceGenerator(self.target_dictionary, beam_size=1)
Example #11
0
def main():
    parser = options.get_parser('Generation')
    parser.add_argument('--path', metavar='FILE', required=True, action='append',
                        help='path(s) to model file(s)')
    options.add_dataset_args(parser)
    options.add_generation_args(parser)

    args = parser.parse_args()
    print(args)

    use_cuda = torch.cuda.is_available() and not args.cpu

    # Load ensemble
    print('| loading model(s) from {}'.format(', '.join(args.path)))
    models, model_args = utils.load_ensemble_for_inference(args.path, data_dir=args.data)
    src_dict, dst_dict = models[0].src_dict, models[0].dst_dict

    print('| [{}] dictionary: {} types'.format(model_args.source_lang, len(src_dict)))
    print('| [{}] dictionary: {} types'.format(model_args.target_lang, len(dst_dict)))

    # Optimize ensemble for generation
    for model in models:
        model.make_generation_fast_(
            beamable_mm_beam_size=None if args.no_beamable_mm else args.beam)

    # Initialize generator
    translator = SequenceGenerator(
        models, beam_size=args.beam, stop_early=(not args.no_early_stop),
        normalize_scores=(not args.unnormalized), len_penalty=args.lenpen,
        unk_penalty=args.unkpen)
    if use_cuda:
        translator.cuda()

    # Load alignment dictionary for unknown word replacement
    # (None if no unknown word replacement, empty if no path to align dictionary)
    align_dict = utils.load_align_dict(args.replace_unk)

    print('| Type the input sentence and press return:')
    for src_str in sys.stdin:
        src_str = src_str.strip()
        src_tokens = tokenizer.Tokenizer.tokenize(src_str, src_dict, add_if_not_exist=False).long()
        if use_cuda:
            src_tokens = src_tokens.cuda()
        translations = translator.generate(Variable(src_tokens.view(1, -1)))
        hypos = translations[0]
        print('O\t{}'.format(src_str))

        # Process top predictions
        for hypo in hypos[:min(len(hypos), args.nbest)]:
            hypo_tokens, hypo_str, alignment = utils.post_process_prediction(
                hypo_tokens=hypo['tokens'].int().cpu(),
                src_str=src_str,
                alignment=hypo['alignment'].int().cpu(),
                align_dict=align_dict,
                dst_dict=dst_dict,
                remove_bpe=args.remove_bpe)
            print('H\t{}\t{}'.format(hypo['score'], hypo_str))
            print('A\t{}'.format(' '.join(map(str, alignment))))
Example #12
0
 def test_encoder_with_different_output_len(self):
     args = self.model.encoder.args
     task = test_utils.TestTranslationTask.setup_task(args, self.tgt_dict, self.tgt_dict)
     reshaping_model = test_utils.TestReshapingModel.build_model(args, task)
     generator = SequenceGenerator([reshaping_model], self.tgt_dict, beam_size=2, max_len_b=2)
     hypos = generator.forward(self.sample)
     for sent in [0, 1]:
         for beam in [0, 1]:
             assert hypos[sent][beam]['attention'] is not None
Example #13
0
def main(args):
    print(args)

    use_cuda = torch.cuda.is_available() and not args.cpu

    # Load ensemble
    print('| loading model(s) from {}'.format(', '.join(args.path)))
    models, model_args = utils.load_ensemble_for_inference(args.path, data_dir=args.data)
    src_dict, dst_dict = models[0].src_dict, models[0].dst_dict

    print('| [{}] dictionary: {} types'.format(model_args.source_lang, len(src_dict)))
    print('| [{}] dictionary: {} types'.format(model_args.target_lang, len(dst_dict)))

    # Optimize ensemble for generation
    for model in models:
        model.make_generation_fast_(
            beamable_mm_beam_size=None if args.no_beamable_mm else args.beam,
        )

    # Initialize generator
    translator = SequenceGenerator(
        models, beam_size=args.beam, stop_early=(not args.no_early_stop),
        normalize_scores=(not args.unnormalized), len_penalty=args.lenpen,
        unk_penalty=args.unkpen)
    if use_cuda:
        translator.cuda()

    # Load alignment dictionary for unknown word replacement
    # (None if no unknown word replacement, empty if no path to align dictionary)
    align_dict = utils.load_align_dict(args.replace_unk)

    print('| Type the input sentence and press return:')
    for src_str in sys.stdin:
        src_str = src_str.strip()
        src_tokens = tokenizer.Tokenizer.tokenize(src_str, src_dict, add_if_not_exist=False).long()
        if use_cuda:
            src_tokens = src_tokens.cuda()
        src_lengths = src_tokens.new([src_tokens.numel()])
        translations = translator.generate(
            Variable(src_tokens.view(1, -1)),
            Variable(src_lengths.view(-1)),
        )
        hypos = translations[0]
        print('O\t{}'.format(src_str))

        # Process top predictions
        for hypo in hypos[:min(len(hypos), args.nbest)]:
            hypo_tokens, hypo_str, alignment = utils.post_process_prediction(
                hypo_tokens=hypo['tokens'].int().cpu(),
                src_str=src_str,
                alignment=hypo['alignment'].int().cpu(),
                align_dict=align_dict,
                dst_dict=dst_dict,
                remove_bpe=args.remove_bpe,
            )
            print('H\t{}\t{}'.format(hypo['score'], hypo_str))
            print('A\t{}'.format(' '.join(map(str, alignment))))
Example #14
0
    def test_topp_sampling_search_high_prob(self):
        # Given a prob high enough to top-P sampling, any of the top 2
        # tokens could be sampled. This can cause different outputs.
        high_sampling_topp = (self.min_top1_prob + self.min_top2_prob) / 2.0
        search_strategy = search.Sampling(
            self.tgt_dict, sampling_topp=high_sampling_topp
        )
        generator = SequenceGenerator(
            [self.model], self.tgt_dict, beam_size=2, search_strategy=search_strategy
        )
        sample = {
            "net_input": {
                "src_tokens": self.src_tokens,
                "src_lengths": self.src_lengths,
            }
        }
        hypos = generator.forward(sample)
        eos, w1, w2 = self.eos, self.w1, self.w2
        # sentence 1, beam 1
        self.assertTrue(
            self.hypoTokens(hypos[0][0], [w1, w1, eos])
            or self.hypoTokens(hypos[0][0], [w1, w2, eos])
        )
        self.assertTrue(
            self.hypoScore(hypos[0][0], [1.0, 0.4, 1.0])
            or self.hypoScore(hypos[0][0], [1.0, 0.35, 1.0])
        )

        # sentence 1, beam 2
        self.assertTrue(
            self.hypoTokens(hypos[0][1], [w1, w1, eos])
            or self.hypoTokens(hypos[0][1], [w1, w2, eos])
        )
        self.assertTrue(
            self.hypoScore(hypos[0][1], [1.0, 0.4, 1.0])
            or self.hypoScore(hypos[0][1], [1.0, 0.35, 1.0])
        )

        # sentence 2, beam 1
        self.assertTrue(
            self.hypoTokens(hypos[1][0], [w1, w1, eos])
            or self.hypoTokens(hypos[1][0], [w1, w2, eos])
        )
        self.assertTrue(
            self.hypoScore(hypos[1][0], [1.0, 0.4, 1.0])
            or self.hypoScore(hypos[1][0], [1.0, 0.35, 1.0])
        )

        # sentence 2, beam 2
        self.assertTrue(
            self.hypoTokens(hypos[1][1], [w1, w1, eos])
            or self.hypoTokens(hypos[1][1], [w1, w2, eos])
        )
        self.assertTrue(
            self.hypoScore(hypos[1][1], [1.0, 0.4, 1.0])
            or self.hypoScore(hypos[1][1], [1.0, 0.35, 1.0])
        )
    def __init__(self, args, task):
        super().__init__(args, task)

        self.source_dictionary = task.source_dictionary
        self.target_dictionary = task.target_dictionary
        self.discriminator = self.load_pretrained_discriminator(args.discriminator_path)
        if not args.cpu:
            self.discriminator.cuda()

        self.beam_generator = SequenceGenerator(task.target_dictionary, beam_size=5)
        self.greedy_generator = SequenceGenerator(task.target_dictionary, beam_size=1)
        self.gamma = args.gamma
Example #16
0
 def test_generation_with_additional_input(self):
     args = self.model.encoder.args
     task = test_utils.TestTranslationTask.setup_task(args, self.tgt_dict, self.tgt_dict)
     add_input_model = test_utils.TestAdditionalInputModel.build_model(args, task)
     generator = SequenceGenerator([add_input_model], self.tgt_dict, beam_size=2)
     sample = self.sample.copy()
     sample['net_input']['fancy_other_input'] = sample['net_input']['src_tokens']
     hypos = generator.forward(self.sample)
     eos, w1, w2 = self.tgt_dict.eos(), self.w1, self.w2
     # sentence 1, beam 1
     self.assertHypoTokens(hypos[0][0], [w1, eos])
     self.assertHypoScore(hypos[0][0], [0.9, 1.0])
Example #17
0
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)

        self.sequence_generator = SequenceGenerator(self.target_dictionary,
                                                    beam_size=1)

        self.discriminator_optimizer = None
        self.discriminator_loss = DiscriminatorCriterion(args[0], self)
        self.discriminator_steps = args[0].discriminator_steps
        self.ignore_mask = args[0].ignore_mask
        self.update_discr_every = args[0].update_discr_every
        self._step_counter = 0
        self.args = args[0]
Example #18
0
 def build_generator(self, args):
     if getattr(args, 'score_reference', False):
         from fairseq.sequence_scorer import SequenceScorer
         return SequenceScorer(self.target_dictionary)
     else:
         from fairseq.sequence_generator import SequenceGenerator
         return SequenceGenerator(
             self.target_dictionary,
             beam_size=getattr(args, 'beam', 5),
             max_len_a=getattr(args, 'max_len_a', 0),
             max_len_b=getattr(args, 'max_len_b', 200),
             min_len=getattr(args, 'min_len', 1),
             stop_early=(not getattr(args, 'no_early_stop', False)),
             normalize_scores=(not getattr(args, 'unnormalized', False)),
             len_penalty=getattr(args, 'lenpen', 1),
             unk_penalty=getattr(args, 'unkpen', 0),
             sampling=getattr(args, 'sampling', False),
             sampling_topk=getattr(args, 'sampling_topk', -1),
             sampling_topp=getattr(args, 'sampling_topp', -1.0),
             temperature=getattr(args, 'temperature', 1.),
             diverse_beam_groups=getattr(args, 'diverse_beam_groups', -1),
             diverse_beam_strength=getattr(args, 'diverse_beam_strength',
                                           0.5),
             match_source_len=getattr(args, 'match_source_len', False),
             no_repeat_ngram_size=getattr(args, 'no_repeat_ngram_size', 0),
         )
Example #19
0
 def test_with_normalization(self):
     generator = SequenceGenerator([self.model], self.tgt_dict)
     hypos = generator.generate(self.src_tokens, self.src_lengths, beam_size=2)
     eos, w1, w2 = self.eos, self.w1, self.w2
     # sentence 1, beam 1
     self.assertHypoTokens(hypos[0][0], [w1, eos])
     self.assertHypoScore(hypos[0][0], [0.9, 1.0])
     # sentence 1, beam 2
     self.assertHypoTokens(hypos[0][1], [w2, w1, w2, eos])
     self.assertHypoScore(hypos[0][1], [0.1, 0.9, 0.9, 1.0])
     # sentence 2, beam 1
     self.assertHypoTokens(hypos[1][0], [w1, w2, w1, eos])
     self.assertHypoScore(hypos[1][0], [0.7, 0.4, 0.4, 1.0])
     # sentence 2, beam 2
     self.assertHypoTokens(hypos[1][1], [w1, w2, eos])
     self.assertHypoScore(hypos[1][1], [0.7, 0.4, 0.6])
Example #20
0
 def test_quantized_ensemble_sequence_generator(self):
     model = torch.quantization.quantize_dynamic(
         self.transformer_model, {torch.nn.Linear}, dtype=torch.qint8, inplace=True
     )
     generator = SequenceGenerator([model], self.task.tgt_dict, beam_size=2)
     scripted_model = torch.jit.script(generator)
     self._test_save_and_load(scripted_model)
Example #21
0
 def build_generator(self, models, args):
     tgt_lang = self.args.target_lang if self.args.common_eos is None else self.args.common_eos
     if getattr(args, 'score_reference', False):
         from fairseq.sequence_scorer import SequenceScorer
         return SequenceScorer(
             self.target_dictionary,
             eos=self.tgt_dict.index('[{}]'.format(tgt_lang))
         )
     else:
         from fairseq.sequence_generator import SequenceGenerator
         return SequenceGenerator(
             models,
             self.target_dictionary,
             beam_size=getattr(args, 'beam', 5),
             max_len_a=getattr(args, 'max_len_a', 0),
             max_len_b=getattr(args, 'max_len_b', 200),
             min_len=getattr(args, 'min_len', 1),
             normalize_scores=(not getattr(args, 'unnormalized', False)),
             len_penalty=getattr(args, 'lenpen', 1),
             unk_penalty=getattr(args, 'unkpen', 0),
             temperature=getattr(args, 'temperature', 1.),
             match_source_len=getattr(args, 'match_source_len', False),
             no_repeat_ngram_size=getattr(args, 'no_repeat_ngram_size', 0),
             eos=self.tgt_dict.index('[{}]'.format(tgt_lang))
         )
Example #22
0
 def test_with_normalization(self):
     generator = SequenceGenerator([self.model])
     hypos = generator.generate(self.src_tokens, self.src_lengths, beam_size=2)
     eos, w1, w2 = self.eos, self.w1, self.w2
     # sentence 1, beam 1
     self.assertHypoTokens(hypos[0][0], [w1, eos])
     self.assertHypoScore(hypos[0][0], [0.9, 1.0])
     # sentence 1, beam 2
     self.assertHypoTokens(hypos[0][1], [w2, w1, w2, eos])
     self.assertHypoScore(hypos[0][1], [0.1, 0.9, 0.9, 1.0])
     # sentence 2, beam 1
     self.assertHypoTokens(hypos[1][0], [w1, w2, w1, eos])
     self.assertHypoScore(hypos[1][0], [0.7, 0.4, 0.4, 1.0])
     # sentence 2, beam 2
     self.assertHypoTokens(hypos[1][1], [w1, w2, eos])
     self.assertHypoScore(hypos[1][1], [0.7, 0.4, 0.6])
 def test_no_stop_early(self):
     generator = SequenceGenerator([self.model], self.tgt_dict, stop_early=False)
     hypos = generator.generate(self.encoder_input, beam_size=2)
     eos, w1, w2 = self.tgt_dict.eos(), self.w1, self.w2
     # sentence 1, beam 1
     self.assertHypoTokens(hypos[0][0], [w1, eos])
     self.assertHypoScore(hypos[0][0], [0.9, 1.0])
     # sentence 1, beam 2
     self.assertHypoTokens(hypos[0][1], [w2, w1, w2, eos])
     self.assertHypoScore(hypos[0][1], [0.1, 0.9, 0.9, 1.0])
     # sentence 2, beam 1
     self.assertHypoTokens(hypos[1][0], [w2, w2, w2, w2, eos])
     self.assertHypoScore(hypos[1][0], [0.3, 0.9, 0.99, 0.4, 1.0])
     # sentence 2, beam 2
     self.assertHypoTokens(hypos[1][1], [w1, w2, w1, eos])
     self.assertHypoScore(hypos[1][1], [0.7, 0.4, 0.4, 1.0])
Example #24
0
 def test_prefix_beam_search(self):
     search_strategy = search.BeamSearch(self.tgt_dict)
     generator = SequenceGenerator(
         [self.model],
         self.tgt_dict,
         beam_size=self.beam_size,
         search_strategy=search_strategy,
     )
     sample = {
         "net_input": {
             "src_tokens": self.tokens,
             "src_lengths": self.token_lengths,
         }
     }
     # make sure test sample doesn't break any assertion
     generator.forward(sample, prefix_tokens=self.tokens[:, :-1])
Example #25
0
 def test_ensemble_sequence_generator(self):
     model = self.transformer_model
     generator = SequenceGenerator(
         [model], self.task.tgt_dict, beam_size=2, no_repeat_ngram_size=2
     )
     scripted_model = torch.jit.script(generator)
     self._test_save_and_load(scripted_model)
Example #26
0
 def test_maxlen(self):
     generator = SequenceGenerator(self.tgt_dict, beam_size=2, max_len_b=2)
     hypos = generator.generate([self.model], self.sample)
     eos, w1, w2 = self.tgt_dict.eos(), self.w1, self.w2
     # sentence 1, beam 1
     self.assertHypoTokens(hypos[0][0], [w1, eos])
     self.assertHypoScore(hypos[0][0], [0.9, 1.0])
     # sentence 1, beam 2
     self.assertHypoTokens(hypos[0][1], [w2, w2, eos])
     self.assertHypoScore(hypos[0][1], [0.1, 0.1, 0.6])
     # sentence 2, beam 1
     self.assertHypoTokens(hypos[1][0], [w1, w2, eos])
     self.assertHypoScore(hypos[1][0], [0.7, 0.4, 0.6])
     # sentence 2, beam 2
     self.assertHypoTokens(hypos[1][1], [w2, w2, eos])
     self.assertHypoScore(hypos[1][1], [0.3, 0.9, 0.01])
Example #27
0
 def build_generator(self, args):
     if args.score_reference:
         args.score_reference = False
         print('| --score-reference is not applicable to speech recognition,'
             ' ignoring it.')
     from fairseq.sequence_generator import SequenceGenerator
     return SequenceGenerator(
         self.target_dictionary,
         beam_size=getattr(args, 'beam', 5),
         max_len_a=getattr(args, 'max_len_a', 0),
         max_len_b=getattr(args, 'max_len_b', 200),
         min_len=getattr(args, 'min_len', 1),
         normalize_scores=(not getattr(args, 'unnormalized', False)),
         len_penalty=getattr(args, 'lenpen', 1),
         unk_penalty=getattr(args, 'unkpen', 0),
         sampling=getattr(args, 'sampling', False),
         sampling_topk=getattr(args, 'sampling_topk', -1),
         sampling_topp=getattr(args, 'sampling_topp', -1.0),
         temperature=getattr(args, 'temperature', 1.),
         diverse_beam_groups=getattr(args, 'diverse_beam_groups', -1),
         diverse_beam_strength=getattr(args, 'diverse_beam_strength', 0.5),
         match_source_len=getattr(args, 'match_source_len', False),
         no_repeat_ngram_size=getattr(args, 'no_repeat_ngram_size', 0),
         coverage_weight=getattr(args, 'coverage_weight', 0.0),
         eos_factor=getattr(args, 'eos_factor', None),
     )
Example #28
0
 def test_no_stop_early(self):
     generator = SequenceGenerator([self.model], self.tgt_dict, stop_early=False)
     hypos = generator.generate(self.src_tokens, self.src_lengths, beam_size=2)
     eos, w1, w2 = self.eos, self.w1, self.w2
     # sentence 1, beam 1
     self.assertHypoTokens(hypos[0][0], [w1, eos])
     self.assertHypoScore(hypos[0][0], [0.9, 1.0])
     # sentence 1, beam 2
     self.assertHypoTokens(hypos[0][1], [w2, w1, w2, eos])
     self.assertHypoScore(hypos[0][1], [0.1, 0.9, 0.9, 1.0])
     # sentence 2, beam 1
     self.assertHypoTokens(hypos[1][0], [w2, w2, w2, w2, eos])
     self.assertHypoScore(hypos[1][0], [0.3, 0.9, 0.99, 0.4, 1.0])
     # sentence 2, beam 2
     self.assertHypoTokens(hypos[1][1], [w1, w2, w1, eos])
     self.assertHypoScore(hypos[1][1], [0.7, 0.4, 0.4, 1.0])
Example #29
0
 def build_generator(self, args):
     if args.score_reference:
         from fairseq.sequence_scorer import SequenceScorer
         return SequenceScorer(self.target_dictionary)
     else:
         from fairseq.sequence_generator import SequenceGenerator
         return SequenceGenerator(
             self.target_dictionary,
             beam_size=args.beam,
             max_len_a=args.max_len_a,
             max_len_b=args.max_len_b,
             min_len=args.min_len,
             stop_early=(not args.no_early_stop),
             normalize_scores=(not args.unnormalized),
             len_penalty=args.lenpen,
             unk_penalty=args.unkpen,
             sampling=args.sampling,
             sampling_topk=args.sampling_topk,
             sampling_temperature=args.sampling_temperature,
             diverse_beam_groups=args.diverse_beam_groups,
             diverse_beam_strength=args.diverse_beam_strength,
             match_source_len=args.match_source_len,
             no_repeat_ngram_size=args.no_repeat_ngram_size,
             copy_ext_dict=args.copy_ext_dict,
         )
Example #30
0
 def test_with_normalization(self):
     generator = SequenceGenerator([self.model], self.tgt_dict, beam_size=2)
     hypos = generator.forward(self.sample)
     eos, w1, w2 = self.tgt_dict.eos(), self.w1, self.w2
     # sentence 1, beam 1
     self.assertHypoTokens(hypos[0][0], [w1, eos])
     self.assertHypoScore(hypos[0][0], [0.9, 1.0])
     # sentence 1, beam 2
     self.assertHypoTokens(hypos[0][1], [w2, w1, w2, eos])
     self.assertHypoScore(hypos[0][1], [0.1, 0.9, 0.9, 1.0])
     # sentence 2, beam 1
     self.assertHypoTokens(hypos[1][0], [w1, w2, w1, eos])
     self.assertHypoScore(hypos[1][0], [0.7, 0.4, 0.4, 1.0])
     # sentence 2, beam 2
     self.assertHypoTokens(hypos[1][1], [w1, w2, eos])
     self.assertHypoScore(hypos[1][1], [0.7, 0.4, 0.6])
Example #31
0
    def build_model(self, args):
        from fairseq import models
        model = models.build_model(args, self)
        if not isinstance(model, FairseqMultiModel):
            raise ValueError('SemisupervisedTranslationTask requires a FairseqMultiModel architecture')

        # create SequenceGenerator for each model that has backtranslation dependency on it
        self.sequence_generators = {}
        if (self.lambda_otf_bt > 0.0 or self.lambda_otf_bt_steps is not None) and self.training:
            for lang_pair in self.lang_pairs:
                src, tgt = lang_pair.split('-')
                key = '{}-{}'.format(tgt, src)
                self.sequence_generators[key] = SequenceGenerator(
                    tgt_dict=self.dicts[src],
                    beam_size=args.bt_beam_size,
                    max_len_a=args.bt_max_len_a,
                    max_len_b=args.bt_max_len_b,
                )
                decoder_lang_tok_idx = self.get_decoder_langtok(src)

                def backtranslate_fn(
                    sample, model=model.models[key],
                    bos_token=decoder_lang_tok_idx,
                    sequence_generator=self.sequence_generators[key],
                ):
                    return sequence_generator.generate(
                        [model],
                        sample,
                        bos_token=bos_token,
                    )
                self.backtranslators[lang_pair] = backtranslate_fn

        return model
Example #32
0
def score_test(args, model, dataset, subset, beam, cuda_device):
    """Evaluate the model on the test set and return the BLEU scorer."""

    translator = SequenceGenerator([model], dataset.dst_dict, beam_size=beam)
    if torch.cuda.is_available():
        translator.cuda()

    scorer = bleu.Scorer(dataset.dst_dict.pad(), dataset.dst_dict.eos(),
                         dataset.dst_dict.unk())
    itr = dataset.dataloader(subset,
                             batch_size=4,
                             max_positions=args.max_positions)
    for _, _, ref, hypos in translator.generate_batched_itr(
            itr, cuda_device=cuda_device):
        scorer.add(ref.int().cpu(), hypos[0]['tokens'].int().cpu())
    return scorer
Example #33
0
    def build_generator(self, models, args, **unused):
        if getattr(args, "score_reference", False):
            from fairseq.sequence_scorer import SequenceScorer

            return SequenceScorer(
                self.target_dictionary,
                eos=self.tgt_dict.index("[{}]".format(self.args.target_lang)),
            )
        else:
            from fairseq.sequence_generator import SequenceGenerator

            return SequenceGenerator(
                models,
                self.target_dictionary,
                beam_size=getattr(args, "beam", 5),
                max_len_a=getattr(args, "max_len_a", 0),
                max_len_b=getattr(args, "max_len_b", 200),
                min_len=getattr(args, "min_len", 1),
                normalize_scores=(not getattr(args, "unnormalized", False)),
                len_penalty=getattr(args, "lenpen", 1),
                unk_penalty=getattr(args, "unkpen", 0),
                temperature=getattr(args, "temperature", 1.0),
                match_source_len=getattr(args, "match_source_len", False),
                no_repeat_ngram_size=getattr(args, "no_repeat_ngram_size", 0),
                eos=self.tgt_dict.index("[{}]".format(self.args.target_lang)),
            )
Example #34
0
 def test_with_lenpen_favoring_long_hypos(self):
     lenpen = 5.0
     generator = SequenceGenerator([self.model], self.tgt_dict, len_penalty=lenpen)
     hypos = generator.generate(self.src_tokens, self.src_lengths, beam_size=2)
     eos, w1, w2 = self.eos, self.w1, self.w2
     # sentence 1, beam 1
     self.assertHypoTokens(hypos[0][0], [w2, w1, w2, eos])
     self.assertHypoScore(hypos[0][0], [0.1, 0.9, 0.9, 1.0], lenpen=lenpen)
     # sentence 1, beam 2
     self.assertHypoTokens(hypos[0][1], [w1, eos])
     self.assertHypoScore(hypos[0][1], [0.9, 1.0], lenpen=lenpen)
     # sentence 2, beam 1
     self.assertHypoTokens(hypos[1][0], [w1, w2, w1, eos])
     self.assertHypoScore(hypos[1][0], [0.7, 0.4, 0.4, 1.0], lenpen=lenpen)
     # sentence 2, beam 2
     self.assertHypoTokens(hypos[1][1], [w1, w2, eos])
     self.assertHypoScore(hypos[1][1], [0.7, 0.4, 0.6], lenpen=lenpen)
Example #35
0
 def _generate(self, opt, src_tokens):
     translator = SequenceGenerator(
         [self.trainer.get_model()],
         self.fairseq_dict,
         beam_size=opt.beam,
         stop_early=(not opt.no_early_stop),
         normalize_scores=(not opt.unnormalized),
         len_penalty=opt.lenpen)
     translator.cuda()
     tokens = src_tokens
     translations = translator.generate(
         Variable(tokens), Variable(self._positions_for_tokens(tokens)))
     results = [t[0] for t in translations]
     output_lines = [[] for _ in range(len(results))]
     for i in range(len(results)):
         output_lines[i] = ' '.join(self.fairseq_dict[idx]
                                    for idx in results[i]['tokens'][:-1])
     return output_lines
Example #36
0
 def test_without_normalization(self):
     # Sentence 1: unchanged from the normalized case
     # Sentence 2: beams swap order
     generator = SequenceGenerator([self.model], self.tgt_dict, normalize_scores=False)
     hypos = generator.generate(self.src_tokens, self.src_lengths, beam_size=2)
     eos, w1, w2 = self.eos, self.w1, self.w2
     # sentence 1, beam 1
     self.assertHypoTokens(hypos[0][0], [w1, eos])
     self.assertHypoScore(hypos[0][0], [0.9, 1.0], normalized=False)
     # sentence 1, beam 2
     self.assertHypoTokens(hypos[0][1], [w2, w1, w2, eos])
     self.assertHypoScore(hypos[0][1], [0.1, 0.9, 0.9, 1.0], normalized=False)
     # sentence 2, beam 1
     self.assertHypoTokens(hypos[1][0], [w1, w2, eos])
     self.assertHypoScore(hypos[1][0], [0.7, 0.4, 0.6], normalized=False)
     # sentence 2, beam 2
     self.assertHypoTokens(hypos[1][1], [w1, w2, w1, eos])
     self.assertHypoScore(hypos[1][1], [0.7, 0.4, 0.4, 1.0], normalized=False)
Example #37
0
 def test_diverse_beam_search(self):
     generator = SequenceGenerator(
         [self.model], self.tgt_dict,
         beam_size=2, diverse_beam_groups=2, diverse_beam_strength=0.,
     )
     hypos = generator.generate(self.src_tokens, self.src_lengths)
     eos, w1, w2 = self.eos, self.w1, self.w2
     # sentence 1, beam 1
     self.assertHypoTokens(hypos[0][0], [w1, w1, eos])
     self.assertHypoScore(hypos[0][0], [0.9, 0.6, 1.0])
     # sentence 1, beam 2
     self.assertHypoTokens(hypos[0][1], [w1, w1, eos])
     self.assertHypoScore(hypos[0][1], [0.9, 0.6, 1.0])
     # sentence 2, beam 1
     self.assertHypoTokens(hypos[1][0], [w1, w2, eos])
     self.assertHypoScore(hypos[1][0], [0.7, 0.4, 0.9])
     # sentence 2, beam 2
     self.assertHypoTokens(hypos[1][1], [w1, w2, eos])
     self.assertHypoScore(hypos[1][1], [0.7, 0.4, 0.9])
Example #38
0
def main(args):
    if args.buffer_size < 1:
        args.buffer_size = 1
    if args.max_tokens is None and args.max_sentences is None:
        args.max_sentences = 1

    assert not args.sampling or args.nbest == args.beam, \
        '--sampling requires --nbest to be equal to --beam'
    assert not args.max_sentences or args.max_sentences <= args.buffer_size, \
        '--max-sentences/--batch-size cannot be larger than --buffer-size'

    print(args)

    use_cuda = torch.cuda.is_available() and not args.cpu

    # Setup task, e.g., translation
    task = tasks.setup_task(args)

    # Load ensemble
    print('| loading model(s) from {}'.format(args.path))
    model_paths = args.path.split(':')
    models, model_args = utils.load_ensemble_for_inference(model_paths, task, model_arg_overrides=eval(args.model_overrides))

    # Set dictionaries
    tgt_dict = task.target_dictionary

    # Optimize ensemble for generation
    for model in models:
        model.make_generation_fast_(
            beamable_mm_beam_size=None if args.no_beamable_mm else args.beam,
            need_attn=args.print_alignment,
        )
        if args.fp16:
            model.half()

    # Initialize generator
    translator = SequenceGenerator(
        models, tgt_dict, beam_size=args.beam, minlen=args.min_len,
        stop_early=(not args.no_early_stop), normalize_scores=(not args.unnormalized),
        len_penalty=args.lenpen, unk_penalty=args.unkpen,
        sampling=args.sampling, sampling_topk=args.sampling_topk, sampling_temperature=args.sampling_temperature,
        diverse_beam_groups=args.diverse_beam_groups, diverse_beam_strength=args.diverse_beam_strength,
    )

    if use_cuda:
        translator.cuda()

    # Load alignment dictionary for unknown word replacement
    # (None if no unknown word replacement, empty if no path to align dictionary)
    align_dict = utils.load_align_dict(args.replace_unk)

    def make_result(src_str, hypos):
        result = Translation(
            src_str='O\t{}'.format(src_str),
            hypos=[],
            pos_scores=[],
            alignments=[],
        )

        # Process top predictions
        for hypo in hypos[:min(len(hypos), args.nbest)]:
            hypo_tokens, hypo_str, alignment = utils.post_process_prediction(
                hypo_tokens=hypo['tokens'].int().cpu(),
                src_str=src_str,
                alignment=hypo['alignment'].int().cpu() if hypo['alignment'] is not None else None,
                align_dict=align_dict,
                tgt_dict=tgt_dict,
                remove_bpe=args.remove_bpe,
            )
            result.hypos.append('H\t{}\t{}'.format(hypo['score'], hypo_str))
            result.pos_scores.append('P\t{}'.format(
                ' '.join(map(
                    lambda x: '{:.4f}'.format(x),
                    hypo['positional_scores'].tolist(),
                ))
            ))
            result.alignments.append(
                'A\t{}'.format(' '.join(map(lambda x: str(utils.item(x)), alignment)))
                if args.print_alignment else None
            )
        return result

    def process_batch(batch):
        tokens = batch.tokens
        lengths = batch.lengths

        if use_cuda:
            tokens = tokens.cuda()
            lengths = lengths.cuda()

        translations = translator.generate(
            tokens,
            lengths,
            maxlen=int(args.max_len_a * tokens.size(1) + args.max_len_b),
        )

        return [make_result(batch.srcs[i], t) for i, t in enumerate(translations)]

    max_positions = utils.resolve_max_positions(
        task.max_positions(),
        *[model.max_positions() for model in models]
    )

    if args.buffer_size > 1:
        print('| Sentence buffer size:', args.buffer_size)
    print('| Type the input sentence and press return:')
    for inputs in buffered_read(args.buffer_size):
        indices = []
        results = []
        for batch, batch_indices in make_batches(inputs, args, task, max_positions):
            indices.extend(batch_indices)
            results += process_batch(batch)

        for i in np.argsort(indices):
            result = results[i]
            print(result.src_str)
            for hypo, pos_scores, align in zip(result.hypos, result.pos_scores, result.alignments):
                print(hypo)
                print(pos_scores)
                if align is not None:
                    print(align)
def model_fn(model_dir):
    
    model_name = 'checkpoint_best.pt'
    model_path = os.path.join(model_dir, model_name)

    logger.info('Loading the model')
    with open(model_path, 'rb') as f:
        model_info = torch.load(f, map_location=torch.device('cpu'))

    # Will be overidden by the model_info['args'] - need to keep for pre-trained models   
    parser = options.get_generation_parser(interactive=True)
    # get args for FairSeq by converting the hyperparameters as if they were command-line arguments
    argv_copy = copy.deepcopy(sys.argv)
    # remove the modifications we did in the command-line arguments
    sys.argv[1:] = ['--path', model_path, model_dir]
    args = options.parse_args_and_arch(parser)
    # restore previous command-line args
    sys.argv = argv_copy
    
    saved_args = model_info['args']
    for key, value in vars(saved_args).items():
        setattr(args, key, value)

    args.data = [model_dir]
    print(args)

    # Setup task, e.g., translation
    task = tasks.setup_task(args)

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    logger.info('Current device: {}'.format(device))

    model_paths = [os.path.join(model_dir, model_name)]
    models, model_args = utils.load_ensemble_for_inference(model_paths, task, model_arg_overrides={})

    # Set dictionaries
    tgt_dict = task.target_dictionary

    # Optimize ensemble for generation
    for model in models:
        model.make_generation_fast_(
            beamable_mm_beam_size=None if args.no_beamable_mm else args.beam,
            need_attn=args.print_alignment,
        )
        if args.fp16:
            model.half()

    # Initialize generator
    translator = SequenceGenerator(
        models, tgt_dict, beam_size=args.beam, minlen=args.min_len,
        stop_early=(not args.no_early_stop), normalize_scores=(not args.unnormalized),
        len_penalty=args.lenpen, unk_penalty=args.unkpen,
        sampling=args.sampling, sampling_topk=args.sampling_topk, sampling_temperature=args.sampling_temperature,
        diverse_beam_groups=args.diverse_beam_groups, diverse_beam_strength=args.diverse_beam_strength,
    )

    if device.type == 'cuda':
        translator.cuda()

    # Load alignment dictionary for unknown word replacement
    # (None if no unknown word replacement, empty if no path to align dictionary)
    # align_dict = utils.load_align_dict(args.replace_unk)
    align_dict = utils.load_align_dict(None)


    max_positions = utils.resolve_max_positions(
        task.max_positions(),
        *[model.max_positions() for model in models]
    )

    return dict(
        translator=translator,
        task=task,
        max_positions=max_positions,
        align_dict=align_dict,
        tgt_dict=tgt_dict,
        args=args,
        device=device,
    )
Example #40
0
def main():
    parser = options.get_parser('Generation')
    parser.add_argument('--path', metavar='FILE', required=True, action='append',
                        help='path(s) to model file(s)')
    dataset_args = options.add_dataset_args(parser)
    dataset_args.add_argument('--batch-size', default=32, type=int, metavar='N',
                              help='batch size')
    dataset_args.add_argument('--gen-subset', default='test', metavar='SPLIT',
                              help='data subset to generate (train, valid, test)')
    options.add_generation_args(parser)

    args = parser.parse_args()
    if args.no_progress_bar and args.log_format is None:
        args.log_format = 'none'
    print(args)

    use_cuda = torch.cuda.is_available() and not args.cpu

    # Load dataset
    if args.replace_unk is None:
        dataset = data.load_dataset(args.data, [args.gen_subset], args.source_lang, args.target_lang)
    else:
        dataset = data.load_raw_text_dataset(args.data, [args.gen_subset], args.source_lang, args.target_lang)
    if args.source_lang is None or args.target_lang is None:
        # record inferred languages in args
        args.source_lang, args.target_lang = dataset.src, dataset.dst

    # Load ensemble
    print('| loading model(s) from {}'.format(', '.join(args.path)))
    models, _ = utils.load_ensemble_for_inference(args.path, dataset.src_dict, dataset.dst_dict)

    print('| [{}] dictionary: {} types'.format(dataset.src, len(dataset.src_dict)))
    print('| [{}] dictionary: {} types'.format(dataset.dst, len(dataset.dst_dict)))
    print('| {} {} {} examples'.format(args.data, args.gen_subset, len(dataset.splits[args.gen_subset])))

    # Optimize ensemble for generation
    for model in models:
        model.make_generation_fast_(
            beamable_mm_beam_size=None if args.no_beamable_mm else args.beam)

    # Initialize generator
    translator = SequenceGenerator(
        models, beam_size=args.beam, stop_early=(not args.no_early_stop),
        normalize_scores=(not args.unnormalized), len_penalty=args.lenpen,
        unk_penalty=args.unkpen)
    if use_cuda:
        translator.cuda()

    # Load alignment dictionary for unknown word replacement
    # (None if no unknown word replacement, empty if no path to align dictionary)
    align_dict = utils.load_align_dict(args.replace_unk)

    # Generate and compute BLEU score
    scorer = bleu.Scorer(dataset.dst_dict.pad(), dataset.dst_dict.eos(), dataset.dst_dict.unk())
    max_positions = min(model.max_encoder_positions() for model in models)
    itr = dataset.eval_dataloader(
        args.gen_subset, max_sentences=args.batch_size, max_positions=max_positions,
        skip_invalid_size_inputs_valid_test=args.skip_invalid_size_inputs_valid_test)
    num_sentences = 0
    with utils.build_progress_bar(args, itr) as t:
        wps_meter = TimeMeter()
        gen_timer = StopwatchMeter()
        translations = translator.generate_batched_itr(
            t, maxlen_a=args.max_len_a, maxlen_b=args.max_len_b,
            cuda_device=0 if use_cuda else None, timer=gen_timer)
        for sample_id, src_tokens, target_tokens, hypos in translations:
            # Process input and ground truth
            target_tokens = target_tokens.int().cpu()
            # Either retrieve the original sentences or regenerate them from tokens.
            if align_dict is not None:
                src_str = dataset.splits[args.gen_subset].src.get_original_text(sample_id)
                target_str = dataset.splits[args.gen_subset].dst.get_original_text(sample_id)
            else:
                src_str = dataset.src_dict.string(src_tokens, args.remove_bpe)
                target_str = dataset.dst_dict.string(target_tokens, args.remove_bpe, escape_unk=True)

            if not args.quiet:
                print('S-{}\t{}'.format(sample_id, src_str))
                print('T-{}\t{}'.format(sample_id, target_str))

            # Process top predictions
            for i, hypo in enumerate(hypos[:min(len(hypos), args.nbest)]):
                hypo_tokens, hypo_str, alignment = utils.post_process_prediction(
                    hypo_tokens=hypo['tokens'].int().cpu(),
                    src_str=src_str,
                    alignment=hypo['alignment'].int().cpu(),
                    align_dict=align_dict,
                    dst_dict=dataset.dst_dict,
                    remove_bpe=args.remove_bpe)

                if not args.quiet:
                    print('H-{}\t{}\t{}'.format(sample_id, hypo['score'], hypo_str))
                    print('A-{}\t{}'.format(sample_id, ' '.join(map(str, alignment))))

                # Score only the top hypothesis
                if i == 0:
                    if align_dict is not None or args.remove_bpe is not None:
                        # Convert back to tokens for evaluation with unk replacement and/or without BPE
                        target_tokens = tokenizer.Tokenizer.tokenize(target_str,
                                                                     dataset.dst_dict,
                                                                     add_if_not_exist=True)
                    scorer.add(target_tokens, hypo_tokens)

            wps_meter.update(src_tokens.size(0))
            t.log({'wps': round(wps_meter.avg)})
            num_sentences += 1

    print('| Translated {} sentences ({} tokens) in {:.1f}s ({:.2f} tokens/s)'.format(
        num_sentences, gen_timer.n, gen_timer.sum, 1. / gen_timer.avg))
    print('| Generate {} with beam={}: {}'.format(args.gen_subset, args.beam, scorer.result_string()))