Exemple #1
0
    def build_dataset_for_inference(self,
                                    src_tokens,
                                    src_lengths,
                                    constraints=None):
        if constraints is not None:
            raise NotImplementedError(
                "Constrained decoding with the multilingual_translation task is not supported"
            )

        src_data = ListDataset(src_tokens, src_lengths)
        dataset = LanguagePairDataset(src_data, src_lengths,
                                      self.source_dictionary)
        src_langtok_spec, tgt_langtok_spec = self.args.langtoks["main"]
        if self.args.lang_tok_replacing_bos_eos:
            dataset = self.data_manager.alter_dataset_langtok(
                dataset,
                src_eos=self.source_dictionary.eos(),
                src_lang=self.args.source_lang,
                tgt_eos=self.target_dictionary.eos(),
                tgt_lang=self.args.target_lang,
                src_langtok_spec=src_langtok_spec,
                tgt_langtok_spec=tgt_langtok_spec,
            )
        else:
            dataset.src = self.data_manager.src_dataset_tranform_func(
                self.args.source_lang,
                self.args.target_lang,
                dataset=dataset.src,
                spec=src_langtok_spec,
            )
        return dataset
Exemple #2
0
 def build_dataset_for_inference(self, src_tokens, src_lengths):
     src_data = ListDataset(src_tokens, src_lengths)
     dataset = LanguagePairDataset(src_data, src_lengths, self.source_dictionary)
     src_langtok_spec, tgt_langtok_spec = self.args.langtoks['main']
     if self.args.lang_tok_replacing_bos_eos:
         dataset = self.data_manager.alter_dataset_langtok(
                 dataset,
                 src_eos=self.source_dictionary.eos(),
                 src_lang=self.args.source_lang,
                 tgt_eos=self.target_dictionary.eos(),
                 tgt_lang=self.args.target_lang,
                 src_langtok_spec=src_langtok_spec,
                 tgt_langtok_spec=tgt_langtok_spec,
             )
     else:
         dataset.src = self.data_manager.src_dataset_tranform_func(
             self.args.source_lang,
             self.args.target_lang,
             dataset=dataset.src,
             spec=src_langtok_spec,
             )
     return dataset