def build_dataset_for_inference(self, src_tokens, src_lengths, constraints=None): if constraints is not None: raise NotImplementedError( "Constrained decoding with the multilingual_translation task is not supported" ) src_data = ListDataset(src_tokens, src_lengths) dataset = LanguagePairDataset(src_data, src_lengths, self.source_dictionary) src_langtok_spec, tgt_langtok_spec = self.args.langtoks["main"] if self.args.lang_tok_replacing_bos_eos: dataset = self.data_manager.alter_dataset_langtok( dataset, src_eos=self.source_dictionary.eos(), src_lang=self.args.source_lang, tgt_eos=self.target_dictionary.eos(), tgt_lang=self.args.target_lang, src_langtok_spec=src_langtok_spec, tgt_langtok_spec=tgt_langtok_spec, ) else: dataset.src = self.data_manager.src_dataset_tranform_func( self.args.source_lang, self.args.target_lang, dataset=dataset.src, spec=src_langtok_spec, ) return dataset
def build_dataset_for_inference(self, src_tokens, src_lengths): src_data = ListDataset(src_tokens, src_lengths) dataset = LanguagePairDataset(src_data, src_lengths, self.source_dictionary) src_langtok_spec, tgt_langtok_spec = self.args.langtoks['main'] if self.args.lang_tok_replacing_bos_eos: dataset = self.data_manager.alter_dataset_langtok( dataset, src_eos=self.source_dictionary.eos(), src_lang=self.args.source_lang, tgt_eos=self.target_dictionary.eos(), tgt_lang=self.args.target_lang, src_langtok_spec=src_langtok_spec, tgt_langtok_spec=tgt_langtok_spec, ) else: dataset.src = self.data_manager.src_dataset_tranform_func( self.args.source_lang, self.args.target_lang, dataset=dataset.src, spec=src_langtok_spec, ) return dataset