def _forced_decode( self, generator: 'models.GeneratorModel', src_file: str, ref_file: str, batcher: Optional[batchers.Batcher] = None, max_src_len: Optional[int] = None, assert_scores: Optional[Sequence[numbers.Real]] = None) -> None: """ Perform forced decoding. Args: generator: generator model to use src_file: a file of src-side inputs to generate outputs for ref_file: path of file with reference translations batcher: necessary with some cases of input pre-processing such as padding or truncation max_src_len: if given, skip inputs that are too long assert_scores: if given, raise exception if the scores for generated outputs don't match the given scores """ src_in = generator.src_reader.read_sents(src_file) # If we have a "assert scores" list return it, otherwise return "None" infinitely assert_in = assert_scores if assert_scores else iter(lambda: None, 1) # Reporting is commenced if there is some defined reporters is_reporting = self.reporter is not None event_trigger.set_reporting(is_reporting) # Saving the translated output to a trg file src_batch, ref_batch, assert_batch = [], [], [] for curr_sent_i, (src_line, assert_line) in islice( enumerate(zip(src_in, assert_in)), self.max_num_sents): src_batch.append(src_line) assert_batch.append(assert_line) if len(src_batch) == batcher.batch_size: self._forced_decode_one_batch(generator, batcher, src_batch, assert_batch, max_src_len) src_batch, ref_batch, assert_batch = [], [], [] if len(src_batch) != 0: self._forced_decode_one_batch(generator, batcher, src_batch, assert_batch, max_src_len) # Finishing up try: if is_reporting: self._conclude_report() finally: # Reporting is done in _generate_output only event_trigger.set_reporting(False)
def _generate_output(self, generator: 'models.GeneratorModel', src_file: str, trg_file: str, batcher: Optional[batchers.Batcher] = None, max_src_len: Optional[int] = None, forced_ref_file: Optional[str] = None, assert_scores: Optional[Sequence[numbers.Real]] = None) -> None: """ Generate outputs and write them to file. Args: generator: generator model to use src_file: a file of src-side inputs to generate outputs for trg_file: file to write outputs to batcher: necessary with some cases of input pre-processing such as padding or truncation max_src_len: if given, skip inputs that are too long forced_ref_file: if given, perform forced decoding with the given file of trg-side inputs assert_scores: if given, raise exception if the scores for generated outputs don't match the given scores """ src_in = generator.src_reader.read_sents(src_file) # If we have a reference file return it, otherwise return "None" infinitely forced_ref_in = generator.trg_reader.read_sents(forced_ref_file) if forced_ref_file else iter(lambda: None, 1) assert_in = assert_scores if assert_scores else iter(lambda: None, 1) # Reporting is commenced if there is some defined reporters is_reporting = self.reporter is not None event_trigger.set_reporting(is_reporting) # Saving the translated output to a trg file with open(trg_file, 'wt', encoding='utf-8') as fp: src_batch, ref_batch, assert_batch = [], [], [] for curr_sent_i, (src_line, ref_line, assert_line) in enumerate(zip(src_in, forced_ref_in, assert_in)): if self.max_num_sents and cur_sent_i >= self.max_num_sents: break src_batch.append(src_line) ref_batch.append(ref_line) assert_batch.append(assert_line) if len(src_batch) == batcher.batch_size: self._generate_one_batch(generator, batcher, src_batch, ref_batch, assert_batch, max_src_len, fp) src_batch, ref_batch, assert_batch = [], [], [] if len(src_batch) != 0: self._generate_one_batch(generator, batcher, src_batch, ref_batch, assert_batch, max_src_len, fp) # Finishing up try: if is_reporting: self._conclude_report() finally: # Reporting is done in _generate_output only event_trigger.set_reporting(False)
def _generate_output(self, generator: 'models.GeneratorModel', src_file: str, trg_file: str, batcher: Optional[batchers.Batcher] = None, max_src_len: Optional[int] = None) -> None: """ Generate outputs and write them to file. Args: generator: generator model to use src_file: a file of src-side inputs to generate outputs for trg_file: file to write outputs to batcher: necessary with some cases of input pre-processing such as padding or truncation max_src_len: if given, skip inputs that are too long """ src_in = generator.src_reader.read_sents(src_file) # Reporting is commenced if there is some defined reporters is_reporting = self.reporter is not None event_trigger.set_reporting(is_reporting) # Saving the translated output to a trg file with open(trg_file, 'wt', encoding='utf-8') as fp: src_batch = [] for curr_sent_i, src_line in islice(enumerate(src_in), self.max_num_sents): src_batch.append(src_line) if len(src_batch) == batcher.batch_size: self._generate_one_batch(generator, batcher, src_batch, max_src_len, fp) src_batch = [] if settings.PRETEND: break if len(src_batch) != 0: self._generate_one_batch(generator, batcher, src_batch, max_src_len, fp) # Finishing up try: if is_reporting: self._conclude_report() finally: # Reporting is done in _generate_output only event_trigger.set_reporting(False)