def write_hypos(self, all_hypos, sen_indices=None): """Writes the hypotheses in ``all_hypos`` to ``path`` """ if self.f is not None: for hypos in all_hypos: self.f.write(io_utils.decode(hypos[0].trgt_sentence)) self.f.write("\n") self.f.flush() else: with codecs.open(self.path, "w", encoding='utf-8') as f: for hypos in all_hypos: f.write(io_utils.decode(hypos[0].trgt_sentence)) f.write("\n") self.f.flush()
def write_hypos(self, all_hypos, sen_indices=None): """Writes the hypotheses in ``all_hypos`` to ``path`` """ if not self.f: self.open_file() for hypos in all_hypos: while len(hypos) < len(self.f): hypos.append(hypos[-1]) for i in range(len(self.f)): self.f[i].write(io_utils.decode(hypos[i].trgt_sentence)) self.f[i].write("\n") self.f[i].flush()
def write_hypos(self, all_hypos, sen_indices): """Writes the hypotheses in ``all_hypos`` to ``path`` """ with codecs.open(self.path, "w", encoding='utf-8') as f: n_predictors = len(self.predictor_names) for idx, hypos in zip(sen_indices, all_hypos): for hypo in hypos: f.write( "%d ||| %s ||| %s ||| %f" % (idx, io_utils.decode(hypo.trgt_sentence), ' '.join( "%s= %f" % (self.predictor_names[i], sum([s[i][0] for s in hypo.score_breakdown])) for i in range(n_predictors)), hypo.total_score)) f.write("\n") idx += 1
def do_decode(decoder, output_handlers, src_sentences, trgt_sentences=None, num_log=1): """This method contains the main decoding loop. It iterates through ``src_sentences`` and applies ``decoder.decode()`` to each of them. At the end, it calls the output handlers to create output files. Args: decoder (Decoder): Current decoder instance output_handlers (list): List of output handlers, see ``create_output_handlers()`` src_sentences (list): A list of strings. The strings are the source sentences with word indices to translate (e.g. '1 123 432 2') """ if not decoder.has_predictor(): logging.fatal("Terminated due to an error in the " "predictor configuration.") return all_hypos = [] text_output_handler = _get_text_output_handler(output_handlers) if text_output_handler: text_output_handler.open_file() score_output_handler = _get_score_output_handler(output_handlers) start_time = time.time() logging.info("Start time: %s" % start_time) sen_indices = [] diversity_metrics = [] not_full = 0 for sen_idx in get_sentence_indices(args.range, src_sentences): decoder.set_current_sen_id(sen_idx) try: src = "0" if src_sentences is False else src_sentences[sen_idx] if len(src.split()) > 1000: print("Skipping ID", str(sen_idx), ". Too long...") continue src_print = io_utils.src_sentence(src) logging.info("Next sentence (ID: %d): %s" % (sen_idx + 1, src_print)) src = io_utils.encode(src) start_hypo_time = time.time() decoder.apply_predictor_count = 0 if trgt_sentences: hypos = decoder.decode( src, io_utils.encode_trg(trgt_sentences[sen_idx])) else: hypos = decoder.decode(src) if not hypos: logging.error("No translation found for ID %d!" % (sen_idx + 1)) logging.info("Stats (ID: %d): score=<not-found> " "num_expansions=%d " "time=%.2f" % (sen_idx + 1, decoder.apply_predictor_count, time.time() - start_hypo_time)) hypos = [_generate_dummy_hypo()] hypos = _postprocess_complete_hypos(hypos) for logged_hypo in hypos[:num_log]: logging.info( "Decoded (ID: %d): %s" % (sen_idx + 1, io_utils.decode(logged_hypo.trgt_sentence))) logging.info("Stats (ID: %d): score=%f " "num_expansions=%d " "time=%.2f " "perplexity=%.2f" % (sen_idx + 1, logged_hypo.total_score, decoder.apply_predictor_count, time.time() - start_hypo_time, utils.perplexity(logged_hypo.score_breakdown))) if score_output_handler: try: score_output_handler.write_score( logged_hypo.score_breakdown) except IOError as e: logging.error( "I/O error %d occurred when creating output files: %s" % (sys.exc_info()[0], e)) if decoder.nbest > 1: diversity_score = utils.ngram_diversity( [io_utils.decode(h.trgt_sentence) for h in hypos]) logging.info("Diversity: score=%f " % (diversity_score)) diversity_metrics.append(diversity_score) if len(hypos) < decoder.nbest: not_full += 1 all_hypos.append(hypos) sen_indices.append(sen_idx) try: # Write text output as we go if text_output_handler: text_output_handler.write_hypos([hypos]) except IOError as e: logging.error( "I/O error %d occurred when creating output files: %s" % (sys.exc_info()[0], e)) except ValueError as e: logging.error("Number format error at sentence id %d: %s, " "Stack trace: %s" % (sen_idx + 1, e, traceback.format_exc())) except AttributeError as e: logging.fatal("Attribute error at sentence id %d: %s. This often " "indicates an error in the predictor configuration " "which could not be detected in initialisation. " "Stack trace: %s" % (sen_idx + 1, e, traceback.format_exc())) except Exception as e: logging.error( "An unexpected %s error has occurred at sentence id " "%d: %s, Stack trace: %s" % (sys.exc_info()[0], sen_idx + 1, e, traceback.format_exc())) try: # Write text output as we go if text_output_handler: hypos = [_generate_dummy_hypo()] text_output_handler.write_hypos([hypos]) except IOError as e: logging.error( "I/O error %d occurred when creating output files: %s" % (sys.exc_info()[0], e)) logging.info("Decoding finished. Time: %.2f" % (time.time() - start_time)) if decoder.nbest > 1: print(diversity_metrics) print("Total not full:", str(not_full)) try: for output_handler in output_handlers: if output_handler == text_output_handler: output_handler.close_file() else: output_handler.write_hypos(all_hypos, sen_indices) except IOError as e: logging.error("I/O error %s occurred when creating output files: %s" % (sys.exc_info()[0], e))