Example #1
0
 def write_hypos(self, all_hypos, sen_indices=None):
     """Writes the hypotheses in ``all_hypos`` to ``path`` """
     if self.f is not None:
         for hypos in all_hypos:
             self.f.write(io_utils.decode(hypos[0].trgt_sentence))
             self.f.write("\n")
             self.f.flush()
     else:
         with codecs.open(self.path, "w", encoding='utf-8') as f:
             for hypos in all_hypos:
                 f.write(io_utils.decode(hypos[0].trgt_sentence))
                 f.write("\n")
                 self.f.flush()
Example #2
0
 def write_hypos(self, all_hypos, sen_indices=None):
     """Writes the hypotheses in ``all_hypos`` to ``path`` """
     if not self.f:
         self.open_file()
     for hypos in all_hypos:
         while len(hypos) < len(self.f):
             hypos.append(hypos[-1])
         for i in range(len(self.f)):
             self.f[i].write(io_utils.decode(hypos[i].trgt_sentence))
             self.f[i].write("\n")
             self.f[i].flush()
Example #3
0
 def write_hypos(self, all_hypos, sen_indices):
     """Writes the hypotheses in ``all_hypos`` to ``path`` """
     with codecs.open(self.path, "w", encoding='utf-8') as f:
         n_predictors = len(self.predictor_names)
         for idx, hypos in zip(sen_indices, all_hypos):
             for hypo in hypos:
                 f.write(
                     "%d ||| %s ||| %s ||| %f" %
                     (idx, io_utils.decode(hypo.trgt_sentence), ' '.join(
                         "%s= %f" %
                         (self.predictor_names[i],
                          sum([s[i][0] for s in hypo.score_breakdown]))
                         for i in range(n_predictors)), hypo.total_score))
                 f.write("\n")
             idx += 1
Example #4
0
def do_decode(decoder,
              output_handlers,
              src_sentences,
              trgt_sentences=None,
              num_log=1):
    """This method contains the main decoding loop. It iterates through
    ``src_sentences`` and applies ``decoder.decode()`` to each of them.
    At the end, it calls the output handlers to create output files.
    
    Args:
        decoder (Decoder):  Current decoder instance
        output_handlers (list):  List of output handlers, see
                                 ``create_output_handlers()``
        src_sentences (list):  A list of strings. The strings are the
                               source sentences with word indices to 
                               translate (e.g. '1 123 432 2')
    """
    if not decoder.has_predictor():
        logging.fatal("Terminated due to an error in the "
                      "predictor configuration.")
        return
    all_hypos = []
    text_output_handler = _get_text_output_handler(output_handlers)
    if text_output_handler:
        text_output_handler.open_file()
    score_output_handler = _get_score_output_handler(output_handlers)

    start_time = time.time()
    logging.info("Start time: %s" % start_time)
    sen_indices = []
    diversity_metrics = []
    not_full = 0

    for sen_idx in get_sentence_indices(args.range, src_sentences):
        decoder.set_current_sen_id(sen_idx)
        try:
            src = "0" if src_sentences is False else src_sentences[sen_idx]
            if len(src.split()) > 1000:
                print("Skipping ID", str(sen_idx), ". Too long...")
                continue
            src_print = io_utils.src_sentence(src)
            logging.info("Next sentence (ID: %d): %s" %
                         (sen_idx + 1, src_print))
            src = io_utils.encode(src)
            start_hypo_time = time.time()
            decoder.apply_predictor_count = 0
            if trgt_sentences:
                hypos = decoder.decode(
                    src, io_utils.encode_trg(trgt_sentences[sen_idx]))
            else:
                hypos = decoder.decode(src)
            if not hypos:
                logging.error("No translation found for ID %d!" %
                              (sen_idx + 1))
                logging.info("Stats (ID: %d): score=<not-found> "
                             "num_expansions=%d "
                             "time=%.2f" %
                             (sen_idx + 1, decoder.apply_predictor_count,
                              time.time() - start_hypo_time))
                hypos = [_generate_dummy_hypo()]

            hypos = _postprocess_complete_hypos(hypos)
            for logged_hypo in hypos[:num_log]:
                logging.info(
                    "Decoded (ID: %d): %s" %
                    (sen_idx + 1, io_utils.decode(logged_hypo.trgt_sentence)))
                logging.info("Stats (ID: %d): score=%f "
                             "num_expansions=%d "
                             "time=%.2f "
                             "perplexity=%.2f" %
                             (sen_idx + 1, logged_hypo.total_score,
                              decoder.apply_predictor_count,
                              time.time() - start_hypo_time,
                              utils.perplexity(logged_hypo.score_breakdown)))

            if score_output_handler:
                try:
                    score_output_handler.write_score(
                        logged_hypo.score_breakdown)
                except IOError as e:
                    logging.error(
                        "I/O error %d occurred when creating output files: %s"
                        % (sys.exc_info()[0], e))

            if decoder.nbest > 1:
                diversity_score = utils.ngram_diversity(
                    [io_utils.decode(h.trgt_sentence) for h in hypos])
                logging.info("Diversity: score=%f " % (diversity_score))
                diversity_metrics.append(diversity_score)

                if len(hypos) < decoder.nbest:
                    not_full += 1

            all_hypos.append(hypos)
            sen_indices.append(sen_idx)
            try:
                # Write text output as we go
                if text_output_handler:
                    text_output_handler.write_hypos([hypos])
            except IOError as e:
                logging.error(
                    "I/O error %d occurred when creating output files: %s" %
                    (sys.exc_info()[0], e))
        except ValueError as e:
            logging.error("Number format error at sentence id %d: %s, "
                          "Stack trace: %s" %
                          (sen_idx + 1, e, traceback.format_exc()))
        except AttributeError as e:
            logging.fatal("Attribute error at sentence id %d: %s. This often "
                          "indicates an error in the predictor configuration "
                          "which could not be detected in initialisation. "
                          "Stack trace: %s" %
                          (sen_idx + 1, e, traceback.format_exc()))
        except Exception as e:
            logging.error(
                "An unexpected %s error has occurred at sentence id "
                "%d: %s, Stack trace: %s" %
                (sys.exc_info()[0], sen_idx + 1, e, traceback.format_exc()))
            try:
                # Write text output as we go
                if text_output_handler:
                    hypos = [_generate_dummy_hypo()]
                    text_output_handler.write_hypos([hypos])
            except IOError as e:
                logging.error(
                    "I/O error %d occurred when creating output files: %s" %
                    (sys.exc_info()[0], e))

    logging.info("Decoding finished. Time: %.2f" % (time.time() - start_time))
    if decoder.nbest > 1:
        print(diversity_metrics)
    print("Total not full:", str(not_full))
    try:
        for output_handler in output_handlers:
            if output_handler == text_output_handler:
                output_handler.close_file()
            else:
                output_handler.write_hypos(all_hypos, sen_indices)
    except IOError as e:
        logging.error("I/O error %s occurred when creating output files: %s" %
                      (sys.exc_info()[0], e))