def testBleuWrapperWithUnicodeLineSeparator(self):
     hyp_filename, ref_filename = self._generate_test_data(
         "unicode-linesep", [u"a b a c", u"e f \u2028 d"],
         [u"a b a z", u"y f g d k l m"])
     bleu = bleu_hook.bleu_wrapper(ref_filename, hyp_filename)
     actual_bleu = 0.2638
     self.assertAllClose(bleu, actual_bleu, atol=1e-03)
 def testBleuWrapperWithUnicodeLineSeparator(self):
   hyp_filename, ref_filename = self._generate_test_data(
       "unicode-linesep", [u"a b a c", u"e f \u2028 d"],
       [u"a b a z", u"y f g d k l m"])
   bleu = bleu_hook.bleu_wrapper(ref_filename, hyp_filename)
   actual_bleu = 0.2638
   self.assertAllClose(bleu, actual_bleu, atol=1e-03)
 def testBleuWrapper(self):
     hyp_filename, ref_filename = self._generate_test_data(
         "standard", [u"a b a c", u"e f g d"],
         [u"a b a z", u"y f g d k l m"])
     bleu = bleu_hook.bleu_wrapper(ref_filename, hyp_filename)
     actual_bleu = 0.3436
     self.assertAllClose(bleu, actual_bleu, atol=1e-03)
Beispiel #4
0
def compute_bleu_summaries(hook_args):
    """Compute BLEU core summaries using the decoder output.

  Args:
    hook_args: DecodeHookArgs namedtuple
  Returns:
    A list of tf.Summary values if hook_args.hparams contains the
    reference file and the translated file.
  """
    decode_hparams = hook_args.decode_hparams
    estimator = hook_args.estimator
    current_step = estimator.get_variable_value(tf.GraphKeys.GLOBAL_STEP)
    has_iters = hasattr(decode_hparams, "iterations_per_loop")
    if current_step and has_iters and decode_hparams.iterations_per_loop:
        iterations_per_loop = decode_hparams.iterations_per_loop
        current_epoch = np.asscalar(current_step) // iterations_per_loop
    else:
        current_epoch = 0

    if (decode_hparams.decode_reference is None
            or decode_hparams.decode_to_file is None):
        return None

    values = []
    bleu = 100 * bleu_hook.bleu_wrapper(decode_hparams.decode_reference,
                                        decode_hparams.decode_to_file)
    values.append(tf.Summary.Value(tag="BLEU", simple_value=bleu))
    tf.logging.info("%s: BLEU = %6.2f" % (decode_hparams.decode_to_file, bleu))
    if decode_hparams.mlperf_mode:
        mlperf_log.transformer_print(key=mlperf_log.EVAL_TARGET,
                                     value=decode_hparams.mlperf_threshold)
        mlperf_log.transformer_print(key=mlperf_log.EVAL_ACCURACY,
                                     value={
                                         "epoch": max(current_epoch - 1, 0),
                                         "value": bleu
                                     })
        mlperf_log.transformer_print(key=mlperf_log.EVAL_STOP)

    if bleu >= decode_hparams.mlperf_threshold:
        decode_hparams.set_hparam("mlperf_success", True)

    return values
Beispiel #5
0
def compute_bleu_summaries(hook_args):
  """Compute BLEU core summaries using the decoder output.

  Args:
    hook_args: DecodeHookArgs namedtuple
  Returns:
    A list of tf.Summary values if hook_args.hparams contains the
    reference file and the translated file.
  """
  decode_hparams = hook_args.decode_hparams

  if (decode_hparams.decode_reference is None or
      decode_hparams.decode_to_file is None):
    return None

  values = []
  bleu = 100 * bleu_hook.bleu_wrapper(
      decode_hparams.decode_reference, decode_hparams.decode_to_file)
  values.append(tf.Summary.Value(tag="BLEU", simple_value=bleu))
  tf.logging.info("%s: BLEU = %6.2f" % (decode_hparams.decode_to_file, bleu))
  return values
Beispiel #6
0
def compute_bleu_summaries(hook_args):
    """Compute BLEU core summaries using the decoder output.

  Args:
    hook_args: DecodeHookArgs namedtuple
  Returns:
    A list of tf.Summary values if hook_args.hparams contains the
    reference file and the translated file.
  """
    decode_hparams = hook_args.decode_hparams

    if (decode_hparams.decode_reference is None
            or decode_hparams.decode_to_file is None):
        return None

    values = []
    bleu = 100 * bleu_hook.bleu_wrapper(decode_hparams.decode_reference,
                                        decode_hparams.decode_to_file)
    values.append(tf.Summary.Value(tag="BLEU", simple_value=bleu))
    tf.logging.info("%s: BLEU = %6.2f" % (decode_hparams.decode_to_file, bleu))
    if hook_args.hparams.mlperf_mode:
        current_step = decode_hparams.mlperf_decode_step
        mlperf_log.transformer_print(key=mlperf_log.EVAL_TARGET,
                                     value=decode_hparams.mlperf_threshold)
        mlperf_log.transformer_print(
            key=mlperf_log.EVAL_ACCURACY,
            value={
                "epoch":
                max(current_step // decode_hparams.iterations_per_loop - 1, 0),
                "value":
                bleu
            })
        mlperf_log.transformer_print(key=mlperf_log.EVAL_STOP)

    if bleu >= decode_hparams.mlperf_threshold:
        decode_hparams.set_hparam("mlperf_success", True)

    return values
Beispiel #7
0
def compute_bleu_summaries(hook_args):
  """Compute BLEU core summaries using the decoder output.

  Args:
    hook_args: DecodeHookArgs namedtuple
  Returns:
    A list of tf.Summary values if hook_args.hparams contains the
    reference file and the translated file.
  """
  decode_hparams = hook_args.decode_hparams

  if (decode_hparams.decode_reference is None or
      decode_hparams.decode_to_file is None):
    return None

  values = []
  bleu = 100 * bleu_hook.bleu_wrapper(
      decode_hparams.decode_reference, decode_hparams.decode_to_file)
  values.append(tf.Summary.Value(tag="BLEU", simple_value=bleu))
  tf.logging.info("%s: BLEU = %6.2f" % (decode_hparams.decode_to_file, bleu))
  if hook_args.hparams.mlperf_mode:
    current_step = decode_hparams.mlperf_decode_step
    mlperf_log.transformer_print(
        key=mlperf_log.EVAL_TARGET, value=decode_hparams.mlperf_threshold)
    mlperf_log.transformer_print(
        key=mlperf_log.EVAL_ACCURACY,
        value={
            "epoch": max(current_step // decode_hparams.iterations_per_loop - 1,
                         0),
            "value": bleu
        })
    mlperf_log.transformer_print(key=mlperf_log.EVAL_STOP)

  if bleu >= decode_hparams.mlperf_threshold:
    decode_hparams.set_hparam("mlperf_success", True)

  return values
Beispiel #8
0
                epoch_i % hparams['log_bleu_every'] == 0:
        start = time.time()
        so_file = open(hparams['pairs_path'] + 'dev_input.txt')
        results_prediction = estimator.predict(
            input_fn=lambda: i_input_fn(so_file, hparams))
        pred_tmp_file = open(hparams['checkpoints_path'] + \
                    ('dev_prediction_%i.txt' % epoch_i), 'w')
        for j, r in enumerate(results_prediction):
            if j % 1000 == 0:
                print('Predicting...', j)
            sent_vec = r['classes']
            if 1 in sent_vec:
                sent_vec = sent_vec[:list(sent_vec).index(1)]
            sent = output_encoder.decode(sent_vec)
            pred_tmp_file.write("%s\n" % sent)
        pred_tmp_file.close()
        print('Predictions took', time.time() - start, 'sec')
        bleu = bleu_hook.bleu_wrapper(
            ref_filename=hparams['pairs_path'] + 'dev_output.txt',
            hyp_filename=hparams['checkpoints_path'] +
            ('dev_prediction_%i.txt' % epoch_i)) * 100
        print('\033[32;1mBLEU = %f\033[0m' % bleu)
        summary = tf.Summary(value=[
            tf.Summary.Value(tag='BLEU_predictions', simple_value=bleu)
        ])
        summary_writer.add_summary(summary, epoch_i)
        summary_writer.flush()

print(eval_results)
print('Training finished')
 def testBleuWrapper(self):
   hyp_filename, ref_filename = self._generate_test_data(
       "standard", [u"a b a c", u"e f g d"], [u"a b a z", u"y f g d k l m"])
   bleu = bleu_hook.bleu_wrapper(ref_filename, hyp_filename)
   actual_bleu = 0.3436
   self.assertAllClose(bleu, actual_bleu, atol=1e-03)
Beispiel #10
0
def main(_):
  tf.logging.set_verbosity(tf.logging.INFO)
  if FLAGS.translation:
    if FLAGS.translations_dir:
      raise ValueError(
          "Cannot specify both --translation and --translations_dir.")
    if FLAGS.bleu_variant in ("uncased", "both"):
      bleu = 100 * bleu_hook.bleu_wrapper(FLAGS.reference, FLAGS.translation,
                                          case_sensitive=False)
      print("BLEU_uncased = %6.2f" % bleu)
    if FLAGS.bleu_variant in ("cased", "both"):
      bleu = 100 * bleu_hook.bleu_wrapper(FLAGS.reference, FLAGS.translation,
                                          case_sensitive=True)
      print("BLEU_cased = %6.2f" % bleu)
    return

  if not FLAGS.translations_dir:
    raise ValueError(
        "Either --translation or --translations_dir must be specified.")
  transl_dir = os.path.expanduser(FLAGS.translations_dir)
  if not os.path.exists(transl_dir):
    exit_time = time.time() + FLAGS.wait_minutes * 60
    tf.logging.info("Translation dir %s does not exist, waiting till %s.",
                    transl_dir, time.asctime(time.localtime(exit_time)))
    while not os.path.exists(transl_dir):
      time.sleep(10)
      if time.time() > exit_time:
        raise ValueError("Translation dir %s does not exist" % transl_dir)

  last_step_file = os.path.join(FLAGS.event_dir, "last_evaluated_step.txt")
  if FLAGS.min_steps == -1:
    if tf.gfile.Exists(last_step_file):
      with open(last_step_file) as ls_file:
        FLAGS.min_steps = int(ls_file.read())
    else:
      FLAGS.min_steps = 0
  if FLAGS.report_zero is None:
    FLAGS.report_zero = FLAGS.min_steps == 0

  writer = tf.summary.FileWriter(FLAGS.event_dir)
  for transl_file in bleu_hook.stepfiles_iterator(
      transl_dir, FLAGS.wait_minutes, FLAGS.min_steps, path_suffix=""):
    # report_zero handling must be inside the for-loop,
    # so we are sure the transl_dir is already created.
    if FLAGS.report_zero:
      all_files = (os.path.join(transl_dir, f) for f in os.listdir(transl_dir))
      start_time = min(
          os.path.getmtime(f) for f in all_files if os.path.isfile(f))
      values = []
      if FLAGS.bleu_variant in ("uncased", "both"):
        values.append(tf.Summary.Value(
            tag="BLEU_uncased" + FLAGS.tag_suffix, simple_value=0))
      if FLAGS.bleu_variant in ("cased", "both"):
        values.append(tf.Summary.Value(
            tag="BLEU_cased" + FLAGS.tag_suffix, simple_value=0))
      writer.add_event(tf.summary.Event(summary=tf.Summary(value=values),
                                        wall_time=start_time, step=0))
      FLAGS.report_zero = False

    filename = transl_file.filename
    tf.logging.info("Evaluating " + filename)
    values = []
    if FLAGS.bleu_variant in ("uncased", "both"):
      bleu = 100 * bleu_hook.bleu_wrapper(FLAGS.reference, filename,
                                          case_sensitive=False)
      values.append(tf.Summary.Value(tag="BLEU_uncased" + FLAGS.tag_suffix,
                                     simple_value=bleu))
      tf.logging.info("%s: BLEU_uncased = %6.2f" % (filename, bleu))
    if FLAGS.bleu_variant in ("cased", "both"):
      bleu = 100 * bleu_hook.bleu_wrapper(FLAGS.reference, filename,
                                          case_sensitive=True)
      values.append(tf.Summary.Value(tag="BLEU_cased" + FLAGS.tag_suffix,
                                     simple_value=bleu))
      tf.logging.info("%s: BLEU_cased = %6.2f" % (transl_file.filename, bleu))
    writer.add_event(tf.summary.Event(
        summary=tf.Summary(value=values),
        wall_time=transl_file.mtime, step=transl_file.steps))
    writer.flush()
    with open(last_step_file, "w") as ls_file:
      ls_file.write(str(transl_file.steps) + "\n")
Beispiel #11
0
            sent_vec = r['classes']
            if 1 in sent_vec:
                sent_vec = sent_vec[:list(sent_vec).index(1)]
            if 0 in sent_vec:
                sent_vec = sent_vec[:list(sent_vec).index(0)]
            sent = output_encoder.decode(sent_vec)
            predictions.append(sent)
        file_path = exp_path + ('predictions/prediction_%s.txt' %
                                corpus_info['evals'][k]['name'])
        print(file_path)
        pred_tmp_file = open(file_path, 'w')
        for s in predictions:
            pred_tmp_file.write("%s\n" % s)
        pred_tmp_file.close()
        print("Comparing %s and %s" %
              (corpus_info['eval_target_path'], file_path))
        bleu = bleu_hook.bleu_wrapper(
            ref_filename=corpus_info['eval_target_path'],
            hyp_filename=file_path) * 100
        print('\033[32;1mBLEU %s = %f\033[0m' %
              (corpus_info['evals'][k]['name'], bleu))
        summary = tf.Summary(value=[
            tf.Summary.Value(tag=('BLEU_%s' % corpus_info['evals'][k]['name']),
                             simple_value=bleu)
        ])
        summary_writer.add_summary(summary, i)
        summary_writer.flush()
    #break

print("END")
Beispiel #12
0
def main(_):
    tf.logging.set_verbosity(tf.logging.INFO)
    if FLAGS.translation:
        if FLAGS.translations_dir:
            raise ValueError(
                "Cannot specify both --translation and --translations_dir.")
        if FLAGS.bleu_variant in ("uncased", "both"):
            bleu = 100 * bleu_hook.bleu_wrapper(
                FLAGS.reference, FLAGS.translation, case_sensitive=False)
            print("BLEU_uncased = %6.2f" % bleu)
        if FLAGS.bleu_variant in ("cased", "both"):
            bleu = 100 * bleu_hook.bleu_wrapper(
                FLAGS.reference, FLAGS.translation, case_sensitive=True)
            print("BLEU_cased = %6.2f" % bleu)
        return

    if not FLAGS.translations_dir:
        raise ValueError(
            "Either --translation or --translations_dir must be specified.")
    transl_dir = os.path.expanduser(FLAGS.translations_dir)
    if not os.path.exists(transl_dir):
        exit_time = time.time() + FLAGS.wait_minutes * 60
        tf.logging.info("Translation dir %s does not exist, waiting till %s.",
                        transl_dir, time.asctime(time.localtime(exit_time)))
        while not os.path.exists(transl_dir):
            time.sleep(10)
            if time.time() > exit_time:
                raise ValueError("Translation dir %s does not exist" %
                                 transl_dir)

    last_step_file = os.path.join(FLAGS.event_dir, "last_evaluated_step.txt")
    if FLAGS.min_steps == -1:
        if tf.gfile.Exists(last_step_file):
            with open(last_step_file) as ls_file:
                FLAGS.min_steps = int(ls_file.read())
        else:
            FLAGS.min_steps = 0
    if FLAGS.report_zero is None:
        FLAGS.report_zero = FLAGS.min_steps == 0

    writer = tf.summary.FileWriter(FLAGS.event_dir)
    for transl_file in bleu_hook.stepfiles_iterator(transl_dir,
                                                    FLAGS.wait_minutes,
                                                    FLAGS.min_steps,
                                                    path_suffix=""):
        # report_zero handling must be inside the for-loop,
        # so we are sure the transl_dir is already created.
        if FLAGS.report_zero:
            all_files = (os.path.join(transl_dir, f)
                         for f in os.listdir(transl_dir))
            start_time = min(
                os.path.getmtime(f) for f in all_files if os.path.isfile(f))
            values = []
            if FLAGS.bleu_variant in ("uncased", "both"):
                values.append(
                    tf.Summary.Value(tag="BLEU_uncased" + FLAGS.tag_suffix,
                                     simple_value=0))
            if FLAGS.bleu_variant in ("cased", "both"):
                values.append(
                    tf.Summary.Value(tag="BLEU_cased" + FLAGS.tag_suffix,
                                     simple_value=0))
            writer.add_event(
                tf.summary.Event(summary=tf.Summary(value=values),
                                 wall_time=start_time,
                                 step=0))
            FLAGS.report_zero = False

        filename = transl_file.filename
        tf.logging.info("Evaluating " + filename)
        values = []
        if FLAGS.bleu_variant in ("uncased", "both"):
            bleu = 100 * bleu_hook.bleu_wrapper(
                FLAGS.reference, filename, case_sensitive=False)
            values.append(
                tf.Summary.Value(tag="BLEU_uncased" + FLAGS.tag_suffix,
                                 simple_value=bleu))
            tf.logging.info("%s: BLEU_uncased = %6.2f" % (filename, bleu))
        if FLAGS.bleu_variant in ("cased", "both"):
            bleu = 100 * bleu_hook.bleu_wrapper(
                FLAGS.reference, filename, case_sensitive=True)
            values.append(
                tf.Summary.Value(tag="BLEU_cased" + FLAGS.tag_suffix,
                                 simple_value=bleu))
            tf.logging.info("%s: BLEU_cased = %6.2f" %
                            (transl_file.filename, bleu))
        writer.add_event(
            tf.summary.Event(summary=tf.Summary(value=values),
                             wall_time=transl_file.mtime,
                             step=transl_file.steps))
        writer.flush()
        with open(last_step_file, "w") as ls_file:
            ls_file.write(str(transl_file.steps) + "\n")