Example #1
0
def main(_):
    if FLAGS.seqio_summaries:
        subdirs = tf.io.gfile.listdir(FLAGS.summary_dir)
        summary_dirs = [os.path.join(FLAGS.summary_dir, d) for d in subdirs]
    else:
        summary_dirs = [FLAGS.summary_dir]

    scores = None
    for d in summary_dirs:
        events = eval_utils.parse_events_files(d, FLAGS.seqio_summaries)
        if FLAGS.perplexity_eval:
            task_metrics = events
        else:
            task_metrics = eval_utils.get_eval_metric_values(
                events,
                task_name=os.path.basename(d)
                if FLAGS.seqio_summaries else None)
        if scores:
            scores.update(task_metrics)
        else:
            scores = task_metrics

    if not scores:
        logging.info("No evaluation events found in %s", FLAGS.summary_dir)
        return
    df = eval_utils.scores_to_df(scores)
    df = eval_utils.compute_avg_glue(df)
    df = eval_utils.sort_columns(df)
    eval_utils.log_csv(df, output_file=FLAGS.out_file)
def main(_):
    events = eval_utils.parse_events_files(FLAGS.summary_dir)
    scores = eval_utils.get_eval_metric_values(events)
    if not scores:
        logging.info("No evaluation events found in %s", FLAGS.summary_dir)
        return
    scores = eval_utils.compute_avg_glue(scores)
    eval_utils.log_csv(scores, output_file=FLAGS.out_file)
Example #3
0
def main(_):
    events = eval_utils.parse_events_files(FLAGS.summary_dir)
    if FLAGS.perplexity_eval:
        scores = events
    else:
        scores = eval_utils.get_eval_metric_values(events)
    if not scores:
        logging.info("No evaluation events found in %s", FLAGS.summary_dir)
        return
    df = eval_utils.scores_to_df(scores)
    df = eval_utils.compute_avg_glue(df)
    df = eval_utils.sort_columns(df)
    eval_utils.log_csv(df, output_file=FLAGS.out_file)
Example #4
0
  def test_log_csv(self):
    with self.assertRaises(ValueError):
      eval_utils.log_csv({"foo_task/unknown_metric": [(10, 30.)]})
    metric_keys = list(eval_utils.METRIC_NAMES.keys())
    metric_names = list(eval_utils.METRIC_NAMES.values())
    scores = {
        metric_keys[0]: [(20, 1.), (30, 2.)],
        metric_keys[1]: [(10, 3.)],
        metric_keys[2]: [(10, 4.)],
    }
    output_file = os.path.join(self.create_tempdir().full_path, "results.csv")
    eval_utils.log_csv(scores, output_file=output_file)
    with tf.gfile.Open(output_file) as f:
      output = f.read()
    expected = """step,{},{},{}
10,,3.000,4.000
20,1.000,,
30,2.000,,
max,2.000,3.000,4.000
step,30,10,10""".format(*[m.name for m in metric_names[:3]])
    self.assertEqual(output, expected)
Example #5
0
  def test_log_csv(self):
    metric_names = list(eval_utils.METRIC_NAMES.values())
    df = pd.DataFrame(
        collections.OrderedDict([
            (metric_names[0].name, [np.nan, 1., 2.]),
            (metric_names[1].name, [3., np.nan, np.nan]),
            (metric_names[2].name, [4., np.nan, np.nan]),
        ]),
        index=[10, 20, 30],
    )
    df.index.name = "step"
    output_file = os.path.join(self.create_tempdir().full_path, "results.csv")
    eval_utils.log_csv(df, output_file=output_file)
    with tf.io.gfile.GFile(output_file) as f:
      output = f.read()
    expected = """step,{},{},{}
10,,3.000,4.000
20,1.000,,
30,2.000,,
max,2.000,3.000,4.000
step,30,10,10""".format(*[m.name for m in metric_names[:3]])
    self.assertEqual(output, expected)