Example #1
0
 def testReadSummaries(self):
     event_dir = self.get_temp_dir()
     summary_writer = tf.summary.create_file_writer(event_dir)
     with summary_writer.as_default():
         tf.summary.scalar("values/a", 1, step=0)
         tf.summary.scalar("values/b", 2, step=0)
         tf.summary.scalar("values/a", 3, step=5)
         tf.summary.scalar("values/b", 4, step=5)
         tf.summary.scalar("values/a", 5, step=10)
         tf.summary.scalar("values/b", 6, step=10)
         summary_writer.flush()
     summaries = misc.read_summaries(event_dir)
     self.assertLen(summaries, 3)
     steps, values = zip(*summaries)
     self.assertListEqual(list(steps), [0, 5, 10])
     values = list(values)
     self.assertDictEqual(values[0], {"values/a": 1, "values/b": 2})
     self.assertDictEqual(values[1], {"values/a": 3, "values/b": 4})
     self.assertDictEqual(values[2], {"values/a": 5, "values/b": 6})
Example #2
0
    def __init__(
        self,
        model,
        features_file,
        labels_file,
        batch_size,
        batch_type="examples",
        length_bucket_width=None,
        scorers=None,
        save_predictions=False,
        early_stopping=None,
        model_dir=None,
        export_on_best=None,
        exporter=None,
        max_exports_to_keep=5,
    ):
        """Initializes the evaluator.

        Args:
          model: A :class:`opennmt.models.Model` to evaluate.
          features_file: Path to the evaluation features.
          labels_file: Path to the evaluation labels.
          batch_size: The evaluation batch size.
          batch_type: The batching strategy to use: can be "examples" or "tokens".
          length_bucket_width: The width of the length buckets to select batch
            candidates from (for efficiency). Set ``None`` to not constrain batch
            formation.
          scorers: A list of scorers, callables taking the path to the reference and
            the hypothesis and return one or more scores.
          save_predictions: Save evaluation predictions to a file. This is ``True``
            when :obj:`scorers` is set.
          early_stopping: An ``EarlyStopping`` instance.
          model_dir: The active model directory.
          export_on_best: Export a model when this evaluation metric has the
            best value so far.
          exporter: A :class:`opennmt.utils.Exporter` instance to export the model.
            Defaults to :class:`opennmt.utils.SavedModelExporter`.
          max_exports_to_keep: Maximum number of exports to keep. Older exports will
            be garbage collected. Set to ``None`` to keep all exports.

        Raises:
          ValueError: If :obj:`save_predictions` is set but the model is not compatible.
          ValueError: If :obj:`save_predictions` is set but :obj:`model_dir` is ``None``.
          ValueError: If :obj:`export_on_best` is set but :obj:`model_dir` is ``None``.
          ValueError: If the :obj:`early_stopping` configuration is invalid.
        """
        if model_dir is not None:
            export_dir = os.path.join(model_dir, "export")
            eval_dir = os.path.join(model_dir, "eval")
        else:
            if save_predictions:
                raise ValueError(
                    "Saving evaluation predictions requires model_dir to be set"
                )
            if export_on_best is not None:
                raise ValueError(
                    "Exporting models requires model_dir to be set")
            export_dir = None
            eval_dir = None

        if scorers is None:
            scorers = []
        if scorers:
            save_predictions = True
        if save_predictions:
            if model.unsupervised:
                raise ValueError(
                    "This model does not support saving evaluation predictions"
                )
            if not tf.io.gfile.exists(eval_dir):
                tf.io.gfile.makedirs(eval_dir)
        self._model = model
        self._labels_file = labels_file
        self._save_predictions = save_predictions
        self._scorers = scorers
        self._eval_dir = eval_dir
        self._metrics_history = []
        if eval_dir is not None:
            self._summary_writer = tf.summary.create_file_writer(eval_dir)
            summaries = misc.read_summaries(eval_dir)
            for step, values in summaries:
                metrics = misc.extract_prefixed_keys(values,
                                                     _SUMMARIES_SCOPE + "/")
                self._metrics_history.append((step, metrics))
        else:
            self._summary_writer = tf.summary.create_noop_writer()
        dataset = model.examples_inputter.make_evaluation_dataset(
            features_file,
            labels_file,
            batch_size,
            batch_type=batch_type,
            length_bucket_width=length_bucket_width,
            num_threads=1,
            prefetch_buffer_size=1,
        )

        self._eval_fn = tf.function(model.evaluate,
                                    input_signature=dataset.element_spec)
        self._dataset = dataset

        self._metrics_name = {"loss", "perplexity"}
        for scorer in self._scorers:
            self._metrics_name.update(scorer.scores_name)
        model_metrics = self._model.get_metrics()
        if model_metrics:
            self._metrics_name.update(set(model_metrics.keys()))

        if early_stopping is not None:
            if early_stopping.metric not in self._metrics_name:
                raise ValueError(
                    "Invalid early stopping metric '%s', expected one in %s" %
                    (early_stopping.metric, str(self._metrics_name)))
            if early_stopping.steps <= 0:
                raise ValueError("Early stopping steps should greater than 0")
        self._early_stopping = early_stopping

        self._export_on_best = export_on_best
        self._exporter = exporter
        self._export_dir = export_dir
        self._max_exports_to_keep = max_exports_to_keep
Example #3
0
  def __init__(self,
               model,
               features_file,
               labels_file,
               batch_size,
               scorers=None,
               save_predictions=False,
               early_stopping=None,
               eval_dir=None):
    """Initializes the evaluator.

    Args:
      model: A :class:`opennmt.models.model.Model` to evaluate.
      features_file: Path to the evaluation features.
      labels_file: Path to the evaluation labels.
      batch_size: The evaluation batch size.
      scorers: A list of scorers, callables taking the path to the reference and
        the hypothesis and return one or more scores.
      save_predictions: Save evaluation predictions to a file. This is ``True``
        when :obj:`external_evaluator` is set.
      early_stopping: An ``EarlyStopping`` instance.
      eval_dir: Directory where predictions can be saved.

    Raises:
      ValueError: If predictions should be saved but the model is not compatible.
      ValueError: If predictions should be saved but :obj:`eval_dir` is ``None``.
      ValueError: If the :obj:`early_stopping` configuration is invalid.
    """
    if scorers is None:
      scorers = []
    if scorers:
      save_predictions = True
    if save_predictions:
      if model.unsupervised:
        raise ValueError("This model does not support saving evaluation predictions")
      if eval_dir is None:
        raise ValueError("Saving evaluation predictions requires eval_dir to be set")
      if not tf.io.gfile.exists(eval_dir):
        tf.io.gfile.makedirs(eval_dir)
    self._model = model
    self._labels_file = labels_file
    self._save_predictions = save_predictions
    self._scorers = scorers
    self._eval_dir = eval_dir
    self._metrics_history = []
    if eval_dir is not None:
      self._summary_writer = tf.summary.create_file_writer(eval_dir)
      summaries = misc.read_summaries(eval_dir)
      for step, values in summaries:
        metrics = misc.extract_prefixed_keys(values, _SUMMARIES_SCOPE + "/")
        self._metrics_history.append((step, metrics))
    else:
      self._summary_writer = tf.summary.create_noop_writer()
    dataset = model.examples_inputter.make_evaluation_dataset(
        features_file,
        labels_file,
        batch_size,
        num_threads=1,
        prefetch_buffer_size=1)

    @dataset_lib.function_on_next(dataset)
    def _eval(next_fn):
      source, target = next_fn()
      outputs, predictions = model(source, labels=target)
      loss = model.compute_loss(outputs, target, training=False)
      return loss, predictions, target

    self._eval = _eval

    self._metrics_name = {"loss", "perplexity"}
    for scorer in self._scorers:
      self._metrics_name.update(scorer.scores_name)
    model_metrics = self._model.get_metrics()
    if model_metrics:
      self._metrics_name.update(set(six.iterkeys(model_metrics)))

    if early_stopping is not None:
      if early_stopping.metric not in self._metrics_name:
        raise ValueError("Invalid early stopping metric '%s', expected one in %s" % (
            early_stopping.metric, str(self._metrics_name)))
      if early_stopping.steps <= 0:
        raise ValueError("Early stopping steps should greater than 0")
    self._early_stopping = early_stopping