def testReadSummaries(self): event_dir = self.get_temp_dir() summary_writer = tf.summary.create_file_writer(event_dir) with summary_writer.as_default(): tf.summary.scalar("values/a", 1, step=0) tf.summary.scalar("values/b", 2, step=0) tf.summary.scalar("values/a", 3, step=5) tf.summary.scalar("values/b", 4, step=5) tf.summary.scalar("values/a", 5, step=10) tf.summary.scalar("values/b", 6, step=10) summary_writer.flush() summaries = misc.read_summaries(event_dir) self.assertLen(summaries, 3) steps, values = zip(*summaries) self.assertListEqual(list(steps), [0, 5, 10]) values = list(values) self.assertDictEqual(values[0], {"values/a": 1, "values/b": 2}) self.assertDictEqual(values[1], {"values/a": 3, "values/b": 4}) self.assertDictEqual(values[2], {"values/a": 5, "values/b": 6})
def __init__( self, model, features_file, labels_file, batch_size, batch_type="examples", length_bucket_width=None, scorers=None, save_predictions=False, early_stopping=None, model_dir=None, export_on_best=None, exporter=None, max_exports_to_keep=5, ): """Initializes the evaluator. Args: model: A :class:`opennmt.models.Model` to evaluate. features_file: Path to the evaluation features. labels_file: Path to the evaluation labels. batch_size: The evaluation batch size. batch_type: The batching strategy to use: can be "examples" or "tokens". length_bucket_width: The width of the length buckets to select batch candidates from (for efficiency). Set ``None`` to not constrain batch formation. scorers: A list of scorers, callables taking the path to the reference and the hypothesis and return one or more scores. save_predictions: Save evaluation predictions to a file. This is ``True`` when :obj:`scorers` is set. early_stopping: An ``EarlyStopping`` instance. model_dir: The active model directory. export_on_best: Export a model when this evaluation metric has the best value so far. exporter: A :class:`opennmt.utils.Exporter` instance to export the model. Defaults to :class:`opennmt.utils.SavedModelExporter`. max_exports_to_keep: Maximum number of exports to keep. Older exports will be garbage collected. Set to ``None`` to keep all exports. Raises: ValueError: If :obj:`save_predictions` is set but the model is not compatible. ValueError: If :obj:`save_predictions` is set but :obj:`model_dir` is ``None``. ValueError: If :obj:`export_on_best` is set but :obj:`model_dir` is ``None``. ValueError: If the :obj:`early_stopping` configuration is invalid. """ if model_dir is not None: export_dir = os.path.join(model_dir, "export") eval_dir = os.path.join(model_dir, "eval") else: if save_predictions: raise ValueError( "Saving evaluation predictions requires model_dir to be set" ) if export_on_best is not None: raise ValueError( "Exporting models requires model_dir to be set") export_dir = None eval_dir = None if scorers is None: scorers = [] if scorers: save_predictions = True if save_predictions: if model.unsupervised: raise ValueError( "This model does not support saving evaluation predictions" ) if not tf.io.gfile.exists(eval_dir): tf.io.gfile.makedirs(eval_dir) self._model = model self._labels_file = labels_file self._save_predictions = save_predictions self._scorers = scorers self._eval_dir = eval_dir self._metrics_history = [] if eval_dir is not None: self._summary_writer = tf.summary.create_file_writer(eval_dir) summaries = misc.read_summaries(eval_dir) for step, values in summaries: metrics = misc.extract_prefixed_keys(values, _SUMMARIES_SCOPE + "/") self._metrics_history.append((step, metrics)) else: self._summary_writer = tf.summary.create_noop_writer() dataset = model.examples_inputter.make_evaluation_dataset( features_file, labels_file, batch_size, batch_type=batch_type, length_bucket_width=length_bucket_width, num_threads=1, prefetch_buffer_size=1, ) self._eval_fn = tf.function(model.evaluate, input_signature=dataset.element_spec) self._dataset = dataset self._metrics_name = {"loss", "perplexity"} for scorer in self._scorers: self._metrics_name.update(scorer.scores_name) model_metrics = self._model.get_metrics() if model_metrics: self._metrics_name.update(set(model_metrics.keys())) if early_stopping is not None: if early_stopping.metric not in self._metrics_name: raise ValueError( "Invalid early stopping metric '%s', expected one in %s" % (early_stopping.metric, str(self._metrics_name))) if early_stopping.steps <= 0: raise ValueError("Early stopping steps should greater than 0") self._early_stopping = early_stopping self._export_on_best = export_on_best self._exporter = exporter self._export_dir = export_dir self._max_exports_to_keep = max_exports_to_keep
def __init__(self, model, features_file, labels_file, batch_size, scorers=None, save_predictions=False, early_stopping=None, eval_dir=None): """Initializes the evaluator. Args: model: A :class:`opennmt.models.model.Model` to evaluate. features_file: Path to the evaluation features. labels_file: Path to the evaluation labels. batch_size: The evaluation batch size. scorers: A list of scorers, callables taking the path to the reference and the hypothesis and return one or more scores. save_predictions: Save evaluation predictions to a file. This is ``True`` when :obj:`external_evaluator` is set. early_stopping: An ``EarlyStopping`` instance. eval_dir: Directory where predictions can be saved. Raises: ValueError: If predictions should be saved but the model is not compatible. ValueError: If predictions should be saved but :obj:`eval_dir` is ``None``. ValueError: If the :obj:`early_stopping` configuration is invalid. """ if scorers is None: scorers = [] if scorers: save_predictions = True if save_predictions: if model.unsupervised: raise ValueError("This model does not support saving evaluation predictions") if eval_dir is None: raise ValueError("Saving evaluation predictions requires eval_dir to be set") if not tf.io.gfile.exists(eval_dir): tf.io.gfile.makedirs(eval_dir) self._model = model self._labels_file = labels_file self._save_predictions = save_predictions self._scorers = scorers self._eval_dir = eval_dir self._metrics_history = [] if eval_dir is not None: self._summary_writer = tf.summary.create_file_writer(eval_dir) summaries = misc.read_summaries(eval_dir) for step, values in summaries: metrics = misc.extract_prefixed_keys(values, _SUMMARIES_SCOPE + "/") self._metrics_history.append((step, metrics)) else: self._summary_writer = tf.summary.create_noop_writer() dataset = model.examples_inputter.make_evaluation_dataset( features_file, labels_file, batch_size, num_threads=1, prefetch_buffer_size=1) @dataset_lib.function_on_next(dataset) def _eval(next_fn): source, target = next_fn() outputs, predictions = model(source, labels=target) loss = model.compute_loss(outputs, target, training=False) return loss, predictions, target self._eval = _eval self._metrics_name = {"loss", "perplexity"} for scorer in self._scorers: self._metrics_name.update(scorer.scores_name) model_metrics = self._model.get_metrics() if model_metrics: self._metrics_name.update(set(six.iterkeys(model_metrics))) if early_stopping is not None: if early_stopping.metric not in self._metrics_name: raise ValueError("Invalid early stopping metric '%s', expected one in %s" % ( early_stopping.metric, str(self._metrics_name))) if early_stopping.steps <= 0: raise ValueError("Early stopping steps should greater than 0") self._early_stopping = early_stopping