Beispiel #1
0
 def make_features(self, element=None, features=None, training=None):
     if self.combine_features:
         if features is None:
             features = {}
         for i, inputter in enumerate(self.inputters):
             prefix = "inputter_%d_" % i
             sub_features = misc.extract_prefixed_keys(features, prefix)
             if not sub_features:
                 # Also try to read the format produced by the serving features.
                 sub_features = misc.extract_suffixed_keys(
                     features, "_%d" % i)
             sub_features = inputter.make_features(
                 element=element[i] if element is not None else None,
                 features=sub_features,
                 training=training)
             for key, value in sub_features.items():
                 features["%s%s" % (prefix, key)] = value
         return features
     else:
         if features is None:
             features = [{} for _ in self.inputters]
         else:
             features = list(features)
         for i, inputter in enumerate(self.inputters):
             features[i] = inputter.make_features(
                 element=element[i] if element is not None else None,
                 features=features[i],
                 training=training)
         return tuple(features)
Beispiel #2
0
 def _transform_data(self, data, mode):
   transformed = []
   for i, inputter in enumerate(self.inputters):
     with tf.variable_scope("inputter_{}".format(i)):
       sub_data = extract_prefixed_keys(data, "inputter_{}_".format(i))
       transformed.append(inputter._transform_data(sub_data, mode))  # pylint: disable=protected-access
   if self.reducer is not None:
     transformed = self.reducer(transformed)
   return transformed
Beispiel #3
0
 def get_length(self, data):
     lengths = []
     for i, inputter in enumerate(self.inputters):
         sub_data = extract_prefixed_keys(data, "inputter_{}_".format(i))
         lengths.append(inputter.get_length(sub_data))
     if self.reducer is None:
         return lengths
     else:
         return lengths[0]
Beispiel #4
0
 def call(self, features, training=None):
   transformed = []
   for i, inputter in enumerate(self.inputters):
     if self.combine_features:
       sub_features = misc.extract_prefixed_keys(features, "inputter_{}_".format(i))
     else:
       sub_features = features[i]
     transformed.append(inputter(sub_features, training=training))
   if self.reducer is not None:
     transformed = self.reducer(transformed)
   return transformed
Beispiel #5
0
 def get_length(self, features):
   lengths = []
   for i, inputter in enumerate(self.inputters):
     if self.combine_features:
       sub_features = extract_prefixed_keys(features, "inputter_{}_".format(i))
     else:
       sub_features = features[i]
     lengths.append(inputter.get_length(sub_features))
   if self.reducer is None:
     return lengths
   else:
     return lengths[0]
Beispiel #6
0
 def make_inputs(self, features, training=None):
     transformed = []
     for i, inputter in enumerate(self.inputters):
         with tf.variable_scope("inputter_{}".format(i)):
             if self.combine_features:
                 sub_features = extract_prefixed_keys(
                     features, "inputter_{}_".format(i))
             else:
                 sub_features = features[i]
             transformed.append(
                 inputter.make_inputs(sub_features, training=training))
     if self.reducer is not None:
         transformed = self.reducer(transformed)
     return transformed
Beispiel #7
0
 def make_inputs(self, features, training=None):
   if not self.built:
     self.build()
   transformed = []
   for i, (inputter, scope) in enumerate(zip(self.inputters, self._get_scopes())):
     with compat.tf_compat(v1="variable_scope")(scope):
       if self.combine_features:
         sub_features = extract_prefixed_keys(features, "inputter_{}_".format(i))
       else:
         sub_features = features[i]
       transformed.append(inputter.make_inputs(sub_features, training=training))
   if self.reducer is not None:
     transformed = self.reducer(transformed)
   return transformed
Beispiel #8
0
 def get_padded_shapes(self, element_spec, maximum_length=None):
     if maximum_length is None:
         maximum_length = [None for _ in self.inputters]
     elif not isinstance(
             maximum_length,
         (list, tuple)) or len(maximum_length) != len(self.inputters):
         raise ValueError(
             "A maximum length should be set for each parallel inputter")
     if self.combine_features:
         shapes = {}
         for i, (inputter,
                 length) in enumerate(zip(self.inputters, maximum_length)):
             prefix = "inputter_%d_" % i
             spec = misc.extract_prefixed_keys(element_spec, prefix)
             sub_shapes = inputter.get_padded_shapes(spec,
                                                     maximum_length=length)
             for key, value in sub_shapes.items():
                 shapes["%s%s" % (prefix, key)] = value
         return shapes
     else:
         return type(element_spec)(
             inputter.get_padded_shapes(spec, maximum_length=length)
             for inputter, spec, length in zip(self.inputters, element_spec,
                                               maximum_length))
Beispiel #9
0
    def __init__(
        self,
        model,
        features_file,
        labels_file,
        batch_size,
        batch_type="examples",
        length_bucket_width=None,
        scorers=None,
        save_predictions=False,
        early_stopping=None,
        model_dir=None,
        export_on_best=None,
        exporter=None,
        max_exports_to_keep=5,
    ):
        """Initializes the evaluator.

        Args:
          model: A :class:`opennmt.models.Model` to evaluate.
          features_file: Path to the evaluation features.
          labels_file: Path to the evaluation labels.
          batch_size: The evaluation batch size.
          batch_type: The batching strategy to use: can be "examples" or "tokens".
          length_bucket_width: The width of the length buckets to select batch
            candidates from (for efficiency). Set ``None`` to not constrain batch
            formation.
          scorers: A list of scorers, callables taking the path to the reference and
            the hypothesis and return one or more scores.
          save_predictions: Save evaluation predictions to a file. This is ``True``
            when :obj:`scorers` is set.
          early_stopping: An ``EarlyStopping`` instance.
          model_dir: The active model directory.
          export_on_best: Export a model when this evaluation metric has the
            best value so far.
          exporter: A :class:`opennmt.utils.Exporter` instance to export the model.
            Defaults to :class:`opennmt.utils.SavedModelExporter`.
          max_exports_to_keep: Maximum number of exports to keep. Older exports will
            be garbage collected. Set to ``None`` to keep all exports.

        Raises:
          ValueError: If :obj:`save_predictions` is set but the model is not compatible.
          ValueError: If :obj:`save_predictions` is set but :obj:`model_dir` is ``None``.
          ValueError: If :obj:`export_on_best` is set but :obj:`model_dir` is ``None``.
          ValueError: If the :obj:`early_stopping` configuration is invalid.
        """
        if model_dir is not None:
            export_dir = os.path.join(model_dir, "export")
            eval_dir = os.path.join(model_dir, "eval")
        else:
            if save_predictions:
                raise ValueError(
                    "Saving evaluation predictions requires model_dir to be set"
                )
            if export_on_best is not None:
                raise ValueError(
                    "Exporting models requires model_dir to be set")
            export_dir = None
            eval_dir = None

        if scorers is None:
            scorers = []
        if scorers:
            save_predictions = True
        if save_predictions:
            if model.unsupervised:
                raise ValueError(
                    "This model does not support saving evaluation predictions"
                )
            if not tf.io.gfile.exists(eval_dir):
                tf.io.gfile.makedirs(eval_dir)
        self._model = model
        self._labels_file = labels_file
        self._save_predictions = save_predictions
        self._scorers = scorers
        self._eval_dir = eval_dir
        self._metrics_history = []
        if eval_dir is not None:
            self._summary_writer = tf.summary.create_file_writer(eval_dir)
            summaries = misc.read_summaries(eval_dir)
            for step, values in summaries:
                metrics = misc.extract_prefixed_keys(values,
                                                     _SUMMARIES_SCOPE + "/")
                self._metrics_history.append((step, metrics))
        else:
            self._summary_writer = tf.summary.create_noop_writer()
        dataset = model.examples_inputter.make_evaluation_dataset(
            features_file,
            labels_file,
            batch_size,
            batch_type=batch_type,
            length_bucket_width=length_bucket_width,
            num_threads=1,
            prefetch_buffer_size=1,
        )

        self._eval_fn = tf.function(model.evaluate,
                                    input_signature=dataset.element_spec)
        self._dataset = dataset

        self._metrics_name = {"loss", "perplexity"}
        for scorer in self._scorers:
            self._metrics_name.update(scorer.scores_name)
        model_metrics = self._model.get_metrics()
        if model_metrics:
            self._metrics_name.update(set(model_metrics.keys()))

        if early_stopping is not None:
            if early_stopping.metric not in self._metrics_name:
                raise ValueError(
                    "Invalid early stopping metric '%s', expected one in %s" %
                    (early_stopping.metric, str(self._metrics_name)))
            if early_stopping.steps <= 0:
                raise ValueError("Early stopping steps should greater than 0")
        self._early_stopping = early_stopping

        self._export_on_best = export_on_best
        self._exporter = exporter
        self._export_dir = export_dir
        self._max_exports_to_keep = max_exports_to_keep
Beispiel #10
0
  def __init__(self,
               model,
               features_file,
               labels_file,
               batch_size,
               scorers=None,
               save_predictions=False,
               early_stopping=None,
               eval_dir=None):
    """Initializes the evaluator.

    Args:
      model: A :class:`opennmt.models.model.Model` to evaluate.
      features_file: Path to the evaluation features.
      labels_file: Path to the evaluation labels.
      batch_size: The evaluation batch size.
      scorers: A list of scorers, callables taking the path to the reference and
        the hypothesis and return one or more scores.
      save_predictions: Save evaluation predictions to a file. This is ``True``
        when :obj:`external_evaluator` is set.
      early_stopping: An ``EarlyStopping`` instance.
      eval_dir: Directory where predictions can be saved.

    Raises:
      ValueError: If predictions should be saved but the model is not compatible.
      ValueError: If predictions should be saved but :obj:`eval_dir` is ``None``.
      ValueError: If the :obj:`early_stopping` configuration is invalid.
    """
    if scorers is None:
      scorers = []
    if scorers:
      save_predictions = True
    if save_predictions:
      if model.unsupervised:
        raise ValueError("This model does not support saving evaluation predictions")
      if eval_dir is None:
        raise ValueError("Saving evaluation predictions requires eval_dir to be set")
      if not tf.io.gfile.exists(eval_dir):
        tf.io.gfile.makedirs(eval_dir)
    self._model = model
    self._labels_file = labels_file
    self._save_predictions = save_predictions
    self._scorers = scorers
    self._eval_dir = eval_dir
    self._metrics_history = []
    if eval_dir is not None:
      self._summary_writer = tf.summary.create_file_writer(eval_dir)
      summaries = misc.read_summaries(eval_dir)
      for step, values in summaries:
        metrics = misc.extract_prefixed_keys(values, _SUMMARIES_SCOPE + "/")
        self._metrics_history.append((step, metrics))
    else:
      self._summary_writer = tf.summary.create_noop_writer()
    dataset = model.examples_inputter.make_evaluation_dataset(
        features_file,
        labels_file,
        batch_size,
        num_threads=1,
        prefetch_buffer_size=1)

    @dataset_lib.function_on_next(dataset)
    def _eval(next_fn):
      source, target = next_fn()
      outputs, predictions = model(source, labels=target)
      loss = model.compute_loss(outputs, target, training=False)
      return loss, predictions, target

    self._eval = _eval

    self._metrics_name = {"loss", "perplexity"}
    for scorer in self._scorers:
      self._metrics_name.update(scorer.scores_name)
    model_metrics = self._model.get_metrics()
    if model_metrics:
      self._metrics_name.update(set(six.iterkeys(model_metrics)))

    if early_stopping is not None:
      if early_stopping.metric not in self._metrics_name:
        raise ValueError("Invalid early stopping metric '%s', expected one in %s" % (
            early_stopping.metric, str(self._metrics_name)))
      if early_stopping.steps <= 0:
        raise ValueError("Early stopping steps should greater than 0")
    self._early_stopping = early_stopping
Beispiel #11
0
 def _index_features(self, features, index):
   if self.combine_features:
     return misc.extract_prefixed_keys(features, "inputter_{}_".format(index))
   else:
     return features[index]