def make_features(self, element=None, features=None, training=None): if self.combine_features: if features is None: features = {} for i, inputter in enumerate(self.inputters): prefix = "inputter_%d_" % i sub_features = misc.extract_prefixed_keys(features, prefix) if not sub_features: # Also try to read the format produced by the serving features. sub_features = misc.extract_suffixed_keys( features, "_%d" % i) sub_features = inputter.make_features( element=element[i] if element is not None else None, features=sub_features, training=training) for key, value in sub_features.items(): features["%s%s" % (prefix, key)] = value return features else: if features is None: features = [{} for _ in self.inputters] else: features = list(features) for i, inputter in enumerate(self.inputters): features[i] = inputter.make_features( element=element[i] if element is not None else None, features=features[i], training=training) return tuple(features)
def _transform_data(self, data, mode): transformed = [] for i, inputter in enumerate(self.inputters): with tf.variable_scope("inputter_{}".format(i)): sub_data = extract_prefixed_keys(data, "inputter_{}_".format(i)) transformed.append(inputter._transform_data(sub_data, mode)) # pylint: disable=protected-access if self.reducer is not None: transformed = self.reducer(transformed) return transformed
def get_length(self, data): lengths = [] for i, inputter in enumerate(self.inputters): sub_data = extract_prefixed_keys(data, "inputter_{}_".format(i)) lengths.append(inputter.get_length(sub_data)) if self.reducer is None: return lengths else: return lengths[0]
def call(self, features, training=None): transformed = [] for i, inputter in enumerate(self.inputters): if self.combine_features: sub_features = misc.extract_prefixed_keys(features, "inputter_{}_".format(i)) else: sub_features = features[i] transformed.append(inputter(sub_features, training=training)) if self.reducer is not None: transformed = self.reducer(transformed) return transformed
def get_length(self, features): lengths = [] for i, inputter in enumerate(self.inputters): if self.combine_features: sub_features = extract_prefixed_keys(features, "inputter_{}_".format(i)) else: sub_features = features[i] lengths.append(inputter.get_length(sub_features)) if self.reducer is None: return lengths else: return lengths[0]
def make_inputs(self, features, training=None): transformed = [] for i, inputter in enumerate(self.inputters): with tf.variable_scope("inputter_{}".format(i)): if self.combine_features: sub_features = extract_prefixed_keys( features, "inputter_{}_".format(i)) else: sub_features = features[i] transformed.append( inputter.make_inputs(sub_features, training=training)) if self.reducer is not None: transformed = self.reducer(transformed) return transformed
def make_inputs(self, features, training=None): if not self.built: self.build() transformed = [] for i, (inputter, scope) in enumerate(zip(self.inputters, self._get_scopes())): with compat.tf_compat(v1="variable_scope")(scope): if self.combine_features: sub_features = extract_prefixed_keys(features, "inputter_{}_".format(i)) else: sub_features = features[i] transformed.append(inputter.make_inputs(sub_features, training=training)) if self.reducer is not None: transformed = self.reducer(transformed) return transformed
def get_padded_shapes(self, element_spec, maximum_length=None): if maximum_length is None: maximum_length = [None for _ in self.inputters] elif not isinstance( maximum_length, (list, tuple)) or len(maximum_length) != len(self.inputters): raise ValueError( "A maximum length should be set for each parallel inputter") if self.combine_features: shapes = {} for i, (inputter, length) in enumerate(zip(self.inputters, maximum_length)): prefix = "inputter_%d_" % i spec = misc.extract_prefixed_keys(element_spec, prefix) sub_shapes = inputter.get_padded_shapes(spec, maximum_length=length) for key, value in sub_shapes.items(): shapes["%s%s" % (prefix, key)] = value return shapes else: return type(element_spec)( inputter.get_padded_shapes(spec, maximum_length=length) for inputter, spec, length in zip(self.inputters, element_spec, maximum_length))
def __init__( self, model, features_file, labels_file, batch_size, batch_type="examples", length_bucket_width=None, scorers=None, save_predictions=False, early_stopping=None, model_dir=None, export_on_best=None, exporter=None, max_exports_to_keep=5, ): """Initializes the evaluator. Args: model: A :class:`opennmt.models.Model` to evaluate. features_file: Path to the evaluation features. labels_file: Path to the evaluation labels. batch_size: The evaluation batch size. batch_type: The batching strategy to use: can be "examples" or "tokens". length_bucket_width: The width of the length buckets to select batch candidates from (for efficiency). Set ``None`` to not constrain batch formation. scorers: A list of scorers, callables taking the path to the reference and the hypothesis and return one or more scores. save_predictions: Save evaluation predictions to a file. This is ``True`` when :obj:`scorers` is set. early_stopping: An ``EarlyStopping`` instance. model_dir: The active model directory. export_on_best: Export a model when this evaluation metric has the best value so far. exporter: A :class:`opennmt.utils.Exporter` instance to export the model. Defaults to :class:`opennmt.utils.SavedModelExporter`. max_exports_to_keep: Maximum number of exports to keep. Older exports will be garbage collected. Set to ``None`` to keep all exports. Raises: ValueError: If :obj:`save_predictions` is set but the model is not compatible. ValueError: If :obj:`save_predictions` is set but :obj:`model_dir` is ``None``. ValueError: If :obj:`export_on_best` is set but :obj:`model_dir` is ``None``. ValueError: If the :obj:`early_stopping` configuration is invalid. """ if model_dir is not None: export_dir = os.path.join(model_dir, "export") eval_dir = os.path.join(model_dir, "eval") else: if save_predictions: raise ValueError( "Saving evaluation predictions requires model_dir to be set" ) if export_on_best is not None: raise ValueError( "Exporting models requires model_dir to be set") export_dir = None eval_dir = None if scorers is None: scorers = [] if scorers: save_predictions = True if save_predictions: if model.unsupervised: raise ValueError( "This model does not support saving evaluation predictions" ) if not tf.io.gfile.exists(eval_dir): tf.io.gfile.makedirs(eval_dir) self._model = model self._labels_file = labels_file self._save_predictions = save_predictions self._scorers = scorers self._eval_dir = eval_dir self._metrics_history = [] if eval_dir is not None: self._summary_writer = tf.summary.create_file_writer(eval_dir) summaries = misc.read_summaries(eval_dir) for step, values in summaries: metrics = misc.extract_prefixed_keys(values, _SUMMARIES_SCOPE + "/") self._metrics_history.append((step, metrics)) else: self._summary_writer = tf.summary.create_noop_writer() dataset = model.examples_inputter.make_evaluation_dataset( features_file, labels_file, batch_size, batch_type=batch_type, length_bucket_width=length_bucket_width, num_threads=1, prefetch_buffer_size=1, ) self._eval_fn = tf.function(model.evaluate, input_signature=dataset.element_spec) self._dataset = dataset self._metrics_name = {"loss", "perplexity"} for scorer in self._scorers: self._metrics_name.update(scorer.scores_name) model_metrics = self._model.get_metrics() if model_metrics: self._metrics_name.update(set(model_metrics.keys())) if early_stopping is not None: if early_stopping.metric not in self._metrics_name: raise ValueError( "Invalid early stopping metric '%s', expected one in %s" % (early_stopping.metric, str(self._metrics_name))) if early_stopping.steps <= 0: raise ValueError("Early stopping steps should greater than 0") self._early_stopping = early_stopping self._export_on_best = export_on_best self._exporter = exporter self._export_dir = export_dir self._max_exports_to_keep = max_exports_to_keep
def __init__(self, model, features_file, labels_file, batch_size, scorers=None, save_predictions=False, early_stopping=None, eval_dir=None): """Initializes the evaluator. Args: model: A :class:`opennmt.models.model.Model` to evaluate. features_file: Path to the evaluation features. labels_file: Path to the evaluation labels. batch_size: The evaluation batch size. scorers: A list of scorers, callables taking the path to the reference and the hypothesis and return one or more scores. save_predictions: Save evaluation predictions to a file. This is ``True`` when :obj:`external_evaluator` is set. early_stopping: An ``EarlyStopping`` instance. eval_dir: Directory where predictions can be saved. Raises: ValueError: If predictions should be saved but the model is not compatible. ValueError: If predictions should be saved but :obj:`eval_dir` is ``None``. ValueError: If the :obj:`early_stopping` configuration is invalid. """ if scorers is None: scorers = [] if scorers: save_predictions = True if save_predictions: if model.unsupervised: raise ValueError("This model does not support saving evaluation predictions") if eval_dir is None: raise ValueError("Saving evaluation predictions requires eval_dir to be set") if not tf.io.gfile.exists(eval_dir): tf.io.gfile.makedirs(eval_dir) self._model = model self._labels_file = labels_file self._save_predictions = save_predictions self._scorers = scorers self._eval_dir = eval_dir self._metrics_history = [] if eval_dir is not None: self._summary_writer = tf.summary.create_file_writer(eval_dir) summaries = misc.read_summaries(eval_dir) for step, values in summaries: metrics = misc.extract_prefixed_keys(values, _SUMMARIES_SCOPE + "/") self._metrics_history.append((step, metrics)) else: self._summary_writer = tf.summary.create_noop_writer() dataset = model.examples_inputter.make_evaluation_dataset( features_file, labels_file, batch_size, num_threads=1, prefetch_buffer_size=1) @dataset_lib.function_on_next(dataset) def _eval(next_fn): source, target = next_fn() outputs, predictions = model(source, labels=target) loss = model.compute_loss(outputs, target, training=False) return loss, predictions, target self._eval = _eval self._metrics_name = {"loss", "perplexity"} for scorer in self._scorers: self._metrics_name.update(scorer.scores_name) model_metrics = self._model.get_metrics() if model_metrics: self._metrics_name.update(set(six.iterkeys(model_metrics))) if early_stopping is not None: if early_stopping.metric not in self._metrics_name: raise ValueError("Invalid early stopping metric '%s', expected one in %s" % ( early_stopping.metric, str(self._metrics_name))) if early_stopping.steps <= 0: raise ValueError("Early stopping steps should greater than 0") self._early_stopping = early_stopping
def _index_features(self, features, index): if self.combine_features: return misc.extract_prefixed_keys(features, "inputter_{}_".format(index)) else: return features[index]