Beispiel #1
0
def test_postprocessing(
    task_name,
    raw_data,
    predict_output=None,
    score_output=None,
    feature_encoder=feature_converters.EncDecFeatureConverter()):
    """Test the postprocessing and metrics for a given task.

  This function injects `raw_data` into `task`, then creates an Evaluator
  based on that task. It then calls `Evaluator.evaluate()` using predict_fn and
  score_fn args that return `predict_output` and `score_output`, returning the
  output of the `evaluate()` call. (Note that, due to the fact that `evaluate`
  uses the task data, this test will also actuate the task preprocessing code.)

  Usually, this function will be invoked `metrics, _, _ = test_postprocessing()`
  since the second and third returned data should be the same as the passed
  predict_output and score_output.

  Args:
    task_name: A SeqIO task name.
    raw_data: A string-keyed dict of string-keyed dicts. The top-level dict
      should be keyed by dataset splits, and the second-level dict should hold
      the dataset data.
    predict_output: A list of (int, [value]) tuples representing the model
      predictions. Optional.
    score_output: A list of (int, [value]) tuples representing the output of the
      model scoring code. Optional.
    feature_encoder: An optional feature encoder object. Defaults to
      EncDecFeatureEncoder.

  Returns:
    metrics: a mapping from task name to computed metrics.
    predicted_tokens: a mapping from task name to the output tokens
      from `predict_fn`, for tasks that have `predict_metric_fns`.
    scores: a mapping from task name to the output scores from
      `score_fn` for tasks that have `score_predict_fns`.
  """
    class PredictCallable(evaluation.PredictFnCallable):
        def __call__(self,
                     dataset: tf.data.Dataset = None,
                     model_feature_lengths: Mapping[str, int] = None):
            return predict_output

    class ScoreCallable(evaluation.PredictFnCallable):
        def __call__(
            self,
            dataset: tf.data.Dataset = None,
            model_feature_lengths: Mapping[str, int] = None,
        ):
            return score_output

    with DataInjector(task_name, raw_data):
        evaluator = evaluation.Evaluator(task_name,
                                         feature_converter=feature_encoder)

        return evaluator.evaluate(compute_metrics=True,
                                  predict_fn=PredictCallable(),
                                  score_fn=ScoreCallable())
def test_postprocessing(
    task_name: str,
    raw_data: Mapping[str, Any],
    target_feature_name: str = "targets",
    predict_output: Optional[Sequence[str]] = None,
    score_output: Optional[Sequence[float]] = None,
    feature_encoder: feature_converters.FeatureConverter = feature_converters
    .EncDecFeatureConverter(pack=False)) -> Mapping[str, Any]:
  """Test the postprocessing and metrics for a given task.

  This function injects `raw_data` into `task`, then creates an Evaluator
  based on that task. It then calls `Evaluator.evaluate()` using predict_fn and
  score_fn args that return `predict_output` and `score_output`, returning the
  output of the `evaluate()` call. (Note that, due to the fact that `evaluate`
  uses the task data, this test will also actuate the task preprocessing code.)

  Usually, this function will be invoked `metrics, _, _ = test_postprocessing()`
  since the second and third returned data should be the same as the passed
  predict_output and score_output.

  Args:
    task_name: A SeqIO task name.
    raw_data: A string-keyed dict of string-keyed dicts. The top-level dict
      should be keyed by dataset splits, and the second-level dict should hold
      the dataset data.
    target_feature_name: Feature whose vocabulary will be used to encode
     predict_output. Defaults to 'targets'.
    predict_output: A list of strings representing model predictions for the
     raw_data. Optional, only used when the task specifies metric_fns.
    score_output: A list of floats representing the score of the raw_data.
     Optional, only used when the task specifies score_metric_fns.
    feature_encoder: An optional feature encoder object. Defaults to
      None.

  Returns:
    metrics: a mapping from metric name to values.
  """

  class PredictCallable(evaluation.PredictFnCallable):

    def __call__(self,
                 dataset: tf.data.Dataset = None,
                 model_feature_lengths: Mapping[str, int] = None):
      if predict_output is None:
        return []
      task = dataset_providers.get_mixture_or_task(task_name)
      return list(
          enumerate(
              task.output_features[target_feature_name].vocabulary.encode(s)
              for s in predict_output))

  class ScoreCallable(evaluation.PredictFnCallable):

    def __call__(
        self,
        dataset: tf.data.Dataset = None,
        model_feature_lengths: Mapping[str, int] = None,
    ):
      if score_output is None:
        return []
      return list(enumerate(score_output))

  with DataInjector(task_name, raw_data):
    evaluator = evaluation.Evaluator(
        task_name, feature_converter=feature_encoder)

    return evaluator.evaluate(
        compute_metrics=True,
        predict_fn=PredictCallable(),
        score_fn=ScoreCallable())[0].result()[task_name]