def test_postprocessing( task_name, raw_data, predict_output=None, score_output=None, feature_encoder=feature_converters.EncDecFeatureConverter()): """Test the postprocessing and metrics for a given task. This function injects `raw_data` into `task`, then creates an Evaluator based on that task. It then calls `Evaluator.evaluate()` using predict_fn and score_fn args that return `predict_output` and `score_output`, returning the output of the `evaluate()` call. (Note that, due to the fact that `evaluate` uses the task data, this test will also actuate the task preprocessing code.) Usually, this function will be invoked `metrics, _, _ = test_postprocessing()` since the second and third returned data should be the same as the passed predict_output and score_output. Args: task_name: A SeqIO task name. raw_data: A string-keyed dict of string-keyed dicts. The top-level dict should be keyed by dataset splits, and the second-level dict should hold the dataset data. predict_output: A list of (int, [value]) tuples representing the model predictions. Optional. score_output: A list of (int, [value]) tuples representing the output of the model scoring code. Optional. feature_encoder: An optional feature encoder object. Defaults to EncDecFeatureEncoder. Returns: metrics: a mapping from task name to computed metrics. predicted_tokens: a mapping from task name to the output tokens from `predict_fn`, for tasks that have `predict_metric_fns`. scores: a mapping from task name to the output scores from `score_fn` for tasks that have `score_predict_fns`. """ class PredictCallable(evaluation.PredictFnCallable): def __call__(self, dataset: tf.data.Dataset = None, model_feature_lengths: Mapping[str, int] = None): return predict_output class ScoreCallable(evaluation.PredictFnCallable): def __call__( self, dataset: tf.data.Dataset = None, model_feature_lengths: Mapping[str, int] = None, ): return score_output with DataInjector(task_name, raw_data): evaluator = evaluation.Evaluator(task_name, feature_converter=feature_encoder) return evaluator.evaluate(compute_metrics=True, predict_fn=PredictCallable(), score_fn=ScoreCallable())
def test_postprocessing( task_name: str, raw_data: Mapping[str, Any], target_feature_name: str = "targets", predict_output: Optional[Sequence[str]] = None, score_output: Optional[Sequence[float]] = None, feature_encoder: feature_converters.FeatureConverter = feature_converters .EncDecFeatureConverter(pack=False)) -> Mapping[str, Any]: """Test the postprocessing and metrics for a given task. This function injects `raw_data` into `task`, then creates an Evaluator based on that task. It then calls `Evaluator.evaluate()` using predict_fn and score_fn args that return `predict_output` and `score_output`, returning the output of the `evaluate()` call. (Note that, due to the fact that `evaluate` uses the task data, this test will also actuate the task preprocessing code.) Usually, this function will be invoked `metrics, _, _ = test_postprocessing()` since the second and third returned data should be the same as the passed predict_output and score_output. Args: task_name: A SeqIO task name. raw_data: A string-keyed dict of string-keyed dicts. The top-level dict should be keyed by dataset splits, and the second-level dict should hold the dataset data. target_feature_name: Feature whose vocabulary will be used to encode predict_output. Defaults to 'targets'. predict_output: A list of strings representing model predictions for the raw_data. Optional, only used when the task specifies metric_fns. score_output: A list of floats representing the score of the raw_data. Optional, only used when the task specifies score_metric_fns. feature_encoder: An optional feature encoder object. Defaults to None. Returns: metrics: a mapping from metric name to values. """ class PredictCallable(evaluation.PredictFnCallable): def __call__(self, dataset: tf.data.Dataset = None, model_feature_lengths: Mapping[str, int] = None): if predict_output is None: return [] task = dataset_providers.get_mixture_or_task(task_name) return list( enumerate( task.output_features[target_feature_name].vocabulary.encode(s) for s in predict_output)) class ScoreCallable(evaluation.PredictFnCallable): def __call__( self, dataset: tf.data.Dataset = None, model_feature_lengths: Mapping[str, int] = None, ): if score_output is None: return [] return list(enumerate(score_output)) with DataInjector(task_name, raw_data): evaluator = evaluation.Evaluator( task_name, feature_converter=feature_encoder) return evaluator.evaluate( compute_metrics=True, predict_fn=PredictCallable(), score_fn=ScoreCallable())[0].result()[task_name]