Esempio n. 1
0
    def evaluate(
        self,
        pipeline: List[base_component.BaseComponent],
        examples: types.Channel,
        model: types.Channel,
        eval_config: tfma.EvalConfig = None,
    ) -> None:
        """Adds a benchmark subgraph to the benchmark suite's workflow DAG.

    Automatically appends a TFX Evaluator component to the given DAG in order
    to evaluate the given `model` on `examples`. The Evaluator uses TensorFlow
    Model Analysis (TFMA) to compute the desired metrics, which are then stored
    in MLMD.

    Args:
      pipeline: List of TFX components of the workflow DAG to benchmark.
      examples: An `standard_artifacts.Examples` Channel, usually produced by an
        ExampleGen component. Input to the benchmark Evaluator. Will use the
        'eval' key examples as the test dataset.
      model: A `standard_artifacts.Model` Channel, usually produced by a Trainer
        component. Input to the benchmark Evaluator.
      eval_config: A TFMA `EvalConfig` for customizing the TFMA evaluation.
        Required when `model` was produced from `tf.keras.Model#save`.
    """

        # Strip common parts of benchmark names from benchmark ID.
        benchmark_name = self._benchmark.id()
        if benchmark_name in self._seen_benchmarks:
            raise ValueError(
                "evaluate was already called once for this benchmark. "
                "Consider creating a sub-benchmark instead.")
        self._seen_benchmarks.add(benchmark_name)

        # Automatically add an Evaluator component to evaluate the produced model on
        # the test set.
        # TODO(b/146611976): Include a Model-agnostic Evaluator which computes
        # metrics according to task type.
        evaluator = tfx.Evaluator(examples, model, eval_config=eval_config)
        self._result.pipelines.append(
            _BenchmarkPipeline(benchmark_name, pipeline, evaluator))
Esempio n. 2
0
def create_pipeline_components(
    pipeline_root: Text,
    transform_module: Text,
    trainer_module: Text,
    bigquery_query: Text = '',
    csv_input_location: Text = '',
) -> List[base_node.BaseNode]:
    """Creates components for a simple Chicago Taxi TFX pipeline for testing.

  Args:
    pipeline_root: The root of the pipeline output.
    transform_module: The location of the transform module file.
    trainer_module: The location of the trainer module file.
    bigquery_query: The query to get input data from BigQuery. If not empty,
      BigQueryExampleGen will be used.
    csv_input_location: The location of the input data directory.

  Returns:
    A list of TFX components that constitutes an end-to-end test pipeline.
  """

    if bool(bigquery_query) == bool(csv_input_location):
        raise ValueError(
            'Exactly one example gen is expected. ',
            'Please provide either bigquery_query or csv_input_location.')

    if bigquery_query:
        example_gen = big_query_example_gen_component.BigQueryExampleGen(
            query=bigquery_query)
    else:
        example_gen = components.CsvExampleGen(input_base=csv_input_location)

    statistics_gen = components.StatisticsGen(
        examples=example_gen.outputs['examples'])
    schema_gen = components.SchemaGen(
        statistics=statistics_gen.outputs['statistics'],
        infer_feature_shape=False)
    example_validator = components.ExampleValidator(
        statistics=statistics_gen.outputs['statistics'],
        schema=schema_gen.outputs['schema'])
    transform = components.Transform(examples=example_gen.outputs['examples'],
                                     schema=schema_gen.outputs['schema'],
                                     module_file=transform_module)
    latest_model_resolver = resolver.Resolver(
        strategy_class=latest_artifacts_resolver.LatestArtifactsResolver,
        model=channel.Channel(type=standard_artifacts.Model)).with_id(
            'Resolver.latest_model_resolver')
    trainer = components.Trainer(
        custom_executor_spec=executor_spec.ExecutorClassSpec(Executor),
        transformed_examples=transform.outputs['transformed_examples'],
        schema=schema_gen.outputs['schema'],
        base_model=latest_model_resolver.outputs['model'],
        transform_graph=transform.outputs['transform_graph'],
        train_args=trainer_pb2.TrainArgs(num_steps=10),
        eval_args=trainer_pb2.EvalArgs(num_steps=5),
        module_file=trainer_module,
    )
    # Get the latest blessed model for model validation.
    model_resolver = resolver.Resolver(
        strategy_class=latest_blessed_model_resolver.
        LatestBlessedModelResolver,
        model=channel.Channel(type=standard_artifacts.Model),
        model_blessing=channel.Channel(
            type=standard_artifacts.ModelBlessing)).with_id(
                'Resolver.latest_blessed_model_resolver')
    # Set the TFMA config for Model Evaluation and Validation.
    eval_config = tfma.EvalConfig(
        model_specs=[tfma.ModelSpec(signature_name='eval')],
        metrics_specs=[
            tfma.MetricsSpec(
                metrics=[tfma.MetricConfig(class_name='ExampleCount')],
                thresholds={
                    'binary_accuracy':
                    tfma.MetricThreshold(
                        value_threshold=tfma.GenericValueThreshold(
                            lower_bound={'value': 0.5}),
                        change_threshold=tfma.GenericChangeThreshold(
                            direction=tfma.MetricDirection.HIGHER_IS_BETTER,
                            absolute={'value': -1e-10}))
                })
        ],
        slicing_specs=[
            tfma.SlicingSpec(),
            tfma.SlicingSpec(feature_keys=['trip_start_hour'])
        ])
    evaluator = components.Evaluator(
        examples=example_gen.outputs['examples'],
        model=trainer.outputs['model'],
        baseline_model=model_resolver.outputs['model'],
        eval_config=eval_config)

    pusher = components.Pusher(
        model=trainer.outputs['model'],
        model_blessing=evaluator.outputs['blessing'],
        push_destination=pusher_pb2.PushDestination(
            filesystem=pusher_pb2.PushDestination.Filesystem(
                base_directory=os.path.join(pipeline_root, 'model_serving'))))

    return [
        example_gen, statistics_gen, schema_gen, example_validator, transform,
        latest_model_resolver, trainer, model_resolver, evaluator, pusher
    ]
Esempio n. 3
0
    def make_evaluation(
        self,
        benchmark_name: str,
        model: Optional[types.Channel] = None,
        predictions: Optional[types.Channel] = None,
        benchmark_run: int = 1,
        runs_per_benchmark: int = 1,
        **kwargs,
    ) -> List[base_component.BaseComponent]:
        """Returns a list of components for evaluating the model.

    Evaluates the model on the 'test' split using the TFX Evaluator, and
    publishes results to MLMD for analysis from the results Colab notebook.

    Args:
      benchmark_name: Unique name of the benchmark, used for publishing
        evalution results.
      model: A `standard_artifacts.Model` Channel, usually produced by a Trainer
        component. Exactly one of `model` and `predictions` must be set.
      predictions: A `standard_artifacts.Examples` Channel, containing
        predictions for `test_examples` in a features column defined by
        `prediction_column_key`. This is generally used for evaluating a model
        that cannot be exported as a `tf.SavedModel`, using its predictions on
        the test set instead. Exactly one of `model` and `predictions` must be
        set.
      benchmark_run: Index of the benchmark run for when runs_per_benchmark > 1.
      runs_per_benchmark: Integer number of runs_per_benchmark. Used for
        computing means and variance for benchmark runs.
      **kwargs: Additional kwargs to pass to `Task#make_evaluation`.

    Raises:
      ValueError: when both `model` and `predictions` are specified.
      ValueError: when neither `model` nor `predictions` are specified.
    """

        del kwargs  # Unused.
        if not model and not predictions:
            raise ValueError(
                'At least one of `model` or `predictions` should be specified')
        if model and predictions:
            raise ValueError(
                'Only one of `model` or `predictions` should be specified')
        metrics_specs = tfma.metrics.specs_from_metrics(
            metrics=self.evaluation_metrics)
        if model:
            evaluator = tfx.Evaluator(
                examples=self.test_examples,
                model=model,
                eval_config=tfma.EvalConfig(model_specs=[
                    tfma.ModelSpec(label_key=self.label_key,
                                   model_type='tf_generic'),
                ],
                                            metrics_specs=metrics_specs,
                                            slicing_specs=[]),
                example_splits=self.test_example_splits,
                instance_name='model')

            publisher = BenchmarkResultPublisher(benchmark_name,
                                                 evaluator.outputs.evaluation,
                                                 run=benchmark_run,
                                                 num_runs=runs_per_benchmark,
                                                 instance_name='model')
        else:
            evaluator = tfx.Evaluator(
                examples=predictions,
                eval_config=tfma.EvalConfig(model_specs=[
                    tfma.ModelSpec(label_key=self.label_key,
                                   prediction_key=self.prediction_column_key),
                ],
                                            metrics_specs=metrics_specs,
                                            slicing_specs=[]),
                example_splits=self.test_example_splits,
                instance_name='predictions')

            publisher = BenchmarkResultPublisher(benchmark_name,
                                                 evaluator.outputs.evaluation,
                                                 run=benchmark_run,
                                                 num_runs=runs_per_benchmark,
                                                 instance_name='predictions')

        return [evaluator, publisher]