Ejemplo n.º 1
0
    def testVerifyEvaluatorRaisesValueError(self):
        extractors = [
            extractor.Extractor(stage_name='ExtractorThatExists',
                                ptransform=None)
        ]
        evaluator.verify_evaluator(
            evaluator.Evaluator(stage_name='EvaluatorWithoutError',
                                run_after='ExtractorThatExists',
                                ptransform=None), extractors)

        with self.assertRaises(ValueError):
            evaluator.verify_evaluator(
                evaluator.Evaluator(stage_name='EvaluatorWithError',
                                    run_after='ExtractorThatDoesNotExist',
                                    ptransform=None), extractors)
Ejemplo n.º 2
0
def ExtractEvaluateAndWriteResults(  # pylint: disable=invalid-name
        examples,
        eval_shared_model,
        output_path,
        display_only_data_location=None,
        slice_spec=None,
        desired_batch_size=None,
        extractors=None,
        evaluators=None,
        writers=None,
        write_config=True,
        num_bootstrap_samples=1):
    """PTransform for performing extraction, evaluation, and writing results.

  Users who want to construct their own Beam pipelines instead of using the
  lightweight run_model_analysis functions should use this PTransform.

  Example usage:
    eval_shared_model = tfma.default_eval_shared_model(
        eval_saved_model_path=model_location,
        add_metrics_callbacks=[...],
        example_weight_key=example_weight_key)
    with beam.Pipeline(runner=...) as p:
      _ = (p
           | 'ReadData' >> beam.io.ReadFromTFRecord(data_location)
           | 'ExtractEvaluateAndWriteResults' >>
           tfma.ExtractEvaluateAndWriteResults(
               eval_shared_model=eval_shared_model,
               output_path=output_path,
               display_only_data_location=data_location,
               slice_spec=slice_spec,
               ...))
    result = tfma.load_eval_result(output_path=output_path)
    tfma.view.render_slicing_metrics(result)

  Note that the exact serialization format is an internal implementation detail
  and subject to change. Users should only use the TFMA functions to write and
  read the results.

  Args:
    examples: PCollection of input examples. Can be any format the model accepts
      (e.g. string containing CSV row, TensorFlow.Example, etc).
    eval_shared_model: Shared model parameters for EvalSavedModel including any
      additional metrics (see EvalSharedModel for more information on how to
      configure additional metrics).
    output_path: Path to output metrics and plots results.
    display_only_data_location: Optional path indicating where the examples were
      read from. This is used only for display purposes - data will not actually
      be read from this path.
    slice_spec: Optional list of SingleSliceSpec specifying the slices to slice
      the data into. If None, defaults to the overall slice.
    desired_batch_size: Optional batch size for batching in Predict and
      Aggregate.
    extractors: Optional list of Extractors to apply to Extracts. Typically
      these will be added by calling the default_extractors function. If no
      extractors are provided, default_extractors (non-materialized) will be
      used.
    evaluators: Optional list of Evaluators for evaluating Extracts. Typically
      these will be added by calling the default_evaluators function. If no
      evaluators are provided, default_evaluators will be used.
    writers: Optional list of Writers for writing Evaluation output. Typically
      these will be added by calling the default_writers function. If no writers
      are provided, default_writers will be used.
    write_config: True to write the config along with the results.
    num_bootstrap_samples: Optional, set to at least 20 in order to calculate
      metrics with confidence intervals.

  Raises:
    ValueError: If matching Extractor not found for an Evaluator.

  Returns:
    PDone.
  """
    if not extractors:
        extractors = default_extractors(eval_shared_model=eval_shared_model,
                                        slice_spec=slice_spec,
                                        desired_batch_size=desired_batch_size,
                                        materialize=False)

    if not evaluators:
        evaluators = default_evaluators(
            eval_shared_model=eval_shared_model,
            desired_batch_size=desired_batch_size,
            num_bootstrap_samples=num_bootstrap_samples)

    for v in evaluators:
        evaluator.verify_evaluator(v, extractors)

    if not writers:
        writers = default_writers(output_path=output_path)

    data_location = '<user provided PCollection>'
    if display_only_data_location is not None:
        data_location = display_only_data_location

    example_weight_metric_key = metric_keys.EXAMPLE_COUNT
    if eval_shared_model.example_weight_key:
        example_weight_metric_key = metric_keys.EXAMPLE_WEIGHT

    eval_config = EvalConfig(
        model_location=eval_shared_model.model_path,
        data_location=data_location,
        slice_spec=slice_spec,
        example_weight_metric_key=example_weight_metric_key,
        num_bootstrap_samples=num_bootstrap_samples)

    # pylint: disable=no-value-for-parameter
    _ = (examples
         | 'InputsToExtracts' >> InputsToExtracts()
         | 'ExtractAndEvaluate' >> ExtractAndEvaluate(extractors=extractors,
                                                      evaluators=evaluators)
         | 'WriteResults' >> WriteResults(writers=writers))

    if write_config:
        _ = examples.pipeline | WriteEvalConfig(eval_config, output_path)
    # pylint: enable=no-value-for-parameter

    return beam.pvalue.PDone(examples.pipeline)
Ejemplo n.º 3
0
def ExtractEvaluateAndWriteResults(  # pylint: disable=invalid-name
    examples: beam.pvalue.PCollection,
    eval_shared_model: Optional[Union[types.EvalSharedModel,
                                      Dict[Text,
                                           types.EvalSharedModel]]] = None,
    eval_config: config.EvalConfig = None,
    extractors: Optional[List[extractor.Extractor]] = None,
    evaluators: Optional[List[evaluator.Evaluator]] = None,
    writers: Optional[List[writer.Writer]] = None,
    output_path: Optional[Text] = None,
    display_only_data_location: Optional[Text] = None,
    display_only_file_format: Optional[Text] = None,
    slice_spec: Optional[List[slicer.SingleSliceSpec]] = None,
    write_config: Optional[bool] = True,
    compute_confidence_intervals: Optional[bool] = False,
    k_anonymization_count: int = 1,
    desired_batch_size: Optional[int] = None,
    random_seed_for_testing: Optional[int] = None) -> beam.pvalue.PDone:
  """PTransform for performing extraction, evaluation, and writing results.

  Users who want to construct their own Beam pipelines instead of using the
  lightweight run_model_analysis functions should use this PTransform.

  Example usage:
    eval_config = tfma.EvalConfig(slicing_specs=[...], metrics_specs=[...])
    eval_shared_model = tfma.default_eval_shared_model(
        eval_saved_model_path=model_location, eval_config=eval_config)
    with beam.Pipeline(runner=...) as p:
      _ = (p
           | 'ReadData' >> beam.io.ReadFromTFRecord(data_location)
           | 'ExtractEvaluateAndWriteResults' >>
           tfma.ExtractEvaluateAndWriteResults(
               eval_shared_model=eval_shared_model,
               eval_config=eval_config,
               ...))
    result = tfma.load_eval_result(output_path=output_path)
    tfma.view.render_slicing_metrics(result)

  Note that the exact serialization format is an internal implementation detail
  and subject to change. Users should only use the TFMA functions to write and
  read the results.

  Args:
    examples: PCollection of input examples. Can be any format the model accepts
      (e.g. string containing CSV row, TensorFlow.Example, etc).
    eval_shared_model: Optional shared model (single-model evaluation) or dict
      of shared models keyed by model name (multi-model evaluation). Only
      required if needed by default extractors, evaluators, or writers and for
      display purposes of the model path.
    eval_config: Eval config.
    extractors: Optional list of Extractors to apply to Extracts. Typically
      these will be added by calling the default_extractors function. If no
      extractors are provided, default_extractors (non-materialized) will be
      used.
    evaluators: Optional list of Evaluators for evaluating Extracts. Typically
      these will be added by calling the default_evaluators function. If no
      evaluators are provided, default_evaluators will be used.
    writers: Optional list of Writers for writing Evaluation output. Typically
      these will be added by calling the default_writers function. If no writers
      are provided, default_writers will be used.
    output_path: Path to output metrics and plots results.
    display_only_data_location: Optional path indicating where the examples were
      read from. This is used only for display purposes - data will not actually
      be read from this path.
    display_only_file_format: Optional format of the examples. This is used only
      for display purposes.
    slice_spec: Deprecated (use EvalConfig).
    write_config: Deprecated (use EvalConfig).
    compute_confidence_intervals: Deprecated (use EvalConfig).
    k_anonymization_count: Deprecated (use EvalConfig).
    desired_batch_size: Optional batch size for batching in Predict.
    random_seed_for_testing: Provide for deterministic tests only.

  Raises:
    ValueError: If EvalConfig invalid or matching Extractor not found for an
      Evaluator.

  Returns:
    PDone.
  """
  eval_shared_models = eval_shared_model
  if not isinstance(eval_shared_model, dict):
    eval_shared_models = {'': eval_shared_model}

  if eval_config is None:
    model_specs = []
    for model_name, shared_model in eval_shared_models.items():
      example_weight_key = shared_model.example_weight_key
      example_weight_keys = {}
      if example_weight_key and isinstance(example_weight_key, dict):
        example_weight_keys = example_weight_key
        example_weight_key = ''
      model_specs.append(
          config.ModelSpec(
              name=model_name,
              example_weight_key=example_weight_key,
              example_weight_keys=example_weight_keys))
    slicing_specs = None
    if slice_spec:
      slicing_specs = [s.to_proto() for s in slice_spec]
    options = config.Options()
    options.compute_confidence_intervals.value = compute_confidence_intervals
    options.k_anonymization_count.value = k_anonymization_count
    if not write_config:
      options.disabled_outputs.values.append(_EVAL_CONFIG_FILE)
    eval_config = config.EvalConfig(
        model_specs=model_specs, slicing_specs=slicing_specs, options=options)
  else:
    eval_config = config.update_eval_config_with_defaults(eval_config)

  config.verify_eval_config(eval_config)

  if not extractors:
    extractors = default_extractors(
        eval_config=eval_config,
        eval_shared_model=eval_shared_model,
        materialize=False,
        desired_batch_size=desired_batch_size)

  if not evaluators:
    evaluators = default_evaluators(
        eval_config=eval_config,
        eval_shared_model=eval_shared_model,
        random_seed_for_testing=random_seed_for_testing)

  for v in evaluators:
    evaluator.verify_evaluator(v, extractors)

  if not writers:
    writers = default_writers(
        output_path=output_path, eval_shared_model=eval_shared_model)

  # pylint: disable=no-value-for-parameter
  _ = (
      examples
      | 'InputsToExtracts' >> InputsToExtracts()
      | 'ExtractAndEvaluate' >> ExtractAndEvaluate(
          extractors=extractors, evaluators=evaluators)
      | 'WriteResults' >> WriteResults(writers=writers))

  if _EVAL_CONFIG_FILE not in eval_config.options.disabled_outputs.values:
    data_location = '<user provided PCollection>'
    if display_only_data_location is not None:
      data_location = display_only_data_location
    file_format = '<unknown>'
    if display_only_file_format is not None:
      file_format = display_only_file_format
    model_locations = {}
    for k, v in eval_shared_models.items():
      model_locations[k] = ('<unknown>' if v is None or v.model_path is None
                            else v.model_path)
    _ = (
        examples.pipeline
        | WriteEvalConfig(eval_config, output_path, data_location, file_format,
                          model_locations))
  # pylint: enable=no-value-for-parameter

  return beam.pvalue.PDone(examples.pipeline)
def ExtractEvaluateAndWriteResults(  # pylint: disable=invalid-name
        examples: beam.pvalue.PCollection,
        eval_shared_model: Optional[types.EvalSharedModel] = None,
        eval_shared_models: Optional[List[types.EvalSharedModel]] = None,
        eval_config: config.EvalConfig = None,
        extractors: Optional[List[extractor.Extractor]] = None,
        evaluators: Optional[List[evaluator.Evaluator]] = None,
        writers: Optional[List[writer.Writer]] = None,
        output_path: Optional[Text] = None,
        display_only_data_location: Optional[Text] = None,
        slice_spec: Optional[List[slicer.SingleSliceSpec]] = None,
        desired_batch_size: Optional[int] = None,
        write_config: Optional[bool] = True,
        compute_confidence_intervals: Optional[bool] = False,
        k_anonymization_count: int = 1) -> beam.pvalue.PDone:
    """PTransform for performing extraction, evaluation, and writing results.

  Users who want to construct their own Beam pipelines instead of using the
  lightweight run_model_analysis functions should use this PTransform.

  Example usage:
    eval_config = tfma.EvalConfig(
        input_data_specs=[tfma.InputDataSpec(location=data_location)],
        model_specs=[tfma.ModelSpec(location=model_location)],
        output_data_specs=[tfma.OutputDataSpec(default_location=output_path)],
        slicing_specs=[...],
        metrics_specs=[...])
    eval_shared_model = tfma.default_eval_shared_model(
        eval_saved_model_path=model_location,
        add_metrics_callbacks=[...])
    with beam.Pipeline(runner=...) as p:
      _ = (p
           | 'ReadData' >> beam.io.ReadFromTFRecord(data_location)
           | 'ExtractEvaluateAndWriteResults' >>
           tfma.ExtractEvaluateAndWriteResults(
               eval_config=eval_config,
               eval_shared_models=[eval_shared_model],
               ...))
    result = tfma.load_eval_result(output_path=output_path)
    tfma.view.render_slicing_metrics(result)

  Note that the exact serialization format is an internal implementation detail
  and subject to change. Users should only use the TFMA functions to write and
  read the results.

  Args:
    examples: PCollection of input examples. Can be any format the model accepts
      (e.g. string containing CSV row, TensorFlow.Example, etc).
    eval_shared_model: Shared model (single-model evaluation).
    eval_shared_models: Shared models (multi-model evaluation).
    eval_config: Eval config.
    extractors: Optional list of Extractors to apply to Extracts. Typically
      these will be added by calling the default_extractors function. If no
      extractors are provided, default_extractors (non-materialized) will be
      used.
    evaluators: Optional list of Evaluators for evaluating Extracts. Typically
      these will be added by calling the default_evaluators function. If no
      evaluators are provided, default_evaluators will be used.
    writers: Optional list of Writers for writing Evaluation output. Typically
      these will be added by calling the default_writers function. If no writers
      are provided, default_writers will be used.
    output_path: Deprecated (use EvalConfig).
    display_only_data_location: Deprecated (use EvalConfig).
    slice_spec: Deprecated (use EvalConfig).
    desired_batch_size: Deprecated (use EvalConfig).
    write_config: Deprecated (use EvalConfig).
    compute_confidence_intervals: Deprecated (use EvalConfig).
    k_anonymization_count: Deprecated (use EvalConfig).

  Raises:
    ValueError: If matching Extractor not found for an Evaluator.

  Returns:
    PDone.
  """
    if eval_shared_model is not None:
        eval_shared_models = [eval_shared_model]

    if eval_config is None:
        data_location = '<user provided PCollection>'
        if display_only_data_location is not None:
            data_location = display_only_data_location
        disabled_outputs = None
        if not write_config:
            disabled_outputs = [_EVAL_CONFIG_FILE]
        model_specs = []
        for m in eval_shared_models:
            example_weight_key = m.example_weight_key
            example_weight_keys = {}
            if example_weight_key and isinstance(example_weight_key, dict):
                example_weight_keys = example_weight_key
                example_weight_key = ''
            model_specs.append(
                config.ModelSpec(location=m.model_path,
                                 example_weight_key=example_weight_key,
                                 example_weight_keys=example_weight_keys))
        slicing_specs = None
        if slice_spec:
            slicing_specs = [s.to_proto() for s in slice_spec]
        options = config.Options()
        options.compute_confidence_intervals.value = compute_confidence_intervals
        options.k_anonymization_count.value = k_anonymization_count
        if desired_batch_size:
            options.desired_batch_size.value = desired_batch_size
        eval_config = config.EvalConfig(
            input_data_specs=[config.InputDataSpec(location=data_location)],
            model_specs=model_specs,
            output_data_specs=[
                config.OutputDataSpec(default_location=output_path,
                                      disabled_outputs=disabled_outputs)
            ],
            slicing_specs=slicing_specs,
            options=options)

    if not extractors:
        extractors = default_extractors(eval_config=eval_config,
                                        eval_shared_models=eval_shared_models,
                                        materialize=False)

    if not evaluators:
        evaluators = default_evaluators(eval_config=eval_config,
                                        eval_shared_models=eval_shared_models)

    for v in evaluators:
        evaluator.verify_evaluator(v, extractors)

    if not writers:
        writers = default_writers(eval_config=eval_config,
                                  eval_shared_models=eval_shared_models)

    # pylint: disable=no-value-for-parameter
    _ = (examples
         | 'InputsToExtracts' >> InputsToExtracts()
         | 'ExtractAndEvaluate' >> ExtractAndEvaluate(extractors=extractors,
                                                      evaluators=evaluators)
         | 'WriteResults' >> WriteResults(writers=writers))

    # TODO(b/141016373): Add support for multiple models.
    if _EVAL_CONFIG_FILE not in eval_config.output_data_specs[
            0].disabled_outputs:
        _ = examples.pipeline | WriteEvalConfig(eval_config)
    # pylint: enable=no-value-for-parameter

    return beam.pvalue.PDone(examples.pipeline)