Example #1
0
  def testUpdateConfigWithoutBaselineModelWhenModelNameProvided(self):
    eval_config_pbtxt = """
      model_specs { name: "candidate" }
      model_specs { name: "baseline" is_baseline: true }
      metrics_specs {
        metrics { class_name: "WeightedExampleCount" }
        model_names: "candidate"
      }
    """
    eval_config = text_format.Parse(eval_config_pbtxt, config.EvalConfig())

    expected_eval_config_pbtxt = """
      model_specs { name: "candidate" }
      model_specs { name: "baseline" is_baseline: true }
      metrics_specs {
        metrics { class_name: "WeightedExampleCount" }
        model_names: ["candidate"]
      }
    """
    expected_eval_config = text_format.Parse(expected_eval_config_pbtxt,
                                             config.EvalConfig())

    got_eval_config = config.update_eval_config_with_defaults(
        eval_config, has_baseline=True)
    self.assertProtoEquals(got_eval_config, expected_eval_config)
    def testUpdateConfigWithDefaultsSingleModel(self):
        eval_config_pbtxt = """
      model_specs { name: "model1" }
      metrics_specs {
        metrics { class_name: "WeightedExampleCount" }
      }
      metrics_specs {
        metrics { class_name: "MeanLabel" }
        model_names: ["model1"]
      }
    """
        eval_config = text_format.Parse(eval_config_pbtxt, config.EvalConfig())

        expected_eval_config_pbtxt = """
      model_specs { name: "" }
      metrics_specs {
        metrics { class_name: "WeightedExampleCount" }
        model_names: [""]
      }
      metrics_specs {
        metrics { class_name: "MeanLabel" }
        model_names: [""]
      }
    """
        expected_eval_config = text_format.Parse(expected_eval_config_pbtxt,
                                                 config.EvalConfig())

        got_eval_config = config.update_eval_config_with_defaults(eval_config)
        self.assertProtoEquals(got_eval_config, expected_eval_config)
Example #3
0
def _update_eval_config_with_defaults(
    eval_config: config.EvalConfig,
    eval_shared_model: Optional[types.MaybeMultipleEvalSharedModels]
) -> config.EvalConfig:
  """Returns updated eval config with default values."""
  eval_shared_models = model_util.verify_and_update_eval_shared_models(
      eval_shared_model)
  maybe_add_baseline = eval_shared_models and len(eval_shared_models) == 2

  return config.update_eval_config_with_defaults(
      eval_config, maybe_add_baseline=maybe_add_baseline)
def default_evaluators(  # pylint: disable=invalid-name
    eval_shared_model: Optional[Union[types.EvalSharedModel,
                                      Dict[Text,
                                           types.EvalSharedModel]]] = None,
    eval_config: config.EvalConfig = None,
    compute_confidence_intervals: Optional[bool] = False,
    k_anonymization_count: int = 1,
    desired_batch_size: Optional[int] = None,
    serialize: bool = False,
    random_seed_for_testing: Optional[int] = None) -> List[
        evaluator.Evaluator]:
    """Returns the default evaluators for use in ExtractAndEvaluate.

  Args:
    eval_shared_model: Optional shared model (single-model evaluation) or dict
      of shared models keyed by model name (multi-model evaluation). Only
      required if there are metrics to be computed in-graph using the model.
    eval_config: Eval config.
    compute_confidence_intervals: Deprecated (use eval_config).
    k_anonymization_count: Deprecated (use eval_config).
    desired_batch_size: Optional batch size for batching in combiner.
    serialize: Deprecated.
    random_seed_for_testing: Provide for deterministic tests only.
  """
    disabled_outputs = []
    if eval_config:
        eval_config = config.update_eval_config_with_defaults(eval_config)
        disabled_outputs = eval_config.options.disabled_outputs.values
    if (constants.METRICS_KEY in disabled_outputs
            and constants.PLOTS_KEY in disabled_outputs):
        return []
    if _is_legacy_eval(eval_shared_model, eval_config):
        # Backwards compatibility for previous add_metrics_callbacks implementation.
        if eval_config is not None:
            if eval_config.options.HasField('compute_confidence_intervals'):
                compute_confidence_intervals = (
                    eval_config.options.compute_confidence_intervals.value)
            if eval_config.options.HasField('k_anonymization_count'):
                k_anonymization_count = eval_config.options.k_anonymization_count.value
        return [
            metrics_and_plots_evaluator.MetricsAndPlotsEvaluator(
                eval_shared_model,
                compute_confidence_intervals=compute_confidence_intervals,
                k_anonymization_count=k_anonymization_count,
                desired_batch_size=desired_batch_size,
                serialize=serialize,
                random_seed_for_testing=random_seed_for_testing)
        ]
    else:
        return [
            metrics_and_plots_evaluator_v2.MetricsAndPlotsEvaluator(
                eval_config=eval_config, eval_shared_model=eval_shared_model)
        ]
    def testUpdateConfigWithDefaultsDoesNotAutomaticallyAddBaselineModel(self):
        eval_config_pbtxt = """
      model_specs { name: "model1" }
      model_specs { name: "model2" is_baseline: true }
      metrics_specs {
        metrics { class_name: "WeightedExampleCount" }
      }
    """
        eval_config = text_format.Parse(eval_config_pbtxt, config.EvalConfig())

        expected_eval_config_pbtxt = """
      model_specs { name: "model1" }
      model_specs { name: "model2" is_baseline: true }
      metrics_specs {
        metrics { class_name: "WeightedExampleCount" }
        model_names: ["model1", "model2"]
      }
    """
        expected_eval_config = text_format.Parse(expected_eval_config_pbtxt,
                                                 config.EvalConfig())

        got_eval_config = config.update_eval_config_with_defaults(eval_config)
        self.assertProtoEquals(got_eval_config, expected_eval_config)
    def testUpdateConfigWithDefaultsAutomaticallyAddsBaselineModel(self):
        eval_config_pbtxt = """
      model_specs { label_key: "my_label" }
      metrics_specs {
        metrics { class_name: "WeightedExampleCount" }
      }
    """
        eval_config = text_format.Parse(eval_config_pbtxt, config.EvalConfig())

        expected_eval_config_pbtxt = """
      model_specs { name: "candidate" label_key: "my_label" }
      model_specs { name: "baseline" label_key: "my_label" is_baseline: true }
      metrics_specs {
        metrics { class_name: "WeightedExampleCount" }
        model_names: ["candidate", "baseline"]
      }
    """
        expected_eval_config = text_format.Parse(expected_eval_config_pbtxt,
                                                 config.EvalConfig())

        got_eval_config = config.update_eval_config_with_defaults(
            eval_config, maybe_add_baseline=True)
        self.assertProtoEquals(got_eval_config, expected_eval_config)
    def testUpdateConfigWithDefaultsRemoveBaselineModel(self):
        eval_config_pbtxt = """
      model_specs { name: "candidate" }
      model_specs { name: "baseline" is_baseline: true }
      metrics_specs {
        metrics {
          class_name: "MeanLabel"
          threshold {
            value_threshold {
              lower_bound { value: 0.9 }
            }
            change_threshold {
              direction: HIGHER_IS_BETTER
              absolute{ value: -1e-10 }
            }
          }
        }
        thresholds {
          key: "my_metric"
          value {
            value_threshold {
              lower_bound { value: 0.9 }
            }
            change_threshold {
              direction: HIGHER_IS_BETTER
              absolute{ value: -1e-10 }
            }
          }
        }
      }
    """
        eval_config = text_format.Parse(eval_config_pbtxt, config.EvalConfig())

        expected_eval_config_pbtxt = """
      model_specs {}
      metrics_specs {
        metrics {
          class_name: "MeanLabel"
          threshold {
            value_threshold {
              lower_bound { value: 0.9 }
            }
          }
        }
        thresholds {
          key: "my_metric"
          value {
            value_threshold {
              lower_bound { value: 0.9 }
            }
          }
        }
        model_names: [""]
      }
    """
        expected_eval_config = text_format.Parse(expected_eval_config_pbtxt,
                                                 config.EvalConfig())

        got_eval_config = config.update_eval_config_with_defaults(
            eval_config, maybe_remove_baseline=True)
        self.assertProtoEquals(got_eval_config, expected_eval_config)
Example #8
0
def ExtractEvaluateAndWriteResults(  # pylint: disable=invalid-name
    examples: beam.pvalue.PCollection,
    eval_shared_model: Optional[Union[types.EvalSharedModel,
                                      Dict[Text,
                                           types.EvalSharedModel]]] = None,
    eval_config: config.EvalConfig = None,
    extractors: Optional[List[extractor.Extractor]] = None,
    evaluators: Optional[List[evaluator.Evaluator]] = None,
    writers: Optional[List[writer.Writer]] = None,
    output_path: Optional[Text] = None,
    display_only_data_location: Optional[Text] = None,
    display_only_file_format: Optional[Text] = None,
    slice_spec: Optional[List[slicer.SingleSliceSpec]] = None,
    write_config: Optional[bool] = True,
    compute_confidence_intervals: Optional[bool] = False,
    k_anonymization_count: int = 1,
    desired_batch_size: Optional[int] = None,
    random_seed_for_testing: Optional[int] = None) -> beam.pvalue.PDone:
  """PTransform for performing extraction, evaluation, and writing results.

  Users who want to construct their own Beam pipelines instead of using the
  lightweight run_model_analysis functions should use this PTransform.

  Example usage:
    eval_config = tfma.EvalConfig(slicing_specs=[...], metrics_specs=[...])
    eval_shared_model = tfma.default_eval_shared_model(
        eval_saved_model_path=model_location, eval_config=eval_config)
    with beam.Pipeline(runner=...) as p:
      _ = (p
           | 'ReadData' >> beam.io.ReadFromTFRecord(data_location)
           | 'ExtractEvaluateAndWriteResults' >>
           tfma.ExtractEvaluateAndWriteResults(
               eval_shared_model=eval_shared_model,
               eval_config=eval_config,
               ...))
    result = tfma.load_eval_result(output_path=output_path)
    tfma.view.render_slicing_metrics(result)

  Note that the exact serialization format is an internal implementation detail
  and subject to change. Users should only use the TFMA functions to write and
  read the results.

  Args:
    examples: PCollection of input examples. Can be any format the model accepts
      (e.g. string containing CSV row, TensorFlow.Example, etc).
    eval_shared_model: Optional shared model (single-model evaluation) or dict
      of shared models keyed by model name (multi-model evaluation). Only
      required if needed by default extractors, evaluators, or writers and for
      display purposes of the model path.
    eval_config: Eval config.
    extractors: Optional list of Extractors to apply to Extracts. Typically
      these will be added by calling the default_extractors function. If no
      extractors are provided, default_extractors (non-materialized) will be
      used.
    evaluators: Optional list of Evaluators for evaluating Extracts. Typically
      these will be added by calling the default_evaluators function. If no
      evaluators are provided, default_evaluators will be used.
    writers: Optional list of Writers for writing Evaluation output. Typically
      these will be added by calling the default_writers function. If no writers
      are provided, default_writers will be used.
    output_path: Path to output metrics and plots results.
    display_only_data_location: Optional path indicating where the examples were
      read from. This is used only for display purposes - data will not actually
      be read from this path.
    display_only_file_format: Optional format of the examples. This is used only
      for display purposes.
    slice_spec: Deprecated (use EvalConfig).
    write_config: Deprecated (use EvalConfig).
    compute_confidence_intervals: Deprecated (use EvalConfig).
    k_anonymization_count: Deprecated (use EvalConfig).
    desired_batch_size: Optional batch size for batching in Predict.
    random_seed_for_testing: Provide for deterministic tests only.

  Raises:
    ValueError: If EvalConfig invalid or matching Extractor not found for an
      Evaluator.

  Returns:
    PDone.
  """
  eval_shared_models = eval_shared_model
  if not isinstance(eval_shared_model, dict):
    eval_shared_models = {'': eval_shared_model}

  if eval_config is None:
    model_specs = []
    for model_name, shared_model in eval_shared_models.items():
      example_weight_key = shared_model.example_weight_key
      example_weight_keys = {}
      if example_weight_key and isinstance(example_weight_key, dict):
        example_weight_keys = example_weight_key
        example_weight_key = ''
      model_specs.append(
          config.ModelSpec(
              name=model_name,
              example_weight_key=example_weight_key,
              example_weight_keys=example_weight_keys))
    slicing_specs = None
    if slice_spec:
      slicing_specs = [s.to_proto() for s in slice_spec]
    options = config.Options()
    options.compute_confidence_intervals.value = compute_confidence_intervals
    options.k_anonymization_count.value = k_anonymization_count
    if not write_config:
      options.disabled_outputs.values.append(_EVAL_CONFIG_FILE)
    eval_config = config.EvalConfig(
        model_specs=model_specs, slicing_specs=slicing_specs, options=options)
  else:
    eval_config = config.update_eval_config_with_defaults(eval_config)

  config.verify_eval_config(eval_config)

  if not extractors:
    extractors = default_extractors(
        eval_config=eval_config,
        eval_shared_model=eval_shared_model,
        materialize=False,
        desired_batch_size=desired_batch_size)

  if not evaluators:
    evaluators = default_evaluators(
        eval_config=eval_config,
        eval_shared_model=eval_shared_model,
        random_seed_for_testing=random_seed_for_testing)

  for v in evaluators:
    evaluator.verify_evaluator(v, extractors)

  if not writers:
    writers = default_writers(
        output_path=output_path, eval_shared_model=eval_shared_model)

  # pylint: disable=no-value-for-parameter
  _ = (
      examples
      | 'InputsToExtracts' >> InputsToExtracts()
      | 'ExtractAndEvaluate' >> ExtractAndEvaluate(
          extractors=extractors, evaluators=evaluators)
      | 'WriteResults' >> WriteResults(writers=writers))

  if _EVAL_CONFIG_FILE not in eval_config.options.disabled_outputs.values:
    data_location = '<user provided PCollection>'
    if display_only_data_location is not None:
      data_location = display_only_data_location
    file_format = '<unknown>'
    if display_only_file_format is not None:
      file_format = display_only_file_format
    model_locations = {}
    for k, v in eval_shared_models.items():
      model_locations[k] = ('<unknown>' if v is None or v.model_path is None
                            else v.model_path)
    _ = (
        examples.pipeline
        | WriteEvalConfig(eval_config, output_path, data_location, file_format,
                          model_locations))
  # pylint: enable=no-value-for-parameter

  return beam.pvalue.PDone(examples.pipeline)
Example #9
0
def default_extractors(  # pylint: disable=invalid-name
    eval_shared_model: Union[types.EvalSharedModel,
                             Dict[Text, types.EvalSharedModel]] = None,
    eval_config: config.EvalConfig = None,
    slice_spec: Optional[List[slicer.SingleSliceSpec]] = None,
    desired_batch_size: Optional[int] = None,
    materialize: Optional[bool] = True) -> List[extractor.Extractor]:
  """Returns the default extractors for use in ExtractAndEvaluate.

  Args:
    eval_shared_model: Shared model (single-model evaluation) or dict of shared
      models keyed by model name (multi-model evaluation). Required unless the
      predictions are provided alongside of the features (i.e. model-agnostic
      evaluations).
    eval_config: Eval config.
    slice_spec: Deprecated (use EvalConfig).
    desired_batch_size: Optional batch size for batching in Predict.
    materialize: True to have extractors create materialized output.

  Raises:
    NotImplementedError: If eval_config contains mixed serving and eval models.
  """
  if eval_config is not None:
    eval_config = config.update_eval_config_with_defaults(eval_config)
    slice_spec = [
        slicer.SingleSliceSpec(spec=spec) for spec in eval_config.slicing_specs
    ]
  if _is_legacy_eval(eval_shared_model, eval_config):
    # Backwards compatibility for previous add_metrics_callbacks implementation.
    return [
        predict_extractor.PredictExtractor(
            eval_shared_model, desired_batch_size, materialize=materialize),
        slice_key_extractor.SliceKeyExtractor(
            slice_spec, materialize=materialize)
    ]
  elif eval_shared_model:
    model_types = model_util.get_model_types(eval_config)
    if not model_types.issubset(constants.VALID_MODEL_TYPES):
      raise NotImplementedError(
          'model type must be one of: {}. evalconfig={}'.format(
              str(constants.VALID_MODEL_TYPES), eval_config))
    if model_types == set([constants.TF_LITE]):
      return [
          input_extractor.InputExtractor(eval_config=eval_config),
          tflite_predict_extractor.TFLitePredictExtractor(
              eval_config=eval_config,
              eval_shared_model=eval_shared_model,
              desired_batch_size=desired_batch_size),
          slice_key_extractor.SliceKeyExtractor(
              slice_spec, materialize=materialize)
      ]
    elif constants.TF_LITE in model_types:
      raise NotImplementedError(
          'support for mixing tf_lite and non-tf_lite models is not '
          'implemented: eval_config={}'.format(eval_config))

    elif (eval_config and all(s.signature_name == eval_constants.EVAL_TAG
                              for s in eval_config.model_specs)):
      return [
          predict_extractor.PredictExtractor(
              eval_shared_model,
              desired_batch_size,
              materialize=materialize,
              eval_config=eval_config),
          slice_key_extractor.SliceKeyExtractor(
              slice_spec, materialize=materialize)
      ]
    elif (eval_config and any(s.signature_name == eval_constants.EVAL_TAG
                              for s in eval_config.model_specs)):
      raise NotImplementedError(
          'support for mixing eval and non-eval models is not implemented: '
          'eval_config={}'.format(eval_config))
    else:
      return [
          input_extractor.InputExtractor(eval_config=eval_config),
          predict_extractor_v2.PredictExtractor(
              eval_config=eval_config,
              eval_shared_model=eval_shared_model,
              desired_batch_size=desired_batch_size),
          slice_key_extractor.SliceKeyExtractor(
              slice_spec, materialize=materialize)
      ]
  else:
    return [
        input_extractor.InputExtractor(eval_config=eval_config),
        slice_key_extractor.SliceKeyExtractor(
            slice_spec, materialize=materialize)
    ]