Example #1
0
    def testSerializeDeserializeToFile(self):
        metrics_slice_key1 = (('fruit', 'pear'), ('animal', 'duck'))
        metrics1 = {
            'alpha': np.array([0.1]),
            'bravo': np.array([0.2]),
            'charlie': np.float32(0.3)
        }
        expected_metrics1 = {
            'alpha': [0.1],
            'bravo': [0.2],
            'charlie': 0.3,
        }
        plots_slice_key1 = (('fruit', 'peach'), ('animal', 'cow'))
        plots1 = {
            'alpha': np.array([0.5, 0.6, 0.7]),
            'bravo': np.array([0.6, 0.7, 0.8]),
            'charlie': np.float32(0.7)
        }
        expected_plots1 = {
            'alpha': [0.5, 0.6, 0.7],
            'bravo': [0.6, 0.7, 0.8],
            'charlie': 0.7,
        }
        eval_config = api_types.EvalConfig(
            model_location='/path/to/model',
            data_location='/path/to/data',
            slice_spec=[
                slicer.SingleSliceSpec(features=[('age', 5), ('gender', 'f')],
                                       columns=['country']),
                slicer.SingleSliceSpec(features=[('age', 6), ('gender', 'm')],
                                       columns=['interest'])
            ],
            example_weight_metric_key='key')

        output_path = self._getTempDir()
        with beam.Pipeline() as pipeline:
            metrics = (pipeline
                       | 'CreateMetrics' >> beam.Create(
                           [(metrics_slice_key1, metrics1)]))
            plots = (
                pipeline
                | 'CreatePlots' >> beam.Create([(plots_slice_key1, plots1)]))

            _ = ((metrics, plots)
                 | 'WriteMetricsPlotsAndConfig' >>
                 serialization.WriteMetricsPlotsAndConfig(
                     output_path=output_path, eval_config=eval_config))

        metrics, plots = serialization.load_plots_and_metrics(output_path)
        self.assertSliceMetricsListEqual(
            [(metrics_slice_key1, expected_metrics1)], metrics)
        self.assertSliceMetricsListEqual([(plots_slice_key1, expected_plots1)],
                                         plots)

        got_eval_config = serialization.load_eval_config(output_path)
        self.assertEqual(eval_config, got_eval_config)
Example #2
0
    def _makeEvalResults(self):
        result_a = api_types.EvalResult(
            slicing_metrics=self._makeTestData(),
            plots=None,
            config=api_types.EvalConfig(example_weight_metric_key=None,
                                        slice_spec=None,
                                        data_location=self.data_location_1,
                                        model_location=self.model_location_1))

        result_b = api_types.EvalResult(
            slicing_metrics=[self.result_c2],
            plots=None,
            config=api_types.EvalConfig(
                example_weight_metric_key=None,
                slice_spec=None,
                data_location=self.full_data_location_2,
                model_location=self.full_model_location_2))
        return api_types.EvalResults([result_a, result_b],
                                     constants.MODEL_CENTRIC_MODE)
Example #3
0
 def testSerializeDeserializeEvalConfig(self):
     eval_config = api_types.EvalConfig(
         model_location='/path/to/model',
         data_location='/path/to/data',
         slice_spec=[
             slicer.SingleSliceSpec(features=[('age', 5), ('gender', 'f')],
                                    columns=['country']),
             slicer.SingleSliceSpec(features=[('age', 6), ('gender', 'm')],
                                    columns=['interest'])
         ],
         example_weight_metric_key='key')
     serialized = serialization._serialize_eval_config(eval_config)
     deserialized = serialization._deserialize_eval_config_raw(serialized)
     got_eval_config = deserialized[serialization._EVAL_CONFIG_KEY]
     self.assertEqual(eval_config, got_eval_config)
Example #4
0
def EvaluateAndWriteResults(  # pylint: disable=invalid-name
    examples,
    eval_saved_model_path,
    output_path,
    display_only_data_location = None,
    slice_spec = None,
    example_weight_key = None,
    add_metrics_callbacks = None,  # pylint: disable=bad-whitespace
    desired_batch_size = None,
):
  """Public API version of evaluate.Evaluate that handles example weights.

  Users who want to construct their own Beam pipelines instead of using the
  lightweight run_model_analysis functions should use this PTransform.

  Example usage:

    with beam.Pipeline(runner=...) as p:
      _ = (p
           | 'ReadData' >> beam.io.ReadFromTFRecord(data_location)
           | 'EvaluateAndWriteResults' >> tfma.EvaluateAndWriteResults(
               eval_saved_model_path=model_location,
               output_path=output_path,
               display_only_data_location=data_location,
               slice_spec=slice_spec,
               example_weight_key=example_weight_key,
               ...))
    result = tfma.load_eval_result(output_path=output_path)
    tfma.view.render_slicing_metrics(result)

  Note that the exact serialization format is an internal implementation detail
  and subject to change. Users should only use the TFMA functions to write and
  read the results.

  Args:
    examples: PCollection of input examples. Can be any format the model accepts
      (e.g. string containing CSV row, TensorFlow.Example, etc).
    eval_saved_model_path: Path to EvalSavedModel. This directory should contain
      the saved_model.pb file.
    output_path: Path to output metrics and plots results.
    display_only_data_location: Optional path indicating where the examples
      were read from. This is used only for display purposes - data will not
      actually be read from this path.
    slice_spec: Optional list of SingleSliceSpec specifying the slices to slice
      the data into. If None, defaults to the overall slice.
    example_weight_key: The key of the example weight column. If None, weight
      will be 1 for each example.
    add_metrics_callbacks: Optional list of callbacks for adding additional
      metrics to the graph. The names of the metrics added by the callbacks
      should not conflict with existing metrics, or metrics added by other
      callbacks. See below for more details about what each callback should do.
    desired_batch_size: Optional batch size for batching in Predict and
      Aggregate.

  Returns:
    PDone.
  """

  if add_metrics_callbacks is None:
    add_metrics_callbacks = []

  # Always compute example weight and example count.
  # pytype: disable=module-attr
  example_count_callback = post_export_metrics.example_count()
  example_weight_metric_key = metric_keys.EXAMPLE_COUNT
  add_metrics_callbacks.append(example_count_callback)
  if example_weight_key:
    example_weight_metric_key = metric_keys.EXAMPLE_WEIGHT
    example_weight_callback = post_export_metrics.example_weight(
        example_weight_key)
    add_metrics_callbacks.append(example_weight_callback)
  # pytype: enable=module-attr

  metrics, plots = examples | 'Evaluate' >> evaluate.Evaluate(
      eval_saved_model_path=eval_saved_model_path,
      add_metrics_callbacks=add_metrics_callbacks,
      slice_spec=slice_spec,
      desired_batch_size=desired_batch_size)

  data_location = '<user provided PCollection>'
  if display_only_data_location is not None:
    data_location = display_only_data_location

  eval_config = api_types.EvalConfig(
      model_location=eval_saved_model_path,
      data_location=data_location,
      slice_spec=slice_spec,
      example_weight_metric_key=example_weight_metric_key)

  _ = ((metrics, plots)
       | 'SerializeMetricsAndPlots' >> serialization.SerializeMetricsAndPlots(
           post_export_metrics=add_metrics_callbacks)
       |
       'WriteMetricsPlotsAndConfig' >> serialization.WriteMetricsPlotsAndConfig(
           output_path=output_path, eval_config=eval_config))

  return beam.pvalue.PDone(examples.pipeline)
Example #5
0
    def testSerializeDeserializeToFile(self):
        metrics_slice_key = _make_slice_key('fruit', 'pear', 'animal', 'duck')
        metrics_for_slice = text_format.Parse(
            """
        slice_key {
          single_slice_keys {
            column: "fruit"
            bytes_value: "pear"
          }
          single_slice_keys {
            column: "animal"
            bytes_value: "duck"
          }
        }
        metrics {
          key: "accuracy"
          value {
            double_value {
              value: 0.8
            }
          }
        }
        metrics {
          key: "example_weight"
          value {
            double_value {
              value: 10.0
            }
          }
        }
        metrics {
          key: "auc"
          value {
            bounded_value {
              lower_bound {
                value: 0.1
              }
              upper_bound {
                value: 0.3
              }
              value {
                value: 0.2
              }
            }
          }
        }
        metrics {
          key: "auprc"
          value {
            bounded_value {
              lower_bound {
                value: 0.05
              }
              upper_bound {
                value: 0.17
              }
              value {
                value: 0.1
              }
            }
          }
        }""", metrics_for_slice_pb2.MetricsForSlice())
        plots_for_slice = text_format.Parse(
            """
        slice_key {
          single_slice_keys {
            column: "fruit"
            bytes_value: "peach"
          }
          single_slice_keys {
            column: "animal"
            bytes_value: "cow"
          }
        }
        plot_data {
          calibration_histogram_buckets {
            buckets {
              lower_threshold_inclusive: -inf
              upper_threshold_exclusive: 0.0
              num_weighted_examples { value: 0.0 }
              total_weighted_label { value: 0.0 }
              total_weighted_refined_prediction { value: 0.0 }
            }
            buckets {
              lower_threshold_inclusive: 0.0
              upper_threshold_exclusive: 0.5
              num_weighted_examples { value: 1.0 }
              total_weighted_label { value: 1.0 }
              total_weighted_refined_prediction { value: 0.3 }
            }
            buckets {
              lower_threshold_inclusive: 0.5
              upper_threshold_exclusive: 1.0
              num_weighted_examples { value: 1.0 }
              total_weighted_label { value: 0.0 }
              total_weighted_refined_prediction { value: 0.7 }
            }
            buckets {
              lower_threshold_inclusive: 1.0
              upper_threshold_exclusive: inf
              num_weighted_examples { value: 0.0 }
              total_weighted_label { value: 0.0 }
              total_weighted_refined_prediction { value: 0.0 }
            }
          }
        }""", metrics_for_slice_pb2.PlotsForSlice())
        plots_slice_key = _make_slice_key('fruit', 'peach', 'animal', 'cow')
        eval_config = api_types.EvalConfig(
            model_location='/path/to/model',
            data_location='/path/to/data',
            slice_spec=[
                slicer.SingleSliceSpec(features=[('age', 5), ('gender', 'f')],
                                       columns=['country']),
                slicer.SingleSliceSpec(features=[('age', 6), ('gender', 'm')],
                                       columns=['interest'])
            ],
            example_weight_metric_key='key')

        output_path = self._getTempDir()
        with beam.Pipeline() as pipeline:
            metrics = (pipeline
                       | 'CreateMetrics' >> beam.Create(
                           [metrics_for_slice.SerializeToString()]))
            plots = (pipeline
                     | 'CreatePlots' >> beam.Create(
                         [plots_for_slice.SerializeToString()]))

            _ = ((metrics, plots)
                 | 'WriteMetricsPlotsAndConfig' >>
                 serialization.WriteMetricsPlotsAndConfig(
                     output_path=output_path, eval_config=eval_config))

        metrics, plots = serialization.load_plots_and_metrics(output_path)
        self.assertSliceMetricsListEqual(
            [(metrics_slice_key, metrics_for_slice.metrics)], metrics)
        self.assertSlicePlotsListEqual(
            [(plots_slice_key, plots_for_slice.plot_data)], plots)
        got_eval_config = serialization.load_eval_config(output_path)
        self.assertEqual(eval_config, got_eval_config)
def ExtractEvaluateAndWriteResults(  # pylint: disable=invalid-name
    examples,
    eval_shared_model,
    output_path,
    display_only_data_location=None,
    slice_spec=None,
    desired_batch_size=None,
    extractors=None,
    fanout=16,
):
    """Public API version of evaluate.Evaluate that handles example weights.

  Users who want to construct their own Beam pipelines instead of using the
  lightweight run_model_analysis functions should use this PTransform.

  Example usage:
    eval_shared_model = tfma.default_eval_shared_model(
        eval_saved_model_path=model_location,
        add_metrics_callbacks=[...],
        example_weight_key=example_weight_key)
    with beam.Pipeline(runner=...) as p:
      _ = (p
           | 'ReadData' >> beam.io.ReadFromTFRecord(data_location)
           | 'ExtractEvaluateAndWriteResults' >>
           tfma.ExtractEvaluateAndWriteResults(
               eval_shared_model=eval_shared_model,
               output_path=output_path,
               display_only_data_location=data_location,
               slice_spec=slice_spec,
               ...))
    result = tfma.load_eval_result(output_path=output_path)
    tfma.view.render_slicing_metrics(result)

  Note that the exact serialization format is an internal implementation detail
  and subject to change. Users should only use the TFMA functions to write and
  read the results.

  Args:
    examples: PCollection of input examples. Can be any format the model accepts
      (e.g. string containing CSV row, TensorFlow.Example, etc).
    eval_shared_model: Shared model parameters for EvalSavedModel including any
      additional metrics (see EvalSharedModel for more information on how to
      configure additional metrics).
    output_path: Path to output metrics and plots results.
    display_only_data_location: Optional path indicating where the examples were
      read from. This is used only for display purposes - data will not actually
      be read from this path.
    slice_spec: Optional list of SingleSliceSpec specifying the slices to slice
      the data into. If None, defaults to the overall slice.
    desired_batch_size: Optional batch size for batching in Predict and
      Aggregate.
    extractors: Optional list of Extractors to apply to ExampleAndExtracts. If
      provided, the extracts MUST contain a FeaturesPredictionsLabels extract
      with key 'fpl' and a list of SliceKeyType extracts with key 'slice_keys'.
      Typically these will be added by calling the default_extractors function.
      If no extractors are provided, default_extractors (non-materialized) will
      be used.

  Raises:
    ValueError: If PredictExtractor or SliceKeyExtractor is not present in
      extractors.

  Returns:
    PDone.
  """
    if not extractors:
        extractors = default_extractors(eval_shared_model=eval_shared_model,
                                        slice_spec=slice_spec,
                                        desired_batch_size=desired_batch_size,
                                        materialize=False)

    metrics, plots = (
        examples
        | 'ToExampleAndExtracts' >> evaluate.ToExampleAndExtracts()
        | 'Extract' >> evaluate.Extract(extractors=extractors)
        |
        'Evaluate' >> evaluate.Evaluate(eval_shared_model=eval_shared_model,
                                        desired_batch_size=desired_batch_size,
                                        fanout=fanout))

    data_location = '<user provided PCollection>'
    if display_only_data_location is not None:
        data_location = display_only_data_location

    example_weight_metric_key = metric_keys.EXAMPLE_COUNT
    if eval_shared_model.example_weight_key:
        example_weight_metric_key = metric_keys.EXAMPLE_WEIGHT

    eval_config = api_types.EvalConfig(
        model_location=eval_shared_model.model_path,
        data_location=data_location,
        slice_spec=slice_spec,
        example_weight_metric_key=example_weight_metric_key)

    _ = ((metrics, plots)
         |
         'SerializeMetricsAndPlots' >> serialization.SerializeMetricsAndPlots(
             post_export_metrics=eval_shared_model.add_metrics_callbacks)
         | 'WriteMetricsPlotsAndConfig' >>
         serialization.WriteMetricsPlotsAndConfig(output_path=output_path,
                                                  eval_config=eval_config))

    return beam.pvalue.PDone(examples.pipeline)