def _runTestWithCustomCheck(self,
                             examples,
                             eval_export_dir,
                             metrics,
                             custom_metrics_check=None,
                             custom_plots_check=None):
   # make sure we are doing some checks
   self.assertTrue(custom_metrics_check is not None or
                   custom_plots_check is not None)
   serialized_examples = [ex.SerializeToString() for ex in examples]
   eval_shared_model = types.EvalSharedModel(
       model_path=eval_export_dir, add_metrics_callbacks=metrics)
   extractors = model_eval_lib.default_extractors(
       eval_shared_model=eval_shared_model)
   with beam.Pipeline() as pipeline:
     metrics, plots = (
         pipeline
         | 'Create' >> beam.Create(serialized_examples)
         | 'ToExampleAndExtracts' >> evaluate.ToExampleAndExtracts()
         | 'Extract' >> evaluate.Extract(extractors=extractors)
         |
         'Evaluate' >> evaluate.Evaluate(eval_shared_model=eval_shared_model))
     if custom_metrics_check is not None:
       util.assert_that(metrics, custom_metrics_check, label='metrics')
     if custom_plots_check is not None:
       util.assert_that(plots, custom_plots_check, label='plot')
Ejemplo n.º 2
0
  def testEvaluateWithPlots(self):
    temp_eval_export_dir = self._getEvalExportDir()
    _, eval_export_dir = (
        fixed_prediction_estimator.simple_fixed_prediction_estimator(
            None, temp_eval_export_dir))

    with beam.Pipeline() as pipeline:
      example1 = self._makeExample(prediction=0.0, label=1.0)
      example2 = self._makeExample(prediction=0.7, label=0.0)
      example3 = self._makeExample(prediction=0.8, label=1.0)
      example4 = self._makeExample(prediction=1.0, label=1.0)

      metrics, plots = (
          pipeline
          | beam.Create([
              example1.SerializeToString(),
              example2.SerializeToString(),
              example3.SerializeToString(),
              example4.SerializeToString()
          ])
          | evaluate.Evaluate(
              eval_saved_model_path=eval_export_dir,
              add_metrics_callbacks=[
                  post_export_metrics.example_count(),
                  post_export_metrics.auc_plots()
              ]))

      def check_metrics(got):
        try:
          self.assertEqual(1, len(got), 'got: %s' % got)
          (slice_key, value) = got[0]
          self.assertEqual((), slice_key)
          self.assertDictElementsAlmostEqual(
              got_values_dict=value,
              expected_values_dict={
                  metric_keys.EXAMPLE_COUNT: 4.0,
              })
        except AssertionError as err:
          raise util.BeamAssertException(err)

      util.assert_that(metrics, check_metrics, label='metrics')

      def check_plots(got):
        try:
          self.assertEqual(1, len(got), 'got: %s' % got)
          (slice_key, value) = got[0]
          self.assertEqual((), slice_key)
          self.assertDictMatrixRowsAlmostEqual(
              got_values_dict=value,
              expected_values_dict={
                  metric_keys.AUC_PLOTS_MATRICES: [(8001, [
                      2, 1, 0, 1, 1.0 / 1.0, 1.0 / 3.0
                  ])],
              })
        except AssertionError as err:
          raise util.BeamAssertException(err)

      util.assert_that(plots, check_plots, label='plots')
Ejemplo n.º 3
0
    def testEvaluateNoSlicingAddPostExportAndCustomMetrics(self):
        temp_eval_export_dir = self._getEvalExportDir()
        _, eval_export_dir = linear_classifier.simple_linear_classifier(
            None, temp_eval_export_dir)

        with beam.Pipeline() as pipeline:
            example1 = self._makeExample(age=3.0,
                                         language='english',
                                         label=1.0)
            example2 = self._makeExample(age=3.0,
                                         language='chinese',
                                         label=0.0)
            example3 = self._makeExample(age=4.0,
                                         language='english',
                                         label=1.0)
            example4 = self._makeExample(age=5.0,
                                         language='chinese',
                                         label=0.0)

            metrics, plots = (pipeline
                              | beam.Create([
                                  example1.SerializeToString(),
                                  example2.SerializeToString(),
                                  example3.SerializeToString(),
                                  example4.SerializeToString()
                              ])
                              | evaluate.Evaluate(
                                  eval_saved_model_path=eval_export_dir,
                                  add_metrics_callbacks=[
                                      _addExampleCountMetricCallback,
                                      post_export_metrics.example_count(),
                                      post_export_metrics.example_weight(
                                          example_weight_key='age')
                                  ]))

            def check_result(got):
                try:
                    self.assertEqual(1, len(got), 'got: %s' % got)
                    (slice_key, value) = got[0]
                    self.assertEqual((), slice_key)
                    self.assertDictElementsAlmostEqual(
                        got_values_dict=value,
                        expected_values_dict={
                            'accuracy': 1.0,
                            'label/mean': 0.5,
                            'my_mean_age': 3.75,
                            'my_mean_age_times_label': 1.75,
                            'added_example_count': 4.0,
                            metric_keys.EXAMPLE_COUNT: 4.0,
                            metric_keys.EXAMPLE_WEIGHT: 15.0
                        })
                except AssertionError as err:
                    raise util.BeamAssertException(err)

            util.assert_that(metrics, check_result, label='metrics')
            util.assert_that(plots, util.is_empty(), label='plots')
Ejemplo n.º 4
0
    def testEvaluateNoSlicing(self):
        temp_eval_export_dir = self._getEvalExportDir()
        _, eval_export_dir = linear_classifier.simple_linear_classifier(
            None, temp_eval_export_dir)
        eval_shared_model = types.EvalSharedModel(
            model_path=eval_export_dir,
            add_metrics_callbacks=[_addExampleCountMetricCallback])
        extractors = [
            predict_extractor.PredictExtractor(eval_shared_model),
            slice_key_extractor.SliceKeyExtractor()
        ]

        with beam.Pipeline() as pipeline:
            example1 = self._makeExample(age=3.0,
                                         language='english',
                                         label=1.0)
            example2 = self._makeExample(age=3.0,
                                         language='chinese',
                                         label=0.0)
            example3 = self._makeExample(age=4.0,
                                         language='english',
                                         label=1.0)
            example4 = self._makeExample(age=5.0,
                                         language='chinese',
                                         label=0.0)

            metrics, _ = (
                pipeline
                | 'Create' >> beam.Create([
                    example1.SerializeToString(),
                    example2.SerializeToString(),
                    example3.SerializeToString(),
                    example4.SerializeToString()
                ])
                | 'ToExampleAnExtracts' >> evaluate.ToExampleAndExtracts()
                | 'Extract' >> evaluate.Extract(extractors=extractors)
                | 'Evaluate' >>
                evaluate.Evaluate(eval_shared_model=eval_shared_model))

            def check_result(got):
                try:
                    self.assertEqual(1, len(got), 'got: %s' % got)
                    (slice_key, value) = got[0]
                    self.assertEqual((), slice_key)
                    self.assertDictElementsAlmostEqual(
                        value, {
                            'accuracy': 1.0,
                            'label/mean': 0.5,
                            'my_mean_age': 3.75,
                            'my_mean_age_times_label': 1.75,
                            'added_example_count': 4.0
                        })
                except AssertionError as err:
                    raise util.BeamAssertException(err)

            util.assert_that(metrics, check_result)
Ejemplo n.º 5
0
    def assertMetricsComputedWithBeamAre(self, eval_saved_model_path,
                                         serialized_examples,
                                         expected_metrics):
        """Checks metrics computed using Beam.

    Metrics will be computed over all examples, without any slicing. If you
    want to provide your own PCollection (e.g. read a large number of examples
    from a file), if you want to check metrics over certain slices, or if you
    want to add additional post-export metrics, use the more general
    assertGeneralMetricsComputedWithBeamAre.

    Example usage:
      self.assertMetricsComputedWithBeamAre(
        eval_saved_model_path=path,
        serialized_examples=[self.makeExample(age=5, label=1.0),
                             self.makeExample(age=10, label=0.0)],
        expected_metrics={'average_loss': 0.1})

    Args:
      eval_saved_model_path: Path to the directory containing the
        EvalSavedModel.
      serialized_examples: List of serialized example bytes.
      expected_metrics: Dictionary of expected metric values.
    """
        def check_metrics(got):
            """Check metrics callback."""
            try:
                self.assertEqual(
                    1, len(got),
                    'expecting metrics for exactly one slice, but got %d '
                    'slices instead. metrics were: %s' % (len(got), got))
                (slice_key, value) = got[0]
                self.assertEqual((), slice_key)
                self.assertDictElementsWithinBounds(
                    got_values_dict=value,
                    expected_values_dict=expected_metrics)
            except AssertionError as err:
                raise beam_util.BeamAssertException(err)

        eval_shared_model = types.EvalSharedModel(
            model_path=eval_saved_model_path)
        extractors = model_eval_lib.default_extractors(
            eval_shared_model=eval_shared_model)

        with beam.Pipeline() as pipeline:
            metrics, _ = (
                pipeline
                | 'CreateExamples' >> beam.Create(serialized_examples)
                | 'ToExampleAndExtracts' >> evaluate.ToExampleAndExtracts()
                | 'Extract' >> evaluate.Extract(extractors=extractors)
                | 'Evaluate' >>
                evaluate.Evaluate(eval_shared_model=eval_shared_model))

            beam_util.assert_that(metrics, check_metrics)
Ejemplo n.º 6
0
    def testEvaluateNoSlicingAddPostExportAndCustomMetricsUnsupervisedModel(
            self):
        # Mainly for testing that the ExampleCount post export metric works with
        # unsupervised models.
        temp_eval_export_dir = self._getEvalExportDir()
        _, eval_export_dir = (fixed_prediction_estimator_no_labels.
                              simple_fixed_prediction_estimator_no_labels(
                                  None, temp_eval_export_dir))
        eval_shared_model = types.EvalSharedModel(
            model_path=eval_export_dir,
            add_metrics_callbacks=[
                post_export_metrics.example_count(),
                post_export_metrics.example_weight(
                    example_weight_key='prediction')
            ])
        extractors = [
            predict_extractor.PredictExtractor(eval_shared_model),
            slice_key_extractor.SliceKeyExtractor()
        ]

        with beam.Pipeline() as pipeline:
            example1 = self._makeExample(prediction=1.0)
            example2 = self._makeExample(prediction=2.0)

            metrics, plots = (
                pipeline
                | 'Create' >> beam.Create([
                    example1.SerializeToString(),
                    example2.SerializeToString(),
                ])
                | 'ToExampleAnExtracts' >> evaluate.ToExampleAndExtracts()
                | 'Extract' >> evaluate.Extract(extractors=extractors)
                | 'Evaluate' >>
                evaluate.Evaluate(eval_shared_model=eval_shared_model))

            def check_result(got):
                try:
                    self.assertEqual(1, len(got), 'got: %s' % got)
                    (slice_key, value) = got[0]
                    self.assertEqual((), slice_key)
                    self.assertDictElementsAlmostEqual(
                        got_values_dict=value,
                        expected_values_dict={
                            'average_loss': 2.5,
                            metric_keys.EXAMPLE_COUNT: 2.0,
                            metric_keys.EXAMPLE_WEIGHT: 3.0
                        })
                except AssertionError as err:
                    raise util.BeamAssertException(err)

            util.assert_that(metrics, check_result, label='metrics')
            util.assert_that(plots, util.is_empty(), label='plots')
Ejemplo n.º 7
0
 def _runTestWithCustomCheck(self,
                             examples,
                             eval_export_dir,
                             metrics,
                             custom_metrics_check=None,
                             custom_plots_check=None):
     # make sure we are doing some checks
     self.assertTrue(custom_metrics_check is not None
                     or custom_plots_check is not None)
     serialized_examples = [ex.SerializeToString() for ex in examples]
     with beam.Pipeline() as pipeline:
         metrics, plots = (pipeline
                           | beam.Create(serialized_examples)
                           | evaluate.Evaluate(
                               eval_saved_model_path=eval_export_dir,
                               add_metrics_callbacks=metrics))
         if custom_metrics_check is not None:
             util.assert_that(metrics,
                              custom_metrics_check,
                              label='metrics')
         if custom_plots_check is not None:
             util.assert_that(plots, custom_plots_check, label='plot')
Ejemplo n.º 8
0
    def assertGeneralMetricsComputedWithBeamAre(self, eval_saved_model_path,
                                                examples_pcollection,
                                                slice_spec,
                                                add_metrics_callbacks,
                                                expected_slice_metrics):
        """Checks metrics computed using Beam.

    A more general version of assertMetricsComputedWithBeamAre. Note that the
    caller is responsible for setting up and running the Beam pipeline.

    Example usage:
      def add_metrics(features, predictions, labels):
       metric_ops = {
         'mse': tf.metrics.mean_squared_error(labels, predictions['logits']),
         'mae': tf.metrics.mean_absolute_error(labels, predictions['logits']),
      }
      return metric_ops

      with beam.Pipeline() as pipeline:
        expected_slice_metrics = {
            (): {
              'mae': 0.1,
              'mse': 0.2,
              tfma.post_export_metrics.metric_keys.AUC:
                tfma.test.BoundedValue(lower_bound=0.5)
            },
            (('age', 10),): {
              'mae': 0.2,
              'mse': 0.3,
              tfma.post_export_metrics.metric_keys.AUC:
                tfma.test.BoundedValue(lower_bound=0.5)
            },
        }
        examples = pipeline | 'ReadExamples' >> beam.io.ReadFromTFRecord(path)
        self.assertGeneralMetricsComputedWithBeamAre(
          eval_saved_model_path=path,
          examples_pcollection=examples,
          slice_spec=[tfma.SingleSliceSpec(),
                      tfma.SingleSliceSpec(columns=['age'])],
          add_metrics_callbacks=[
            add_metrics, tfma.post_export_metrics.post_export_metrics.auc()],
          expected_slice_metrics=expected_slice_metrics)

    Args:
      eval_saved_model_path: Path to the directory containing the
        EvalSavedModel.
      examples_pcollection: A PCollection of serialized example bytes.
      slice_spec: List of slice specifications.
      add_metrics_callbacks: Callbacks for adding additional metrics.
      expected_slice_metrics: Dictionary of dictionaries describing the expected
        metrics for each slice. The outer dictionary map slice keys to the
        expected metrics for that slice.
    """
        def check_metrics(got):
            """Check metrics callback."""
            try:
                slices = {}
                for slice_key, value in got:
                    slices[slice_key] = value
                self.assertItemsEqual(slices.keys(),
                                      expected_slice_metrics.keys())
                for slice_key, expected_metrics in expected_slice_metrics.items(
                ):
                    self.assertDictElementsWithinBounds(
                        got_values_dict=slices[slice_key],
                        expected_values_dict=expected_metrics)
            except AssertionError as err:
                raise beam_util.BeamAssertException(err)

        metrics, _ = (examples_pcollection
                      | 'Evaluate' >> evaluate.Evaluate(
                          eval_saved_model_path=eval_saved_model_path,
                          slice_spec=slice_spec,
                          add_metrics_callbacks=add_metrics_callbacks))

        beam_util.assert_that(metrics, check_metrics)
Ejemplo n.º 9
0
def EvaluateAndWriteResults(  # pylint: disable=invalid-name
    examples,
    eval_saved_model_path,
    output_path,
    display_only_data_location = None,
    slice_spec = None,
    example_weight_key = None,
    add_metrics_callbacks = None,  # pylint: disable=bad-whitespace
    desired_batch_size = None,
):
  """Public API version of evaluate.Evaluate that handles example weights.

  Users who want to construct their own Beam pipelines instead of using the
  lightweight run_model_analysis functions should use this PTransform.

  Example usage:

    with beam.Pipeline(runner=...) as p:
      _ = (p
           | 'ReadData' >> beam.io.ReadFromTFRecord(data_location)
           | 'EvaluateAndWriteResults' >> tfma.EvaluateAndWriteResults(
               eval_saved_model_path=model_location,
               output_path=output_path,
               display_only_data_location=data_location,
               slice_spec=slice_spec,
               example_weight_key=example_weight_key,
               ...))
    result = tfma.load_eval_result(output_path=output_path)
    tfma.view.render_slicing_metrics(result)

  Note that the exact serialization format is an internal implementation detail
  and subject to change. Users should only use the TFMA functions to write and
  read the results.

  Args:
    examples: PCollection of input examples. Can be any format the model accepts
      (e.g. string containing CSV row, TensorFlow.Example, etc).
    eval_saved_model_path: Path to EvalSavedModel. This directory should contain
      the saved_model.pb file.
    output_path: Path to output metrics and plots results.
    display_only_data_location: Optional path indicating where the examples
      were read from. This is used only for display purposes - data will not
      actually be read from this path.
    slice_spec: Optional list of SingleSliceSpec specifying the slices to slice
      the data into. If None, defaults to the overall slice.
    example_weight_key: The key of the example weight column. If None, weight
      will be 1 for each example.
    add_metrics_callbacks: Optional list of callbacks for adding additional
      metrics to the graph. The names of the metrics added by the callbacks
      should not conflict with existing metrics, or metrics added by other
      callbacks. See below for more details about what each callback should do.
    desired_batch_size: Optional batch size for batching in Predict and
      Aggregate.

  Returns:
    PDone.
  """

  if add_metrics_callbacks is None:
    add_metrics_callbacks = []

  # Always compute example weight and example count.
  # pytype: disable=module-attr
  example_count_callback = post_export_metrics.example_count()
  example_weight_metric_key = metric_keys.EXAMPLE_COUNT
  add_metrics_callbacks.append(example_count_callback)
  if example_weight_key:
    example_weight_metric_key = metric_keys.EXAMPLE_WEIGHT
    example_weight_callback = post_export_metrics.example_weight(
        example_weight_key)
    add_metrics_callbacks.append(example_weight_callback)
  # pytype: enable=module-attr

  metrics, plots = examples | 'Evaluate' >> evaluate.Evaluate(
      eval_saved_model_path=eval_saved_model_path,
      add_metrics_callbacks=add_metrics_callbacks,
      slice_spec=slice_spec,
      desired_batch_size=desired_batch_size)

  data_location = '<user provided PCollection>'
  if display_only_data_location is not None:
    data_location = display_only_data_location

  eval_config = api_types.EvalConfig(
      model_location=eval_saved_model_path,
      data_location=data_location,
      slice_spec=slice_spec,
      example_weight_metric_key=example_weight_metric_key)

  _ = ((metrics, plots)
       | 'SerializeMetricsAndPlots' >> serialization.SerializeMetricsAndPlots(
           post_export_metrics=add_metrics_callbacks)
       |
       'WriteMetricsPlotsAndConfig' >> serialization.WriteMetricsPlotsAndConfig(
           output_path=output_path, eval_config=eval_config))

  return beam.pvalue.PDone(examples.pipeline)
Ejemplo n.º 10
0
    def testEvaluateWithSlicingAndDifferentBatchSizes(self):
        temp_eval_export_dir = self._getEvalExportDir()
        _, eval_export_dir = linear_classifier.simple_linear_classifier(
            None, temp_eval_export_dir)

        for batch_size in [1, 2, 4, 8]:

            with beam.Pipeline() as pipeline:
                example1 = self._makeExample(age=3.0,
                                             language='english',
                                             label=1.0,
                                             slice_key='first_slice')
                example2 = self._makeExample(age=3.0,
                                             language='chinese',
                                             label=0.0,
                                             slice_key='first_slice')
                example3 = self._makeExample(age=4.0,
                                             language='english',
                                             label=0.0,
                                             slice_key='second_slice')
                example4 = self._makeExample(age=5.0,
                                             language='chinese',
                                             label=1.0,
                                             slice_key='second_slice')
                example5 = self._makeExample(age=5.0,
                                             language='chinese',
                                             label=1.0,
                                             slice_key='second_slice')

                metrics, plots = (
                    pipeline
                    | beam.Create([
                        example1.SerializeToString(),
                        example2.SerializeToString(),
                        example3.SerializeToString(),
                        example4.SerializeToString(),
                        example5.SerializeToString(),
                    ])
                    | evaluate.Evaluate(
                        eval_saved_model_path=eval_export_dir,
                        add_metrics_callbacks=[_addExampleCountMetricCallback],
                        slice_spec=[
                            slicer.SingleSliceSpec(),
                            slicer.SingleSliceSpec(columns=['slice_key'])
                        ],
                        desired_batch_size=batch_size))

                def check_result(got):
                    try:
                        self.assertEqual(3, len(got), 'got: %s' % got)
                        slices = {}
                        for slice_key, value in got:
                            slices[slice_key] = value
                        overall_slice = ()
                        first_slice = (('slice_key', 'first_slice'), )
                        second_slice = (('slice_key', 'second_slice'), )
                        self.assertItemsEqual(
                            slices.keys(),
                            [overall_slice, first_slice, second_slice])
                        self.assertDictElementsAlmostEqual(
                            slices[overall_slice], {
                                'accuracy': 0.4,
                                'label/mean': 0.6,
                                'my_mean_age': 4.0,
                                'my_mean_age_times_label': 2.6,
                                'added_example_count': 5.0
                            })
                        self.assertDictElementsAlmostEqual(
                            slices[first_slice], {
                                'accuracy': 1.0,
                                'label/mean': 0.5,
                                'my_mean_age': 3.0,
                                'my_mean_age_times_label': 1.5,
                                'added_example_count': 2.0
                            })
                        self.assertDictElementsAlmostEqual(
                            slices[second_slice], {
                                'accuracy': 0.0,
                                'label/mean': 2.0 / 3.0,
                                'my_mean_age': 14.0 / 3.0,
                                'my_mean_age_times_label': 10.0 / 3.0,
                                'added_example_count': 3.0
                            })

                    except AssertionError as err:
                        # This function is redefined every iteration, so it will have the
                        # right value of batch_size.
                        raise util.BeamAssertException(
                            'batch_size = %d, error: %s' % (batch_size, err))  # pylint: disable=cell-var-from-loop

                util.assert_that(metrics, check_result, label='metrics')
                util.assert_that(plots, util.is_empty(), label='plots')
Ejemplo n.º 11
0
def ExtractEvaluateAndWriteResults(  # pylint: disable=invalid-name
    examples,
    eval_shared_model,
    output_path,
    display_only_data_location=None,
    slice_spec=None,
    desired_batch_size=None,
    extractors=None,
    fanout=16,
):
    """Public API version of evaluate.Evaluate that handles example weights.

  Users who want to construct their own Beam pipelines instead of using the
  lightweight run_model_analysis functions should use this PTransform.

  Example usage:
    eval_shared_model = tfma.default_eval_shared_model(
        eval_saved_model_path=model_location,
        add_metrics_callbacks=[...],
        example_weight_key=example_weight_key)
    with beam.Pipeline(runner=...) as p:
      _ = (p
           | 'ReadData' >> beam.io.ReadFromTFRecord(data_location)
           | 'ExtractEvaluateAndWriteResults' >>
           tfma.ExtractEvaluateAndWriteResults(
               eval_shared_model=eval_shared_model,
               output_path=output_path,
               display_only_data_location=data_location,
               slice_spec=slice_spec,
               ...))
    result = tfma.load_eval_result(output_path=output_path)
    tfma.view.render_slicing_metrics(result)

  Note that the exact serialization format is an internal implementation detail
  and subject to change. Users should only use the TFMA functions to write and
  read the results.

  Args:
    examples: PCollection of input examples. Can be any format the model accepts
      (e.g. string containing CSV row, TensorFlow.Example, etc).
    eval_shared_model: Shared model parameters for EvalSavedModel including any
      additional metrics (see EvalSharedModel for more information on how to
      configure additional metrics).
    output_path: Path to output metrics and plots results.
    display_only_data_location: Optional path indicating where the examples were
      read from. This is used only for display purposes - data will not actually
      be read from this path.
    slice_spec: Optional list of SingleSliceSpec specifying the slices to slice
      the data into. If None, defaults to the overall slice.
    desired_batch_size: Optional batch size for batching in Predict and
      Aggregate.
    extractors: Optional list of Extractors to apply to ExampleAndExtracts. If
      provided, the extracts MUST contain a FeaturesPredictionsLabels extract
      with key 'fpl' and a list of SliceKeyType extracts with key 'slice_keys'.
      Typically these will be added by calling the default_extractors function.
      If no extractors are provided, default_extractors (non-materialized) will
      be used.

  Raises:
    ValueError: If PredictExtractor or SliceKeyExtractor is not present in
      extractors.

  Returns:
    PDone.
  """
    if not extractors:
        extractors = default_extractors(eval_shared_model=eval_shared_model,
                                        slice_spec=slice_spec,
                                        desired_batch_size=desired_batch_size,
                                        materialize=False)

    metrics, plots = (
        examples
        | 'ToExampleAndExtracts' >> evaluate.ToExampleAndExtracts()
        | 'Extract' >> evaluate.Extract(extractors=extractors)
        |
        'Evaluate' >> evaluate.Evaluate(eval_shared_model=eval_shared_model,
                                        desired_batch_size=desired_batch_size,
                                        fanout=fanout))

    data_location = '<user provided PCollection>'
    if display_only_data_location is not None:
        data_location = display_only_data_location

    example_weight_metric_key = metric_keys.EXAMPLE_COUNT
    if eval_shared_model.example_weight_key:
        example_weight_metric_key = metric_keys.EXAMPLE_WEIGHT

    eval_config = api_types.EvalConfig(
        model_location=eval_shared_model.model_path,
        data_location=data_location,
        slice_spec=slice_spec,
        example_weight_metric_key=example_weight_metric_key)

    _ = ((metrics, plots)
         |
         'SerializeMetricsAndPlots' >> serialization.SerializeMetricsAndPlots(
             post_export_metrics=eval_shared_model.add_metrics_callbacks)
         | 'WriteMetricsPlotsAndConfig' >>
         serialization.WriteMetricsPlotsAndConfig(output_path=output_path,
                                                  eval_config=eval_config))

    return beam.pvalue.PDone(examples.pipeline)
Ejemplo n.º 12
0
    def testEvaluateNoSlicingAddPostExportAndCustomMetrics(self):
        temp_eval_export_dir = self._getEvalExportDir()
        _, eval_export_dir = linear_classifier.simple_linear_classifier(
            None, temp_eval_export_dir)
        eval_shared_model = types.EvalSharedModel(
            model_path=eval_export_dir,
            add_metrics_callbacks=[
                _addExampleCountMetricCallback,
                # Note that since everything runs in-process this doesn't
                # actually test that the py_func can be correctly recreated
                # on workers in a distributed context.
                _addPyFuncMetricCallback,
                post_export_metrics.example_count(),
                post_export_metrics.example_weight(example_weight_key='age')
            ])
        extractors = [
            predict_extractor.PredictExtractor(eval_shared_model),
            slice_key_extractor.SliceKeyExtractor()
        ]

        with beam.Pipeline() as pipeline:
            example1 = self._makeExample(age=3.0,
                                         language='english',
                                         label=1.0)
            example2 = self._makeExample(age=3.0,
                                         language='chinese',
                                         label=0.0)
            example3 = self._makeExample(age=4.0,
                                         language='english',
                                         label=1.0)
            example4 = self._makeExample(age=5.0,
                                         language='chinese',
                                         label=0.0)

            metrics, plots = (
                pipeline
                | 'Create' >> beam.Create([
                    example1.SerializeToString(),
                    example2.SerializeToString(),
                    example3.SerializeToString(),
                    example4.SerializeToString()
                ])
                | 'ToExampleAnExtracts' >> evaluate.ToExampleAndExtracts()
                | 'Extract' >> evaluate.Extract(extractors=extractors)
                | 'Evaluate' >>
                evaluate.Evaluate(eval_shared_model=eval_shared_model))

            def check_result(got):
                try:
                    self.assertEqual(1, len(got), 'got: %s' % got)
                    (slice_key, value) = got[0]
                    self.assertEqual((), slice_key)
                    self.assertDictElementsAlmostEqual(
                        got_values_dict=value,
                        expected_values_dict={
                            'accuracy': 1.0,
                            'label/mean': 0.5,
                            'my_mean_age': 3.75,
                            'my_mean_age_times_label': 1.75,
                            'added_example_count': 4.0,
                            'py_func_label_sum': 2.0,
                            metric_keys.EXAMPLE_COUNT: 4.0,
                            metric_keys.EXAMPLE_WEIGHT: 15.0
                        })
                except AssertionError as err:
                    raise util.BeamAssertException(err)

            util.assert_that(metrics, check_result, label='metrics')
            util.assert_that(plots, util.is_empty(), label='plots')