Esempio n. 1
0
  def testSliceOneSlice(self):
    with beam.Pipeline() as pipeline:
      fpls = create_fpls()
      metrics = (
          pipeline
          | 'CreateTestInput' >> beam.Create(fpls)
          | 'WrapFpls' >> beam.Map(wrap_fpl)
          | 'ExtractSlices' >> slice_key_extractor._ExtractSliceKeys([
              slicer.SingleSliceSpec(),
              slicer.SingleSliceSpec(columns=['gender'])
          ])
          | 'FanoutSlices' >> slicer.FanoutSlices())

      def check_result(got):
        try:
          self.assertEqual(4, len(got), 'got: %s' % got)
          expected_result = [
              ((), wrap_fpl(fpls[0])),
              ((), wrap_fpl(fpls[1])),
              ((('gender', 'f'),), wrap_fpl(fpls[0])),
              ((('gender', 'm'),), wrap_fpl(fpls[1])),
          ]
          self.assertEqual(
              sorted(got, key=lambda x: x[0]),
              sorted(expected_result, key=lambda x: x[0]))
        except AssertionError as err:
          raise util.BeamAssertException(err)

      util.assert_that(metrics, check_result)
Esempio n. 2
0
  def testSliceDefaultSlice(self):
    with beam.Pipeline() as pipeline:
      fpls = create_fpls()

      metrics = (
          pipeline
          | 'CreateTestInput' >> beam.Create(fpls)
          | 'WrapFpls' >> beam.Map(wrap_fpl)
          | 'ExtractSlices' >> slice_key_extractor._ExtractSliceKeys(
              [slicer.SingleSliceSpec()])
          | 'FanoutSlices' >> slicer.FanoutSlices())

      def check_result(got):
        try:
          self.assertEqual(2, len(got), 'got: %s' % got)
          expected_result = [
              ((), wrap_fpl(fpls[0])),
              ((), wrap_fpl(fpls[1])),
          ]
          self.assertEqual(len(got), len(expected_result))
          self.assertTrue(
              got[0] == expected_result[0] and got[1] == expected_result[1] or
              got[1] == expected_result[0] and got[0] == expected_result[1])
        except AssertionError as err:
          raise util.BeamAssertException(err)

      util.assert_that(metrics, check_result)
Esempio n. 3
0
    def testSliceOnMetaFeature(self):
        # We want to make sure that slicing on the newly added feature works, so
        # pulling in slice here.
        with beam.Pipeline() as pipeline:
            fpls = create_fpls()
            metrics = (
                pipeline
                | 'CreateTestInput' >> beam.Create(fpls)
                | 'WrapFpls' >> beam.Map(wrap_fpl)
                | 'ExtractInterestsNum' >>
                meta_feature_extractor.ExtractMetaFeature(get_num_interests)
                | 'ExtractSlices' >> slice_key_extractor._ExtractSliceKeys([
                    slicer.SingleSliceSpec(),
                    slicer.SingleSliceSpec(columns=['num_interests'])
                ])
                | 'FanoutSlices' >> slicer.FanoutSlices())

            def check_result(got):
                try:
                    self.assertEqual(4, len(got), 'got: %s' % got)
                    expected_slice_keys = [
                        (),
                        (),
                        (('num_interests', 1), ),
                        (('num_interests', 2), ),
                    ]
                    self.assertEqual(sorted(slice_key for slice_key, _ in got),
                                     sorted(expected_slice_keys))
                except AssertionError as err:
                    raise util.BeamAssertException(err)

            util.assert_that(metrics, check_result)
def ComputeMetricsAndPlots(  # pylint: disable=invalid-name
    extracts: beam.pvalue.PCollection,
    eval_shared_model: types.EvalSharedModel,
    desired_batch_size: Optional[int] = None,
    num_bootstrap_samples: Optional[int] = 1,
    random_seed_for_testing: Optional[int] = None
) -> Tuple[beam.pvalue.DoOutputsTuple, beam.pvalue.PCollection]:
    """Computes metrics and plots using the EvalSavedModel.

  Args:
    extracts: PCollection of Extracts. The extracts MUST contain a
      FeaturesPredictionsLabels extract keyed by
      tfma.FEATURE_PREDICTIONS_LABELS_KEY and a list of SliceKeyType extracts
      keyed by tfma.SLICE_KEY_TYPES_KEY. Typically these will be added by
      calling the default_extractors function.
    eval_shared_model: Shared model parameters for EvalSavedModel including any
      additional metrics (see EvalSharedModel for more information on how to
      configure additional metrics).
    desired_batch_size: Optional batch size for batching in Aggregate.
    num_bootstrap_samples: Set to value > 1 to run metrics analysis over
      multiple bootstrap samples and compute uncertainty intervals.
    random_seed_for_testing: Provide for deterministic tests only.

  Returns:
    Tuple of Tuple[PCollection of (slice key, metrics),
    PCollection of (slice key, plot metrics)] and
    PCollection of (slice_key and its example count).
  """

    _ = (extracts.pipeline
         | counter_util.IncrementMetricsComputationCounters(
             eval_shared_model.add_metrics_callbacks))

    # pylint: disable=no-value-for-parameter
    slices = (
        extracts

        # Input: one example at a time, with slice keys in extracts.
        # Output: one fpl example per slice key (notice that the example turns
        #         into n logical examples, references to which are replicated once
        #         per applicable slice key).
        | 'FanoutSlices' >> slicer.FanoutSlices())

    slices_count = (slices
                    | 'ExtractSliceKeys' >> beam.Keys()
                    | 'CountPerSliceKey' >> beam.combiners.Count.PerElement())

    aggregated_metrics = (
        slices
        # Metrics are computed per slice key.
        # Output: Multi-outputs, a dict of slice key to computed metrics, and
        # plots if applicable.
        | 'ComputePerSliceMetrics' >> aggregate.ComputePerSliceMetrics(
            eval_shared_model=eval_shared_model,
            desired_batch_size=desired_batch_size,
            num_bootstrap_samples=num_bootstrap_samples,
            random_seed_for_testing=random_seed_for_testing))
    return (aggregated_metrics, slices_count)
Esempio n. 5
0
def ComputeMetricsAndPlots(  # pylint: disable=invalid-name
    extracts,
    eval_shared_model,
    desired_batch_size = None,
    num_bootstrap_samples = 1,
    random_seed = None,
):
  """Computes metrics and plots using the EvalSavedModel.

  Args:
    extracts: PCollection of Extracts. The extracts MUST contain a
      FeaturesPredictionsLabels extract keyed by
      tfma.FEATURE_PREDICTIONS_LABELS_KEY and a list of SliceKeyType extracts
      keyed by tfma.SLICE_KEY_TYPES_KEY. Typically these will be added by
      calling the default_extractors function.
    eval_shared_model: Shared model parameters for EvalSavedModel including any
      additional metrics (see EvalSharedModel for more information on how to
      configure additional metrics).
    desired_batch_size: Optional batch size for batching in Aggregate.
    num_bootstrap_samples: Set to value > 1 to run metrics analysis over
      multiple bootstrap samples and compute uncertainty intervals.
    random_seed: Provide for deterministic tests only.

  Returns:
    DoOutputsTuple. The tuple entries are
    PCollection of (slice key, metrics) and
    PCollection of (slice key, plot metrics).
  """
  # pylint: disable=no-value-for-parameter
  return (
      extracts

      # Input: one example at a time, with slice keys in extracts.
      # Output: one fpl example per slice key (notice that the example turns
      #         into n, replicated once per applicable slice key)
      | 'FanoutSlices' >> slicer.FanoutSlices()

      # Each slice key lands on one shard where metrics are computed for all
      # examples in that shard -- the "map" and "reduce" parts of the
      # computation happen within this shard.
      # Output: Multi-outputs, a dict of slice key to computed metrics, and
      # plots if applicable.
      | 'ComputePerSliceMetrics' >> aggregate.ComputePerSliceMetrics(
          eval_shared_model=eval_shared_model,
          desired_batch_size=desired_batch_size,
          num_bootstrap_samples=num_bootstrap_samples,
          random_seed=random_seed))
def ComputeMetricsAndPlots(  # pylint: disable=invalid-name
    extracts: beam.pvalue.PCollection,
    eval_shared_model: types.EvalSharedModel,
    desired_batch_size: Optional[int] = None,
    compute_confidence_intervals: Optional[bool] = False,
    random_seed_for_testing: Optional[int] = None
) -> Tuple[beam.pvalue.DoOutputsTuple, beam.pvalue.PCollection]:
  """Computes metrics and plots using the EvalSavedModel.

  Args:
    extracts: PCollection of Extracts. The extracts MUST contain a
      FeaturesPredictionsLabels extract keyed by
      tfma.FEATURE_PREDICTIONS_LABELS_KEY and a list of SliceKeyType extracts
      keyed by tfma.SLICE_KEY_TYPES_KEY. Typically these will be added by
      calling the default_extractors function.
    eval_shared_model: Shared model parameters for EvalSavedModel including any
      additional metrics (see EvalSharedModel for more information on how to
      configure additional metrics).
    desired_batch_size: Optional batch size for batching in Aggregate.
    compute_confidence_intervals: Set to True to run metrics analysis over
      multiple bootstrap samples and compute uncertainty intervals.
    random_seed_for_testing: Provide for deterministic tests only.

  Returns:
    Tuple of Tuple[PCollection of (slice key, metrics),
    PCollection of (slice key, plot metrics)] and
    PCollection of (slice_key and its example count).
  """
  # pylint: disable=no-value-for-parameter

  _ = (
      extracts.pipeline
      | counter_util.IncrementMetricsComputationCounters(
          eval_shared_model.add_metrics_callbacks))

  slices = (
      extracts
      # Downstream computation only cares about FPLs, so we prune before fanout.
      # Note that fanout itself will prune the slice keys.
      # TODO(b/130032676, b/111353165): Prune FPLs to contain only the necessary
      # set for the calculation of post_export_metrics if possible.
      | 'PruneExtracts' >> extractor.Filter(include=[
          constants.FEATURES_PREDICTIONS_LABELS_KEY,
          constants.SLICE_KEY_TYPES_KEY,
          constants.INPUT_KEY,
      ])
      # Input: one example at a time, with slice keys in extracts.
      # Output: one fpl example per slice key (notice that the example turns
      #         into n logical examples, references to which are replicated once
      #         per applicable slice key).
      | 'FanoutSlices' >> slicer.FanoutSlices())

  slices_count = (
      slices
      | 'ExtractSliceKeys' >> beam.Keys()
      | 'CountPerSliceKey' >> beam.combiners.Count.PerElement())

  aggregated_metrics = (
      slices
      # Metrics are computed per slice key.
      # Output: Multi-outputs, a dict of slice key to computed metrics, and
      # plots if applicable.
      | 'ComputePerSliceMetrics' >>
      poisson_bootstrap.ComputeWithConfidenceIntervals(
          aggregate.ComputePerSliceMetrics,
          num_bootstrap_samples=(poisson_bootstrap.DEFAULT_NUM_BOOTSTRAP_SAMPLES
                                 if compute_confidence_intervals else 1),
          random_seed_for_testing=random_seed_for_testing,
          eval_shared_model=eval_shared_model,
          desired_batch_size=desired_batch_size)
      | 'SeparateMetricsAndPlots' >> beam.ParDo(
          _SeparateMetricsAndPlotsFn()).with_outputs(
              _SeparateMetricsAndPlotsFn.OUTPUT_TAG_PLOTS,
              main=_SeparateMetricsAndPlotsFn.OUTPUT_TAG_METRICS))

  return (aggregated_metrics, slices_count)