def testSliceOneSlice(self): with beam.Pipeline() as pipeline: fpls = create_fpls() metrics = ( pipeline | 'CreateTestInput' >> beam.Create(fpls) | 'WrapFpls' >> beam.Map(wrap_fpl) | 'ExtractSlices' >> slice_key_extractor._ExtractSliceKeys([ slicer.SingleSliceSpec(), slicer.SingleSliceSpec(columns=['gender']) ]) | 'FanoutSlices' >> slicer.FanoutSlices()) def check_result(got): try: self.assertEqual(4, len(got), 'got: %s' % got) expected_result = [ ((), wrap_fpl(fpls[0])), ((), wrap_fpl(fpls[1])), ((('gender', 'f'),), wrap_fpl(fpls[0])), ((('gender', 'm'),), wrap_fpl(fpls[1])), ] self.assertEqual( sorted(got, key=lambda x: x[0]), sorted(expected_result, key=lambda x: x[0])) except AssertionError as err: raise util.BeamAssertException(err) util.assert_that(metrics, check_result)
def testSliceDefaultSlice(self): with beam.Pipeline() as pipeline: fpls = create_fpls() metrics = ( pipeline | 'CreateTestInput' >> beam.Create(fpls) | 'WrapFpls' >> beam.Map(wrap_fpl) | 'ExtractSlices' >> slice_key_extractor._ExtractSliceKeys( [slicer.SingleSliceSpec()]) | 'FanoutSlices' >> slicer.FanoutSlices()) def check_result(got): try: self.assertEqual(2, len(got), 'got: %s' % got) expected_result = [ ((), wrap_fpl(fpls[0])), ((), wrap_fpl(fpls[1])), ] self.assertEqual(len(got), len(expected_result)) self.assertTrue( got[0] == expected_result[0] and got[1] == expected_result[1] or got[1] == expected_result[0] and got[0] == expected_result[1]) except AssertionError as err: raise util.BeamAssertException(err) util.assert_that(metrics, check_result)
def testSliceOnMetaFeature(self): # We want to make sure that slicing on the newly added feature works, so # pulling in slice here. with beam.Pipeline() as pipeline: fpls = create_fpls() metrics = ( pipeline | 'CreateTestInput' >> beam.Create(fpls) | 'WrapFpls' >> beam.Map(wrap_fpl) | 'ExtractInterestsNum' >> meta_feature_extractor.ExtractMetaFeature(get_num_interests) | 'ExtractSlices' >> slice_key_extractor._ExtractSliceKeys([ slicer.SingleSliceSpec(), slicer.SingleSliceSpec(columns=['num_interests']) ]) | 'FanoutSlices' >> slicer.FanoutSlices()) def check_result(got): try: self.assertEqual(4, len(got), 'got: %s' % got) expected_slice_keys = [ (), (), (('num_interests', 1), ), (('num_interests', 2), ), ] self.assertEqual(sorted(slice_key for slice_key, _ in got), sorted(expected_slice_keys)) except AssertionError as err: raise util.BeamAssertException(err) util.assert_that(metrics, check_result)
def ComputeMetricsAndPlots( # pylint: disable=invalid-name extracts: beam.pvalue.PCollection, eval_shared_model: types.EvalSharedModel, desired_batch_size: Optional[int] = None, num_bootstrap_samples: Optional[int] = 1, random_seed_for_testing: Optional[int] = None ) -> Tuple[beam.pvalue.DoOutputsTuple, beam.pvalue.PCollection]: """Computes metrics and plots using the EvalSavedModel. Args: extracts: PCollection of Extracts. The extracts MUST contain a FeaturesPredictionsLabels extract keyed by tfma.FEATURE_PREDICTIONS_LABELS_KEY and a list of SliceKeyType extracts keyed by tfma.SLICE_KEY_TYPES_KEY. Typically these will be added by calling the default_extractors function. eval_shared_model: Shared model parameters for EvalSavedModel including any additional metrics (see EvalSharedModel for more information on how to configure additional metrics). desired_batch_size: Optional batch size for batching in Aggregate. num_bootstrap_samples: Set to value > 1 to run metrics analysis over multiple bootstrap samples and compute uncertainty intervals. random_seed_for_testing: Provide for deterministic tests only. Returns: Tuple of Tuple[PCollection of (slice key, metrics), PCollection of (slice key, plot metrics)] and PCollection of (slice_key and its example count). """ _ = (extracts.pipeline | counter_util.IncrementMetricsComputationCounters( eval_shared_model.add_metrics_callbacks)) # pylint: disable=no-value-for-parameter slices = ( extracts # Input: one example at a time, with slice keys in extracts. # Output: one fpl example per slice key (notice that the example turns # into n logical examples, references to which are replicated once # per applicable slice key). | 'FanoutSlices' >> slicer.FanoutSlices()) slices_count = (slices | 'ExtractSliceKeys' >> beam.Keys() | 'CountPerSliceKey' >> beam.combiners.Count.PerElement()) aggregated_metrics = ( slices # Metrics are computed per slice key. # Output: Multi-outputs, a dict of slice key to computed metrics, and # plots if applicable. | 'ComputePerSliceMetrics' >> aggregate.ComputePerSliceMetrics( eval_shared_model=eval_shared_model, desired_batch_size=desired_batch_size, num_bootstrap_samples=num_bootstrap_samples, random_seed_for_testing=random_seed_for_testing)) return (aggregated_metrics, slices_count)
def ComputeMetricsAndPlots( # pylint: disable=invalid-name extracts, eval_shared_model, desired_batch_size = None, num_bootstrap_samples = 1, random_seed = None, ): """Computes metrics and plots using the EvalSavedModel. Args: extracts: PCollection of Extracts. The extracts MUST contain a FeaturesPredictionsLabels extract keyed by tfma.FEATURE_PREDICTIONS_LABELS_KEY and a list of SliceKeyType extracts keyed by tfma.SLICE_KEY_TYPES_KEY. Typically these will be added by calling the default_extractors function. eval_shared_model: Shared model parameters for EvalSavedModel including any additional metrics (see EvalSharedModel for more information on how to configure additional metrics). desired_batch_size: Optional batch size for batching in Aggregate. num_bootstrap_samples: Set to value > 1 to run metrics analysis over multiple bootstrap samples and compute uncertainty intervals. random_seed: Provide for deterministic tests only. Returns: DoOutputsTuple. The tuple entries are PCollection of (slice key, metrics) and PCollection of (slice key, plot metrics). """ # pylint: disable=no-value-for-parameter return ( extracts # Input: one example at a time, with slice keys in extracts. # Output: one fpl example per slice key (notice that the example turns # into n, replicated once per applicable slice key) | 'FanoutSlices' >> slicer.FanoutSlices() # Each slice key lands on one shard where metrics are computed for all # examples in that shard -- the "map" and "reduce" parts of the # computation happen within this shard. # Output: Multi-outputs, a dict of slice key to computed metrics, and # plots if applicable. | 'ComputePerSliceMetrics' >> aggregate.ComputePerSliceMetrics( eval_shared_model=eval_shared_model, desired_batch_size=desired_batch_size, num_bootstrap_samples=num_bootstrap_samples, random_seed=random_seed))
def ComputeMetricsAndPlots( # pylint: disable=invalid-name extracts: beam.pvalue.PCollection, eval_shared_model: types.EvalSharedModel, desired_batch_size: Optional[int] = None, compute_confidence_intervals: Optional[bool] = False, random_seed_for_testing: Optional[int] = None ) -> Tuple[beam.pvalue.DoOutputsTuple, beam.pvalue.PCollection]: """Computes metrics and plots using the EvalSavedModel. Args: extracts: PCollection of Extracts. The extracts MUST contain a FeaturesPredictionsLabels extract keyed by tfma.FEATURE_PREDICTIONS_LABELS_KEY and a list of SliceKeyType extracts keyed by tfma.SLICE_KEY_TYPES_KEY. Typically these will be added by calling the default_extractors function. eval_shared_model: Shared model parameters for EvalSavedModel including any additional metrics (see EvalSharedModel for more information on how to configure additional metrics). desired_batch_size: Optional batch size for batching in Aggregate. compute_confidence_intervals: Set to True to run metrics analysis over multiple bootstrap samples and compute uncertainty intervals. random_seed_for_testing: Provide for deterministic tests only. Returns: Tuple of Tuple[PCollection of (slice key, metrics), PCollection of (slice key, plot metrics)] and PCollection of (slice_key and its example count). """ # pylint: disable=no-value-for-parameter _ = ( extracts.pipeline | counter_util.IncrementMetricsComputationCounters( eval_shared_model.add_metrics_callbacks)) slices = ( extracts # Downstream computation only cares about FPLs, so we prune before fanout. # Note that fanout itself will prune the slice keys. # TODO(b/130032676, b/111353165): Prune FPLs to contain only the necessary # set for the calculation of post_export_metrics if possible. | 'PruneExtracts' >> extractor.Filter(include=[ constants.FEATURES_PREDICTIONS_LABELS_KEY, constants.SLICE_KEY_TYPES_KEY, constants.INPUT_KEY, ]) # Input: one example at a time, with slice keys in extracts. # Output: one fpl example per slice key (notice that the example turns # into n logical examples, references to which are replicated once # per applicable slice key). | 'FanoutSlices' >> slicer.FanoutSlices()) slices_count = ( slices | 'ExtractSliceKeys' >> beam.Keys() | 'CountPerSliceKey' >> beam.combiners.Count.PerElement()) aggregated_metrics = ( slices # Metrics are computed per slice key. # Output: Multi-outputs, a dict of slice key to computed metrics, and # plots if applicable. | 'ComputePerSliceMetrics' >> poisson_bootstrap.ComputeWithConfidenceIntervals( aggregate.ComputePerSliceMetrics, num_bootstrap_samples=(poisson_bootstrap.DEFAULT_NUM_BOOTSTRAP_SAMPLES if compute_confidence_intervals else 1), random_seed_for_testing=random_seed_for_testing, eval_shared_model=eval_shared_model, desired_batch_size=desired_batch_size) | 'SeparateMetricsAndPlots' >> beam.ParDo( _SeparateMetricsAndPlotsFn()).with_outputs( _SeparateMetricsAndPlotsFn.OUTPUT_TAG_PLOTS, main=_SeparateMetricsAndPlotsFn.OUTPUT_TAG_METRICS)) return (aggregated_metrics, slices_count)