def assertMetricsComputedWithBeamAre(self, eval_saved_model_path, serialized_examples, expected_metrics, add_metrics_callbacks=None): """Checks metrics computed using Beam. Metrics will be computed over all examples, without any slicing. If you want to provide your own PCollection (e.g. read a large number of examples from a file), if you want to check metrics over certain slices, or if you want to add additional post-export metrics, use the more general assertGeneralMetricsComputedWithBeamAre. Example usage: self.assertMetricsComputedWithBeamAre( eval_saved_model_path=path, serialized_examples=[self.makeExample(age=5, label=1.0), self.makeExample(age=10, label=0.0)], expected_metrics={'average_loss': 0.1}) Args: eval_saved_model_path: Path to the directory containing the EvalSavedModel. serialized_examples: List of serialized example bytes. expected_metrics: Dictionary of expected metric values. add_metrics_callbacks: Optional. Callbacks for adding additional metrics. """ def check_metrics(got): """Check metrics callback.""" try: self.assertEqual( 1, len(got), 'expecting metrics for exactly one slice, but got %d ' 'slices instead. metrics were: %s' % (len(got), got)) (slice_key, value) = got[0] self.assertEqual((), slice_key) self.assertDictElementsWithinBounds( got_values_dict=value, expected_values_dict=expected_metrics) except AssertionError as err: raise beam_util.BeamAssertException(err) eval_shared_model = model_eval_lib.default_eval_shared_model( eval_saved_model_path=eval_saved_model_path, add_metrics_callbacks=add_metrics_callbacks) extractors = model_eval_lib.default_extractors( eval_shared_model=eval_shared_model) with beam.Pipeline() as pipeline: # pylint: disable=no-value-for-parameter metrics, _ = ( pipeline | 'CreateExamples' >> beam.Create(serialized_examples) | 'InputsToExtracts' >> model_eval_lib.InputsToExtracts() | 'Extract' >> Extract(extractors=extractors) | 'ComputeMetricsAndPlots' >> metrics_and_plots_evaluator. ComputeMetricsAndPlots(eval_shared_model=eval_shared_model)) # pylint: enable=no-value-for-parameter beam_util.assert_that(metrics, check_metrics)
def _runTestWithCustomCheck(self, examples, eval_export_dir, metrics, custom_metrics_check=None, custom_plots_check=None): # make sure we are doing some checks self.assertTrue(custom_metrics_check is not None or custom_plots_check is not None) serialized_examples = [ex.SerializeToString() for ex in examples] eval_shared_model = types.EvalSharedModel( model_path=eval_export_dir, add_metrics_callbacks=metrics) extractors = model_eval_lib.default_extractors( eval_shared_model=eval_shared_model) with beam.Pipeline() as pipeline: metrics, plots = ( pipeline | 'Create' >> beam.Create(serialized_examples) | 'ToExampleAndExtracts' >> evaluate.ToExampleAndExtracts() | 'Extract' >> evaluate.Extract(extractors=extractors) | 'Evaluate' >> evaluate.Evaluate(eval_shared_model=eval_shared_model)) if custom_metrics_check is not None: util.assert_that(metrics, custom_metrics_check, label='metrics') if custom_plots_check is not None: util.assert_that(plots, custom_plots_check, label='plot')
def _runTestWithCustomCheck(self, examples, eval_export_dir, metrics_callbacks, slice_spec=None, custom_metrics_check=None, custom_plots_check=None, custom_result_check=None): # make sure we are doing some checks self.assertTrue(custom_metrics_check is not None or custom_plots_check is not None or custom_result_check is not None) serialized_examples = [ex.SerializeToString() for ex in examples] slicing_specs = None if slice_spec: slicing_specs = [s.to_proto() for s in slice_spec] eval_config = config.EvalConfig(slicing_specs=slicing_specs) eval_shared_model = self.createTestEvalSharedModel( eval_saved_model_path=eval_export_dir, add_metrics_callbacks=metrics_callbacks) extractors = model_eval_lib.default_extractors( eval_config=eval_config, eval_shared_model=eval_shared_model) tfx_io = raw_tf_record.RawBeamRecordTFXIO( physical_format='inmemory', raw_record_column_name=constants.ARROW_INPUT_COLUMN, telemetry_descriptors=['TFMATest']) with beam.Pipeline() as pipeline: (metrics, plots), _ = ( pipeline | 'Create' >> beam.Create(serialized_examples) | 'BatchExamples' >> tfx_io.BeamSource() | 'InputsToExtracts' >> model_eval_lib.BatchedInputsToExtracts() | 'Extract' >> tfma_unit.Extract(extractors=extractors) # pylint: disable=no-value-for-parameter | 'ComputeMetricsAndPlots' >> legacy_metrics_and_plots_evaluator._ComputeMetricsAndPlots( # pylint: disable=protected-access eval_shared_model=eval_shared_model, compute_confidence_intervals=self. compute_confidence_intervals, random_seed_for_testing=self.deterministic_test_seed)) if custom_metrics_check is not None: util.assert_that(metrics, custom_metrics_check, label='metrics') if custom_plots_check is not None: util.assert_that(plots, custom_plots_check, label='plot') result = pipeline.run() if custom_result_check is not None: custom_result_check(result)
def _runTestWithCustomCheck(self, examples, eval_export_dir, metrics_callbacks, slice_spec=None, custom_metrics_check=None, custom_plots_check=None, custom_result_check=None): # make sure we are doing some checks self.assertTrue(custom_metrics_check is not None or custom_plots_check is not None or custom_result_check is not None) serialized_examples = [ex.SerializeToString() for ex in examples] slicing_specs = None if slice_spec: slicing_specs = [s.to_proto() for s in slice_spec] eval_config = config.EvalConfig( input_data_specs=[config.InputDataSpec()], model_specs=[config.ModelSpec(location=eval_export_dir)], output_data_specs=[config.OutputDataSpec()], slicing_specs=slicing_specs) eval_shared_model = self.createTestEvalSharedModel( eval_saved_model_path=eval_export_dir, add_metrics_callbacks=metrics_callbacks) extractors = model_eval_lib.default_extractors( eval_config=eval_config, eval_shared_models=[eval_shared_model]) with beam.Pipeline() as pipeline: (metrics, plots), _ = ( pipeline | 'Create' >> beam.Create(serialized_examples) | 'InputsToExtracts' >> model_eval_lib.InputsToExtracts() | 'Extract' >> tfma_unit.Extract(extractors=extractors) # pylint: disable=no-value-for-parameter | 'ComputeMetricsAndPlots' >> metrics_and_plots_evaluator.ComputeMetricsAndPlots( eval_shared_model=eval_shared_model, compute_confidence_intervals=self. compute_confidence_intervals, random_seed_for_testing=self.deterministic_test_seed)) if custom_metrics_check is not None: util.assert_that(metrics, custom_metrics_check, label='metrics') if custom_plots_check is not None: util.assert_that(plots, custom_plots_check, label='plot') result = pipeline.run() if custom_result_check is not None: custom_result_check(result)
def assertGeneralMetricsComputedWithBeamAre( self, eval_saved_model_path: Text, examples_pcollection: beam.pvalue.PCollection, slice_spec: List[slicer.SingleSliceSpec], add_metrics_callbacks: List[types.AddMetricsCallbackType], expected_slice_metrics: Dict[Any, Dict[Text, Any]]): """Checks metrics computed using Beam. A more general version of assertMetricsComputedWithBeamAre. Note that the caller is responsible for setting up and running the Beam pipeline. Example usage: def add_metrics(features, predictions, labels): metric_ops = { 'mse': tf.metrics.mean_squared_error(labels, predictions['logits']), 'mae': tf.metrics.mean_absolute_error(labels, predictions['logits']), } return metric_ops with beam.Pipeline() as pipeline: expected_slice_metrics = { (): { 'mae': 0.1, 'mse': 0.2, tfma.post_export_metrics.metric_keys.AUC: tfma.test.BoundedValue(lower_bound=0.5) }, (('age', 10),): { 'mae': 0.2, 'mse': 0.3, tfma.post_export_metrics.metric_keys.AUC: tfma.test.BoundedValue(lower_bound=0.5) }, } examples = pipeline | 'ReadExamples' >> beam.io.ReadFromTFRecord(path) self.assertGeneralMetricsComputedWithBeamAre( eval_saved_model_path=path, examples_pcollection=examples, slice_spec=[tfma.slicer.SingleSliceSpec(), tfma.slicer.SingleSliceSpec(columns=['age'])], add_metrics_callbacks=[ add_metrics, tfma.post_export_metrics.auc()], expected_slice_metrics=expected_slice_metrics) Args: eval_saved_model_path: Path to the directory containing the EvalSavedModel. examples_pcollection: A PCollection of serialized example bytes. slice_spec: List of slice specifications. add_metrics_callbacks: Callbacks for adding additional metrics. expected_slice_metrics: Dictionary of dictionaries describing the expected metrics for each slice. The outer dictionary map slice keys to the expected metrics for that slice. """ def check_metrics(got): """Check metrics callback.""" try: slices = {} for slice_key, value in got: slices[slice_key] = value self.assertItemsEqual(list(slices.keys()), list(expected_slice_metrics.keys())) for slice_key, expected_metrics in expected_slice_metrics.items( ): self.assertDictElementsWithinBounds( got_values_dict=slices[slice_key], expected_values_dict=expected_metrics) except AssertionError as err: raise beam_util.BeamAssertException(err) slicing_specs = None if slice_spec: slicing_specs = [s.to_proto() for s in slice_spec] eval_config = config.EvalConfig(slicing_specs=slicing_specs) eval_shared_model = self.createTestEvalSharedModel( eval_saved_model_path=eval_saved_model_path, add_metrics_callbacks=add_metrics_callbacks) extractors = model_eval_lib.default_extractors( eval_config=eval_config, eval_shared_model=eval_shared_model) # pylint: disable=no-value-for-parameter (metrics, _), _ = (examples_pcollection | 'InputsToExtracts' >> model_eval_lib.InputsToExtracts() | 'Extract' >> Extract(extractors=extractors) | 'ComputeMetricsAndPlots' >> legacy_metrics_and_plots_evaluator.ComputeMetricsAndPlots( eval_shared_model=eval_shared_model)) # pylint: enable=no-value-for-parameter beam_util.assert_that(metrics, check_metrics)
def assertMetricsComputedWithBeamAre( self, eval_saved_model_path: str, serialized_examples: List[bytes], expected_metrics: Dict[str, Any], add_metrics_callbacks: Optional[List[ types.AddMetricsCallbackType]] = None): """Checks metrics computed using Beam. Metrics will be computed over all examples, without any slicing. If you want to provide your own PCollection (e.g. read a large number of examples from a file), if you want to check metrics over certain slices, or if you want to add additional post-export metrics, use the more general assertGeneralMetricsComputedWithBeamAre. Example usage: self.assertMetricsComputedWithBeamAre( eval_saved_model_path=path, serialized_examples=[self.makeExample(age=5, label=1.0), self.makeExample(age=10, label=0.0)], expected_metrics={'average_loss': 0.1}) Args: eval_saved_model_path: Path to the directory containing the EvalSavedModel. serialized_examples: List of serialized example bytes. expected_metrics: Dictionary of expected metric values. add_metrics_callbacks: Optional. Callbacks for adding additional metrics. """ def check_metrics(got): """Check metrics callback.""" try: self.assertEqual( 1, len(got), 'expecting metrics for exactly one slice, but got %d ' 'slices instead. metrics were: %s' % (len(got), got)) (slice_key, value) = got[0] self.assertEqual((), slice_key) self.assertDictElementsWithinBounds( got_values_dict=value, expected_values_dict=expected_metrics) except AssertionError as err: raise beam_util.BeamAssertException(err) eval_config = config_pb2.EvalConfig() eval_shared_model = model_eval_lib.default_eval_shared_model( eval_saved_model_path=eval_saved_model_path, add_metrics_callbacks=add_metrics_callbacks) extractors = model_eval_lib.default_extractors( eval_config=eval_config, eval_shared_model=eval_shared_model) tfx_io = raw_tf_record.RawBeamRecordTFXIO( physical_format='inmemory', raw_record_column_name=constants.ARROW_INPUT_COLUMN, telemetry_descriptors=['TFMATest']) with beam.Pipeline() as pipeline: # pylint: disable=no-value-for-parameter (metrics, _), _ = ( pipeline | 'CreateExamples' >> beam.Create(serialized_examples) | 'BatchExamples' >> tfx_io.BeamSource() | 'InputsToExtracts' >> model_eval_lib.BatchedInputsToExtracts() | 'Extract' >> Extract(extractors=extractors) | 'ComputeMetricsAndPlots' >> legacy_metrics_and_plots_evaluator._ComputeMetricsAndPlots( # pylint: disable=protected-access eval_shared_model=eval_shared_model)) # pylint: enable=no-value-for-parameter beam_util.assert_that(metrics, check_metrics)