def testMultiModelPredict(self): temp_eval_export_dir = self._getEvalExportDir() _, model1_dir = linear_classifier.simple_linear_classifier( None, temp_eval_export_dir) model1 = model_eval_lib.default_eval_shared_model( eval_saved_model_path=model1_dir) _, model2_dir = linear_classifier.simple_linear_classifier( None, temp_eval_export_dir) model2 = model_eval_lib.default_eval_shared_model( eval_saved_model_path=model2_dir) eval_shared_model = {'model1': model1, 'model2': model2} eval_config = config.EvalConfig(model_specs=[ config.ModelSpec(name='model1', example_weight_key='age'), config.ModelSpec(name='model2', example_weight_key='age') ]) tfx_io = raw_tf_record.RawBeamRecordTFXIO( physical_format='inmemory', raw_record_column_name=constants.ARROW_INPUT_COLUMN, telemetry_descriptors=['TFMATest']) extractor = predict_extractor.PredictExtractor( eval_shared_model, eval_config=eval_config) with beam.Pipeline() as pipeline: examples = [ self._makeExample(age=3.0, language='english', label=1.0), self._makeExample(age=3.0, language='chinese', label=0.0), self._makeExample(age=4.0, language='english', label=1.0), self._makeExample(age=5.0, language='chinese', label=0.0), ] serialized_examples = [e.SerializeToString() for e in examples] predict_extracts = ( pipeline | beam.Create(serialized_examples, reshuffle=False) | 'BatchExamples' >> tfx_io.BeamSource(batch_size=2) | 'InputsToExtracts' >> model_eval_lib.BatchedInputsToExtracts() | 'Predict' >> extractor.ptransform) def check_result(got): try: self.assertLen(got, 2) for item in got: self.assertIn(constants.FEATURES_KEY, item) for feature in ('language', 'age'): for features_dict in item[constants.FEATURES_KEY]: self.assertIn(feature, features_dict) self.assertIn(constants.LABELS_KEY, item) self.assertIn(constants.PREDICTIONS_KEY, item) for model in ('model1', 'model2'): for predictions_dict in item[constants.PREDICTIONS_KEY]: self.assertIn(model, predictions_dict) self.assertIn(constants.EXAMPLE_WEIGHTS_KEY, item) for i in range(len(item[constants.FEATURES_KEY])): self.assertAlmostEqual(item[constants.FEATURES_KEY][i]['age'], item[constants.EXAMPLE_WEIGHTS_KEY][i]) except AssertionError as err: raise util.BeamAssertException(err) util.assert_that(predict_extracts, check_result)
def _runTestWithCustomCheck(self, examples, eval_export_dir, metrics_callbacks, slice_spec=None, custom_metrics_check=None, custom_plots_check=None, custom_result_check=None): # make sure we are doing some checks self.assertTrue(custom_metrics_check is not None or custom_plots_check is not None or custom_result_check is not None) serialized_examples = [ex.SerializeToString() for ex in examples] slicing_specs = None if slice_spec: slicing_specs = [s.to_proto() for s in slice_spec] eval_config = config.EvalConfig(slicing_specs=slicing_specs) eval_shared_model = self.createTestEvalSharedModel( eval_saved_model_path=eval_export_dir, add_metrics_callbacks=metrics_callbacks) extractors = model_eval_lib.default_extractors( eval_config=eval_config, eval_shared_model=eval_shared_model) tfx_io = raw_tf_record.RawBeamRecordTFXIO( physical_format='inmemory', raw_record_column_name=constants.ARROW_INPUT_COLUMN, telemetry_descriptors=['TFMATest']) with beam.Pipeline() as pipeline: (metrics, plots), _ = ( pipeline | 'Create' >> beam.Create(serialized_examples) | 'BatchExamples' >> tfx_io.BeamSource() | 'InputsToExtracts' >> model_eval_lib.BatchedInputsToExtracts() | 'Extract' >> tfma_unit.Extract(extractors=extractors) # pylint: disable=no-value-for-parameter | 'ComputeMetricsAndPlots' >> legacy_metrics_and_plots_evaluator._ComputeMetricsAndPlots( # pylint: disable=protected-access eval_shared_model=eval_shared_model, compute_confidence_intervals=self. compute_confidence_intervals, random_seed_for_testing=self.deterministic_test_seed)) if custom_metrics_check is not None: util.assert_that(metrics, custom_metrics_check, label='metrics') if custom_plots_check is not None: util.assert_that(plots, custom_plots_check, label='plot') result = pipeline.run() if custom_result_check is not None: custom_result_check(result)
def testE2E(self): column_name = "raw_record" tfxio = raw_tf_record.RawBeamRecordTFXIO( physical_format="inmem", raw_record_column_name=column_name, telemetry_descriptors=["some", "component"]) def _AssertFn(record_batches): self.assertLen(record_batches, 1) record_batch = record_batches[0] self.assertTrue(record_batch.schema.equals(tfxio.ArrowSchema())) tensor_adapter = tfxio.TensorAdapter() tensors = tensor_adapter.ToBatchTensors(record_batch) self.assertLen(tensors, 1) self.assertIn(column_name, tensors) with beam.Pipeline() as p: record_batch_pcoll = ( p | "CreateInMemRecords" >> beam.Create(_RAW_RECORDS) | "BeamSource" >> tfxio.BeamSource(batch_size=len(_RAW_RECORDS))) beam_testing_util.assert_that(record_batch_pcoll, _AssertFn)
def assertGeneralMetricsComputedWithBeamAre( self, eval_saved_model_path: str, examples_pcollection: beam.pvalue.PCollection, slice_spec: List[slicer.SingleSliceSpec], add_metrics_callbacks: List[types.AddMetricsCallbackType], expected_slice_metrics: Dict[Any, Dict[str, Any]]): """Checks metrics computed using Beam. A more general version of assertMetricsComputedWithBeamAre. Note that the caller is responsible for setting up and running the Beam pipeline. Example usage: def add_metrics(features, predictions, labels): metric_ops = { 'mse': tf.metrics.mean_squared_error(labels, predictions['logits']), 'mae': tf.metrics.mean_absolute_error(labels, predictions['logits']), } return metric_ops with beam.Pipeline() as pipeline: expected_slice_metrics = { (): { 'mae': 0.1, 'mse': 0.2, tfma.post_export_metrics.metric_keys.AUC: tfma.test.BoundedValue(lower_bound=0.5) }, (('age', 10),): { 'mae': 0.2, 'mse': 0.3, tfma.post_export_metrics.metric_keys.AUC: tfma.test.BoundedValue(lower_bound=0.5) }, } examples = pipeline | 'ReadExamples' >> beam.io.ReadFromTFRecord(path) self.assertGeneralMetricsComputedWithBeamAre( eval_saved_model_path=path, examples_pcollection=examples, slice_spec=[tfma.slicer.SingleSliceSpec(), tfma.slicer.SingleSliceSpec(columns=['age'])], add_metrics_callbacks=[ add_metrics, tfma.post_export_metrics.auc()], expected_slice_metrics=expected_slice_metrics) Args: eval_saved_model_path: Path to the directory containing the EvalSavedModel. examples_pcollection: A PCollection of serialized example bytes. slice_spec: List of slice specifications. add_metrics_callbacks: Callbacks for adding additional metrics. expected_slice_metrics: Dictionary of dictionaries describing the expected metrics for each slice. The outer dictionary map slice keys to the expected metrics for that slice. """ def check_metrics(got): """Check metrics callback.""" try: slices = {} for slice_key, value in got: slices[slice_key] = value self.assertCountEqual(list(slices.keys()), list(expected_slice_metrics.keys())) for slice_key, expected_metrics in expected_slice_metrics.items( ): self.assertDictElementsWithinBounds( got_values_dict=slices[slice_key], expected_values_dict=expected_metrics) except AssertionError as err: raise beam_util.BeamAssertException(err) slicing_specs = None if slice_spec: slicing_specs = [s.to_proto() for s in slice_spec] eval_config = config_pb2.EvalConfig(slicing_specs=slicing_specs) eval_shared_model = self.createTestEvalSharedModel( eval_saved_model_path=eval_saved_model_path, add_metrics_callbacks=add_metrics_callbacks) extractors = model_eval_lib.default_extractors( eval_config=eval_config, eval_shared_model=eval_shared_model) tfx_io = raw_tf_record.RawBeamRecordTFXIO( physical_format='inmemory', raw_record_column_name=constants.ARROW_INPUT_COLUMN, telemetry_descriptors=['TFMATest']) # pylint: disable=no-value-for-parameter (metrics, _), _ = ( examples_pcollection | 'BatchExamples' >> tfx_io.BeamSource() | 'InputsToExtracts' >> model_eval_lib.BatchedInputsToExtracts() | 'Extract' >> Extract(extractors=extractors) | 'ComputeMetricsAndPlots' >> legacy_metrics_and_plots_evaluator._ComputeMetricsAndPlots( # pylint: disable=protected-access eval_shared_model=eval_shared_model)) # pylint: enable=no-value-for-parameter beam_util.assert_that(metrics, check_metrics)
def assertMetricsComputedWithBeamAre( self, eval_saved_model_path: str, serialized_examples: List[bytes], expected_metrics: Dict[str, Any], add_metrics_callbacks: Optional[List[ types.AddMetricsCallbackType]] = None): """Checks metrics computed using Beam. Metrics will be computed over all examples, without any slicing. If you want to provide your own PCollection (e.g. read a large number of examples from a file), if you want to check metrics over certain slices, or if you want to add additional post-export metrics, use the more general assertGeneralMetricsComputedWithBeamAre. Example usage: self.assertMetricsComputedWithBeamAre( eval_saved_model_path=path, serialized_examples=[self.makeExample(age=5, label=1.0), self.makeExample(age=10, label=0.0)], expected_metrics={'average_loss': 0.1}) Args: eval_saved_model_path: Path to the directory containing the EvalSavedModel. serialized_examples: List of serialized example bytes. expected_metrics: Dictionary of expected metric values. add_metrics_callbacks: Optional. Callbacks for adding additional metrics. """ def check_metrics(got): """Check metrics callback.""" try: self.assertEqual( 1, len(got), 'expecting metrics for exactly one slice, but got %d ' 'slices instead. metrics were: %s' % (len(got), got)) (slice_key, value) = got[0] self.assertEqual((), slice_key) self.assertDictElementsWithinBounds( got_values_dict=value, expected_values_dict=expected_metrics) except AssertionError as err: raise beam_util.BeamAssertException(err) eval_config = config_pb2.EvalConfig() eval_shared_model = model_eval_lib.default_eval_shared_model( eval_saved_model_path=eval_saved_model_path, add_metrics_callbacks=add_metrics_callbacks) extractors = model_eval_lib.default_extractors( eval_config=eval_config, eval_shared_model=eval_shared_model) tfx_io = raw_tf_record.RawBeamRecordTFXIO( physical_format='inmemory', raw_record_column_name=constants.ARROW_INPUT_COLUMN, telemetry_descriptors=['TFMATest']) with beam.Pipeline() as pipeline: # pylint: disable=no-value-for-parameter (metrics, _), _ = ( pipeline | 'CreateExamples' >> beam.Create(serialized_examples) | 'BatchExamples' >> tfx_io.BeamSource() | 'InputsToExtracts' >> model_eval_lib.BatchedInputsToExtracts() | 'Extract' >> Extract(extractors=extractors) | 'ComputeMetricsAndPlots' >> legacy_metrics_and_plots_evaluator._ComputeMetricsAndPlots( # pylint: disable=protected-access eval_shared_model=eval_shared_model)) # pylint: enable=no-value-for-parameter beam_util.assert_that(metrics, check_metrics)
def _write_tfma(self, tfma_path: str, output_file_format: str, store: Optional[mlmd.MetadataStore] = None): _, eval_saved_model_path = ( fixed_prediction_estimator.simple_fixed_prediction_estimator( export_path=None, eval_export_path=os.path.join(self.tmpdir, 'eval_export_dir'))) eval_config = tfma.EvalConfig(model_specs=[tfma.ModelSpec()]) eval_shared_model = self.createTestEvalSharedModel( eval_saved_model_path=eval_saved_model_path, add_metrics_callbacks=[ tfma.post_export_metrics.example_count(), tfma.post_export_metrics. calibration_plot_and_prediction_histogram(num_buckets=2) ]) extractors = [ tfma.extractors.legacy_predict_extractor.PredictExtractor( eval_shared_model, eval_config=eval_config), tfma.extractors.unbatch_extractor.UnbatchExtractor(), tfma.extractors.slice_key_extractor.SliceKeyExtractor() ] evaluators = [ tfma.evaluators.legacy_metrics_and_plots_evaluator. MetricsAndPlotsEvaluator(eval_shared_model) ] writers = [ tfma.writers.MetricsPlotsAndValidationsWriter( output_paths={ 'metrics': os.path.join(tfma_path, 'metrics'), 'plots': os.path.join(tfma_path, 'plots') }, output_file_format=output_file_format, eval_config=eval_config, add_metrics_callbacks=eval_shared_model.add_metrics_callbacks) ] tfx_io = raw_tf_record.RawBeamRecordTFXIO( physical_format='inmemory', raw_record_column_name='__raw_record__', telemetry_descriptors=['TFMATest']) with beam.Pipeline() as pipeline: example1 = self._makeExample(prediction=0.0, label=1.0) example2 = self._makeExample(prediction=1.0, label=1.0) _ = (pipeline | 'Create' >> beam.Create([ example1.SerializeToString(), example2.SerializeToString(), ]) | 'BatchExamples' >> tfx_io.BeamSource() | 'ExtractEvaluateAndWriteResults' >> tfma.ExtractEvaluateAndWriteResults( eval_config=eval_config, eval_shared_model=eval_shared_model, extractors=extractors, evaluators=evaluators, writers=writers)) if store: eval_type = metadata_store_pb2.ArtifactType() eval_type.name = standard_artifacts.ModelEvaluation.TYPE_NAME eval_type_id = store.put_artifact_type(eval_type) artifact = metadata_store_pb2.Artifact() artifact.uri = tfma_path artifact.type_id = eval_type_id store.put_artifacts([artifact])