def testEvaluateExistingMetricsWithExportedCustomMetricsDNN(self): temp_eval_export_dir = self._getEvalExportDir() _, eval_export_dir = dnn_classifier.simple_dnn_classifier( None, temp_eval_export_dir) eval_saved_model = load.EvalSavedModel(eval_export_dir) example1 = self._makeExample(age=3.0, language='english', label=1.0) features_predictions_labels = self.predict_injective_single_example( eval_saved_model, example1.SerializeToString()) eval_saved_model.perform_metrics_update(features_predictions_labels) example2 = self._makeExample(age=2.0, language='chinese', label=0.0) features_predictions_labels = self.predict_injective_single_example( eval_saved_model, example2.SerializeToString()) eval_saved_model.perform_metrics_update(features_predictions_labels) metric_values = eval_saved_model.get_metric_values() self.assertDictElementsAlmostEqual( metric_values, { # We don't check accuracy and AUC here because it varies from run to # run due to DNN initialization 'my_mean_age': 2.5, 'my_mean_label': 0.5, 'my_mean_age_times_label': 1.5 }) self.assertIn('my_mean_prediction', metric_values) self.assertIn('prediction/mean', metric_values) self.assertAlmostEqual(metric_values['prediction/mean'], metric_values['my_mean_prediction'], places=5)
def testPredictExtractorWithBinaryClassificationModel(self): temp_export_dir = self._getExportDir() export_dir, _ = dnn_classifier.simple_dnn_classifier(temp_export_dir, None, n_classes=2) eval_config = config.EvalConfig( model_specs=[config.ModelSpec(location=export_dir)]) eval_shared_model = self.createTestEvalSharedModel( eval_saved_model_path=export_dir, tags=[tf.saved_model.SERVING]) predict_extractor = predict_extractor_v2.PredictExtractor( eval_config=eval_config, eval_shared_models=[eval_shared_model]) examples = [ self._makeExample(age=1.0, language='english', label=0), self._makeExample(age=2.0, language='chinese', label=1), self._makeExample(age=3.0, language='chinese', label=0), ] with beam.Pipeline() as pipeline: # pylint: disable=no-value-for-parameter result = ( pipeline | 'Create' >> beam.Create( [e.SerializeToString() for e in examples]) | 'InputsToExtracts' >> model_eval_lib.InputsToExtracts() | predict_extractor.stage_name >> predict_extractor.ptransform) # pylint: enable=no-value-for-parameter def check_result(got): try: self.assertLen(got, 3) # We can't verify the actual predictions, but we can verify the keys. for item in got: self.assertIn(constants.PREDICTIONS_KEY, item) for pred_key in ('logistic', 'probabilities', 'all_classes'): self.assertIn(pred_key, item[constants.PREDICTIONS_KEY]) except AssertionError as err: raise util.BeamAssertException(err) util.assert_that(result, check_result, label='result')
def testPostExportMetricsDNNClassifier(self): temp_eval_export_dir = self._getEvalExportDir() _, eval_export_dir = dnn_classifier.simple_dnn_classifier( None, temp_eval_export_dir) examples = [ self._makeExample(age=3.0, language='english', label=1.0), self._makeExample(age=3.0, language='chinese', label=0.0), self._makeExample(age=4.0, language='english', label=1.0), self._makeExample(age=5.0, language='chinese', label=0.0) ] expected_values_dict = { metric_keys.EXAMPLE_COUNT: 4.0, metric_keys.EXAMPLE_WEIGHT: 15.0, } self._runTest(examples, eval_export_dir, [ post_export_metrics.example_count(), post_export_metrics.example_weight('age') ], expected_values_dict)
def testPredictExtractorWithMultiClassModel(self): temp_export_dir = self._getExportDir() _, export_dir = dnn_classifier.simple_dnn_classifier(None, temp_export_dir, n_classes=3) eval_shared_model = self.createTestEvalSharedModel( model_path=export_dir) predict_extractor = predict_extractor_v2.PredictExtractor( eval_shared_model) examples = [ self._makeExample(age=1.0, language='english', label=0), self._makeExample(age=2.0, language='chinese', label=1), self._makeExample(age=3.0, language='english', label=2), self._makeExample(age=4.0, language='chinese', label=1), ] with beam.Pipeline() as pipeline: # pylint: disable=no-value-for-parameter result = ( pipeline | 'Create' >> beam.Create( [e.SerializeToString() for e in examples]) | 'InputsToExtracts' >> model_eval_lib.InputsToExtracts() | predict_extractor.stage_name >> predict_extractor.ptransform) # pylint: enable=no-value-for-parameter def check_result(got): try: self.assertEqual(4, len(got), 'got: %s' % got) # We can't verify the actual predictions, but we can verify the keys. for item in got: self.assertIn(constants.PREDICTIONS_KEY, item) for pred_key in ('probabilities', 'all_classes'): self.assertIn(pred_key, item[constants.PREDICTIONS_KEY]) except AssertionError as err: raise util.BeamAssertException(err) util.assert_that(result, check_result, label='result')
def testPredictExtractorWithMultiClassModel(self): temp_export_dir = self._getExportDir() export_dir, _ = dnn_classifier.simple_dnn_classifier(temp_export_dir, None, n_classes=3) eval_config = config.EvalConfig(model_specs=[config.ModelSpec()]) eval_shared_model = self.createTestEvalSharedModel( eval_saved_model_path=export_dir, tags=[tf.saved_model.SERVING]) schema = text_format.Parse( """ feature { name: "age" type: FLOAT } feature { name: "langauge" type: BYTES } feature { name: "label" type: INT } """, schema_pb2.Schema()) tfx_io = test_util.InMemoryTFExampleRecord( schema=schema, raw_record_column_name=constants.BATCHED_INPUT_KEY) tensor_adapter_config = tensor_adapter.TensorAdapterConfig( arrow_schema=tfx_io.ArrowSchema(), tensor_representations=tfx_io.TensorRepresentations()) input_extractor = batched_input_extractor.BatchedInputExtractor( eval_config) predict_extractor = batched_predict_extractor_v2.BatchedPredictExtractor( eval_config=eval_config, eval_shared_model=eval_shared_model, tensor_adapter_config=tensor_adapter_config) examples = [ self._makeExample(age=1.0, language='english', label=0), self._makeExample(age=2.0, language='chinese', label=1), self._makeExample(age=3.0, language='english', label=2), self._makeExample(age=4.0, language='chinese', label=1), ] with beam.Pipeline() as pipeline: # pylint: disable=no-value-for-parameter result = ( pipeline | 'Create' >> beam.Create( [e.SerializeToString() for e in examples], reshuffle=False) | 'BatchExamples' >> tfx_io.BeamSource(batch_size=4) | 'InputsToExtracts' >> model_eval_lib.BatchedInputsToExtracts() | input_extractor.stage_name >> input_extractor.ptransform | predict_extractor.stage_name >> predict_extractor.ptransform) # pylint: enable=no-value-for-parameter def check_result(got): try: self.assertLen(got, 1) # We can't verify the actual predictions, but we can verify the keys. for item in got: self.assertIn(constants.BATCHED_PREDICTIONS_KEY, item) for pred in item[constants.BATCHED_PREDICTIONS_KEY]: for pred_key in ('probabilities', 'all_classes'): self.assertIn(pred_key, pred) except AssertionError as err: raise util.BeamAssertException(err) util.assert_that(result, check_result, label='result')
def testEvaluateWithMultiClassModel(self): n_classes = 3 temp_export_dir = self._getExportDir() _, export_dir = dnn_classifier.simple_dnn_classifier( None, temp_export_dir, n_classes=n_classes) # Add example_count and weighted_example_count eval_config = config.EvalConfig( model_specs=[ config.ModelSpec(location=export_dir, label_key='label', example_weight_key='age') ], slicing_specs=[config.SlicingSpec()], metrics_specs=metric_specs.specs_from_metrics( [calibration.MeanLabel('mean_label')], binarize=config.BinarizationOptions( class_ids=range(n_classes)))) eval_shared_model = self.createTestEvalSharedModel( eval_saved_model_path=export_dir, tags=[tf.saved_model.SERVING]) slice_spec = [ slicer.SingleSliceSpec(spec=s) for s in eval_config.slicing_specs ] extractors = [ input_extractor.InputExtractor(eval_config=eval_config), predict_extractor_v2.PredictExtractor( eval_config=eval_config, eval_shared_models=[eval_shared_model]), slice_key_extractor.SliceKeyExtractor(slice_spec=slice_spec) ] evaluators = [ metrics_and_plots_evaluator_v2.MetricsAndPlotsEvaluator( eval_config=eval_config, eval_shared_models=[eval_shared_model]) ] examples = [ self._makeExample(age=1.0, language='english', label=0), self._makeExample(age=2.0, language='chinese', label=1), self._makeExample(age=3.0, language='english', label=2), self._makeExample(age=4.0, language='chinese', label=1), ] with beam.Pipeline() as pipeline: # pylint: disable=no-value-for-parameter metrics = ( pipeline | 'Create' >> beam.Create( [e.SerializeToString() for e in examples]) | 'InputsToExtracts' >> model_eval_lib.InputsToExtracts() | 'ExtractAndEvaluate' >> model_eval_lib.ExtractAndEvaluate( extractors=extractors, evaluators=evaluators)) # pylint: enable=no-value-for-parameter def check_metrics(got): try: self.assertLen(got, 1) got_slice_key, got_metrics = got[0] example_count_key = metric_types.MetricKey( name='example_count') weighted_example_count_key = metric_types.MetricKey( name='weighted_example_count') label_key_class_0 = metric_types.MetricKey( name='mean_label', sub_key=metric_types.SubKey(class_id=0)) label_key_class_1 = metric_types.MetricKey( name='mean_label', sub_key=metric_types.SubKey(class_id=1)) label_key_class_2 = metric_types.MetricKey( name='mean_label', sub_key=metric_types.SubKey(class_id=2)) self.assertEqual(got_slice_key, ()) self.assertDictElementsAlmostEqual( got_metrics, { example_count_key: 4, weighted_example_count_key: (1.0 + 2.0 + 3.0 + 4.0), label_key_class_0: (1 * 1.0 + 0 * 2.0 + 0 * 3.0 + 0 * 4.0) / (1.0 + 2.0 + 3.0 + 4.0), label_key_class_1: (0 * 1.0 + 1 * 2.0 + 0 * 3.0 + 1 * 4.0) / (1.0 + 2.0 + 3.0 + 4.0), label_key_class_2: (0 * 1.0 + 0 * 2.0 + 1 * 3.0 + 0 * 4.0) / (1.0 + 2.0 + 3.0 + 4.0) }) except AssertionError as err: raise util.BeamAssertException(err) util.assert_that(metrics[constants.METRICS_KEY], check_metrics, label='metrics')
def testEvaluateWithBinaryClassificationModel(self): n_classes = 2 temp_export_dir = self._getExportDir() _, export_dir = dnn_classifier.simple_dnn_classifier( None, temp_export_dir, n_classes=n_classes) # Add mean_label, example_count, weighted_example_count, calibration_plot eval_config = config.EvalConfig( model_specs=[ config.ModelSpec(location=export_dir, label_key='label', example_weight_key='age') ], slicing_specs=[config.SlicingSpec()], metrics_specs=metric_specs.specs_from_metrics([ calibration.MeanLabel('mean_label'), calibration_plot.CalibrationPlot(name='calibration_plot', num_buckets=10) ])) eval_shared_model = self.createTestEvalSharedModel( eval_saved_model_path=export_dir, tags=[tf.saved_model.SERVING]) slice_spec = [ slicer.SingleSliceSpec(spec=s) for s in eval_config.slicing_specs ] extractors = [ input_extractor.InputExtractor(eval_config=eval_config), predict_extractor_v2.PredictExtractor( eval_config=eval_config, eval_shared_models=[eval_shared_model]), slice_key_extractor.SliceKeyExtractor(slice_spec=slice_spec) ] evaluators = [ metrics_and_plots_evaluator_v2.MetricsAndPlotsEvaluator( eval_config=eval_config, eval_shared_models=[eval_shared_model]) ] examples = [ self._makeExample(age=1.0, language='english', label=0.0), self._makeExample(age=2.0, language='chinese', label=1.0), self._makeExample(age=3.0, language='chinese', label=0.0), ] with beam.Pipeline() as pipeline: # pylint: disable=no-value-for-parameter metrics_and_plots = ( pipeline | 'Create' >> beam.Create( [e.SerializeToString() for e in examples]) | 'InputsToExtracts' >> model_eval_lib.InputsToExtracts() | 'ExtractAndEvaluate' >> model_eval_lib.ExtractAndEvaluate( extractors=extractors, evaluators=evaluators)) # pylint: enable=no-value-for-parameter def check_metrics(got): try: self.assertLen(got, 1) got_slice_key, got_metrics = got[0] self.assertEqual(got_slice_key, ()) example_count_key = metric_types.MetricKey( name='example_count') weighted_example_count_key = metric_types.MetricKey( name='weighted_example_count') label_key = metric_types.MetricKey(name='mean_label') self.assertDictElementsAlmostEqual( got_metrics, { example_count_key: 3, weighted_example_count_key: (1.0 + 2.0 + 3.0), label_key: (0 * 1.0 + 1 * 2.0 + 0 * 3.0) / (1.0 + 2.0 + 3.0), }) except AssertionError as err: raise util.BeamAssertException(err) def check_plots(got): try: self.assertLen(got, 1) got_slice_key, got_plots = got[0] self.assertEqual(got_slice_key, ()) plot_key = metric_types.PlotKey('calibration_plot') self.assertIn(plot_key, got_plots) # 10 buckets + 2 for edge cases self.assertLen(got_plots[plot_key].buckets, 12) except AssertionError as err: raise util.BeamAssertException(err) util.assert_that(metrics_and_plots[constants.METRICS_KEY], check_metrics, label='metrics') util.assert_that(metrics_and_plots[constants.PLOTS_KEY], check_plots, label='plots')