Ejemplo n.º 1
0
    def testUpdateConfigWithDefaultsMultiModel(self):
        eval_config_pbtxt = """
      model_specs { name: "model1" }
      model_specs { name: "model2" }
      metrics_specs {
        metrics { class_name: "WeightedExampleCount" }
      }
      metrics_specs {
        metrics { class_name: "MeanLabel" }
        model_names: ["model1"]
      }
    """
        eval_config = text_format.Parse(eval_config_pbtxt,
                                        config_pb2.EvalConfig())

        expected_eval_config_pbtxt = """
      model_specs { name: "model1" }
      model_specs { name: "model2" }
      metrics_specs {
        metrics { class_name: "WeightedExampleCount" }
        model_names: ["model1", "model2"]
      }
      metrics_specs {
        metrics { class_name: "MeanLabel" }
        model_names: ["model1"]
      }
    """
        expected_eval_config = text_format.Parse(expected_eval_config_pbtxt,
                                                 config_pb2.EvalConfig())

        got_eval_config = config_util.update_eval_config_with_defaults(
            eval_config)
        self.assertProtoEquals(got_eval_config, expected_eval_config)
Ejemplo n.º 2
0
    def testUpdateConfigWithoutBaselineModelWhenModelNameProvided(self):
        eval_config_pbtxt = """
      model_specs { name: "candidate" }
      model_specs { name: "baseline" is_baseline: true }
      metrics_specs {
        metrics { class_name: "WeightedExampleCount" }
        model_names: "candidate"
      }
    """
        eval_config = text_format.Parse(eval_config_pbtxt,
                                        config_pb2.EvalConfig())

        expected_eval_config_pbtxt = """
      model_specs { name: "candidate" }
      model_specs { name: "baseline" is_baseline: true }
      metrics_specs {
        metrics { class_name: "WeightedExampleCount" }
        model_names: ["candidate"]
      }
    """
        expected_eval_config = text_format.Parse(expected_eval_config_pbtxt,
                                                 config_pb2.EvalConfig())

        got_eval_config = config_util.update_eval_config_with_defaults(
            eval_config, has_baseline=True)
        self.assertProtoEquals(got_eval_config, expected_eval_config)
Ejemplo n.º 3
0
    def testUpdateConfigWithDefaultsDoesNotAutomaticallyAddBaselineModel(self):
        eval_config_pbtxt = """
      model_specs { name: "model1" }
      model_specs { name: "model2" is_baseline: true }
      metrics_specs {
        metrics { class_name: "WeightedExampleCount" }
      }
    """
        eval_config = text_format.Parse(eval_config_pbtxt,
                                        config_pb2.EvalConfig())

        expected_eval_config_pbtxt = """
      model_specs { name: "model1" }
      model_specs { name: "model2" is_baseline: true }
      metrics_specs {
        metrics { class_name: "WeightedExampleCount" }
        model_names: ["model1", "model2"]
      }
    """
        expected_eval_config = text_format.Parse(expected_eval_config_pbtxt,
                                                 config_pb2.EvalConfig())

        got_eval_config = config_util.update_eval_config_with_defaults(
            eval_config, has_baseline=True)
        self.assertProtoEquals(got_eval_config, expected_eval_config)
Ejemplo n.º 4
0
    def testHasChangeThreshold(self):
        eval_config = text_format.Parse(
            """
      metrics_specs {
        metrics {
          class_name: "MeanLabel"
          threshold {
            change_threshold {
              direction: HIGHER_IS_BETTER
              absolute { value: 0.1 }
            }
          }
        }
      }
    """, config_pb2.EvalConfig())

        self.assertTrue(config_util.has_change_threshold(eval_config))

        eval_config = text_format.Parse(
            """
      metrics_specs {
        thresholds {
          key: "my_metric"
          value {
            change_threshold {
              direction: HIGHER_IS_BETTER
              absolute { value: 0.1 }
            }
          }
        }
      }
    """, config_pb2.EvalConfig())

        self.assertTrue(config_util.has_change_threshold(eval_config))

        eval_config = text_format.Parse(
            """
      metrics_specs {
        metrics {
          class_name: "MeanLabel"
          threshold {
            value_threshold {
              lower_bound { value: 0.9 }
            }
          }
        }
      }
    """, config_pb2.EvalConfig())

        self.assertFalse(config_util.has_change_threshold(eval_config))
    def test_features_extractor_no_features(self):
        model_spec = config_pb2.ModelSpec()
        eval_config = config_pb2.EvalConfig(model_specs=[model_spec])
        feature_extractor = features_extractor.FeaturesExtractor(eval_config)
        tfx_io = tf_example_record.TFExampleBeamRecord(
            raw_record_column_name=constants.ARROW_INPUT_COLUMN,
            physical_format='inmem',
            telemetry_descriptors=['testing'])

        with beam.Pipeline() as pipeline:
            result = (
                pipeline | 'Create' >> beam.Create([b''] * 3)
                | 'DecodeToRecordBatch' >> tfx_io.BeamSource(batch_size=3)
                |
                'InputsToExtracts' >> model_eval_lib.BatchedInputsToExtracts()
                | feature_extractor.stage_name >> feature_extractor.ptransform)

            def check_result(got):
                self.assertLen(got, 1)
                self.assertIn(constants.FEATURES_KEY, got[0])
                self.assertEmpty(got[0][constants.FEATURES_KEY])
                self.assertIn(constants.INPUT_KEY, got[0])
                self.assertLen(got[0][constants.INPUT_KEY], 3)

            util.assert_that(result, check_result, label='CheckResult')
 def testSerializeDeserializeEvalConfig(self):
     output_path = self._getTempDir()
     options = config_pb2.Options()
     options.compute_confidence_intervals.value = False
     options.min_slice_size.value = 1
     eval_config = config_pb2.EvalConfig(slicing_specs=[
         config_pb2.SlicingSpec(feature_keys=['country'],
                                feature_values={
                                    'age': '5',
                                    'gender': 'f'
                                }),
         config_pb2.SlicingSpec(feature_keys=['interest'],
                                feature_values={
                                    'age': '6',
                                    'gender': 'm'
                                })
     ],
                                         options=options)
     data_location = '/path/to/data'
     file_format = 'tfrecords'
     model_location = '/path/to/model'
     with tf.io.gfile.GFile(os.path.join(output_path, 'eval_config.json'),
                            'w') as f:
         f.write(
             eval_config_writer._serialize_eval_run(eval_config,
                                                    data_location,
                                                    file_format,
                                                    {'': model_location}))
     got_eval_config, got_data_location, got_file_format, got_model_locations = (
         eval_config_writer.load_eval_run(output_path))
     self.assertEqual(eval_config, got_eval_config)
     self.assertEqual(data_location, got_data_location)
     self.assertEqual(file_format, got_file_format)
     self.assertEqual({'': model_location}, got_model_locations)
Ejemplo n.º 7
0
 def testMetricKeysToSkipForConfidenceIntervals(self):
     metrics_specs = [
         config_pb2.MetricsSpec(metrics=[
             config_pb2.MetricConfig(
                 class_name='ExampleCount',
                 config=json.dumps({'name': 'example_count'}),
                 threshold=config_pb2.MetricThreshold(
                     value_threshold=config_pb2.GenericValueThreshold())),
             config_pb2.MetricConfig(
                 class_name='MeanLabel',
                 config=json.dumps({'name': 'mean_label'}),
                 threshold=config_pb2.MetricThreshold(
                     change_threshold=config_pb2.GenericChangeThreshold())),
             config_pb2.MetricConfig(
                 class_name='MeanSquaredError',
                 config=json.dumps({'name': 'mse'}),
                 threshold=config_pb2.MetricThreshold(
                     change_threshold=config_pb2.GenericChangeThreshold()))
         ],
                                model_names=['model_name1', 'model_name2'],
                                output_names=[
                                    'output_name1', 'output_name2'
                                ]),
     ]
     metrics_specs += metric_specs.specs_from_metrics(
         [tf.keras.metrics.MeanSquaredError('mse')],
         model_names=['model_name1', 'model_name2'])
     keys = metric_specs.metric_keys_to_skip_for_confidence_intervals(
         metrics_specs, eval_config=config_pb2.EvalConfig())
     self.assertLen(keys, 8)
     self.assertIn(
         metric_types.MetricKey(name='example_count',
                                model_name='model_name1',
                                output_name='output_name1'), keys)
     self.assertIn(
         metric_types.MetricKey(name='example_count',
                                model_name='model_name1',
                                output_name='output_name2'), keys)
     self.assertIn(
         metric_types.MetricKey(name='example_count',
                                model_name='model_name2',
                                output_name='output_name1'), keys)
     self.assertIn(
         metric_types.MetricKey(name='example_count',
                                model_name='model_name2',
                                output_name='output_name2'), keys)
     self.assertIn(
         metric_types.MetricKey(name='example_count',
                                model_name='model_name1'), keys)
     self.assertIn(
         metric_types.MetricKey(name='weighted_example_count',
                                model_name='model_name1',
                                example_weighted=True), keys)
     self.assertIn(
         metric_types.MetricKey(name='example_count',
                                model_name='model_name2'), keys)
     self.assertIn(
         metric_types.MetricKey(name='weighted_example_count',
                                model_name='model_name2',
                                example_weighted=True), keys)
Ejemplo n.º 8
0
    def testSliceKeys(self, model_names, extracts, slice_specs,
                      expected_slices):
        eval_config = config_pb2.EvalConfig(model_specs=[
            config_pb2.ModelSpec(name=name) for name in model_names
        ])
        with beam.Pipeline() as pipeline:
            slice_keys_extracts = (
                pipeline
                | 'CreateTestInput' >> beam.Create(extracts)
                | 'ExtractSlices' >> slice_key_extractor.ExtractSliceKeys(
                    slice_spec=slice_specs, eval_config=eval_config))

            def check_result(got):
                try:
                    self.assertLen(got, 2)
                    got_results = []
                    for item in got:
                        self.assertIn(constants.SLICE_KEY_TYPES_KEY, item)
                        got_results.append(
                            sorted(item[constants.SLICE_KEY_TYPES_KEY]))
                    self.assertCountEqual(got_results, expected_slices)
                except AssertionError as err:
                    raise util.BeamAssertException(err)

            util.assert_that(slice_keys_extracts, check_result)
Ejemplo n.º 9
0
 def testGetModelAndOutputNamesEmptyPredictions(self):
     eval_config = config_pb2.EvalConfig(
         model_specs=[config_pb2.ModelSpec()])
     self.assertEmpty(
         util.StandardExtracts({
             constants.PREDICTIONS_KEY: {}
         }).get_model_and_output_names(eval_config))
Ejemplo n.º 10
0
    def _assert_test(self,
                     num_buckets,
                     baseline_examples,
                     comparison_examples,
                     lift_metric_value,
                     ignore_out_of_bound_examples=False):
        eval_config = config_pb2.EvalConfig(
            cross_slicing_specs=[config_pb2.CrossSlicingSpec()])
        computations = lift.Lift(
            num_buckets=num_buckets,
            ignore_out_of_bound_examples=ignore_out_of_bound_examples
        ).computations(eval_config=eval_config)
        histogram = computations[0]
        lift_metrics = computations[1]

        with beam.Pipeline() as pipeline:
            # pylint: disable=no-value-for-parameter
            baseline_result = (
                pipeline
                | 'CreateB' >> beam.Create(baseline_examples)
                | 'ProcessB' >> beam.Map(metric_util.to_standard_metric_inputs)
                | 'AddSliceB' >> beam.Map(lambda x: ((), x))
                | 'ComputeHistogramB' >> beam.CombinePerKey(histogram.combiner)
            )  # pyformat: ignore

            comparison_result = (
                pipeline
                | 'CreateC' >> beam.Create(comparison_examples)
                | 'ProcessC' >> beam.Map(metric_util.to_standard_metric_inputs)
                | 'AddSliceC' >> beam.Map(lambda x: (('slice'), x))
                | 'ComputeHistogramC' >> beam.CombinePerKey(histogram.combiner)
            )  # pyformat: ignore

            # pylint: enable=no-value-for-parameter

            merged_result = ((baseline_result, comparison_result)
                             | 'MergePCollections' >> beam.Flatten())

            def check_result(got):
                try:
                    self.assertLen(got, 2)
                    slice_1, metric_1 = got[0]
                    slice_2, metric_2 = got[1]
                    lift_value = None
                    if not slice_1:
                        lift_value = lift_metrics.cross_slice_comparison(
                            metric_1, metric_2)
                    else:
                        lift_value = lift_metrics.cross_slice_comparison(
                            metric_2, metric_1)

                    self.assertDictElementsAlmostEqual(
                        lift_value, {
                            metric_types.MetricKey(name=f'lift@{num_buckets}'):
                            lift_metric_value,
                        })
                except AssertionError as err:
                    raise util.BeamAssertException(err)

            util.assert_that(merged_result, check_result, label='result')
Ejemplo n.º 11
0
    def testStandardMetricInputsWithCustomLabelKeys(self):
        example = metric_types.StandardMetricInputs(
            labels={
                'custom_label': np.array([2]),
                'other_label': np.array([0])
            },
            predictions={'custom_prediction': np.array([0, 0.5, 0.3, 0.9])},
            example_weights=np.array([1.0]))
        eval_config = config_pb2.EvalConfig(model_specs=[
            config_pb2.ModelSpec(label_key='custom_label',
                                 prediction_key='custom_prediction')
        ])
        iterator = metric_util.to_label_prediction_example_weight(
            example, eval_config=eval_config)

        for expected_label, expected_prediction in zip((0.0, 0.0, 1.0, 0.0),
                                                       (0.0, 0.5, 0.3, 0.9)):
            got_label, got_pred, got_example_weight = next(iterator)
            self.assertAllClose(got_label,
                                np.array([expected_label]),
                                atol=0,
                                rtol=0)
            self.assertAllClose(got_pred,
                                np.array([expected_prediction]),
                                atol=0,
                                rtol=0)
            self.assertAllClose(got_example_weight,
                                np.array([1.0]),
                                atol=0,
                                rtol=0)
Ejemplo n.º 12
0
    def testUpdateConfigWithDefaultsNoBaselineModelNonRubberstamp(self):
        eval_config_pbtxt = """
      model_specs { name: "" }
      metrics_specs {
        metrics {
          class_name: "MeanLabel"
          per_slice_thresholds {
            slicing_specs: {}
            threshold {
              value_threshold {
                lower_bound { value: 0.9 }
              }
              change_threshold {
                direction: HIGHER_IS_BETTER
                absolute { value: -1e-10 }
              }
            }
          }
        }
      }
    """

        eval_config = text_format.Parse(eval_config_pbtxt,
                                        config_pb2.EvalConfig())

        with self.assertRaises(RuntimeError):
            config_util.update_eval_config_with_defaults(eval_config,
                                                         has_baseline=False,
                                                         rubber_stamp=False)
Ejemplo n.º 13
0
    def testModelSignaturesDoFn(self, save_as_keras, signature_names,
                                default_signature_names, prefer_dict_outputs,
                                use_schema, expected_num_outputs):
        export_path = self.createModelWithMultipleDenseInputs(save_as_keras)
        eval_shared_models = {}
        model_specs = []
        for sigs in signature_names.values():
            for model_name in sigs:
                if model_name not in eval_shared_models:
                    eval_shared_models[
                        model_name] = self.createTestEvalSharedModel(
                            eval_saved_model_path=export_path,
                            model_name=model_name,
                            tags=[tf.saved_model.SERVING])
                    model_specs.append(config_pb2.ModelSpec(name=model_name))
        eval_config = config_pb2.EvalConfig(model_specs=model_specs)
        schema = self.createDenseInputsSchema() if use_schema else None
        tfx_io = tf_example_record.TFExampleBeamRecord(
            physical_format='text',
            schema=schema,
            raw_record_column_name=constants.ARROW_INPUT_COLUMN)

        examples = [
            self._makeExample(input_1=1.0, input_2=2.0),
            self._makeExample(input_1=3.0, input_2=4.0),
            self._makeExample(input_1=5.0, input_2=6.0),
        ]

        with beam.Pipeline() as pipeline:
            # pylint: disable=no-value-for-parameter
            result = (pipeline
                      | 'Create' >> beam.Create(
                          [e.SerializeToString() for e in examples])
                      | 'BatchExamples' >> tfx_io.BeamSource(batch_size=3)
                      | 'ToExtracts' >> beam.Map(_record_batch_to_extracts)
                      | 'ModelSignatures' >> beam.ParDo(
                          model_util.ModelSignaturesDoFn(
                              eval_config=eval_config,
                              eval_shared_models=eval_shared_models,
                              signature_names=signature_names,
                              default_signature_names=default_signature_names,
                              prefer_dict_outputs=prefer_dict_outputs)))

            # pylint: enable=no-value-for-parameter

            def check_result(got):
                try:
                    self.assertLen(got, 1)
                    for key in signature_names:
                        self.assertIn(key, got[0])
                        if prefer_dict_outputs:
                            self.assertIsInstance(got[0][key], dict)
                            self.assertEqual(tfma_util.batch_size(got[0][key]),
                                             expected_num_outputs)

                except AssertionError as err:
                    raise util.BeamAssertException(err)

            util.assert_that(result, check_result, label='result')
Ejemplo n.º 14
0
  def testBatchSizeLimit(self):
    temp_export_dir = self._getExportDir()
    _, export_dir = batch_size_limited_classifier.simple_batch_size_limited_classifier(
        None, temp_export_dir)
    eval_shared_model = self.createTestEvalSharedModel(
        eval_saved_model_path=export_dir, tags=[tf.saved_model.SERVING])
    eval_config = config_pb2.EvalConfig(model_specs=[config_pb2.ModelSpec()])
    schema = text_format.Parse(
        """
        feature {
          name: "classes"
          type: BYTES
        }
        feature {
          name: "scores"
          type: FLOAT
        }
        feature {
          name: "labels"
          type: BYTES
        }
        """, schema_pb2.Schema())
    tfx_io = test_util.InMemoryTFExampleRecord(
        schema=schema, raw_record_column_name=constants.ARROW_INPUT_COLUMN)
    tensor_adapter_config = tensor_adapter.TensorAdapterConfig(
        arrow_schema=tfx_io.ArrowSchema(),
        tensor_representations=tfx_io.TensorRepresentations())
    feature_extractor = features_extractor.FeaturesExtractor(
        eval_config=eval_config,
        tensor_representations=tensor_adapter_config.tensor_representations)
    prediction_extractor = predictions_extractor.PredictionsExtractor(
        eval_config=eval_config, eval_shared_model=eval_shared_model)

    examples = []
    for _ in range(4):
      examples.append(
          self._makeExample(classes='first', scores=0.0, labels='third'))

    with beam.Pipeline() as pipeline:
      predict_extracts = (
          pipeline
          | 'Create' >> beam.Create([e.SerializeToString() for e in examples],
                                    reshuffle=False)
          | 'BatchExamples' >> tfx_io.BeamSource(batch_size=1)
          | 'InputsToExtracts' >> model_eval_lib.BatchedInputsToExtracts()
          | feature_extractor.stage_name >> feature_extractor.ptransform
          | prediction_extractor.stage_name >> prediction_extractor.ptransform)

      def check_result(got):
        try:
          self.assertLen(got, 4)
          # We can't verify the actual predictions, but we can verify the keys.
          for item in got:
            self.assertIn(constants.PREDICTIONS_KEY, item)

        except AssertionError as err:
          raise util.BeamAssertException(err)

      util.assert_that(predict_extracts, check_result, label='result')
Ejemplo n.º 15
0
 def _makeEvalResults(self):
     result_a = view_types.EvalResult(slicing_metrics=self._makeTestData(),
                                      plots=None,
                                      attributions=None,
                                      config=config_pb2.EvalConfig(),
                                      data_location=self.data_location_1,
                                      file_format='tfrecords',
                                      model_location=self.model_location_1)
     result_b = view_types.EvalResult(
         slicing_metrics=[self.result_c2],
         plots=None,
         attributions=None,
         config=config_pb2.EvalConfig(),
         data_location=self.full_data_location_2,
         file_format='tfrecords',
         model_location=self.full_model_location_2)
     return view_types.EvalResults([result_a, result_b],
                                   constants.MODEL_CENTRIC_MODE)
    def testCustomTFMetricWithPadding(self, example_indices, expected):
        computation = tf_metric_wrapper.tf_metric_computations(
            [
                _CustomMetric(name='custom_label', update_y_pred=False),
                _CustomMetric(name='custom_pred', update_y_pred=True),
            ],
            eval_config=config_pb2.EvalConfig(model_specs=[
                config_pb2.ModelSpec(padding_options=config_pb2.PaddingOptions(
                    label_int_padding=-1,
                    prediction_float_padding=-1.0,
                ))
            ]),
            example_weighted=True)[0]

        examples = [{
            'labels': np.array([1], dtype=np.int64),
            'predictions': np.array([0.1, 0.2, 0.3, 0.0]),
            'example_weights': np.array([1.0])
        }, {
            'labels': np.array([1, 2], dtype=np.int64),
            'predictions': np.array([0.1, 0.2, 0.0]),
            'example_weights': np.array([1.0])
        }, {
            'labels': np.array([1, 2, 3], dtype=np.int64),
            'predictions': np.array([0.1, 0.2, 0.3]),
            'example_weights': np.array([2.0])
        }]

        with beam.Pipeline() as pipeline:
            # pylint: disable=no-value-for-parameter
            result = (
                pipeline
                |
                'Create' >> beam.Create([examples[i] for i in example_indices])
                | 'Process' >> beam.Map(metric_util.to_standard_metric_inputs)
                | 'AddSlice' >> beam.Map(lambda x: ((), x))
                | 'Combine' >> beam.CombinePerKey(computation.combiner))

            # pylint: enable=no-value-for-parameter

            def check_result(got):
                try:
                    self.assertLen(got, 1)
                    got_slice_key, got_metrics = got[0]
                    self.assertEqual(got_slice_key, ())

                    custom_label_key = metric_types.MetricKey(
                        name='custom_label', example_weighted=True)
                    custom_pred_key = metric_types.MetricKey(
                        name='custom_pred', example_weighted=True)
                    self.assertDictElementsAlmostEqual(got_metrics, expected)

                except AssertionError as err:
                    raise util.BeamAssertException(err)

            util.assert_that(result, check_result, label='result')
Ejemplo n.º 17
0
 def testGetModelAndOutputNamesMultiOutput(self):
     eval_config = config_pb2.EvalConfig(
         model_specs=[config_pb2.ModelSpec()])
     self.assertEqual([(None, 'output1'), (None, 'output2')],
                      util.StandardExtracts({
                          constants.PREDICTIONS_KEY: {
                              'output1': np.array([]),
                              'output2': np.array([])
                          }
                      }).get_model_and_output_names(eval_config))
Ejemplo n.º 18
0
  def testSqlSliceKeyExtractorWithMultipleSchema(self):
    eval_config = config_pb2.EvalConfig(slicing_specs=[
        config_pb2.SlicingSpec(slice_keys_sql="""
        SELECT
          STRUCT(fixed_string)
        FROM
          example.fixed_string,
          example.fixed_int
        WHERE fixed_int = 1
        """)
    ])
    slice_key_extractor = sql_slice_key_extractor.SqlSliceKeyExtractor(
        eval_config)

    record_batch_1 = pa.RecordBatch.from_arrays([
        pa.array([[1], [1], [2]], type=pa.list_(pa.int64())),
        pa.array([[1.0], [1.0], [2.0]], type=pa.list_(pa.float64())),
        pa.array([['fixed_string1'], ['fixed_string2'], ['fixed_string3']],
                 type=pa.list_(pa.string())),
    ], ['fixed_int', 'fixed_float', 'fixed_string'])
    record_batch_2 = pa.RecordBatch.from_arrays([
        pa.array([[1], [1], [2]], type=pa.list_(pa.int64())),
        pa.array([[1.0], [1.0], [2.0]], type=pa.list_(pa.float64())),
        pa.array([['fixed_string1'], ['fixed_string2'], ['fixed_string3']],
                 type=pa.list_(pa.string())),
        pa.array([['extra_field1'], ['extra_field2'], ['extra_field3']],
                 type=pa.list_(pa.string())),
    ], ['fixed_int', 'fixed_float', 'fixed_string', 'extra_field'])

    with beam.Pipeline() as pipeline:
      # pylint: disable=no-value-for-parameter
      result = (
          pipeline
          | 'Create' >> beam.Create([record_batch_1, record_batch_2],
                                    reshuffle=False)
          | 'InputsToExtracts' >> model_eval_lib.BatchedInputsToExtracts()
          | slice_key_extractor.stage_name >> slice_key_extractor.ptransform)

      # pylint: enable=no-value-for-parameter

      def check_result(got):
        try:
          self.assertLen(got, 2)
          self.assertEqual(got[0][constants.SLICE_KEY_TYPES_KEY],
                           [[(('fixed_string', 'fixed_string1'),)],
                            [(('fixed_string', 'fixed_string2'),)], []])
          self.assertEqual(got[1][constants.SLICE_KEY_TYPES_KEY],
                           [[(('fixed_string', 'fixed_string1'),)],
                            [(('fixed_string', 'fixed_string2'),)], []])

        except AssertionError as err:
          raise util.BeamAssertException(err)

      util.assert_that(result, check_result)
Ejemplo n.º 19
0
    def testSqlSliceKeyExtractorWithEmptySqlConfig(self):
        eval_config = config_pb2.EvalConfig()
        feature_extractor = features_extractor.FeaturesExtractor(
            eval_config=eval_config)
        slice_key_extractor = sql_slice_key_extractor.SqlSliceKeyExtractor(
            eval_config)

        tfx_io = tf_example_record.TFExampleBeamRecord(
            physical_format='inmem',
            telemetry_descriptors=['test', 'component'],
            schema=_SCHEMA,
            raw_record_column_name=constants.ARROW_INPUT_COLUMN)
        examples = [
            self._makeExample(fixed_int=1,
                              fixed_float=1.0,
                              fixed_string='fixed_string1'),
            self._makeExample(fixed_int=1,
                              fixed_float=1.0,
                              fixed_string='fixed_string2'),
            self._makeExample(fixed_int=2,
                              fixed_float=0.0,
                              fixed_string='fixed_string3')
        ]

        with beam.Pipeline() as pipeline:
            # pylint: disable=no-value-for-parameter
            result = (
                pipeline
                | 'Create' >> beam.Create(
                    [e.SerializeToString() for e in examples], reshuffle=False)
                | 'BatchExamples' >> tfx_io.BeamSource(batch_size=3)
                |
                'InputsToExtracts' >> model_eval_lib.BatchedInputsToExtracts()
                | feature_extractor.stage_name >> feature_extractor.ptransform
                | slice_key_extractor.stage_name >>
                slice_key_extractor.ptransform)

            # pylint: enable=no-value-for-parameter

            def check_result(got):
                try:
                    self.assertLen(got, 1)
                    np.testing.assert_equal(
                        got[0][constants.SLICE_KEY_TYPES_KEY],
                        types.VarLenTensorValue.from_dense_rows(
                            [np.array([]),
                             np.array([]),
                             np.array([])]))

                except AssertionError as err:
                    raise util.BeamAssertException(err)

            util.assert_that(result, check_result)
Ejemplo n.º 20
0
def get_evaluation_config(
    problem_type: constants.ProblemType,
    evaluation_column_specs: EvaluationColumnSpecs,
    slice_features: List[List[ColumnSpec]],
    class_names: Optional[List[Text]] = None,
    positive_class_names: Optional[List[Text]] = None,
    top_k_list: Optional[List[int]] = None
) -> model_evaluation_pb2.EvaluationConfig:
  """Build a Model Evaluation Configuration.

  Args:
    problem_type: One of the ProblemType enum.
    evaluation_column_specs: column specs necessary for parsing evaluation data.
    slice_features: List of slice specs, each a list of keys to slice. The
      default slice over all values will automatically be added.
    class_names: For classification-type problems, a list of string names for
      classes.
    positive_class_names: For classification-type problems, a list of string
      names for classes to be treated as positively valued.
    top_k_list: For classification-type problems, if specified, a list of top-k
      aggregations.

  Returns:
    An EvaluationConfig.
  """
  tfma_eval_config = config_pb2.EvalConfig()

  tfma_eval_config.model_specs.append(
      config_pb2.ModelSpec(
          prediction_key=evaluation_column_specs.predicted_score_column_spec
          .as_string(),
          prediction_keys=None,
          label_key=evaluation_column_specs.ground_truth_column_spec.as_string(
          ),
          label_keys=None))

  metric_specs = _get_metric_specs(problem_type, class_names,
                                   positive_class_names, top_k_list)
  assert metric_specs, 'At least one metric_spec must be defined %r' % metric_specs
  tfma_eval_config.metrics_specs.extend(metric_specs)

  slicing_specs = _get_tfma_slicing_specs(slice_features)
  assert slicing_specs, 'At least one slicing_spec must be defined %r' % slicing_specs
  tfma_eval_config.slicing_specs.extend(slicing_specs)

  adapter = tfma_adapter.TFMAToME(
      class_name_list=class_names,
      predicted_label_column_spec=evaluation_column_specs
      .predicted_label_column_spec,
      predicted_label_id_column_spec=evaluation_column_specs
      .predicted_label_id_column_spec)
  return adapter.eval_config(tfma_eval_config)
Ejemplo n.º 21
0
 def testGetModelAndOutputNamesMultiModel(self):
     eval_config = config_pb2.EvalConfig(model_specs=[
         config_pb2.ModelSpec(name=constants.BASELINE_KEY),
         config_pb2.ModelSpec(name=constants.CANDIDATE_KEY)
     ])
     self.assertEqual([(constants.BASELINE_KEY, None),
                       (constants.CANDIDATE_KEY, None)],
                      util.StandardExtracts({
                          constants.PREDICTIONS_KEY: {
                              constants.BASELINE_KEY: np.array([]),
                              constants.CANDIDATE_KEY: np.array([])
                          }
                      }).get_model_and_output_names(eval_config))
Ejemplo n.º 22
0
  def testSqlSliceKeyExtractor(self):
    eval_config = config_pb2.EvalConfig(slicing_specs=[
        config_pb2.SlicingSpec(slice_keys_sql="""
        SELECT
          STRUCT(fixed_string)
        FROM
          example.fixed_string,
          example.fixed_int
        WHERE fixed_int = 1
        """)
    ])
    slice_key_extractor = sql_slice_key_extractor.SqlSliceKeyExtractor(
        eval_config)

    tfx_io = tf_example_record.TFExampleBeamRecord(
        physical_format='inmem',
        telemetry_descriptors=['test', 'component'],
        schema=_SCHEMA,
        raw_record_column_name=constants.ARROW_INPUT_COLUMN)
    examples = [
        self._makeExample(
            fixed_int=1, fixed_float=1.0, fixed_string='fixed_string1'),
        self._makeExample(
            fixed_int=1, fixed_float=1.0, fixed_string='fixed_string2'),
        self._makeExample(
            fixed_int=2, fixed_float=0.0, fixed_string='fixed_string3')
    ]

    with beam.Pipeline() as pipeline:
      # pylint: disable=no-value-for-parameter
      result = (
          pipeline
          | 'Create' >> beam.Create([e.SerializeToString() for e in examples],
                                    reshuffle=False)
          | 'BatchExamples' >> tfx_io.BeamSource(batch_size=3)
          | 'InputsToExtracts' >> model_eval_lib.BatchedInputsToExtracts()
          | slice_key_extractor.stage_name >> slice_key_extractor.ptransform)

      # pylint: enable=no-value-for-parameter

      def check_result(got):
        try:
          self.assertLen(got, 1)
          self.assertEqual(got[0][constants.SLICE_KEY_TYPES_KEY],
                           [[(('fixed_string', 'fixed_string1'),)],
                            [(('fixed_string', 'fixed_string2'),)], []])

        except AssertionError as err:
          raise util.BeamAssertException(err)

      util.assert_that(result, check_result)
    def _runTestWithCustomCheck(self,
                                examples,
                                eval_export_dir,
                                metrics_callbacks,
                                slice_spec=None,
                                custom_metrics_check=None,
                                custom_plots_check=None,
                                custom_result_check=None):
        # make sure we are doing some checks
        self.assertTrue(custom_metrics_check is not None
                        or custom_plots_check is not None
                        or custom_result_check is not None)
        serialized_examples = [ex.SerializeToString() for ex in examples]
        slicing_specs = None
        if slice_spec:
            slicing_specs = [s.to_proto() for s in slice_spec]
        eval_config = config_pb2.EvalConfig(slicing_specs=slicing_specs)
        eval_shared_model = self.createTestEvalSharedModel(
            eval_saved_model_path=eval_export_dir,
            add_metrics_callbacks=metrics_callbacks)
        extractors = model_eval_lib.default_extractors(
            eval_config=eval_config, eval_shared_model=eval_shared_model)
        tfx_io = raw_tf_record.RawBeamRecordTFXIO(
            physical_format='inmemory',
            raw_record_column_name=constants.ARROW_INPUT_COLUMN,
            telemetry_descriptors=['TFMATest'])
        with beam.Pipeline() as pipeline:
            (metrics, plots), _ = (
                pipeline
                | 'Create' >> beam.Create(serialized_examples)
                | 'BatchExamples' >> tfx_io.BeamSource()
                |
                'InputsToExtracts' >> model_eval_lib.BatchedInputsToExtracts()
                | 'Extract' >> tfma_unit.Extract(extractors=extractors)  # pylint: disable=no-value-for-parameter
                | 'ComputeMetricsAndPlots' >>
                legacy_metrics_and_plots_evaluator._ComputeMetricsAndPlots(  # pylint: disable=protected-access
                    eval_shared_model=eval_shared_model,
                    compute_confidence_intervals=self.
                    compute_confidence_intervals,
                    random_seed_for_testing=self.deterministic_test_seed))
            if custom_metrics_check is not None:
                util.assert_that(metrics,
                                 custom_metrics_check,
                                 label='metrics')
            if custom_plots_check is not None:
                util.assert_that(plots, custom_plots_check, label='plot')

        result = pipeline.run()
        if custom_result_check is not None:
            custom_result_check(result)
Ejemplo n.º 24
0
    def testUpdateConfigWithDefaultsEmtpyModelName(self):
        eval_config_pbtxt = """
      model_specs { name: "" }
      metrics_specs {
        metrics { class_name: "ExampleCount" }
      }
    """
        eval_config = text_format.Parse(eval_config_pbtxt,
                                        config_pb2.EvalConfig())

        expected_eval_config_pbtxt = """
      model_specs { name: "" }
      metrics_specs {
        metrics { class_name: "ExampleCount" }
        model_names: [""]
      }
    """
        expected_eval_config = text_format.Parse(expected_eval_config_pbtxt,
                                                 config_pb2.EvalConfig())

        got_eval_config = config_util.update_eval_config_with_defaults(
            eval_config)
        self.assertProtoEquals(got_eval_config, expected_eval_config)
Ejemplo n.º 25
0
    def testUpdateConfigWithDefaultsAutomaticallyAddsBaselineModel(self):
        eval_config_pbtxt = """
      model_specs { label_key: "my_label" }
      metrics_specs {
        metrics { class_name: "ExampleCount" }
      }
    """
        eval_config = text_format.Parse(eval_config_pbtxt,
                                        config_pb2.EvalConfig())

        expected_eval_config_pbtxt = """
      model_specs { name: "candidate" label_key: "my_label" }
      model_specs { name: "baseline" label_key: "my_label" is_baseline: true }
      metrics_specs {
        metrics { class_name: "ExampleCount" }
        model_names: ["candidate", "baseline"]
      }
    """
        expected_eval_config = text_format.Parse(expected_eval_config_pbtxt,
                                                 config_pb2.EvalConfig())

        got_eval_config = config_util.update_eval_config_with_defaults(
            eval_config, has_baseline=True)
        self.assertProtoEquals(got_eval_config, expected_eval_config)
Ejemplo n.º 26
0
    def testBatchedPredict(self):
        temp_eval_export_dir = self._getEvalExportDir()
        _, eval_export_dir = linear_classifier.simple_linear_classifier(
            None, temp_eval_export_dir)
        eval_shared_model = model_eval_lib.default_eval_shared_model(
            eval_saved_model_path=eval_export_dir)
        eval_config = config_pb2.EvalConfig(
            model_specs=[config_pb2.ModelSpec()])
        with beam.Pipeline() as pipeline:
            examples = [
                self._makeExample(age=3.0, language='english', label=1.0),
                self._makeExample(age=3.0, language='chinese', label=0.0),
                self._makeExample(age=4.0, language='english', label=1.0),
                self._makeExample(age=5.0, language='chinese', label=0.0),
            ]
            serialized_examples = [e.SerializeToString() for e in examples]

            tfx_io = raw_tf_record.RawBeamRecordTFXIO(
                physical_format='inmemory',
                raw_record_column_name=constants.ARROW_INPUT_COLUMN,
                telemetry_descriptors=['TFMATest'])
            extractor = predict_extractor.PredictExtractor(
                eval_shared_model, eval_config=eval_config)
            predict_extracts = (
                pipeline
                | 'Create' >> beam.Create(serialized_examples, reshuffle=False)
                | 'BatchExamples' >> tfx_io.BeamSource(batch_size=2)
                |
                'InputsToExtracts' >> model_eval_lib.BatchedInputsToExtracts()
                | 'Predict' >> extractor.ptransform)

            def check_result(got):
                try:
                    self.assertLen(got, 2)
                    for item in got:
                        self.assertIn(constants.FEATURES_KEY, item)
                        for feature in ('language', 'age'):
                            for features_dict in item[constants.FEATURES_KEY]:
                                self.assertIn(feature, features_dict)
                        self.assertIn(constants.LABELS_KEY, item)
                        self.assertIn(constants.PREDICTIONS_KEY, item)

                except AssertionError as err:
                    raise util.BeamAssertException(err)

            util.assert_that(predict_extracts, check_result, label='result')
Ejemplo n.º 27
0
    def testToComputationsWithMixedAggregationAndNonAggregationMetrics(self):
        computations = metric_specs.to_computations([
            config_pb2.MetricsSpec(metrics=[
                config_pb2.MetricConfig(class_name='CategoricalAccuracy')
            ]),
            config_pb2.MetricsSpec(
                metrics=[
                    config_pb2.MetricConfig(class_name='BinaryCrossentropy')
                ],
                binarize=config_pb2.BinarizationOptions(
                    class_ids={'values': [1]}),
                aggregate=config_pb2.AggregationOptions(micro_average=True))
        ], config_pb2.EvalConfig())

        # 3 separate computations should be used (one for aggregated metrics, one
        # for non-aggregated metrics, and one for metrics associated with class 1)
        self.assertLen(computations, 3)
Ejemplo n.º 28
0
    def testMacroAverage(self):
        metric_name = 'test'
        class_ids = [0, 1, 2]
        sub_keys = [metric_types.SubKey(class_id=i) for i in class_ids]
        sub_key_values = [0.1, 0.2, 0.3]
        computations = aggregation.macro_average(
            metric_name,
            sub_keys,
            eval_config=config_pb2.EvalConfig(),
            class_weights={
                0: 1.0,
                1: 1.0,
                2: 1.0
            })
        metric = computations[0]

        sub_metrics = {}
        for sub_key, value in zip(sub_keys, sub_key_values):
            key = metric_types.MetricKey(name=metric_name, sub_key=sub_key)
            sub_metrics[key] = value

        with beam.Pipeline() as pipeline:
            # pylint: disable=no-value-for-parameter
            result = (pipeline
                      | 'Create' >> beam.Create([((), sub_metrics)])
                      | 'ComputeMetric' >>
                      beam.Map(lambda x: (x[0], metric.result(x[1]))))

            # pylint: enable=no-value-for-parameter

            def check_result(got):
                try:
                    self.assertLen(got, 1)
                    got_slice_key, got_metrics = got[0]
                    self.assertEqual(got_slice_key, ())
                    key = metric.keys[0]
                    expected_value = (0.1 + 0.2 + 0.3) / 3.0
                    self.assertDictElementsAlmostEqual(got_metrics,
                                                       {key: expected_value},
                                                       places=5)

                except AssertionError as err:
                    raise util.BeamAssertException(err)

            util.assert_that(result, check_result, label='result')
 def testSerializeDeserializeLegacyEvalConfig(self):
     output_path = self._getTempDir()
     old_config = LegacyConfig(
         model_location='/path/to/model',
         data_location='/path/to/data',
         slice_spec=[
             slicer.SingleSliceSpec(columns=['country'],
                                    features=[('age', 5), ('gender', 'f')]),
             slicer.SingleSliceSpec(columns=['interest'],
                                    features=[('age', 6), ('gender', 'm')])
         ],
         example_count_metric_key=None,
         example_weight_metric_key='key',
         compute_confidence_intervals=False,
         k_anonymization_count=1)
     final_dict = {}
     final_dict['tfma_version'] = tfma_version.VERSION
     final_dict['eval_config'] = old_config
     with tf.io.TFRecordWriter(os.path.join(output_path,
                                            'eval_config')) as w:
         w.write(pickle.dumps(final_dict))
     got_eval_config, got_data_location, _, got_model_locations = (
         eval_config_writer.load_eval_run(output_path))
     options = config_pb2.Options()
     options.compute_confidence_intervals.value = (
         old_config.compute_confidence_intervals)
     options.min_slice_size.value = old_config.k_anonymization_count
     eval_config = config_pb2.EvalConfig(slicing_specs=[
         config_pb2.SlicingSpec(feature_keys=['country'],
                                feature_values={
                                    'age': '5',
                                    'gender': 'f'
                                }),
         config_pb2.SlicingSpec(feature_keys=['interest'],
                                feature_values={
                                    'age': '6',
                                    'gender': 'm'
                                })
     ],
                                         options=options)
     self.assertEqual(eval_config, got_eval_config)
     self.assertEqual(old_config.data_location, got_data_location)
     self.assertLen(got_model_locations, 1)
     self.assertEqual(old_config.model_location,
                      list(got_model_locations.values())[0])
Ejemplo n.º 30
0
    def testModelSignaturesDoFnError(self):
        export_path = self.createModelWithInvalidOutputShape()
        signature_names = {constants.PREDICTIONS_KEY: {'': [None]}}
        eval_shared_models = {
            '':
            self.createTestEvalSharedModel(eval_saved_model_path=export_path,
                                           tags=[tf.saved_model.SERVING])
        }
        model_specs = [config_pb2.ModelSpec()]
        eval_config = config_pb2.EvalConfig(model_specs=model_specs)
        schema = self.createDenseInputsSchema()
        tfx_io = tf_example_record.TFExampleBeamRecord(
            physical_format='text',
            schema=schema,
            raw_record_column_name=constants.ARROW_INPUT_COLUMN)
        tensor_adapter_config = tensor_adapter.TensorAdapterConfig(
            arrow_schema=tfx_io.ArrowSchema(),
            tensor_representations=tfx_io.TensorRepresentations())

        examples = [
            self._makeExample(input_1=1.0, input_2=2.0),
            self._makeExample(input_1=3.0, input_2=4.0),
            self._makeExample(input_1=5.0, input_2=6.0),
        ]

        with self.assertRaisesRegex(
                ValueError,
                'First dimension does not correspond with batch size.'):
            with beam.Pipeline() as pipeline:
                # pylint: disable=no-value-for-parameter
                _ = (pipeline
                     | 'Create' >> beam.Create(
                         [e.SerializeToString() for e in examples])
                     | 'BatchExamples' >> tfx_io.BeamSource(batch_size=3)
                     | 'ToExtracts' >> beam.Map(_record_batch_to_extracts)
                     | 'ModelSignatures' >> beam.ParDo(
                         model_util.ModelSignaturesDoFn(
                             eval_config=eval_config,
                             eval_shared_models=eval_shared_models,
                             signature_names=signature_names,
                             default_signature_names=None,
                             prefer_dict_outputs=False,
                             tensor_adapter_config=tensor_adapter_config)))