def testMetricKeysToSkipForConfidenceIntervals(self):
     metrics_specs = [
         config_pb2.MetricsSpec(metrics=[
             config_pb2.MetricConfig(
                 class_name='ExampleCount',
                 config=json.dumps({'name': 'example_count'}),
                 threshold=config_pb2.MetricThreshold(
                     value_threshold=config_pb2.GenericValueThreshold())),
             config_pb2.MetricConfig(
                 class_name='MeanLabel',
                 config=json.dumps({'name': 'mean_label'}),
                 threshold=config_pb2.MetricThreshold(
                     change_threshold=config_pb2.GenericChangeThreshold())),
             config_pb2.MetricConfig(
                 class_name='MeanSquaredError',
                 config=json.dumps({'name': 'mse'}),
                 threshold=config_pb2.MetricThreshold(
                     change_threshold=config_pb2.GenericChangeThreshold()))
         ],
                                model_names=['model_name1', 'model_name2'],
                                output_names=[
                                    'output_name1', 'output_name2'
                                ]),
     ]
     metrics_specs += metric_specs.specs_from_metrics(
         [tf.keras.metrics.MeanSquaredError('mse')],
         model_names=['model_name1', 'model_name2'])
     keys = metric_specs.metric_keys_to_skip_for_confidence_intervals(
         metrics_specs, eval_config=config_pb2.EvalConfig())
     self.assertLen(keys, 8)
     self.assertIn(
         metric_types.MetricKey(name='example_count',
                                model_name='model_name1',
                                output_name='output_name1'), keys)
     self.assertIn(
         metric_types.MetricKey(name='example_count',
                                model_name='model_name1',
                                output_name='output_name2'), keys)
     self.assertIn(
         metric_types.MetricKey(name='example_count',
                                model_name='model_name2',
                                output_name='output_name1'), keys)
     self.assertIn(
         metric_types.MetricKey(name='example_count',
                                model_name='model_name2',
                                output_name='output_name2'), keys)
     self.assertIn(
         metric_types.MetricKey(name='example_count',
                                model_name='model_name1'), keys)
     self.assertIn(
         metric_types.MetricKey(name='weighted_example_count',
                                model_name='model_name1',
                                example_weighted=True), keys)
     self.assertIn(
         metric_types.MetricKey(name='example_count',
                                model_name='model_name2'), keys)
     self.assertIn(
         metric_types.MetricKey(name='weighted_example_count',
                                model_name='model_name2',
                                example_weighted=True), keys)
Beispiel #2
0
def _serialize_tf_loss(loss: tf.keras.losses.Loss) -> config_pb2.MetricConfig:
  """Serializes TF loss."""
  cfg = metric_util.serialize_loss(loss)
  return config_pb2.MetricConfig(
      class_name=cfg['class_name'],
      module=loss.__class__.__module__,
      config=json.dumps(cfg['config'], sort_keys=True))
Beispiel #3
0
def _serialize_tf_metric(
    metric: tf.keras.metrics.Metric) -> config_pb2.MetricConfig:
  """Serializes TF metric."""
  cfg = metric_util.serialize_metric(metric)
  return config_pb2.MetricConfig(
      class_name=cfg['class_name'],
      config=json.dumps(cfg['config'], sort_keys=True))
    def testToComputationsWithMixedAggregationAndNonAggregationMetrics(self):
        computations = metric_specs.to_computations([
            config_pb2.MetricsSpec(metrics=[
                config_pb2.MetricConfig(class_name='CategoricalAccuracy')
            ]),
            config_pb2.MetricsSpec(
                metrics=[
                    config_pb2.MetricConfig(class_name='BinaryCrossentropy')
                ],
                binarize=config_pb2.BinarizationOptions(
                    class_ids={'values': [1]}),
                aggregate=config_pb2.AggregationOptions(micro_average=True))
        ], config_pb2.EvalConfig())

        # 3 separate computations should be used (one for aggregated metrics, one
        # for non-aggregated metrics, and one for metrics associated with class 1)
        self.assertLen(computations, 3)
Beispiel #5
0
def _serialize_tfma_metric(
    metric: metric_types.Metric) -> config_pb2.MetricConfig:
  """Serializes TFMA metric."""
  # This implementation is identical to _serialize_tf_metric, but keeping two
  # implementations for symmetry with deserialize where separate implementations
  # are required (and to be consistent with the keras implementation).
  cfg = tf.keras.utils.serialize_keras_object(metric)
  return config_pb2.MetricConfig(
      class_name=cfg['class_name'],
      config=json.dumps(cfg['config'], sort_keys=True))
    def testMetricsSpecBeamCounter(self):
        with beam.Pipeline() as pipeline:
            metrics_spec = config_pb2.MetricsSpec(metrics=[
                config_pb2.MetricConfig(class_name='FairnessIndicators')
            ])
            model_types = set(['tf_js', 'tf_keras'])
            _ = pipeline | counter_util.IncrementMetricsSpecsCounters(
                [metrics_spec], model_types)

        result = pipeline.run()

        for model_type in model_types:
            metric_filter = beam.metrics.metric.MetricsFilter().with_namespace(
                constants.METRICS_NAMESPACE).with_name(
                    'metric_computed_FairnessIndicators_v2_' + model_type)
            actual_metrics_count = result.metrics().query(
                filter=metric_filter)['counters'][0].committed

            self.assertEqual(actual_metrics_count, 1)
Beispiel #7
0
    def _metric_config(
            self, me_metric_config: me_proto.MetricConfig
    ) -> config_pb2.MetricConfig:
        """Convert ME MetricConfig into TFMA.

    Args:
      me_metric_config: Input ME MetricConfig.

    Returns:
      TFMA MetricConfig.
    """
        if not me_metric_config:
            return None
        tfma_metric_config = config_pb2.MetricConfig()
        if me_metric_config.tfma_metric_config.class_name:
            tfma_metric_config.class_name = me_metric_config.tfma_metric_config.class_name
        if me_metric_config.tfma_metric_config.module_name:
            tfma_metric_config.module = me_metric_config.tfma_metric_config.module_name
        if me_metric_config.tfma_metric_config.config:
            tfma_metric_config.config = me_metric_config.tfma_metric_config.config
        # TODO(b/159642889): Error out if messages cannot be converted.
        return tfma_metric_config
    def testSpecsFromMetrics(self):
        metrics_specs = metric_specs.specs_from_metrics(
            {
                'output_name1': [
                    tf.keras.metrics.Precision(name='precision'),
                    tf.keras.metrics.MeanSquaredError('mse'),
                    tf.keras.losses.MeanAbsoluteError(name='mae'),
                ],
                'output_name2': [
                    confusion_matrix_metrics.Precision(name='precision'),
                    tf.keras.losses.MeanAbsolutePercentageError(name='mape'),
                    calibration.MeanPrediction('mean_prediction')
                ]
            },
            unweighted_metrics={
                'output_name1': [calibration.MeanLabel('mean_label')],
                'output_name2':
                [tf.keras.metrics.RootMeanSquaredError('rmse')]
            },
            model_names=['model_name1', 'model_name2'],
            binarize=config_pb2.BinarizationOptions(
                class_ids={'values': [0, 1]}),
            aggregate=config_pb2.AggregationOptions(macro_average=True))

        self.assertLen(metrics_specs, 7)
        self.assertProtoEquals(
            metrics_specs[0],
            config_pb2.MetricsSpec(
                metrics=[
                    config_pb2.MetricConfig(class_name='ExampleCount',
                                            config=json.dumps(
                                                {'name': 'example_count'})),
                ],
                model_names=['model_name1', 'model_name2'],
                example_weights=config_pb2.ExampleWeightOptions(
                    unweighted=True)))
        self.assertProtoEquals(
            metrics_specs[1],
            config_pb2.MetricsSpec(
                metrics=[
                    config_pb2.MetricConfig(
                        class_name='WeightedExampleCount',
                        config=json.dumps({'name': 'weighted_example_count'})),
                ],
                model_names=['model_name1', 'model_name2'],
                output_names=['output_name1'],
                example_weights=config_pb2.ExampleWeightOptions(
                    weighted=True)))
        self.assertProtoEquals(
            metrics_specs[2],
            config_pb2.MetricsSpec(
                metrics=[
                    config_pb2.MetricConfig(class_name='Precision',
                                            config=json.dumps(
                                                {
                                                    'name': 'precision',
                                                    'class_id': None,
                                                    'thresholds': None,
                                                    'top_k': None
                                                },
                                                sort_keys=True)),
                    config_pb2.MetricConfig(class_name='MeanSquaredError',
                                            config=json.dumps(
                                                {
                                                    'name': 'mse',
                                                    'dtype': 'float32',
                                                },
                                                sort_keys=True)),
                    config_pb2.MetricConfig(
                        class_name='MeanAbsoluteError',
                        module=metric_specs._TF_LOSSES_MODULE,
                        config=json.dumps({
                            'reduction': 'auto',
                            'name': 'mae'
                        },
                                          sort_keys=True))
                ],
                model_names=['model_name1', 'model_name2'],
                output_names=['output_name1'],
                binarize=config_pb2.BinarizationOptions(
                    class_ids={'values': [0, 1]}),
                aggregate=config_pb2.AggregationOptions(macro_average=True)))
        self.assertProtoEquals(
            metrics_specs[3],
            config_pb2.MetricsSpec(
                metrics=[
                    config_pb2.MetricConfig(class_name='MeanLabel',
                                            config=json.dumps(
                                                {'name': 'mean_label'}))
                ],
                model_names=['model_name1', 'model_name2'],
                output_names=['output_name1'],
                binarize=config_pb2.BinarizationOptions(
                    class_ids={'values': [0, 1]}),
                aggregate=config_pb2.AggregationOptions(macro_average=True),
                example_weights=config_pb2.ExampleWeightOptions(
                    unweighted=True)))
        self.assertProtoEquals(
            metrics_specs[4],
            config_pb2.MetricsSpec(
                metrics=[
                    config_pb2.MetricConfig(
                        class_name='WeightedExampleCount',
                        config=json.dumps({'name': 'weighted_example_count'})),
                ],
                model_names=['model_name1', 'model_name2'],
                output_names=['output_name2'],
                example_weights=config_pb2.ExampleWeightOptions(
                    weighted=True)))
        self.assertProtoEquals(
            metrics_specs[5],
            config_pb2.MetricsSpec(
                metrics=[
                    config_pb2.MetricConfig(class_name='Precision',
                                            config=json.dumps(
                                                {
                                                    'name': 'precision',
                                                },
                                                sort_keys=True)),
                    config_pb2.MetricConfig(
                        class_name='MeanAbsolutePercentageError',
                        module=metric_specs._TF_LOSSES_MODULE,
                        config=json.dumps({
                            'reduction': 'auto',
                            'name': 'mape'
                        },
                                          sort_keys=True)),
                    config_pb2.MetricConfig(class_name='MeanPrediction',
                                            config=json.dumps(
                                                {'name': 'mean_prediction'}))
                ],
                model_names=['model_name1', 'model_name2'],
                output_names=['output_name2'],
                binarize=config_pb2.BinarizationOptions(
                    class_ids={'values': [0, 1]}),
                aggregate=config_pb2.AggregationOptions(macro_average=True)))
        self.assertProtoEquals(
            metrics_specs[6],
            config_pb2.MetricsSpec(
                metrics=[
                    config_pb2.MetricConfig(class_name='RootMeanSquaredError',
                                            config=json.dumps(
                                                {
                                                    'name': 'rmse',
                                                    'dtype': 'float32'
                                                },
                                                sort_keys=True))
                ],
                model_names=['model_name1', 'model_name2'],
                output_names=['output_name2'],
                binarize=config_pb2.BinarizationOptions(
                    class_ids={'values': [0, 1]}),
                aggregate=config_pb2.AggregationOptions(macro_average=True),
                example_weights=config_pb2.ExampleWeightOptions(
                    unweighted=True)))
    def testMetricThresholdsFromMetricsSpecs(self):
        slice_specs = [
            config_pb2.SlicingSpec(feature_keys=['feature1']),
            config_pb2.SlicingSpec(feature_values={'feature2': 'value1'})
        ]

        # For cross slice tests.
        baseline_slice_spec = config_pb2.SlicingSpec(feature_keys=['feature3'])

        metrics_specs = [
            config_pb2.MetricsSpec(
                thresholds={
                    'auc':
                    config_pb2.MetricThreshold(
                        value_threshold=config_pb2.GenericValueThreshold()),
                    'mean/label':
                    config_pb2.MetricThreshold(
                        value_threshold=config_pb2.GenericValueThreshold(),
                        change_threshold=config_pb2.GenericChangeThreshold()),
                    'mse':
                    config_pb2.MetricThreshold(
                        change_threshold=config_pb2.GenericChangeThreshold())
                },
                per_slice_thresholds={
                    'auc':
                    config_pb2.PerSliceMetricThresholds(thresholds=[
                        config_pb2.PerSliceMetricThreshold(
                            slicing_specs=slice_specs,
                            threshold=config_pb2.MetricThreshold(
                                value_threshold=config_pb2.
                                GenericValueThreshold()))
                    ]),
                    'mean/label':
                    config_pb2.PerSliceMetricThresholds(thresholds=[
                        config_pb2.PerSliceMetricThreshold(
                            slicing_specs=slice_specs,
                            threshold=config_pb2.MetricThreshold(
                                value_threshold=config_pb2.
                                GenericValueThreshold(),
                                change_threshold=config_pb2.
                                GenericChangeThreshold()))
                    ])
                },
                cross_slice_thresholds={
                    'auc':
                    config_pb2.CrossSliceMetricThresholds(thresholds=[
                        config_pb2.CrossSliceMetricThreshold(
                            cross_slicing_specs=[
                                config_pb2.CrossSlicingSpec(
                                    baseline_spec=baseline_slice_spec,
                                    slicing_specs=slice_specs)
                            ],
                            threshold=config_pb2.MetricThreshold(
                                value_threshold=config_pb2.
                                GenericValueThreshold(),
                                change_threshold=config_pb2.
                                GenericChangeThreshold()))
                    ]),
                    'mse':
                    config_pb2.CrossSliceMetricThresholds(thresholds=[
                        config_pb2.CrossSliceMetricThreshold(
                            cross_slicing_specs=[
                                config_pb2.CrossSlicingSpec(
                                    baseline_spec=baseline_slice_spec,
                                    slicing_specs=slice_specs)
                            ],
                            threshold=config_pb2.MetricThreshold(
                                change_threshold=config_pb2.
                                GenericChangeThreshold())),
                        # Test for duplicate cross_slicing_spec.
                        config_pb2.CrossSliceMetricThreshold(
                            cross_slicing_specs=[
                                config_pb2.CrossSlicingSpec(
                                    baseline_spec=baseline_slice_spec,
                                    slicing_specs=slice_specs)
                            ],
                            threshold=config_pb2.MetricThreshold(
                                value_threshold=config_pb2.
                                GenericValueThreshold()))
                    ])
                },
                model_names=['model_name'],
                output_names=['output_name']),
            config_pb2.MetricsSpec(metrics=[
                config_pb2.MetricConfig(
                    class_name='ExampleCount',
                    config=json.dumps({'name': 'example_count'}),
                    threshold=config_pb2.MetricThreshold(
                        value_threshold=config_pb2.GenericValueThreshold()))
            ],
                                   model_names=['model_name1', 'model_name2'],
                                   example_weights=config_pb2.
                                   ExampleWeightOptions(unweighted=True)),
            config_pb2.MetricsSpec(metrics=[
                config_pb2.MetricConfig(
                    class_name='WeightedExampleCount',
                    config=json.dumps({'name': 'weighted_example_count'}),
                    threshold=config_pb2.MetricThreshold(
                        value_threshold=config_pb2.GenericValueThreshold()))
            ],
                                   model_names=['model_name1', 'model_name2'],
                                   output_names=[
                                       'output_name1', 'output_name2'
                                   ],
                                   example_weights=config_pb2.
                                   ExampleWeightOptions(weighted=True)),
            config_pb2.MetricsSpec(metrics=[
                config_pb2.MetricConfig(
                    class_name='MeanSquaredError',
                    config=json.dumps({'name': 'mse'}),
                    threshold=config_pb2.MetricThreshold(
                        change_threshold=config_pb2.GenericChangeThreshold())),
                config_pb2.MetricConfig(
                    class_name='MeanLabel',
                    config=json.dumps({'name': 'mean_label'}),
                    threshold=config_pb2.MetricThreshold(
                        change_threshold=config_pb2.GenericChangeThreshold()),
                    per_slice_thresholds=[
                        config_pb2.PerSliceMetricThreshold(
                            slicing_specs=slice_specs,
                            threshold=config_pb2.MetricThreshold(
                                change_threshold=config_pb2.
                                GenericChangeThreshold())),
                    ],
                    cross_slice_thresholds=[
                        config_pb2.CrossSliceMetricThreshold(
                            cross_slicing_specs=[
                                config_pb2.CrossSlicingSpec(
                                    baseline_spec=baseline_slice_spec,
                                    slicing_specs=slice_specs)
                            ],
                            threshold=config_pb2.MetricThreshold(
                                change_threshold=config_pb2.
                                GenericChangeThreshold()))
                    ]),
            ],
                                   model_names=['model_name'],
                                   output_names=['output_name'],
                                   binarize=config_pb2.BinarizationOptions(
                                       class_ids={'values': [0, 1]}),
                                   aggregate=config_pb2.AggregationOptions(
                                       macro_average=True,
                                       class_weights={
                                           0: 1.0,
                                           1: 1.0
                                       }))
        ]

        thresholds = metric_specs.metric_thresholds_from_metrics_specs(
            metrics_specs, eval_config=config_pb2.EvalConfig())

        expected_keys_and_threshold_counts = {
            metric_types.MetricKey(name='auc',
                                   model_name='model_name',
                                   output_name='output_name',
                                   is_diff=False,
                                   example_weighted=None):
            4,
            metric_types.MetricKey(name='auc',
                                   model_name='model_name',
                                   output_name='output_name',
                                   is_diff=True,
                                   example_weighted=None):
            1,
            metric_types.MetricKey(name='mean/label',
                                   model_name='model_name',
                                   output_name='output_name',
                                   is_diff=True,
                                   example_weighted=None):
            3,
            metric_types.MetricKey(name='mean/label',
                                   model_name='model_name',
                                   output_name='output_name',
                                   is_diff=False,
                                   example_weighted=None):
            3,
            metric_types.MetricKey(name='example_count',
                                   model_name='model_name1'):
            1,
            metric_types.MetricKey(name='example_count',
                                   model_name='model_name2'):
            1,
            metric_types.MetricKey(name='weighted_example_count',
                                   model_name='model_name1',
                                   output_name='output_name1',
                                   example_weighted=True):
            1,
            metric_types.MetricKey(name='weighted_example_count',
                                   model_name='model_name1',
                                   output_name='output_name2',
                                   example_weighted=True):
            1,
            metric_types.MetricKey(name='weighted_example_count',
                                   model_name='model_name2',
                                   output_name='output_name1',
                                   example_weighted=True):
            1,
            metric_types.MetricKey(name='weighted_example_count',
                                   model_name='model_name2',
                                   output_name='output_name2',
                                   example_weighted=True):
            1,
            metric_types.MetricKey(name='mse',
                                   model_name='model_name',
                                   output_name='output_name',
                                   sub_key=metric_types.SubKey(class_id=0),
                                   is_diff=True):
            1,
            metric_types.MetricKey(name='mse',
                                   model_name='model_name',
                                   output_name='output_name',
                                   sub_key=metric_types.SubKey(class_id=1),
                                   is_diff=True):
            1,
            metric_types.MetricKey(name='mse',
                                   model_name='model_name',
                                   output_name='output_name',
                                   is_diff=False,
                                   example_weighted=None):
            1,
            metric_types.MetricKey(name='mse',
                                   model_name='model_name',
                                   output_name='output_name',
                                   is_diff=True,
                                   example_weighted=None):
            2,
            metric_types.MetricKey(name='mse',
                                   model_name='model_name',
                                   output_name='output_name',
                                   aggregation_type=metric_types.AggregationType(macro_average=True),
                                   is_diff=True):
            1,
            metric_types.MetricKey(name='mean_label',
                                   model_name='model_name',
                                   output_name='output_name',
                                   sub_key=metric_types.SubKey(class_id=0),
                                   is_diff=True):
            4,
            metric_types.MetricKey(name='mean_label',
                                   model_name='model_name',
                                   output_name='output_name',
                                   sub_key=metric_types.SubKey(class_id=1),
                                   is_diff=True):
            4,
            metric_types.MetricKey(name='mean_label',
                                   model_name='model_name',
                                   output_name='output_name',
                                   aggregation_type=metric_types.AggregationType(macro_average=True),
                                   is_diff=True):
            4
        }
        self.assertLen(thresholds, len(expected_keys_and_threshold_counts))
        for key, count in expected_keys_and_threshold_counts.items():
            self.assertIn(key, thresholds)
            self.assertLen(thresholds[key], count,
                           'failed for key {}'.format(key))