コード例 #1
0
  def testToComputations(self):
    computations = metric_specs.to_computations(
        metric_specs.specs_from_metrics(
            {
                'output_name': [
                    tf.keras.metrics.MeanSquaredError('mse'),
                    calibration.MeanLabel('mean_label')
                ]
            },
            model_names=['model_name'],
            binarize=config.BinarizationOptions(class_ids={'values': [0, 1]}),
            aggregate=config.AggregationOptions(macro_average=True)),
        config.EvalConfig())

    keys = []
    for m in computations:
      for k in m.keys:
        if not k.name.startswith('_'):
          keys.append(k)
    self.assertLen(keys, 8)
    self.assertIn(metric_types.MetricKey(name='example_count'), keys)
    self.assertIn(
        metric_types.MetricKey(
            name='weighted_example_count',
            model_name='model_name',
            output_name='output_name'), keys)
    self.assertIn(
        metric_types.MetricKey(
            name='mse',
            model_name='model_name',
            output_name='output_name',
            sub_key=metric_types.SubKey(class_id=0)), keys)
    self.assertIn(
        metric_types.MetricKey(
            name='mse',
            model_name='model_name',
            output_name='output_name',
            sub_key=metric_types.SubKey(class_id=1)), keys)
    self.assertIn(
        metric_types.MetricKey(
            name='mse', model_name='model_name', output_name='output_name'),
        keys)
    self.assertIn(
        metric_types.MetricKey(
            name='mean_label',
            model_name='model_name',
            output_name='output_name',
            sub_key=metric_types.SubKey(class_id=0)), keys)
    self.assertIn(
        metric_types.MetricKey(
            name='mean_label',
            model_name='model_name',
            output_name='output_name',
            sub_key=metric_types.SubKey(class_id=1)), keys)
    self.assertIn(
        metric_types.MetricKey(
            name='mean_label',
            model_name='model_name',
            output_name='output_name'), keys)
コード例 #2
0
def default_multi_class_classification_specs(
    model_names: Optional[List[Text]] = None,
    output_names: Optional[List[Text]] = None,
    output_weights: Optional[Dict[Text, float]] = None,
    binarize: Optional[config.BinarizationOptions] = None,
    aggregate: Optional[config.AggregationOptions] = None,
    sparse: bool = True) -> List[config.MetricsSpec]:
  """Returns default metric specs for multi-class classification problems.

  Args:
    model_names: Optional model names if multi-model evaluation.
    output_names: Optional list of output names (if multi-output model).
    output_weights: Optional output weights for creating overall metric
      aggregated across outputs (if multi-output model). If a weight is not
      provided for an output, it's weight defaults to 0.0 (i.e. output ignored).
    binarize: Optional settings for binarizing multi-class/multi-label metrics.
    aggregate: Optional settings for aggregating multi-class/multi-label
      metrics.
    sparse: True if the labels are sparse.
  """

  if sparse:
    metrics = [
        tf.keras.metrics.SparseCategoricalCrossentropy(name='loss'),
        tf.keras.metrics.SparseCategoricalAccuracy(name='accuracy')
    ]
  else:
    metrics = [
        tf.keras.metrics.CategoricalCrossentropy(name='loss'),
        tf.keras.metrics.CategoricalAccuracy(name='accuracy')
    ]
  metrics.append(
      multi_class_confusion_matrix_plot.MultiClassConfusionMatrixPlot())
  if binarize is not None:
    for top_k in binarize.top_k_list.values:
      metrics.extend([
          tf.keras.metrics.Precision(name='precision', top_k=top_k),
          tf.keras.metrics.Recall(name='recall', top_k=top_k)
      ])
    binarize_without_top_k = config.BinarizationOptions()
    binarize_without_top_k.CopyFrom(binarize)
    binarize_without_top_k.ClearField('top_k_list')
    binarize = binarize_without_top_k
  multi_class_metrics = specs_from_metrics(
      metrics,
      model_names=model_names,
      output_names=output_names,
      output_weights=output_weights)
  if aggregate is None:
    aggregate = config.AggregationOptions(micro_average=True)
  multi_class_metrics.extend(
      default_binary_classification_specs(
          model_names=model_names,
          output_names=output_names,
          output_weights=output_weights,
          binarize=binarize,
          aggregate=aggregate))
  return multi_class_metrics
コード例 #3
0
  def testToComputationsWithMixedAggregationAndNonAggregationMetrics(self):
    computations = metric_specs.to_computations([
        config.MetricsSpec(
            metrics=[config.MetricConfig(class_name='CategoricalAccuracy')]),
        config.MetricsSpec(
            metrics=[config.MetricConfig(class_name='BinaryCrossentropy')],
            binarize=config.BinarizationOptions(class_ids={'values': [1]}),
            aggregate=config.AggregationOptions(micro_average=True))
    ], config.EvalConfig())

    # 3 separate computations should be used (one for aggregated metrics, one
    # for non-aggregated metrics, and one for metrics associated with class 1)
    self.assertLen(computations, 3)
コード例 #4
0
def default_multi_class_classification_specs(
        model_names: Optional[List[Text]] = None,
        output_names: Optional[List[Text]] = None,
        binarize: Optional[config.BinarizationOptions] = None,
        aggregate: Optional[config.AggregationOptions] = None,
        sparse: bool = True) -> config.MetricsSpec:
    """Returns default metric specs for multi-class classification problems.

  Args:
    model_names: Optional model names if multi-model evaluation.
    output_names: Optional list of output names (if multi-output model).
    binarize: Optional settings for binarizing multi-class/multi-label metrics.
    aggregate: Optional settings for aggregating multi-class/multi-label
      metrics.
    sparse: True if the labels are sparse.
  """

    if sparse:
        metrics = [
            tf.keras.metrics.SparseCategoricalCrossentropy(name='loss'),
            tf.keras.metrics.SparseCategoricalAccuracy(name='accuracy')
        ]
    else:
        metrics = [
            tf.keras.metrics.CategoricalCrossentropy(name='loss'),
            tf.keras.metrics.CategoricalAccuracy(name='accuracy')
        ]
    metrics.append(
        multi_class_confusion_matrix_at_thresholds.
        MultiClassConfusionMatrixAtThresholds(
            name='multi_class_confusion_matrix_at_thresholds'))
    if binarize is not None:
        for top_k in binarize.top_k_list:
            metrics.extend([
                tf.keras.metrics.Precision(name='precision', top_k=top_k),
                tf.keras.metrics.Recall(name='recall', top_k=top_k)
            ])
        binarize = config.BinarizationOptions().CopyFrom(binarize)
        binarize.ClearField('top_k')
    multi_class_metrics = specs_from_metrics(metrics,
                                             model_names=model_names,
                                             output_names=output_names)
    if aggregate is None:
        aggregate = config.AggregationOptions(micro_average=True)
    multi_class_metrics.extend(
        default_binary_classification_specs(model_names=model_names,
                                            output_names=output_names,
                                            binarize=binarize,
                                            aggregate=aggregate))
    return multi_class_metrics
コード例 #5
0
    def testSpecsFromMetrics(self):
        metrics_specs = metric_specs.specs_from_metrics(
            {
                'output_name1': [
                    tf.keras.metrics.MeanSquaredError('mse'),
                    tf.keras.losses.MeanAbsoluteError(name='mae'),
                    calibration.MeanLabel('mean_label')
                ],
                'output_name2': [
                    tf.keras.metrics.RootMeanSquaredError('rmse'),
                    tf.keras.losses.MeanAbsolutePercentageError(name='mape'),
                    calibration.MeanPrediction('mean_prediction')
                ]
            },
            model_names=['model_name1', 'model_name2'],
            binarize=config.BinarizationOptions(class_ids={'values': [0, 1]}),
            aggregate=config.AggregationOptions(macro_average=True))

        self.assertLen(metrics_specs, 5)
        self.assertProtoEquals(
            metrics_specs[0],
            config.MetricsSpec(metrics=[
                config.MetricConfig(class_name='ExampleCount',
                                    config=json.dumps(
                                        {'name': 'example_count'})),
            ]))
        self.assertProtoEquals(
            metrics_specs[1],
            config.MetricsSpec(metrics=[
                config.MetricConfig(class_name='WeightedExampleCount',
                                    config=json.dumps(
                                        {'name': 'weighted_example_count'})),
            ],
                               model_names=['model_name1', 'model_name2'],
                               output_names=['output_name1']))
        self.assertProtoEquals(
            metrics_specs[2],
            config.MetricsSpec(metrics=[
                config.MetricConfig(class_name='MeanSquaredError',
                                    config=json.dumps(
                                        {
                                            'name': 'mse',
                                            'dtype': 'float32'
                                        },
                                        sort_keys=True)),
                config.MetricConfig(class_name='MeanAbsoluteError',
                                    module=metric_specs._TF_LOSSES_MODULE,
                                    config=json.dumps(
                                        {
                                            'reduction': 'auto',
                                            'name': 'mae'
                                        },
                                        sort_keys=True)),
                config.MetricConfig(class_name='MeanLabel',
                                    config=json.dumps({'name': 'mean_label'}))
            ],
                               model_names=['model_name1', 'model_name2'],
                               output_names=['output_name1'],
                               binarize=config.BinarizationOptions(
                                   class_ids={'values': [0, 1]}),
                               aggregate=config.AggregationOptions(
                                   macro_average=True)))
        self.assertProtoEquals(
            metrics_specs[3],
            config.MetricsSpec(metrics=[
                config.MetricConfig(class_name='WeightedExampleCount',
                                    config=json.dumps(
                                        {'name': 'weighted_example_count'})),
            ],
                               model_names=['model_name1', 'model_name2'],
                               output_names=['output_name2']))
        self.assertProtoEquals(
            metrics_specs[4],
            config.MetricsSpec(
                metrics=[
                    config.MetricConfig(class_name='RootMeanSquaredError',
                                        config=json.dumps(
                                            {
                                                'name': 'rmse',
                                                'dtype': 'float32'
                                            },
                                            sort_keys=True)),
                    config.MetricConfig(
                        class_name='MeanAbsolutePercentageError',
                        module=metric_specs._TF_LOSSES_MODULE,
                        config=json.dumps({
                            'reduction': 'auto',
                            'name': 'mape'
                        },
                                          sort_keys=True)),
                    config.MetricConfig(class_name='MeanPrediction',
                                        config=json.dumps(
                                            {'name': 'mean_prediction'}))
                ],
                model_names=['model_name1', 'model_name2'],
                output_names=['output_name2'],
                binarize=config.BinarizationOptions(
                    class_ids={'values': [0, 1]}),
                aggregate=config.AggregationOptions(macro_average=True)))
コード例 #6
0
 def testMetricThresholdsFromMetricsSpecs(self):
     metrics_specs = [
         config.MetricsSpec(
             thresholds={
                 'auc':
                 config.MetricThreshold(
                     value_threshold=config.GenericValueThreshold()),
                 'mean/label':
                 config.MetricThreshold(
                     value_threshold=config.GenericValueThreshold(),
                     change_threshold=config.GenericChangeThreshold()),
                 # The mse metric will be overridden by MetricConfig below.
                 'mse':
                 config.MetricThreshold(
                     change_threshold=config.GenericChangeThreshold())
             },
             # Model names and output_names should be ignored because
             # ExampleCount is model independent.
             model_names=['model_name'],
             output_names=['output_name']),
         config.MetricsSpec(
             metrics=[
                 config.MetricConfig(
                     class_name='ExampleCount',
                     config=json.dumps({'name': 'example_count'}),
                     threshold=config.MetricThreshold(
                         value_threshold=config.GenericValueThreshold()))
             ],
             # Model names and output_names should be ignored because
             # ExampleCount is model independent.
             model_names=['model_name1', 'model_name2'],
             output_names=['output_name1', 'output_name2']),
         config.MetricsSpec(metrics=[
             config.MetricConfig(
                 class_name='WeightedExampleCount',
                 config=json.dumps({'name': 'weighted_example_count'}),
                 threshold=config.MetricThreshold(
                     value_threshold=config.GenericValueThreshold()))
         ],
                            model_names=['model_name1', 'model_name2'],
                            output_names=['output_name1', 'output_name2']),
         config.MetricsSpec(
             metrics=[
                 config.MetricConfig(
                     class_name='MeanSquaredError',
                     config=json.dumps({'name': 'mse'}),
                     threshold=config.MetricThreshold(
                         change_threshold=config.GenericChangeThreshold())),
                 config.MetricConfig(
                     class_name='MeanLabel',
                     config=json.dumps({'name': 'mean_label'}),
                     threshold=config.MetricThreshold(
                         change_threshold=config.GenericChangeThreshold()))
             ],
             model_names=['model_name'],
             output_names=['output_name'],
             binarize=config.BinarizationOptions(
                 class_ids={'values': [0, 1]}),
             aggregate=config.AggregationOptions(macro_average=True))
     ]
     thresholds = metric_specs.metric_thresholds_from_metrics_specs(
         metrics_specs)
     self.assertLen(thresholds, 14)
     self.assertIn(
         metric_types.MetricKey(name='auc',
                                model_name='model_name',
                                output_name='output_name'), thresholds)
     self.assertIn(
         metric_types.MetricKey(name='mean/label',
                                model_name='model_name',
                                output_name='output_name',
                                is_diff=True), thresholds)
     self.assertIn(
         metric_types.MetricKey(name='mean/label',
                                model_name='model_name',
                                output_name='output_name',
                                is_diff=False), thresholds)
     self.assertIn(metric_types.MetricKey(name='example_count'), thresholds)
     self.assertIn(
         metric_types.MetricKey(name='weighted_example_count',
                                model_name='model_name1',
                                output_name='output_name1'), thresholds)
     self.assertIn(
         metric_types.MetricKey(name='weighted_example_count',
                                model_name='model_name1',
                                output_name='output_name2'), thresholds)
     self.assertIn(
         metric_types.MetricKey(name='weighted_example_count',
                                model_name='model_name2',
                                output_name='output_name1'), thresholds)
     self.assertIn(
         metric_types.MetricKey(name='weighted_example_count',
                                model_name='model_name2',
                                output_name='output_name2'), thresholds)
     self.assertIn(
         metric_types.MetricKey(name='mse',
                                model_name='model_name',
                                output_name='output_name',
                                sub_key=metric_types.SubKey(class_id=0),
                                is_diff=True), thresholds)
     self.assertIn(
         metric_types.MetricKey(name='mse',
                                model_name='model_name',
                                output_name='output_name',
                                sub_key=metric_types.SubKey(class_id=1),
                                is_diff=True), thresholds)
     self.assertIn(
         metric_types.MetricKey(name='mse',
                                model_name='model_name',
                                output_name='output_name',
                                is_diff=True), thresholds)
     self.assertIn(
         metric_types.MetricKey(name='mean_label',
                                model_name='model_name',
                                output_name='output_name',
                                sub_key=metric_types.SubKey(class_id=0),
                                is_diff=True), thresholds)
     self.assertIn(
         metric_types.MetricKey(name='mean_label',
                                model_name='model_name',
                                output_name='output_name',
                                sub_key=metric_types.SubKey(class_id=1),
                                is_diff=True), thresholds)
     self.assertIn(
         metric_types.MetricKey(name='mean_label',
                                model_name='model_name',
                                output_name='output_name',
                                is_diff=True), thresholds)
コード例 #7
0
    def testSpecsFromMetrics(self):
        metrics_specs = metric_specs.specs_from_metrics(
            {
                'output_name1': [
                    tf.keras.metrics.MeanSquaredError('mse'),
                    calibration.MeanLabel('mean_label')
                ],
                'output_name2': [
                    tf.keras.metrics.RootMeanSquaredError('rmse'),
                    calibration.MeanPrediction('mean_prediction')
                ]
            },
            model_names=['model_name1', 'model_name2'],
            binarize=config.BinarizationOptions(class_ids=[0, 1]),
            aggregate=config.AggregationOptions(macro_average=True))

        self.assertLen(metrics_specs, 5)
        self.assertProtoEquals(
            metrics_specs[0],
            config.MetricsSpec(metrics=[
                config.MetricConfig(class_name='ExampleCount',
                                    config=json.dumps(
                                        {'name': 'example_count'})),
            ]))
        self.assertProtoEquals(
            metrics_specs[1],
            config.MetricsSpec(metrics=[
                config.MetricConfig(class_name='WeightedExampleCount',
                                    config=json.dumps(
                                        {'name': 'weighted_example_count'})),
            ],
                               model_names=['model_name1', 'model_name2'],
                               output_names=['output_name1']))
        self.assertProtoEquals(
            metrics_specs[2],
            config.MetricsSpec(
                metrics=[
                    config.MetricConfig(class_name='MeanSquaredError',
                                        config=json.dumps({
                                            'name': 'mse',
                                            'dtype': 'float32'
                                        })),
                    config.MetricConfig(class_name='MeanLabel',
                                        config=json.dumps(
                                            {'name': 'mean_label'}))
                ],
                model_names=['model_name1', 'model_name2'],
                output_names=['output_name1'],
                binarize=config.BinarizationOptions(class_ids=[0, 1]),
                aggregate=config.AggregationOptions(macro_average=True)))
        self.assertProtoEquals(
            metrics_specs[3],
            config.MetricsSpec(metrics=[
                config.MetricConfig(class_name='WeightedExampleCount',
                                    config=json.dumps(
                                        {'name': 'weighted_example_count'})),
            ],
                               model_names=['model_name1', 'model_name2'],
                               output_names=['output_name2']))
        self.assertProtoEquals(
            metrics_specs[4],
            config.MetricsSpec(
                metrics=[
                    config.MetricConfig(class_name='RootMeanSquaredError',
                                        config=json.dumps({
                                            'name': 'rmse',
                                            'dtype': 'float32'
                                        })),
                    config.MetricConfig(class_name='MeanPrediction',
                                        config=json.dumps(
                                            {'name': 'mean_prediction'}))
                ],
                model_names=['model_name1', 'model_name2'],
                output_names=['output_name2'],
                binarize=config.BinarizationOptions(class_ids=[0, 1]),
                aggregate=config.AggregationOptions(macro_average=True)))
コード例 #8
0
    def testToComputations(self):
        computations = metric_specs.to_computations(
            metric_specs.specs_from_metrics(
                {
                    'output_name': [
                        tf.keras.metrics.MeanSquaredError('mse'),
                        # Add a loss exactly same as metric
                        # (https://github.com/tensorflow/tfx/issues/1550)
                        tf.keras.losses.MeanSquaredError(name='loss'),
                        calibration.MeanLabel('mean_label')
                    ]
                },
                model_names=['model_name'],
                binarize=config.BinarizationOptions(
                    class_ids={'values': [0, 1]}),
                aggregate=config.AggregationOptions(macro_average=True,
                                                    class_weights={
                                                        0: 1.0,
                                                        1: 1.0
                                                    })),
            config.EvalConfig())

        keys = []
        for m in computations:
            for k in m.keys:
                if not k.name.startswith('_'):
                    keys.append(k)
        self.assertLen(keys, 11)
        self.assertIn(
            metric_types.MetricKey(name='example_count',
                                   model_name='model_name'), keys)
        self.assertIn(
            metric_types.MetricKey(name='weighted_example_count',
                                   model_name='model_name',
                                   output_name='output_name'), keys)
        self.assertIn(
            metric_types.MetricKey(name='mse',
                                   model_name='model_name',
                                   output_name='output_name',
                                   sub_key=metric_types.SubKey(class_id=0)),
            keys)
        self.assertIn(
            metric_types.MetricKey(name='mse',
                                   model_name='model_name',
                                   output_name='output_name',
                                   sub_key=metric_types.SubKey(class_id=1)),
            keys)
        self.assertIn(
            metric_types.MetricKey(name='mse',
                                   model_name='model_name',
                                   output_name='output_name'), keys)
        self.assertIn(
            metric_types.MetricKey(name='loss',
                                   model_name='model_name',
                                   output_name='output_name',
                                   sub_key=metric_types.SubKey(class_id=0)),
            keys)
        self.assertIn(
            metric_types.MetricKey(name='loss',
                                   model_name='model_name',
                                   output_name='output_name',
                                   sub_key=metric_types.SubKey(class_id=1)),
            keys)
        self.assertIn(
            metric_types.MetricKey(name='loss',
                                   model_name='model_name',
                                   output_name='output_name'), keys)
        self.assertIn(
            metric_types.MetricKey(name='mean_label',
                                   model_name='model_name',
                                   output_name='output_name',
                                   sub_key=metric_types.SubKey(class_id=0)),
            keys)
        self.assertIn(
            metric_types.MetricKey(name='mean_label',
                                   model_name='model_name',
                                   output_name='output_name',
                                   sub_key=metric_types.SubKey(class_id=1)),
            keys)
        self.assertIn(
            metric_types.MetricKey(name='mean_label',
                                   model_name='model_name',
                                   output_name='output_name'), keys)
コード例 #9
0
    def testMetricThresholdsFromMetricsSpecs(self):
        slice_specs = [
            config.SlicingSpec(feature_keys=['feature1']),
            config.SlicingSpec(feature_values={'feature2': 'value1'})
        ]

        # For cross slice tests.
        baseline_slice_spec = config.SlicingSpec(feature_keys=['feature3'])

        metrics_specs = [
            config.MetricsSpec(
                thresholds={
                    'auc':
                    config.MetricThreshold(
                        value_threshold=config.GenericValueThreshold()),
                    'mean/label':
                    config.MetricThreshold(
                        value_threshold=config.GenericValueThreshold(),
                        change_threshold=config.GenericChangeThreshold()),
                    'mse':
                    config.MetricThreshold(
                        change_threshold=config.GenericChangeThreshold())
                },
                per_slice_thresholds={
                    'auc':
                    config.PerSliceMetricThresholds(thresholds=[
                        config.PerSliceMetricThreshold(
                            slicing_specs=slice_specs,
                            threshold=config.MetricThreshold(
                                value_threshold=config.GenericValueThreshold(
                                )))
                    ]),
                    'mean/label':
                    config.PerSliceMetricThresholds(thresholds=[
                        config.PerSliceMetricThreshold(
                            slicing_specs=slice_specs,
                            threshold=config.MetricThreshold(
                                value_threshold=config.GenericValueThreshold(),
                                change_threshold=config.GenericChangeThreshold(
                                )))
                    ])
                },
                cross_slice_thresholds={
                    'auc':
                    config.CrossSliceMetricThresholds(thresholds=[
                        config.CrossSliceMetricThreshold(
                            cross_slicing_specs=[
                                config.CrossSlicingSpec(
                                    baseline_spec=baseline_slice_spec,
                                    slicing_specs=slice_specs)
                            ],
                            threshold=config.MetricThreshold(
                                value_threshold=config.GenericValueThreshold(),
                                change_threshold=config.GenericChangeThreshold(
                                )))
                    ]),
                    'mse':
                    config.CrossSliceMetricThresholds(thresholds=[
                        config.CrossSliceMetricThreshold(
                            cross_slicing_specs=[
                                config.CrossSlicingSpec(
                                    baseline_spec=baseline_slice_spec,
                                    slicing_specs=slice_specs)
                            ],
                            threshold=config.MetricThreshold(
                                change_threshold=config.GenericChangeThreshold(
                                ))),
                        # Test for duplicate cross_slicing_spec.
                        config.CrossSliceMetricThreshold(
                            cross_slicing_specs=[
                                config.CrossSlicingSpec(
                                    baseline_spec=baseline_slice_spec,
                                    slicing_specs=slice_specs)
                            ],
                            threshold=config.MetricThreshold(
                                value_threshold=config.GenericValueThreshold())
                        )
                    ])
                },
                model_names=['model_name'],
                output_names=['output_name']),
            config.MetricsSpec(metrics=[
                config.MetricConfig(
                    class_name='ExampleCount',
                    config=json.dumps({'name': 'example_count'}),
                    threshold=config.MetricThreshold(
                        value_threshold=config.GenericValueThreshold()))
            ],
                               model_names=['model_name1', 'model_name2'],
                               output_names=['output_name1', 'output_name2']),
            config.MetricsSpec(metrics=[
                config.MetricConfig(
                    class_name='WeightedExampleCount',
                    config=json.dumps({'name': 'weighted_example_count'}),
                    threshold=config.MetricThreshold(
                        value_threshold=config.GenericValueThreshold()))
            ],
                               model_names=['model_name1', 'model_name2'],
                               output_names=['output_name1', 'output_name2']),
            config.MetricsSpec(metrics=[
                config.MetricConfig(
                    class_name='MeanSquaredError',
                    config=json.dumps({'name': 'mse'}),
                    threshold=config.MetricThreshold(
                        change_threshold=config.GenericChangeThreshold())),
                config.MetricConfig(
                    class_name='MeanLabel',
                    config=json.dumps({'name': 'mean_label'}),
                    threshold=config.MetricThreshold(
                        change_threshold=config.GenericChangeThreshold()),
                    per_slice_thresholds=[
                        config.PerSliceMetricThreshold(
                            slicing_specs=slice_specs,
                            threshold=config.MetricThreshold(
                                change_threshold=config.GenericChangeThreshold(
                                ))),
                    ],
                    cross_slice_thresholds=[
                        config.CrossSliceMetricThreshold(
                            cross_slicing_specs=[
                                config.CrossSlicingSpec(
                                    baseline_spec=baseline_slice_spec,
                                    slicing_specs=slice_specs)
                            ],
                            threshold=config.MetricThreshold(
                                change_threshold=config.GenericChangeThreshold(
                                )))
                    ]),
            ],
                               model_names=['model_name'],
                               output_names=['output_name'],
                               binarize=config.BinarizationOptions(
                                   class_ids={'values': [0, 1]}),
                               aggregate=config.AggregationOptions(
                                   macro_average=True,
                                   class_weights={
                                       0: 1.0,
                                       1: 1.0
                                   }))
        ]

        thresholds = metric_specs.metric_thresholds_from_metrics_specs(
            metrics_specs)

        expected_keys_and_threshold_counts = {
            metric_types.MetricKey(name='auc',
                                   model_name='model_name',
                                   output_name='output_name',
                                   is_diff=False):
            4,
            metric_types.MetricKey(name='auc',
                                   model_name='model_name',
                                   output_name='output_name',
                                   is_diff=True):
            1,
            metric_types.MetricKey(name='mean/label',
                                   model_name='model_name',
                                   output_name='output_name',
                                   is_diff=True):
            3,
            metric_types.MetricKey(name='mean/label',
                                   model_name='model_name',
                                   output_name='output_name',
                                   is_diff=False):
            3,
            metric_types.MetricKey(name='example_count',
                                   model_name='model_name1',
                                   output_name='output_name1'):
            1,
            metric_types.MetricKey(name='example_count',
                                   model_name='model_name1',
                                   output_name='output_name2'):
            1,
            metric_types.MetricKey(name='example_count',
                                   model_name='model_name2',
                                   output_name='output_name1'):
            1,
            metric_types.MetricKey(name='example_count',
                                   model_name='model_name2',
                                   output_name='output_name2'):
            1,
            metric_types.MetricKey(name='weighted_example_count',
                                   model_name='model_name1',
                                   output_name='output_name1'):
            1,
            metric_types.MetricKey(name='weighted_example_count',
                                   model_name='model_name1',
                                   output_name='output_name2'):
            1,
            metric_types.MetricKey(name='weighted_example_count',
                                   model_name='model_name2',
                                   output_name='output_name1'):
            1,
            metric_types.MetricKey(name='weighted_example_count',
                                   model_name='model_name2',
                                   output_name='output_name2'):
            1,
            metric_types.MetricKey(name='mse',
                                   model_name='model_name',
                                   output_name='output_name',
                                   sub_key=metric_types.SubKey(class_id=0),
                                   is_diff=True):
            1,
            metric_types.MetricKey(name='mse',
                                   model_name='model_name',
                                   output_name='output_name',
                                   sub_key=metric_types.SubKey(class_id=1),
                                   is_diff=True):
            1,
            metric_types.MetricKey(name='mse',
                                   model_name='model_name',
                                   output_name='output_name',
                                   is_diff=True):
            2,
            metric_types.MetricKey(name='mse',
                                   model_name='model_name',
                                   output_name='output_name',
                                   is_diff=False):
            1,
            metric_types.MetricKey(name='mse',
                                   model_name='model_name',
                                   output_name='output_name',
                                   aggregation_type=metric_types.AggregationType(macro_average=True),
                                   is_diff=True):
            1,
            metric_types.MetricKey(name='mean_label',
                                   model_name='model_name',
                                   output_name='output_name',
                                   sub_key=metric_types.SubKey(class_id=0),
                                   is_diff=True):
            4,
            metric_types.MetricKey(name='mean_label',
                                   model_name='model_name',
                                   output_name='output_name',
                                   sub_key=metric_types.SubKey(class_id=1),
                                   is_diff=True):
            4,
            metric_types.MetricKey(name='mean_label',
                                   model_name='model_name',
                                   output_name='output_name',
                                   aggregation_type=metric_types.AggregationType(macro_average=True),
                                   is_diff=True):
            4
        }
        self.assertLen(thresholds, len(expected_keys_and_threshold_counts))
        for key, count in expected_keys_and_threshold_counts.items():
            self.assertIn(key, thresholds)
            self.assertLen(thresholds[key], count,
                           'failed for key {}'.format(key))