def testAucUnweighted(self):
    temp_eval_export_dir = self._getEvalExportDir()
    _, eval_export_dir = (
        fixed_prediction_estimator.simple_fixed_prediction_estimator(
            None, temp_eval_export_dir))
    examples = [
        self._makeExample(prediction=0.0000, label=0.0000),
        self._makeExample(prediction=0.0000, label=1.0000),
        self._makeExample(prediction=0.7000, label=1.0000),
        self._makeExample(prediction=0.8000, label=0.0000),
        self._makeExample(prediction=1.0000, label=1.0000),
    ]

    expected_values_dict = {
        metric_keys.AUC: 0.58333,
        metric_keys.lower_bound(metric_keys.AUC): 0.5,
        metric_keys.upper_bound(metric_keys.AUC): 0.66667,
        metric_keys.lower_bound(metric_keys.AUPRC): 0.74075,
        metric_keys.lower_bound(metric_keys.AUPRC): 0.70000,
        metric_keys.upper_bound(metric_keys.AUPRC): 0.77778,
    }

    self._runTest(
        examples, eval_export_dir,
        [post_export_metrics.auc(),
         post_export_metrics.auc(curve='PR')], expected_values_dict)
Exemple #2
0
  def testConvertSliceMetricsToProtoEmptyMetrics(self):
    slice_key = _make_slice_key('age', 5, 'language', 'english', 'price', 0.3)
    slice_metrics = {metric_keys.ERROR_METRIC: 'error_message'}

    actual_metrics = (
        metrics_plots_and_validations_writer.convert_slice_metrics_to_proto(
            (slice_key, slice_metrics),
            [post_export_metrics.auc(),
             post_export_metrics.auc(curve='PR')]))

    expected_metrics = metrics_for_slice_pb2.MetricsForSlice()
    expected_metrics.slice_key.CopyFrom(slicer.serialize_slice_key(slice_key))
    expected_metrics.metrics[
        metric_keys.ERROR_METRIC].debug_message = 'error_message'
    self.assertProtoEquals(expected_metrics, actual_metrics)
  def testSerializeMetrics_emptyMetrics(self):
    slice_key = _make_slice_key('age', 5, 'language', 'english', 'price', 0.3)
    slice_metrics = {metric_keys.ERROR_METRIC: 'error_message'}

    actual_metrics = metrics_and_plots_serialization._serialize_metrics(
        (slice_key, slice_metrics),
        [post_export_metrics.auc(),
         post_export_metrics.auc(curve='PR')])

    expected_metrics = metrics_for_slice_pb2.MetricsForSlice()
    expected_metrics.slice_key.CopyFrom(slicer.serialize_slice_key(slice_key))
    expected_metrics.metrics[
        metric_keys.ERROR_METRIC].debug_message = 'error_message'
    self.assertProtoEquals(
        expected_metrics,
        metrics_for_slice_pb2.MetricsForSlice.FromString(actual_metrics))
  def testAucUnweightedSerialization(self):
    temp_eval_export_dir = self._getEvalExportDir()
    _, eval_export_dir = (
        fixed_prediction_estimator.simple_fixed_prediction_estimator(
            None, temp_eval_export_dir))
    examples = [
        self._makeExample(prediction=0.0000, label=0.0000),
        self._makeExample(prediction=0.0000, label=1.0000),
        self._makeExample(prediction=0.7000, label=1.0000),
        self._makeExample(prediction=0.8000, label=0.0000),
        self._makeExample(prediction=1.0000, label=1.0000),
    ]

    expected_values_dict = {
        metric_keys.lower_bound(metric_keys.AUPRC): 0.74075,
        metric_keys.lower_bound(metric_keys.AUPRC): 0.70000,
        metric_keys.upper_bound(metric_keys.AUPRC): 0.77778,
    }

    auc_metric = post_export_metrics.auc(curve='PR')

    def check_result(got):  # pylint: disable=invalid-name
      try:
        self.assertEqual(1, len(got), 'got: %s' % got)
        (slice_key, value) = got[0]
        self.assertEqual((), slice_key)
        self.assertDictElementsAlmostEqual(value, expected_values_dict)

        # Check serialization too.
        # Note that we can't just make this a dict, since proto maps
        # allow uninitialized key access, i.e. they act like defaultdicts.
        output_metrics = metrics_for_slice_pb2.MetricsForSlice().metrics
        auc_metric.populate_stats_and_pop(value, output_metrics)
        self.assertProtoEquals(
            """
            bounded_value {
              lower_bound {
                value: 0.6999999
              }
              upper_bound {
                value: 0.7777776
              }
              value {
                value: 0.7407472
              }
            }
            """, output_metrics[metric_keys.AUPRC])
      except AssertionError as err:
        raise util.BeamAssertException(err)

    self._runTestWithCustomCheck(
        examples,
        eval_export_dir, [auc_metric],
        custom_metrics_check=check_result)
Exemple #5
0
  def testMetricComputedBeamCounter(self):
    with beam.Pipeline() as pipeline:
      auc = post_export_metrics.auc()
      _ = pipeline | counter_util.IncrementMetricsComputationCounters([auc])

    result = pipeline.run()
    metric_filter = beam.metrics.metric.MetricsFilter().with_namespace(
        constants.METRICS_NAMESPACE).with_name('metric_computed_auc')
    actual_metrics_count = result.metrics().query(
        filter=metric_filter)['counters'][0].committed

    self.assertEqual(actual_metrics_count, 1)
    def testSerializeMetrics(self):
        slice_key = _make_slice_key('age', 5, 'language', 'english', 'price',
                                    0.3)
        slice_metrics = {
            'accuracy': 0.8,
            _full_key(metric_keys.AUPRC): 0.1,
            _full_key(metric_keys.lower_bound(metric_keys.AUPRC)): 0.05,
            _full_key(metric_keys.upper_bound(metric_keys.AUPRC)): 0.17,
            _full_key(metric_keys.AUC): 0.2,
            _full_key(metric_keys.lower_bound(metric_keys.AUC)): 0.1,
            _full_key(metric_keys.upper_bound(metric_keys.AUC)): 0.3
        }
        expected_metrics_for_slice = text_format.Parse(
            string.Template("""
        slice_key {
          single_slice_keys {
            column: 'age'
            int64_value: 5
          }
          single_slice_keys {
            column: 'language'
            bytes_value: 'english'
          }
          single_slice_keys {
            column: 'price'
            float_value: 0.3
          }
        }
        metrics {
          key: "accuracy"
          value {
            double_value {
              value: 0.8
            }
          }
        }
        metrics {
          key: "$auc"
          value {
            bounded_value {
              lower_bound {
                value: 0.1
              }
              upper_bound {
                value: 0.3
              }
              value {
                value: 0.2
              }
              methodology: RIEMANN_SUM
            }
          }
        }
        metrics {
          key: "$auprc"
          value {
            bounded_value {
              lower_bound {
                value: 0.05
              }
              upper_bound {
                value: 0.17
              }
              value {
                value: 0.1
              }
              methodology: RIEMANN_SUM
            }
          }
        }""").substitute(auc=_full_key(metric_keys.AUC),
                         auprc=_full_key(metric_keys.AUPRC)),
            metrics_for_slice_pb2.MetricsForSlice())

        got = metrics_and_plots_evaluator._serialize_metrics(
            (slice_key, slice_metrics),
            [post_export_metrics.auc(),
             post_export_metrics.auc(curve='PR')])
        self.assertProtoEquals(
            expected_metrics_for_slice,
            metrics_for_slice_pb2.MetricsForSlice.FromString(got))
    def testAssertGeneralMetricsComputedWithBeamAre(self):
        temp_eval_export_dir = self._getEvalExportDir()
        _, eval_export_dir = (fixed_prediction_estimator_extra_fields.
                              simple_fixed_prediction_estimator_extra_fields(
                                  None, temp_eval_export_dir))
        examples = [
            self.makeExample(prediction=0.0,
                             label=0.0,
                             fixed_string='negative_slice',
                             fixed_float=0.0,
                             fixed_int=0),
            self.makeExample(prediction=0.2,
                             label=0.0,
                             fixed_string='negative_slice',
                             fixed_float=0.0,
                             fixed_int=0),
            self.makeExample(prediction=0.4,
                             label=0.0,
                             fixed_string='negative_slice',
                             fixed_float=0.0,
                             fixed_int=0),
            self.makeExample(prediction=0.8,
                             label=1.0,
                             fixed_string='positive_slice',
                             fixed_float=0.0,
                             fixed_int=0),
            self.makeExample(prediction=0.9,
                             label=1.0,
                             fixed_string='positive_slice',
                             fixed_float=0.0,
                             fixed_int=0),
            self.makeExample(prediction=1.0,
                             label=1.0,
                             fixed_string='positive_slice',
                             fixed_float=0.0,
                             fixed_int=0),
        ]
        expected_slice_metrics = {}
        expected_slice_metrics[()] = {
            'average_loss': (0.00 + 0.04 + 0.16 + 0.04 + 0.01 + 0.00) / 6.0,
            'mae':
            0.15,
            # Note that we don't check the exact value because of numerical errors.
            metric_keys.AUC:
            tfma_unit.BoundedValue(0.98, 1.00),
        }
        # We don't check AUC for the positive / negative only slices because
        # it's not clear what the value should be.
        expected_slice_metrics[(('fixed_string', b'negative_slice'), )] = {
            'average_loss': (0.00 + 0.04 + 0.16) / 3.0,
            'mae': 0.2,
        }
        expected_slice_metrics[(('fixed_string', b'positive_slice'), )] = {
            'average_loss': (0.04 + 0.01 + 0.00) / 3.0,
            'mae': 0.1,
        }

        def add_metrics(features, predictions, labels):
            del features
            metric_ops = {
                'mae':
                tf.metrics.mean_absolute_error(labels,
                                               predictions['predictions']),
            }
            return metric_ops

        with beam.Pipeline() as pipeline:
            examples_pcollection = pipeline | 'Create' >> beam.Create(examples)
            self.assertGeneralMetricsComputedWithBeamAre(
                eval_saved_model_path=eval_export_dir,
                examples_pcollection=examples_pcollection,
                slice_spec=[
                    slicer.SingleSliceSpec(),
                    slicer.SingleSliceSpec(columns=['fixed_string'])
                ],
                add_metrics_callbacks=[add_metrics,
                                       post_export_metrics.auc()],
                expected_slice_metrics=expected_slice_metrics)
Exemple #8
0
    def testSerializeMetricsRanges(self):
        slice_key = _make_slice_key('age', 5, 'language', 'english', 'price',
                                    0.3)
        slice_metrics = {
            'accuracy': types.ValueWithTDistribution(0.8, 0.1, 9, 0.8),
            metric_keys.AUPRC: 0.1,
            metric_keys.lower_bound_key(metric_keys.AUPRC): 0.05,
            metric_keys.upper_bound_key(metric_keys.AUPRC): 0.17,
            metric_keys.AUC: 0.2,
            metric_keys.lower_bound_key(metric_keys.AUC): 0.1,
            metric_keys.upper_bound_key(metric_keys.AUC): 0.3
        }
        expected_metrics_for_slice = text_format.Parse(
            string.Template("""
        slice_key {
          single_slice_keys {
            column: 'age'
            int64_value: 5
          }
          single_slice_keys {
            column: 'language'
            bytes_value: 'english'
          }
          single_slice_keys {
            column: 'price'
            float_value: 0.3
          }
        }
        metrics {
          key: "accuracy"
          value {
            bounded_value {
              value {
                value: 0.8
              }
              lower_bound {
                value: 0.5737843
              }
              upper_bound {
                value: 1.0262157
              }
              methodology: POISSON_BOOTSTRAP
            }
          }
        }
        metrics {
          key: "$auc"
          value {
            bounded_value {
              lower_bound {
                value: 0.1
              }
              upper_bound {
                value: 0.3
              }
              value {
                value: 0.2
              }
              methodology: RIEMANN_SUM
            }
          }
        }
        metrics {
          key: "$auprc"
          value {
            bounded_value {
              lower_bound {
                value: 0.05
              }
              upper_bound {
                value: 0.17
              }
              value {
                value: 0.1
              }
              methodology: RIEMANN_SUM
            }
          }
        }""").substitute(auc=metric_keys.AUC, auprc=metric_keys.AUPRC),
            metrics_for_slice_pb2.MetricsForSlice())

        got = metrics_and_plots_serialization._serialize_metrics(
            (slice_key, slice_metrics),
            [post_export_metrics.auc(),
             post_export_metrics.auc(curve='PR')])
        self.assertProtoEquals(
            expected_metrics_for_slice,
            metrics_for_slice_pb2.MetricsForSlice.FromString(got))
 def _counter_inc(self, data):
   auc = post_export_metrics.auc()
   counter_util.update_beam_counters([auc])
   return
Exemple #10
0
  def testConvertSliceMetricsToProtoFromLegacyStrings(self):
    slice_key = _make_slice_key('age', 5, 'language', 'english', 'price', 0.3)
    slice_metrics = {
        'accuracy': 0.8,
        metric_keys.AUPRC: 0.1,
        metric_keys.lower_bound_key(metric_keys.AUPRC): 0.05,
        metric_keys.upper_bound_key(metric_keys.AUPRC): 0.17,
        metric_keys.AUC: 0.2,
        metric_keys.lower_bound_key(metric_keys.AUC): 0.1,
        metric_keys.upper_bound_key(metric_keys.AUC): 0.3
    }
    expected_metrics_for_slice = text_format.Parse(
        string.Template("""
        slice_key {
          single_slice_keys {
            column: 'age'
            int64_value: 5
          }
          single_slice_keys {
            column: 'language'
            bytes_value: 'english'
          }
          single_slice_keys {
            column: 'price'
            float_value: 0.3
          }
        }
        metrics {
          key: "accuracy"
          value {
            double_value {
              value: 0.8
            }
          }
        }
        metrics {
          key: "$auc"
          value {
            bounded_value {
              lower_bound {
                value: 0.1
              }
              upper_bound {
                value: 0.3
              }
              value {
                value: 0.2
              }
              methodology: RIEMANN_SUM
            }
          }
        }
        metrics {
          key: "$auprc"
          value {
            bounded_value {
              lower_bound {
                value: 0.05
              }
              upper_bound {
                value: 0.17
              }
              value {
                value: 0.1
              }
              methodology: RIEMANN_SUM
            }
          }
        }""").substitute(auc=metric_keys.AUC, auprc=metric_keys.AUPRC),
        metrics_for_slice_pb2.MetricsForSlice())

    got = metrics_plots_and_validations_writer.convert_slice_metrics_to_proto(
        (slice_key, slice_metrics),
        [post_export_metrics.auc(),
         post_export_metrics.auc(curve='PR')])
    self.assertProtoEquals(expected_metrics_for_slice, got)
Exemple #11
0
  def testConvertSliceMetricsToProtoMetricsRanges(self):
    slice_key = _make_slice_key('age', 5, 'language', 'english', 'price', 0.3)
    slice_metrics = {
        'accuracy': types.ValueWithTDistribution(0.8, 0.1, 9, 0.8),
        metric_keys.AUPRC: 0.1,
        metric_keys.lower_bound_key(metric_keys.AUPRC): 0.05,
        metric_keys.upper_bound_key(metric_keys.AUPRC): 0.17,
        metric_keys.AUC: 0.2,
        metric_keys.lower_bound_key(metric_keys.AUC): 0.1,
        metric_keys.upper_bound_key(metric_keys.AUC): 0.3
    }
    expected_metrics_for_slice = text_format.Parse(
        string.Template("""
        slice_key {
          single_slice_keys {
            column: 'age'
            int64_value: 5
          }
          single_slice_keys {
            column: 'language'
            bytes_value: 'english'
          }
          single_slice_keys {
            column: 'price'
            float_value: 0.3
          }
        }
        metrics {
          key: "accuracy"
          value {
            bounded_value {
              value {
                value: 0.8
              }
              lower_bound {
                value: 0.5737843
              }
              upper_bound {
                value: 1.0262157
              }
              methodology: POISSON_BOOTSTRAP
            }
            confidence_interval {
              lower_bound {
                value: 0.5737843
              }
              upper_bound {
                value: 1.0262157
              }
              t_distribution_value {
                sample_mean {
                  value: 0.8
                }
                sample_standard_deviation {
                  value: 0.1
                }
                sample_degrees_of_freedom {
                  value: 9
                }
                unsampled_value {
                  value: 0.8
                }
              }
            }
          }
        }
        metrics {
          key: "$auc"
          value {
            bounded_value {
              lower_bound {
                value: 0.1
              }
              upper_bound {
                value: 0.3
              }
              value {
                value: 0.2
              }
              methodology: RIEMANN_SUM
            }
          }
        }
        metrics {
          key: "$auprc"
          value {
            bounded_value {
              lower_bound {
                value: 0.05
              }
              upper_bound {
                value: 0.17
              }
              value {
                value: 0.1
              }
              methodology: RIEMANN_SUM
            }
          }
        }""").substitute(auc=metric_keys.AUC, auprc=metric_keys.AUPRC),
        metrics_for_slice_pb2.MetricsForSlice())

    got = metrics_plots_and_validations_writer.convert_slice_metrics_to_proto(
        (slice_key, slice_metrics),
        [post_export_metrics.auc(),
         post_export_metrics.auc(curve='PR')])
    self.assertProtoEquals(expected_metrics_for_slice, got)