def slice_spec_from_stats(  # pylint: disable=invalid-name
    statistics: statistics_pb2.DatasetFeatureStatisticsList,
    categorical_uniques_threshold: int = 100,
    max_cross_size: int = 2
) -> List[slicer.SingleSliceSpec]:
  """Generates slicing spec from statistics.

  Args:
    statistics: Data statistics.
    categorical_uniques_threshold: Maximum number of unique values beyond which
      we don't slice on that categorical feature.
    max_cross_size: Maximum size feature crosses to consider.

  Returns:
    List of slice specs.
  """
  columns_to_consider = []
  for feature in statistics.datasets[0].features:
    if len(feature.path.step) != 1:
      continue
    stats_type = feature.WhichOneof('stats')
    if stats_type == 'string_stats':
      # TODO(pachristopher): Consider slicing on top-K values for features
      # with high cardinality.
      if 0 < feature.string_stats.unique <= categorical_uniques_threshold:
        columns_to_consider.append(feature)

  result = []
  for i in range(1, max_cross_size+1):
    for cross in itertools.combinations(columns_to_consider, i):
      result.append(slicer.SingleSliceSpec(
          columns=[feature.path.step[0] for feature in cross]))
  result.append(slicer.SingleSliceSpec())
  return result
예제 #2
0
def render_plot(
    result: view_types.EvalResult,
    slicing_spec: Optional[Union[slicer.SingleSliceSpec,
                                 config_pb2.SlicingSpec]] = None,
    output_name: Optional[Text] = None,
    class_id: Optional[int] = None,
    top_k: Optional[int] = None,
    k: Optional[int] = None,
    label: Optional[Text] = None,
) -> Optional[visualization.PlotViewer]:  # pytype: disable=invalid-annotation
    """Renders the plot view as widget.

  Args:
    result: An tfma.EvalResult.
    slicing_spec: The tfma.SlicingSpec to identify the slice. Show overall if
      unset.
    output_name: A string representing the output name.
    class_id: A number representing the class id if multi class.
    top_k: The k used to compute prediction in the top k position.
    k: The k used to compute prediciton at the kth position.
    label: A partial label used to match a set of plots in the results.

  Returns:
    A PlotViewer object if in Jupyter notebook; None if in Colab.
  """
    if slicing_spec and isinstance(slicing_spec, config_pb2.SlicingSpec):
        slicing_spec = slicer.SingleSliceSpec(spec=slicing_spec)
    slice_spec_to_use = slicing_spec if slicing_spec else slicer.SingleSliceSpec(
    )
    data, cfg = util.get_plot_data_and_config(result.plots, slice_spec_to_use,
                                              output_name, class_id, top_k, k,
                                              label)
    return visualization.render_plot(data, cfg)
def slice_spec_from_stats(  # pylint: disable=invalid-name
        statistics: statistics_pb2.DatasetFeatureStatisticsList,
        categorical_uniques_threshold: int = 100,
        max_cross_size: int = 2) -> List[slicer.SingleSliceSpec]:
    """Generates slicing spec from statistics.

  Args:
    statistics: Data statistics.
    categorical_uniques_threshold: Maximum number of unique values beyond which
      we don't slice on that categorical feature.
    max_cross_size: Maximum size feature crosses to consider.

  Returns:
    List of slice specs.
  """
    slicable_column_names = []
    for feature in _get_slicable_categorical_features(
            statistics, categorical_uniques_threshold):
        slicable_column_names.append(feature.path.step[0])
    for feature in _get_slicable_numeric_features(statistics):
        # We would bucketize the feature based on the quantiles boundaries.
        slicable_column_names.append(TRANSFORMED_FEATURE_PREFIX +
                                     feature.path.step[0])

    result = []
    for i in range(1, max_cross_size + 1):
        for cross in itertools.combinations(slicable_column_names, i):
            result.append(
                slicer.SingleSliceSpec(
                    columns=[feature_name for feature_name in cross]))
    result.append(slicer.SingleSliceSpec())
    return result
    def testSliceOnMetaFeature(self):
        # We want to make sure that slicing on the newly added feature works, so
        # pulling in slice here.
        with beam.Pipeline() as pipeline:
            fpls = create_fpls()
            metrics = (
                pipeline
                | 'CreateTestInput' >> beam.Create(fpls)
                | 'WrapFpls' >> beam.Map(wrap_fpl)
                | 'ExtractInterestsNum' >>
                meta_feature_extractor.ExtractMetaFeature(get_num_interests)
                | 'ExtractSlices' >> slice_key_extractor._ExtractSliceKeys([
                    slicer.SingleSliceSpec(),
                    slicer.SingleSliceSpec(columns=['num_interests'])
                ])
                | 'FanoutSlices' >> slicer.FanoutSlices())

            def check_result(got):
                try:
                    self.assertEqual(4, len(got), 'got: %s' % got)
                    expected_slice_keys = [
                        (),
                        (),
                        (('num_interests', 1), ),
                        (('num_interests', 2), ),
                    ]
                    self.assertEqual(sorted(slice_key for slice_key, _ in got),
                                     sorted(expected_slice_keys))
                except AssertionError as err:
                    raise util.BeamAssertException(err)

            util.assert_that(metrics, check_result)
예제 #5
0
def SliceKeyExtractor(
    slice_spec: Optional[List[slicer.SingleSliceSpec]] = None,
    eval_config: Optional[config.EvalConfig] = None,
    materialize: Optional[bool] = True) -> extractor.Extractor:
  """Creates an extractor for extracting slice keys.

  The incoming Extracts must contain features stored under tfma.FEATURES_KEY
  and optionally under tfma.TRANSFORMED_FEATURES.

  The extractor's PTransform yields a copy of the Extracts input with an
  additional extract pointing at the list of SliceKeyType values keyed by
  tfma.SLICE_KEY_TYPES_KEY. If materialize is True then a materialized version
  of the slice keys will be added under the key tfma.MATERIALZED_SLICE_KEYS_KEY.

  Args:
    slice_spec: Deprecated (use EvalConfig).
    eval_config: Optional EvalConfig containing slicing_specs specifying the
      slices to slice the data into. If slicing_specs are empty, defaults to
      overall slice.
    materialize: True to add MaterializedColumn entries for the slice keys.

  Returns:
    Extractor for slice keys.
  """
  if slice_spec and eval_config:
    raise ValueError('slice_spec is deprecated, only use eval_config')
  if eval_config:
    slice_spec = [
        slicer.SingleSliceSpec(spec=spec) for spec in eval_config.slicing_specs
    ]
  if not slice_spec:
    slice_spec = [slicer.SingleSliceSpec()]
  return extractor.Extractor(
      stage_name=SLICE_KEY_EXTRACTOR_STAGE_NAME,
      ptransform=ExtractSliceKeys(slice_spec, eval_config, materialize))
    def testMaterializedSliceKeys(self):
        with beam.Pipeline() as pipeline:
            fpls = create_fpls()
            slice_keys_extracts = (
                pipeline
                | 'CreateTestInput' >> beam.Create(fpls)
                | 'WrapFpls' >> beam.Map(wrap_fpl)
                | 'ExtractSlices' >> slice_key_extractor.ExtractSliceKeys(
                    [
                        slicer.SingleSliceSpec(),
                        slicer.SingleSliceSpec(columns=['gender'])
                    ],
                    materialize=True))

            def check_result(got):
                try:
                    self.assertEqual(2, len(got), 'got: %s' % got)
                    expected_results = sorted([
                        types.MaterializedColumn(
                            name=constants.SLICE_KEYS_KEY,
                            value=[b'Overall', b'gender:f']),
                        types.MaterializedColumn(
                            name=constants.SLICE_KEYS_KEY,
                            value=[b'Overall', b'gender:m'])
                    ])
                    got_results = []
                    for item in got:
                        self.assertIn(constants.SLICE_KEYS_KEY, item)
                        got_results.append(item[constants.SLICE_KEYS_KEY])
                    self.assertEqual(sorted(got_results),
                                     sorted(expected_results))
                except AssertionError as err:
                    raise util.BeamAssertException(err)

            util.assert_that(slice_keys_extracts, check_result)
  def testLegacySliceKeys(self):
    with beam.Pipeline() as pipeline:
      fpls = create_fpls()
      slice_keys_extracts = (
          pipeline
          | 'CreateTestInput' >> beam.Create(fpls)
          | 'WrapFpls' >> beam.Map(wrap_fpl)
          | 'ExtractSlices' >> slice_key_extractor.ExtractSliceKeys([
              slicer.SingleSliceSpec(),
              slicer.SingleSliceSpec(columns=['gender'])
          ]))

      def check_result(got):
        try:
          self.assertLen(got, 2)
          expected_results = sorted([[(), (('gender', 'f'),)],
                                     [(), (('gender', 'm'),)]])
          got_results = []
          for item in got:
            self.assertIn(constants.SLICE_KEY_TYPES_KEY, item)
            got_results.append(sorted(item[constants.SLICE_KEY_TYPES_KEY]))
          self.assertCountEqual(got_results, expected_results)
        except AssertionError as err:
          raise util.BeamAssertException(err)

      util.assert_that(slice_keys_extracts, check_result)
예제 #8
0
 def test_slice_spec_from_stats_and_schema_int_categorical(self):
   stats = text_format.Parse(
       """
       datasets {
         features: {
           path { step: 'feature1' }
           type: INT
           string_stats: {
             unique: 10
           }
         }
         features: {
           path { step: 'feature2' }
           type: INT
           num_stats: {
             min: 1
             max: 10
           }
         }
       }
       """, statistics_pb2.DatasetFeatureStatisticsList())
   transformed_feature2 = (
       auto_slice_key_extractor.TRANSFORMED_FEATURE_PREFIX + 'feature2')
   expected_slice_spec = [
       slicer.SingleSliceSpec(columns=['feature1']),
       slicer.SingleSliceSpec(columns=[transformed_feature2]),
       slicer.SingleSliceSpec(columns=['feature1', transformed_feature2]),
       slicer.SingleSliceSpec()
   ]
   actual_slice_spec = auto_slice_key_extractor.slice_spec_from_stats(stats)
   self.assertEqual(actual_slice_spec, expected_slice_spec)
예제 #9
0
    def testSliceOneSlice(self):
        with beam.Pipeline() as pipeline:
            fpls = create_fpls()
            metrics = (
                pipeline
                | 'CreateTestInput' >> beam.Create(fpls, reshuffle=False)
                | 'WrapFpls' >> beam.Map(wrap_fpl)
                | 'ExtractSlices' >> slice_key_extractor.ExtractSliceKeys([
                    slicer.SingleSliceSpec(),
                    slicer.SingleSliceSpec(columns=['gender'])
                ])
                | 'FanoutSlices' >> slicer.FanoutSlices())

            def check_result(got):
                try:
                    self.assertLen(got, 4)
                    expected_result = [
                        ((), wrap_fpl(fpls[0])),
                        ((), wrap_fpl(fpls[1])),
                        ((('gender', 'f'), ), wrap_fpl(fpls[0])),
                        ((('gender', 'm'), ), wrap_fpl(fpls[1])),
                    ]
                    self.assertCountEqual(got, expected_result)
                except AssertionError as err:
                    raise util.BeamAssertException(err)

            util.assert_that(metrics, check_result)
예제 #10
0
def render_time_series(
    results: view_types.EvalResults,
    slicing_spec: Optional[Union[slicer.SingleSliceSpec,
                                 config_pb2.SlicingSpec]] = None,
    display_full_path: bool = False
) -> Optional[visualization.TimeSeriesViewer]:  # pytype: disable=invalid-annotation
    """Renders the time series view as widget.

  Args:
    results: An tfma.EvalResults.
    slicing_spec: A tfma.SlicingSpec determining the slice to show time series
      on. Show overall if not set.
    display_full_path: Whether to display the full path to model / data in the
      visualization or just show file name.

  Returns:
    A TimeSeriesViewer object if in Jupyter notebook; None if in Colab.
  """
    if slicing_spec and isinstance(slicing_spec, config_pb2.SlicingSpec):
        slicing_spec = slicer.SingleSliceSpec(spec=slicing_spec)
    slice_spec_to_use = slicing_spec if slicing_spec else slicer.SingleSliceSpec(
    )
    data = util.get_time_series(results, slice_spec_to_use, display_full_path)
    cfg = {
        'isModelCentric': results.get_mode() == constants.MODEL_CENTRIC_MODE
    }

    return visualization.render_time_series(data, cfg)
예제 #11
0
 def test_slice_spec_from_stats_and_schema(self):
     stats = text_format.Parse(
         """
     datasets {
       features: {
         path { step: 'feature1' }
         type: STRING
         string_stats: {
           unique: 10
         }
       }
       features: {
         path { step: 'feature2' }
         type: STRING
         string_stats: {
           unique: 200
         }
       }
       features: {
         path { step: 'feature3' }
         type: INT
         string_stats: {
           unique: 10
         }
       }
       features: {
         path { step: 'feature4' }
         type: INT
         string_stats: {
           unique: 200
         }
       }
       features: {
         path { step: 'feature5' }
         type: INT
         num_stats: {
         }
       }
       features: {
         path { step: 'feature6' }
         type: FLOAT
         num_stats: {
         }
       }
     }
     """, statistics_pb2.DatasetFeatureStatisticsList())
     expected_slice_spec = [
         slicer.SingleSliceSpec(columns=['feature1']),
         slicer.SingleSliceSpec(columns=['feature3']),
         slicer.SingleSliceSpec(columns=['feature1', 'feature3']),
         slicer.SingleSliceSpec()
     ]
     actual_slice_spec = auto_slice_key_extractor.slice_spec_from_stats(
         stats)
     self.assertEqual(actual_slice_spec, expected_slice_spec)
예제 #12
0
    def testBuildAnalysisTableWithSlices(self):
        model_location = self._exportEvalSavedModel(
            linear_classifier.simple_linear_classifier)
        eval_shared_model = model_eval_lib.default_eval_shared_model(
            eval_saved_model_path=model_location)

        example1 = self._makeExample(age=3.0,
                                     language='english',
                                     label=1.0,
                                     slice_key='first_slice')
        slice_spec = [
            slicer.SingleSliceSpec(columns=['age']),
            slicer.SingleSliceSpec(features=[('age', 3)]),
            slicer.SingleSliceSpec(columns=['age'],
                                   features=[('language', 'english')])
        ]

        with beam.Pipeline() as pipeline:
            result = (
                pipeline
                | 'CreateInput' >> beam.Create([example1.SerializeToString()])
                | 'BuildTable' >> contrib.BuildAnalysisTable(
                    eval_shared_model, slice_spec))

            def check_result(got):
                self.assertEqual(1, len(got), 'got: %s' % got)
                extracts = got[0]

                # Values of type MaterializedColumn are emitted to signal to
                # downstream sink components to output the data to file.
                materialized_dict = dict(
                    (k, v) for k, v in extracts.items()
                    if isinstance(v, types.MaterializedColumn))
                self._assertMaterializedColumns(
                    materialized_dict, {
                        constants.SLICE_KEYS_KEY:
                        types.MaterializedColumn(
                            name=constants.SLICE_KEYS_KEY,
                            value=[
                                b'age:3.0', b'age:3',
                                b'age_X_language:3.0_X_english'
                            ])
                    })
                self._assertMaterializedColumnsExist(materialized_dict, [
                    'predictions__logits', 'predictions__probabilities',
                    'predictions__classes', 'predictions__logistic',
                    'predictions__class_ids'
                ])

            util.assert_that(result[constants.ANALYSIS_KEY], check_result)
예제 #13
0
 def testSerializeDeserializeLegacyEvalConfig(self):
     output_path = self._getTempDir()
     old_config = LegacyConfig(
         model_location='/path/to/model',
         data_location='/path/to/data',
         slice_spec=[
             slicer.SingleSliceSpec(columns=['country'],
                                    features=[('age', 5), ('gender', 'f')]),
             slicer.SingleSliceSpec(columns=['interest'],
                                    features=[('age', 6), ('gender', 'm')])
         ],
         example_count_metric_key=None,
         example_weight_metric_key='key',
         compute_confidence_intervals=False,
         k_anonymization_count=1)
     final_dict = {}
     final_dict['tfma_version'] = tfma_version.VERSION_STRING
     final_dict['eval_config'] = old_config
     with tf.io.TFRecordWriter(os.path.join(output_path,
                                            'eval_config')) as w:
         w.write(pickle.dumps(final_dict))
     got_eval_config = model_eval_lib.load_eval_config(output_path)
     options = config.Options()
     options.compute_confidence_intervals.value = (
         old_config.compute_confidence_intervals)
     options.k_anonymization_count.value = old_config.k_anonymization_count
     eval_config = config.EvalConfig(
         input_data_specs=[
             config.InputDataSpec(location=old_config.data_location)
         ],
         model_specs=[config.ModelSpec(location=old_config.model_location)],
         output_data_specs=[
             config.OutputDataSpec(default_location=output_path)
         ],
         slicing_specs=[
             config.SlicingSpec(feature_keys=['country'],
                                feature_values={
                                    'age': '5',
                                    'gender': 'f'
                                }),
             config.SlicingSpec(feature_keys=['interest'],
                                feature_values={
                                    'age': '6',
                                    'gender': 'm'
                                })
         ],
         options=options)
     self.assertEqual(eval_config, got_eval_config)
def slice_spec_from_stats(  # pylint: disable=invalid-name
    statistics: statistics_pb2.DatasetFeatureStatisticsList,
    categorical_uniques_threshold: int = 100,
    max_cross_size: int = 2,
    allowlist_features: Optional[Set[Text]] = None,
    denylist_features: Optional[Set[Text]] = None) -> List[
        slicer.SingleSliceSpec]:
    """Generates slicing spec from statistics.

  Args:
    statistics: Data statistics.
    categorical_uniques_threshold: Maximum number of unique values beyond which
      we don't slice on that categorical feature.
    max_cross_size: Maximum size feature crosses to consider.
    allowlist_features: Set of features to be used for slicing.
    denylist_features: Set of features to ignore for slicing.

  Returns:
    List of slice specs.
  """
    features_to_consider = []
    for feature in statistics.datasets[0].features:
        # TODO(pachristopher): Consider structured features once TFMA supports
        # slicing on structured features.
        if (len(feature.path.step) != 1
                or (allowlist_features
                    and feature.path.step[0] not in allowlist_features) or
            (denylist_features and feature.path.step[0] in denylist_features)):
            continue
        features_to_consider.append(feature)

    slicable_column_names = []
    for feature in _get_slicable_categorical_features(
            features_to_consider, categorical_uniques_threshold):
        slicable_column_names.append(feature.path.step[0])
    for feature in _get_slicable_numeric_features(features_to_consider):
        # We would bucketize the feature based on the quantiles boundaries.
        slicable_column_names.append(TRANSFORMED_FEATURE_PREFIX +
                                     feature.path.step[0])

    result = []
    for i in range(1, max_cross_size + 1):
        for cross in itertools.combinations(slicable_column_names, i):
            result.append(
                slicer.SingleSliceSpec(
                    columns=[feature_name for feature_name in cross]))
    result.append(slicer.SingleSliceSpec())
    return result
예제 #15
0
def render_slicing_metrics(
    result: view_types.EvalResult,
    slicing_column: Optional[Text] = None,
    slicing_spec: Optional[Union[slicer.SingleSliceSpec,
                                 config_pb2.SlicingSpec]] = None,
    weighted_example_column: Optional[Text] = None,
    event_handlers: Optional[Callable[[Dict[Text, Union[Text, float]]],
                                      None]] = None,
) -> Optional[visualization.SlicingMetricsViewer]:  # pytype: disable=invalid-annotation
    """Renders the slicing metrics view as widget.

  Args:
    result: An tfma.EvalResult.
    slicing_column: The column to slice on.
    slicing_spec: The tfma.SlicingSpec to filter results. If neither column nor
      spec is set, show overall.
    weighted_example_column: Override for the weighted example column. This can
      be used when different weights are applied in different aprts of the model
      (eg: multi-head).
    event_handlers: The event handlers

  Returns:
    A SlicingMetricsViewer object if in Jupyter notebook; None if in Colab.
  """
    if slicing_spec and isinstance(slicing_spec, config_pb2.SlicingSpec):
        slicing_spec = slicer.SingleSliceSpec(spec=slicing_spec)
    data = util.get_slicing_metrics(result.slicing_metrics, slicing_column,
                                    slicing_spec)
    cfg = util.get_slicing_config(result.config, weighted_example_column)

    return visualization.render_slicing_metrics(data,
                                                cfg,
                                                event_handlers=event_handlers)
예제 #16
0
def render_slicing_attributions(
    result: view_types.EvalResult,
    slicing_column: Optional[Text] = None,
    slicing_spec: Optional[Union[slicer.SingleSliceSpec,
                                 config_pb2.SlicingSpec]] = None,
    metric_name: Optional[Text] = None,
    weighted_example_column: Optional[Text] = None,
    event_handlers: Optional[Callable[[Dict[Text, Union[Text, float]]],
                                      None]] = None,
) -> Optional[visualization.SlicingMetricsViewer]:  # pytype: disable=invalid-annotation
    """Renders the slicing metrics view as widget.

  Args:
    result: An tfma.EvalResult.
    slicing_column: The column to slice on.
    slicing_spec: The tfma.SlicingSpec to filter results. If neither column nor
      spec is set, show overall.
    metric_name: Name of attributions metric to show attributions for. Optional
      if only one metric used.
    weighted_example_column: Override for the weighted example column. This can
      be used when different weights are applied in different aprts of the model
      (eg: multi-head).
    event_handlers: The event handlers

  Returns:
    A SlicingMetricsViewer object if in Jupyter notebook; None if in Colab.
  """
    if slicing_spec and isinstance(slicing_spec, config_pb2.SlicingSpec):
        slicing_spec = slicer.SingleSliceSpec(spec=slicing_spec)
    data = util.get_slicing_metrics(result.attributions, slicing_column,
                                    slicing_spec)
    # Attributions have one additional level of indirection for the metric_name.
    # Filter this out using the metric_name provided.
    for d in data:
        updated_data = {}
        for output_name, per_output_items in d['metrics'].items():  # pytype: disable=attribute-error
            updated_data[output_name] = {}
            for sub_key, per_sub_key_items in per_output_items.items():
                updated_data[output_name][sub_key] = {}
                if metric_name:
                    if metric_name not in per_sub_key_items:
                        raise ValueError(
                            'metric_name={} not found in {}'.format(
                                metric_name, per_sub_key_items.keys()))
                    updated_data[output_name][sub_key] = per_sub_key_items[
                        metric_name]
                elif len(per_sub_key_items) == 1:
                    updated_data[output_name][sub_key] = list(
                        per_sub_key_items.values())[0]
                else:
                    raise ValueError(
                        'metric_name must be one of the following: {}'.format(
                            per_sub_key_items.keys()))
        d['metrics'] = updated_data

    cfg = util.get_slicing_config(result.config, weighted_example_column)

    return visualization.render_slicing_metrics(data,
                                                cfg,
                                                event_handlers=event_handlers)
예제 #17
0
 def assertSliceResult(self, name, features_dict, columns, features, expected):
   spec = slicer.SingleSliceSpec(columns=columns, features=features)
   msg = 'Test case %s: slice on columns %s, features %s' % (name, columns,
                                                             features)
   six.assertCountEqual(
       self, expected,
       slicer.get_slices_for_features_dict(features_dict, [spec]), msg)
예제 #18
0
def SliceKeyExtractor(slice_spec: Optional[List[slicer.SingleSliceSpec]] = None,
                      materialize: Optional[bool] = True
                     ) -> extractor.Extractor:
  """Creates an extractor for extracting slice keys.

  The incoming Extracts must contain a FeaturesPredictionsLabels extract keyed
  by tfma.FEATURES_PREDICTIONS_LABELS_KEY. Typically this will be obtained by
  calling the PredictExtractor.

  The extractor's PTransform yields a copy of the Extracts input with an
  additional extract pointing at the list of SliceKeyType values keyed by
  tfma.SLICE_KEY_TYPES_KEY. If materialize is True then a materialized version
  of the slice keys will be added under the key tfma.MATERIALZED_SLICE_KEYS_KEY.

  Args:
    slice_spec: Optional list of SingleSliceSpec specifying the slices to slice
      the data into. If None, defaults to the overall slice.
    materialize: True to add MaterializedColumn entries for the slice keys.

  Returns:
    Extractor for slice keys.
  """
  if not slice_spec:
    slice_spec = [slicer.SingleSliceSpec()]
  return extractor.Extractor(
      stage_name=SLICE_KEY_EXTRACTOR_STAGE_NAME,
      ptransform=ExtractSliceKeys(slice_spec, materialize))
예제 #19
0
 def testConvertEvalResultToUIInputWithNoDataFound(self):
     eval_result = self._makeEvalResult(slices=((('slice', '1'), ), ))
     with self.assertRaises(ValueError):
         widget_view.convert_slicing_metrics_to_ui_input(
             eval_result.slicing_metrics,
             slicing_spec=slicer.SingleSliceSpec(columns=['unknown']),
         )
    def _check_threshold(key: metric_types.MetricKey,
                         slicing_spec: Optional[config.SlicingSpec],
                         threshold: _ThresholdType, metric: Any) -> bool:
        """Verify a metric given its metric key and metric value."""
        if (slicing_spec is not None and not slicer.SingleSliceSpec(
                spec=slicing_spec).is_slice_applicable(sliced_key)):
            return True

        if isinstance(threshold, config.GenericValueThreshold):
            lower_bound, upper_bound = -np.inf, np.inf
            if threshold.HasField('lower_bound'):
                lower_bound = threshold.lower_bound.value
            if threshold.HasField('upper_bound'):
                upper_bound = threshold.upper_bound.value
            return metric > lower_bound and metric < upper_bound
        elif isinstance(threshold, config.GenericChangeThreshold):
            diff = metric
            ratio = diff / metrics[key.make_baseline_key(baseline_model_name)]
            if threshold.direction == config.MetricDirection.LOWER_IS_BETTER:
                absolute, relative = np.inf, np.inf
            elif threshold.direction == config.MetricDirection.HIGHER_IS_BETTER:
                absolute, relative = -np.inf, -np.inf
            else:
                raise ValueError('"UNKNOWN" direction for change threshold.')
            if threshold.HasField('absolute'):
                absolute = threshold.absolute.value
            if threshold.HasField('relative'):
                relative = threshold.relative.value
            if threshold.direction == config.MetricDirection.LOWER_IS_BETTER:
                return diff < absolute and ratio < relative
            elif threshold.direction == config.MetricDirection.HIGHER_IS_BETTER:
                return diff > absolute and ratio > relative
    def testSliceKeys(self, model_names, extracts, slice_column,
                      expected_slices):
        eval_config = config.EvalConfig(
            model_specs=[config.ModelSpec(name=name) for name in model_names])
        with beam.Pipeline() as pipeline:
            slice_keys_extracts = (
                pipeline
                | 'CreateTestInput' >> beam.Create(extracts)
                | 'ExtractSlices' >> slice_key_extractor.ExtractSliceKeys(
                    [slicer.SingleSliceSpec(columns=[slice_column])],
                    eval_config=eval_config))

            def check_result(got):
                try:
                    self.assertLen(got, 2)
                    got_results = []
                    for item in got:
                        self.assertIn(constants.SLICE_KEY_TYPES_KEY, item)
                        got_results.append(
                            sorted(item[constants.SLICE_KEY_TYPES_KEY]))
                    self.assertCountEqual(got_results, expected_slices)
                except AssertionError as err:
                    raise util.BeamAssertException(err)

            util.assert_that(slice_keys_extracts, check_result)
예제 #22
0
 def testValidateMetricsMetricTDistributionValueAndThreshold(
         self, slicing_specs, slice_key):
     threshold = config.MetricThreshold(
         value_threshold=config.GenericValueThreshold(
             lower_bound={'value': 0.9}))
     eval_config = config.EvalConfig(
         model_specs=[
             config.ModelSpec(),
         ],
         slicing_specs=slicing_specs,
         metrics_specs=[
             config.MetricsSpec(metrics=[
                 config.MetricConfig(
                     class_name='AUC',
                     threshold=threshold if slicing_specs is None else None,
                     per_slice_thresholds=[
                         config.PerSliceMetricThreshold(
                             slicing_specs=slicing_specs,
                             threshold=threshold)
                     ]),
             ],
                                model_names=['']),
         ],
     )
     sliced_metrics = (slice_key, {
         metric_types.MetricKey(name='auc'):
         types.ValueWithTDistribution(sample_mean=0.91, unsampled_value=0.8)
     })
     result = metrics_validator.validate_metrics(sliced_metrics,
                                                 eval_config)
     self.assertFalse(result.validation_ok)
     expected = text_format.Parse(
         """
     metric_validations_per_slice {
       failures {
         metric_key {
           name: "auc"
         }
         metric_value {
           double_value {
             value: 0.8
           }
         }
       }
     }""", validation_result_pb2.ValidationResult())
     expected.metric_validations_per_slice[0].failures[
         0].metric_threshold.CopyFrom(threshold)
     expected.metric_validations_per_slice[0].slice_key.CopyFrom(
         slicer.serialize_slice_key(slice_key))
     for spec in slicing_specs or [None]:
         if (spec is None or slicer.SingleSliceSpec(
                 spec=spec).is_slice_applicable(slice_key)):
             slicing_details = expected.validation_details.slicing_details.add(
             )
             if spec is not None:
                 slicing_details.slicing_spec.CopyFrom(spec)
             else:
                 slicing_details.slicing_spec.CopyFrom(config.SlicingSpec())
             slicing_details.num_matching_slices = 1
     self.assertEqual(result, expected)
예제 #23
0
    def testSliceDefaultSlice(self):
        with beam.Pipeline() as pipeline:
            fpls = create_fpls()

            metrics = (pipeline
                       | 'CreateTestInput' >> beam.Create(fpls)
                       | 'WrapFpls' >> beam.Map(wrap_fpl)
                       |
                       'ExtractSlices' >> slice_key_extractor.ExtractSliceKeys(
                           [slicer.SingleSliceSpec()])
                       | 'FanoutSlices' >> slicer.FanoutSlices())

            def check_result(got):
                try:
                    self.assertLen(got, 2)
                    expected_result = [
                        ((), wrap_fpl(fpls[0])),
                        ((), wrap_fpl(fpls[1])),
                    ]
                    self.assertEqual(len(got), len(expected_result))
                    self.assertTrue(got[0] == expected_result[0]
                                    and got[1] == expected_result[1]
                                    or got[1] == expected_result[0]
                                    and got[0] == expected_result[1])
                except AssertionError as err:
                    raise util.BeamAssertException(err)

            util.assert_that(metrics, check_result)
예제 #24
0
    def testIsSliceApplicable(self):
        test_cases = [
            ('applicable', ['column1'], [('column3', 'value3'),
                                         ('column4', 'value4')],
             (('column1', 'value1'), ('column3', 'value3'), ('column4',
                                                             'value4')), True),
            ('wrongcolumns', ['column1', 'column2'], [('column3', 'value3'),
                                                      ('column4', 'value4')],
             (('column1', 'value1'), ('column3', 'value3'),
              ('column4', 'value4')), False),
            ('wrongfeatures', ['column1'], [('column3', 'value3')],
             (('column1', 'value1'), ('column3', 'value3'),
              ('column4', 'value4')), False),
            ('nocolumns', [], [('column3', 'value3')],
             (('column1', 'value1'), ('column3', 'value3'),
              ('column4', 'value4')), False),
            ('nofeatures', ['column1'], [], (('column1', 'value1'), ), True),
            ('empty slice key', ['column1'], [('column2', 'value1')], (),
             False), ('overall', [], [], (), True)
        ]  # pyformat: disable

        for (name, columns, features, slice_key, result) in test_cases:
            slice_spec = slicer.SingleSliceSpec(columns=columns,
                                                features=features)
            self.assertEqual(slice_spec.is_slice_applicable(slice_key),
                             result,
                             msg=name)
예제 #25
0
def get_slicing_metrics(
    results: List[Tuple[slicer.SliceKeyType, Dict[Text, Any]]],
    slicing_column: Optional[Text] = None,
    slicing_spec: Optional[slicer.SingleSliceSpec] = None,
) -> List[Dict[Text, Union[Dict[Text, Any], Text]]]:
    """Util function that extracts slicing metrics from the results.

  If neither slicing_column nor slicing_spec is provided, get Overall. If
  slicing_column is set, use it to filter metrics from results. Otherwise, use
  slicing_spec for filtering.

  Args:
    results: A list of records. Each record is a tuple of (slice_name,
      {metric_name, metric_value}).
    slicing_column: The column to filter the resuslts with.
    slicing_spec: The slicer.SingleSliceSpec to filter the resutls with.

  Returns:
    A list of {slice, metrics}

  Raises:
    ValueError: The provided slicing_column does not exist in results or more
    than one set of overall result is found.
  """

    if slicing_column:
        data = find_all_slices(
            results, slicer.SingleSliceSpec(columns=[slicing_column]))
    elif not slicing_spec:
        data = find_all_slices(results, slicer.SingleSliceSpec())
    else:
        data = find_all_slices(results, slicing_spec)

    slice_count = len(data)
    if not slice_count:
        if not slicing_spec:
            if not slicing_column:
                slicing_column = slicer.OVERALL_SLICE_NAME
            raise ValueError('No slices found for %s' % slicing_column)
        else:
            raise ValueError('No slices found for %s' % slicing_spec)
    elif not slicing_column and not slicing_spec and slice_count > 1:
        raise ValueError('More than one slice found for %s' %
                         slicer.OVERALL_SLICE_NAME)
    else:
        return data
예제 #26
0
def convert_slicing_metrics_to_ui_input(
        slicing_metrics: List[Tuple[slicer.SliceKeyOrCrossSliceKeyType,
                                    view_types.MetricsByOutputName]],
        slicing_column: Optional[str] = None,
        slicing_spec: Optional[slicer.SingleSliceSpec] = None,
        output_name: str = '',
        multi_class_key: str = '') -> Optional[List[Dict[str, Any]]]:
    """Renders the Fairness Indicator view.

  Args:
    slicing_metrics: tfma.EvalResult.slicing_metrics.
    slicing_column: The slicing column to to filter results. If both
      slicing_column and slicing_spec are None, show all eval results.
    slicing_spec: The slicing spec to filter results. If both slicing_column and
      slicing_spec are None, show all eval results.
    output_name: The output name associated with metric (for multi-output
      models).
    multi_class_key: The multi-class key associated with metric (for multi-class
      models).

  Returns:
    A list of dicts for each slice, where each dict contains keys 'sliceValue',
    'slice', and 'metrics'.

  Raises:
    ValueError if no related eval result found or both slicing_column and
    slicing_spec are not None.
  """
    if slicing_column and slicing_spec:
        raise ValueError(
            'Only one of the "slicing_column" and "slicing_spec" parameters '
            'can be set.')
    if slicing_column:
        slicing_spec = slicer.SingleSliceSpec(columns=[slicing_column])

    data = []
    for (slice_key, metric_value) in slicing_metrics:
        if (metric_value is not None and output_name in metric_value
                and multi_class_key in metric_value[output_name]):
            metrics = metric_value[output_name][multi_class_key]
            # To add evaluation data for cross slice comparison.
            if slicer.is_cross_slice_key(slice_key):
                _add_cross_slice_key_data(slice_key, metrics, data)
            # To add evaluation data for regular slices.
            elif (slicing_spec is None or not slice_key
                  or slicing_spec.is_slice_applicable(slice_key)):
                data.append({
                    'sliceValue': stringify_slice_key_value(slice_key),
                    'slice': slicer.stringify_slice_key(slice_key),
                    'metrics': metrics
                })
    if not data:
        raise ValueError(
            'No eval result found for output_name:"%s" and '
            'multi_class_key:"%s" and slicing_column:"%s" and slicing_spec:"%s".'
            % (output_name, multi_class_key, slicing_column, slicing_spec))
    return data
예제 #27
0
 def testNonUTF8ValueRaisesValueError(self):
   column_name = 'column_name'
   invalid_value = b'\x8a'
   spec = slicer.SingleSliceSpec(columns=[column_name])
   features_dict = self._makeFeaturesDict({
       column_name: [invalid_value],
   })
   with self.assertRaisesRegex(ValueError, column_name):
     list(slicer.get_slices_for_features_dicts([features_dict], None, [spec]))
예제 #28
0
    def testGetSlicesForFeaturesDictMultipleSingleSliceSpecs(self):
        features_dict = self._makeFeaturesDict({
            'gender': ['f'],
            'age': [5],
            'interest': ['cars']
        })

        spec_overall = slicer.SingleSliceSpec()
        spec_age = slicer.SingleSliceSpec(columns=['age'])
        spec_age4 = slicer.SingleSliceSpec(features=[('age', 4)])
        spec_age5_gender = slicer.SingleSliceSpec(columns=['gender'],
                                                  features=[('age', 5)])

        slice_spec = [spec_overall, spec_age, spec_age4, spec_age5_gender]
        expected = [(), (('age', 5), ), (('age', 5), ('gender', 'f'))]
        self.assertItemsEqual(
            expected,
            slicer.get_slices_for_features_dict(features_dict, slice_spec))
예제 #29
0
def convert_eval_result_to_ui_input(
    eval_result: model_eval_lib.EvalResult,
    slicing_column: Optional[Text] = None,
    slicing_spec: Optional[slicer.SingleSliceSpec] = None,
    output_name: Text = '',
    multi_class_key: Text = '') -> Optional[List[Dict[Text, Any]]]:
  """Renders the Fairness Indicator view.

  Args:
    eval_result: An tfma.EvalResult.
    slicing_column: The slicing column to to filter results. If both
      slicing_column and slicing_spec are None, show all eval results.
    slicing_spec: The slicing spec to filter results. If both slicing_column and
      slicing_spec are None, show all eval results.
    output_name: The output name associated with metric (for multi-output
      models).
    multi_class_key: The multi-class key associated with metric (for multi-class
      models).

  Returns:
    A FairnessIndicatorViewer object if in Jupyter notebook; None if in Colab.

  Raises:
    ValueError if no related eval result found or both slicing_column and
    slicing_spec are not None.
  """
  if slicing_column and slicing_spec:
    raise ValueError(
        'Only one of the "slicing_column" and "slicing_spec" parameters '
        'can be set.')
  if slicing_column:
    slicing_spec = slicer.SingleSliceSpec(columns=[slicing_column])

  data = []
  for (slice_key, metric_value) in eval_result.slicing_metrics:
    slice_key_ok = (
        slicing_spec is None or not slice_key or
        slicing_spec.is_slice_applicable(slice_key))
    metric_ok = (
        output_name in metric_value and
        multi_class_key in metric_value[output_name])

    if slice_key_ok and metric_ok:
      data.append({
          'sliceValue': stringify_slice_key_value(slice_key),
          'slice': slicer.stringify_slice_key(slice_key),
          'metrics': metric_value[output_name][multi_class_key]
      })
  if not data:
    raise ValueError(
        'No eval result found for output_name:"%s" and '
        'multi_class_key:"%s" and slicing_column:"%s" and slicing_spec:"%s".' %
        (output_name, multi_class_key, slicing_column, slicing_spec))
  return data
 def is_slice_applicable(
     sliced_combiner_output: Tuple[slicer.SliceKeyType,
                                   metric_types.MetricsDict],
     slicing_specs: Union[config.SlicingSpec, Iterable[config.SlicingSpec]]
 ) -> bool:
     slice_key, _ = sliced_combiner_output
     for slicing_spec in slicing_specs:
         if slicer.SingleSliceSpec(
                 spec=slicing_spec).is_slice_applicable(slice_key):
             return True
     return False