Esempio n. 1
0
def convert_slicing_metrics_to_ui_input(
        slicing_metrics: List[Tuple[slicer.SliceKeyOrCrossSliceKeyType,
                                    view_types.MetricsByOutputName]],
        slicing_column: Optional[str] = None,
        slicing_spec: Optional[slicer.SingleSliceSpec] = None,
        output_name: str = '',
        multi_class_key: str = '') -> Optional[List[Dict[str, Any]]]:
    """Renders the Fairness Indicator view.

  Args:
    slicing_metrics: tfma.EvalResult.slicing_metrics.
    slicing_column: The slicing column to to filter results. If both
      slicing_column and slicing_spec are None, show all eval results.
    slicing_spec: The slicing spec to filter results. If both slicing_column and
      slicing_spec are None, show all eval results.
    output_name: The output name associated with metric (for multi-output
      models).
    multi_class_key: The multi-class key associated with metric (for multi-class
      models).

  Returns:
    A list of dicts for each slice, where each dict contains keys 'sliceValue',
    'slice', and 'metrics'.

  Raises:
    ValueError if no related eval result found or both slicing_column and
    slicing_spec are not None.
  """
    if slicing_column and slicing_spec:
        raise ValueError(
            'Only one of the "slicing_column" and "slicing_spec" parameters '
            'can be set.')
    if slicing_column:
        slicing_spec = slicer.SingleSliceSpec(columns=[slicing_column])

    data = []
    for (slice_key, metric_value) in slicing_metrics:
        if (metric_value is not None and output_name in metric_value
                and multi_class_key in metric_value[output_name]):
            metrics = metric_value[output_name][multi_class_key]
            # To add evaluation data for cross slice comparison.
            if slicer.is_cross_slice_key(slice_key):
                _add_cross_slice_key_data(slice_key, metrics, data)
            # To add evaluation data for regular slices.
            elif (slicing_spec is None or not slice_key
                  or slicing_spec.is_slice_applicable(slice_key)):
                data.append({
                    'sliceValue': stringify_slice_key_value(slice_key),
                    'slice': slicer.stringify_slice_key(slice_key),
                    'metrics': metrics
                })
    if not data:
        raise ValueError(
            'No eval result found for output_name:"%s" and '
            'multi_class_key:"%s" and slicing_column:"%s" and slicing_spec:"%s".'
            % (output_name, multi_class_key, slicing_column, slicing_spec))
    return data
Esempio n. 2
0
def convert_slice_attributions_to_proto(
    attributions: Tuple[slicer.SliceKeyOrCrossSliceKeyType,
                        Dict[Any, Dict[Text, Any]]]
) -> metrics_for_slice_pb2.AttributionsForSlice:
    """Converts the given slice attributions into serialized AtributionsForSlice.

  Args:
    attributions: The slice attributions.

  Returns:
    The AttributionsForSlice proto.

  Raises:
    TypeError: If the type of the feature value in slice key cannot be
      recognized.
  """
    result = metrics_for_slice_pb2.AttributionsForSlice()
    slice_key, slice_attributions = attributions

    if slicer.is_cross_slice_key(slice_key):
        result.cross_slice_key.CopyFrom(
            slicer.serialize_cross_slice_key(slice_key))
    else:
        result.slice_key.CopyFrom(slicer.serialize_slice_key(slice_key))

    slice_attributions = slice_attributions.copy()
    for key in sorted(slice_attributions.keys()):
        key_and_value = result.attributions_keys_and_values.add()
        key_and_value.key.CopyFrom(key.to_proto())
        for feature, value in slice_attributions[key].items():
            attribution_value = metrics_for_slice_pb2.MetricValue()
            if isinstance(value, six.binary_type):
                # Convert textual types to string metrics.
                attribution_value.bytes_value = value
            elif isinstance(value, six.text_type):
                # Convert textual types to string metrics.
                attribution_value.bytes_value = value.encode('utf8')
            elif isinstance(value, np.ndarray) and value.size != 1:
                # Convert NumPy arrays to ArrayValue.
                attribution_value.array_value.CopyFrom(
                    _convert_to_array_value(value))
            else:
                # We try to convert to float values.
                try:
                    attribution_value.double_value.value = float(value)
                except (TypeError, ValueError) as e:
                    attribution_value.unknown_type.value = str(value)
                    attribution_value.unknown_type.error = e.message  # pytype: disable=attribute-error
            key_and_value.values[feature].CopyFrom(attribution_value)

    return result
Esempio n. 3
0
def convert_slice_plots_to_proto(
    plots: Tuple[slicer.SliceKeyOrCrossSliceKeyType, Dict[Any, Any]],
    add_metrics_callbacks: List[types.AddMetricsCallbackType]
) -> metrics_for_slice_pb2.PlotsForSlice:
    """Converts the given slice plots into PlotsForSlice proto.

  Args:
    plots: The slice plots.
    add_metrics_callbacks: A list of metric callbacks. This should be the same
      list as the one passed to tfma.Evaluate().

  Returns:
    The PlotsForSlice proto.
  """
    result = metrics_for_slice_pb2.PlotsForSlice()
    slice_key, slice_plots = plots

    if slicer.is_cross_slice_key(slice_key):
        result.cross_slice_key.CopyFrom(
            slicer.serialize_cross_slice_key(slice_key))
    else:
        result.slice_key.CopyFrom(slicer.serialize_slice_key(slice_key))

    slice_plots = slice_plots.copy()

    if metric_keys.ERROR_METRIC in slice_plots:
        logging.warning('Error for slice: %s with error message: %s ',
                        slice_key, slice_plots[metric_keys.ERROR_METRIC])
        error_metric = slice_plots.pop(metric_keys.ERROR_METRIC)
        result.plots[metric_keys.ERROR_METRIC].debug_message = error_metric
        return result

    if add_metrics_callbacks and (not any(
            isinstance(k, metric_types.MetricKey)
            for k in slice_plots.keys())):
        for add_metrics_callback in add_metrics_callbacks:
            if hasattr(add_metrics_callback, 'populate_plots_and_pop'):
                add_metrics_callback.populate_plots_and_pop(
                    slice_plots, result.plots)
    plots_by_key = {}
    for key in sorted(slice_plots.keys()):
        value = slice_plots[key]
        # Remove plot name from key (multiple plots are combined into a single
        # proto).
        if isinstance(key, metric_types.MetricKey):
            parent_key = key._replace(name=None)
        else:
            continue
        if parent_key not in plots_by_key:
            key_and_value = result.plot_keys_and_values.add()
            key_and_value.key.CopyFrom(parent_key.to_proto())
            plots_by_key[parent_key] = key_and_value.value

        if isinstance(value,
                      metrics_for_slice_pb2.CalibrationHistogramBuckets):
            plots_by_key[parent_key].calibration_histogram_buckets.CopyFrom(
                value)
            slice_plots.pop(key)
        elif isinstance(value,
                        metrics_for_slice_pb2.ConfusionMatrixAtThresholds):
            plots_by_key[parent_key].confusion_matrix_at_thresholds.CopyFrom(
                value)
            slice_plots.pop(key)
        elif isinstance(
                value,
                metrics_for_slice_pb2.MultiClassConfusionMatrixAtThresholds):
            plots_by_key[
                parent_key].multi_class_confusion_matrix_at_thresholds.CopyFrom(
                    value)
            slice_plots.pop(key)
        elif isinstance(
                value,
                metrics_for_slice_pb2.MultiLabelConfusionMatrixAtThresholds):
            plots_by_key[
                parent_key].multi_label_confusion_matrix_at_thresholds.CopyFrom(
                    value)
            slice_plots.pop(key)

    if slice_plots:
        if add_metrics_callbacks is None:
            add_metrics_callbacks = []
        raise NotImplementedError(
            'some plots were not converted or popped. keys: %s. '
            'add_metrics_callbacks were: %s' % (
                slice_plots.keys(),
                [
                    x.name for x in add_metrics_callbacks  # pytype: disable=attribute-error
                ]))

    return result
Esempio n. 4
0
def convert_slice_metrics_to_proto(
    metrics: Tuple[slicer.SliceKeyOrCrossSliceKeyType, Dict[Any, Any]],
    add_metrics_callbacks: List[types.AddMetricsCallbackType]
) -> metrics_for_slice_pb2.MetricsForSlice:
    """Converts the given slice metrics into serialized proto MetricsForSlice.

  Args:
    metrics: The slice metrics.
    add_metrics_callbacks: A list of metric callbacks. This should be the same
      list as the one passed to tfma.Evaluate().

  Returns:
    The MetricsForSlice proto.

  Raises:
    TypeError: If the type of the feature value in slice key cannot be
      recognized.
  """
    result = metrics_for_slice_pb2.MetricsForSlice()
    slice_key, slice_metrics = metrics

    if slicer.is_cross_slice_key(slice_key):
        result.cross_slice_key.CopyFrom(
            slicer.serialize_cross_slice_key(slice_key))
    else:
        result.slice_key.CopyFrom(slicer.serialize_slice_key(slice_key))

    slice_metrics = slice_metrics.copy()

    if metric_keys.ERROR_METRIC in slice_metrics:
        logging.warning('Error for slice: %s with error message: %s ',
                        slice_key, slice_metrics[metric_keys.ERROR_METRIC])
        result.metrics[metric_keys.ERROR_METRIC].debug_message = slice_metrics[
            metric_keys.ERROR_METRIC]
        return result

    # Convert the metrics from add_metrics_callbacks to the structured output if
    # defined.
    if add_metrics_callbacks and (not any(
            isinstance(k, metric_types.MetricKey)
            for k in slice_metrics.keys())):
        for add_metrics_callback in add_metrics_callbacks:
            if hasattr(add_metrics_callback, 'populate_stats_and_pop'):
                add_metrics_callback.populate_stats_and_pop(
                    slice_key, slice_metrics, result.metrics)
    for key in sorted(slice_metrics.keys()):
        value = slice_metrics[key]
        metric_value = metrics_for_slice_pb2.MetricValue()
        if isinstance(value,
                      metrics_for_slice_pb2.ConfusionMatrixAtThresholds):
            metric_value.confusion_matrix_at_thresholds.CopyFrom(value)
        elif isinstance(
                value,
                metrics_for_slice_pb2.MultiClassConfusionMatrixAtThresholds):
            metric_value.multi_class_confusion_matrix_at_thresholds.CopyFrom(
                value)
        elif isinstance(value, types.ValueWithTDistribution):
            # Currently we populate both bounded_value and confidence_interval.
            # Avoid populating bounded_value once the UI handles confidence_interval.
            # Convert to a bounded value. 95% confidence level is computed here.
            _, lower_bound, upper_bound = (
                math_util.calculate_confidence_interval(value))
            metric_value.bounded_value.value.value = value.unsampled_value
            metric_value.bounded_value.lower_bound.value = lower_bound
            metric_value.bounded_value.upper_bound.value = upper_bound
            metric_value.bounded_value.methodology = (
                metrics_for_slice_pb2.BoundedValue.POISSON_BOOTSTRAP)
            # Populate confidence_interval
            metric_value.confidence_interval.lower_bound.value = lower_bound
            metric_value.confidence_interval.upper_bound.value = upper_bound
            t_dist_value = metrics_for_slice_pb2.TDistributionValue()
            t_dist_value.sample_mean.value = value.sample_mean
            t_dist_value.sample_standard_deviation.value = (
                value.sample_standard_deviation)
            t_dist_value.sample_degrees_of_freedom.value = (
                value.sample_degrees_of_freedom)
            # Once the UI handles confidence interval, we will avoid setting this and
            # instead use the double_value.
            t_dist_value.unsampled_value.value = value.unsampled_value
            metric_value.confidence_interval.t_distribution_value.CopyFrom(
                t_dist_value)
        elif isinstance(value, six.binary_type):
            # Convert textual types to string metrics.
            metric_value.bytes_value = value
        elif isinstance(value, six.text_type):
            # Convert textual types to string metrics.
            metric_value.bytes_value = value.encode('utf8')
        elif isinstance(value, np.ndarray):
            # Convert NumPy arrays to ArrayValue.
            metric_value.array_value.CopyFrom(_convert_to_array_value(value))
        else:
            # We try to convert to float values.
            try:
                metric_value.double_value.value = float(value)
            except (TypeError, ValueError) as e:
                metric_value.unknown_type.value = str(value)
                metric_value.unknown_type.error = e.message  # pytype: disable=attribute-error

        if isinstance(key, metric_types.MetricKey):
            key_and_value = result.metric_keys_and_values.add()
            key_and_value.key.CopyFrom(key.to_proto())
            key_and_value.value.CopyFrom(metric_value)
        else:
            result.metrics[key].CopyFrom(metric_value)

    return result
Esempio n. 5
0
def validate_metrics(
    sliced_metrics: Tuple[Union[slicer.SliceKeyType, slicer.CrossSliceKeyType],
                          Dict['metric_types.MetricKey',
                               Any]], eval_config: config_pb2.EvalConfig
) -> validation_result_pb2.ValidationResult:
    """Check the metrics and check whether they should be validated."""
    # Find out which model is baseline.
    baseline_spec = model_util.get_baseline_model_spec(eval_config)
    baseline_model_name = baseline_spec.name if baseline_spec else None

    sliced_key, metrics = sliced_metrics
    thresholds = metric_specs.metric_thresholds_from_metrics_specs(
        eval_config.metrics_specs)
    is_cross_slice = slicer.is_cross_slice_key(sliced_key)

    def _check_threshold(key: metric_types.MetricKey,
                         threshold: _ThresholdType, metric: Any) -> bool:
        """Verify a metric given its metric key and metric value."""
        metric = float(metric)
        if isinstance(threshold, config_pb2.GenericValueThreshold):
            lower_bound, upper_bound = -np.inf, np.inf
            if threshold.HasField('lower_bound'):
                lower_bound = threshold.lower_bound.value
            if threshold.HasField('upper_bound'):
                upper_bound = threshold.upper_bound.value
            return metric >= lower_bound and metric <= upper_bound
        elif isinstance(threshold, config_pb2.GenericChangeThreshold):
            diff = metric
            metric_baseline = float(
                metrics[key.make_baseline_key(baseline_model_name)])
            if math.isclose(metric_baseline, 0.0):
                ratio = float('nan')
            else:
                ratio = diff / metric_baseline
            if threshold.direction == config_pb2.MetricDirection.LOWER_IS_BETTER:
                absolute, relative = np.inf, np.inf
            elif threshold.direction == config_pb2.MetricDirection.HIGHER_IS_BETTER:
                absolute, relative = -np.inf, -np.inf
            else:
                raise ValueError(
                    '"UNKNOWN" direction for change threshold: {}.'.format(
                        threshold))
            if threshold.HasField('absolute'):
                absolute = threshold.absolute.value
            if threshold.HasField('relative'):
                relative = threshold.relative.value
            if threshold.direction == config_pb2.MetricDirection.LOWER_IS_BETTER:
                return diff <= absolute and ratio <= relative
            elif threshold.direction == config_pb2.MetricDirection.HIGHER_IS_BETTER:
                return diff >= absolute and ratio >= relative
        else:
            raise ValueError('Unknown threshold: {}'.format(threshold))

    def _copy_metric(metric, to):
        # Will add more types when more MetricValue are supported.
        to.double_value.value = float(metric)

    def _copy_threshold(threshold, to):
        if isinstance(threshold, config_pb2.GenericValueThreshold):
            to.value_threshold.CopyFrom(threshold)
        if isinstance(threshold, config_pb2.GenericChangeThreshold):
            to.change_threshold.CopyFrom(threshold)

    def _add_to_set(s, v):
        """Adds value to set. Returns true if didn't exist."""
        if v in s:
            return False
        else:
            s.add(v)
            return True

    # Empty metrics per slice is considered validated.
    result = validation_result_pb2.ValidationResult(validation_ok=True)
    validation_for_slice = validation_result_pb2.MetricsValidationForSlice()
    unchecked_thresholds = dict(thresholds)
    for metric_key, metric in metrics.items():
        if metric_key not in thresholds:
            continue
        del unchecked_thresholds[metric_key]
        # Not meaningful to check threshold for baseline model, thus always return
        # True if such threshold is configured. We also do not compare Message type
        # metrics.
        if metric_key.model_name == baseline_model_name:
            continue
        msg = ''
        existing_failures = set()
        for slice_spec, threshold in thresholds[metric_key]:
            if slice_spec is not None:
                if (isinstance(slice_spec, config_pb2.SlicingSpec)
                        and (is_cross_slice or not slicer.SingleSliceSpec(
                            spec=slice_spec).is_slice_applicable(sliced_key))):
                    continue
                if (isinstance(slice_spec, config_pb2.CrossSlicingSpec)
                        and (not is_cross_slice
                             or not slicer.is_cross_slice_applicable(
                                 cross_slice_key=sliced_key,
                                 cross_slicing_spec=slice_spec))):
                    continue
            elif is_cross_slice:
                continue
            try:
                check_result = _check_threshold(metric_key, threshold, metric)
            except ValueError:
                msg = """
          Invalid metrics or threshold for comparison: The type of the metric
          is: {}, the metric value is: {}, and the threshold is: {}.
          """.format(type(metric), metric, threshold)
                check_result = False
            else:
                msg = ''
            if not check_result:
                # The same threshold values could be set for multiple matching slice
                # specs. Only store the first match.
                #
                # Note that hashing by SerializeToString() is only safe if used within
                # the same process.
                if not _add_to_set(existing_failures,
                                   threshold.SerializeToString()):
                    continue
                failure = validation_for_slice.failures.add()
                failure.metric_key.CopyFrom(metric_key.to_proto())
                _copy_metric(metric, failure.metric_value)
                _copy_threshold(threshold, failure.metric_threshold)
                failure.message = msg
            # Track we have completed a validation check for slice spec and metric
            slicing_details = result.validation_details.slicing_details.add()
            if slice_spec is not None:
                if isinstance(slice_spec, config_pb2.SlicingSpec):
                    slicing_details.slicing_spec.CopyFrom(slice_spec)
                else:
                    slicing_details.cross_slicing_spec.CopyFrom(slice_spec)
            else:
                slicing_details.slicing_spec.CopyFrom(config_pb2.SlicingSpec())
            slicing_details.num_matching_slices = 1
    # All unchecked thresholds are considered failures.
    for metric_key, thresholds in unchecked_thresholds.items():
        if metric_key.model_name == baseline_model_name:
            continue
        existing_failures = set()
        for slice_spec, threshold in thresholds:
            if slice_spec is not None:
                if is_cross_slice != isinstance(slice_spec,
                                                config_pb2.CrossSlicingSpec):
                    continue
                if (is_cross_slice
                        and not slicer.is_cross_slice_applicable(
                            cross_slice_key=sliced_key,
                            cross_slicing_spec=slice_spec)):
                    continue
            elif is_cross_slice:
                continue
            # The same threshold values could be set for multiple matching slice
            # specs. Only store the first match.
            #
            # Note that hashing by SerializeToString() is only safe if used within
            # the same process.
            if not _add_to_set(existing_failures,
                               threshold.SerializeToString()):
                continue
            failure = validation_for_slice.failures.add()
            failure.metric_key.CopyFrom(metric_key.to_proto())
            _copy_threshold(threshold, failure.metric_threshold)
            failure.message = 'Metric not found.'
    # Any failure leads to overall failure.
    if validation_for_slice.failures:
        if not is_cross_slice:
            validation_for_slice.slice_key.CopyFrom(
                slicer.serialize_slice_key(sliced_key))
        else:
            validation_for_slice.cross_slice_key.CopyFrom(
                slicer.serialize_cross_slice_key(sliced_key))
        result.validation_ok = False
        result.metric_validations_per_slice.append(validation_for_slice)
    return result
Esempio n. 6
0
def convert_slice_metrics_to_proto(
    metrics: Tuple[slicer.SliceKeyOrCrossSliceKeyType, Dict[Any, Any]],
    add_metrics_callbacks: List[types.AddMetricsCallbackType]
) -> metrics_for_slice_pb2.MetricsForSlice:
    """Converts the given slice metrics into serialized proto MetricsForSlice.

  Args:
    metrics: The slice metrics.
    add_metrics_callbacks: A list of metric callbacks. This should be the same
      list as the one passed to tfma.Evaluate().

  Returns:
    The MetricsForSlice proto.

  Raises:
    TypeError: If the type of the feature value in slice key cannot be
      recognized.
  """
    result = metrics_for_slice_pb2.MetricsForSlice()
    slice_key, slice_metrics = metrics

    if slicer.is_cross_slice_key(slice_key):
        result.cross_slice_key.CopyFrom(
            slicer.serialize_cross_slice_key(slice_key))
    else:
        result.slice_key.CopyFrom(slicer.serialize_slice_key(slice_key))

    slice_metrics = slice_metrics.copy()

    if metric_keys.ERROR_METRIC in slice_metrics:
        logging.warning('Error for slice: %s with error message: %s ',
                        slice_key, slice_metrics[metric_keys.ERROR_METRIC])
        result.metrics[metric_keys.ERROR_METRIC].debug_message = slice_metrics[
            metric_keys.ERROR_METRIC]
        return result

    # Convert the metrics from add_metrics_callbacks to the structured output if
    # defined.
    if add_metrics_callbacks and (not any(
            isinstance(k, metric_types.MetricKey)
            for k in slice_metrics.keys())):
        for add_metrics_callback in add_metrics_callbacks:
            if hasattr(add_metrics_callback, 'populate_stats_and_pop'):
                add_metrics_callback.populate_stats_and_pop(
                    slice_key, slice_metrics, result.metrics)
    for key in sorted(slice_metrics.keys()):
        value = slice_metrics[key]
        if isinstance(value, types.ValueWithTDistribution):
            unsampled_value = value.unsampled_value
            _, lower_bound, upper_bound = (
                math_util.calculate_confidence_interval(value))
            confidence_interval = metrics_for_slice_pb2.ConfidenceInterval(
                lower_bound=convert_metric_value_to_proto(lower_bound),
                upper_bound=convert_metric_value_to_proto(upper_bound),
                standard_error=convert_metric_value_to_proto(
                    value.sample_standard_deviation),
                degrees_of_freedom={'value': value.sample_degrees_of_freedom})
            metric_value = convert_metric_value_to_proto(unsampled_value)

            # If metric can be stored to double_value metrics, replace it with a
            # bounded_value for backwards compatibility.
            # TODO(b/188575688): remove this logic to stop populating bounded_value
            if metric_value.WhichOneof('type') == 'double_value':
                # setting bounded_value clears double_value in the same oneof scope.
                metric_value.bounded_value.value.value = unsampled_value
                metric_value.bounded_value.lower_bound.value = lower_bound
                metric_value.bounded_value.upper_bound.value = upper_bound
                metric_value.bounded_value.methodology = (
                    metrics_for_slice_pb2.BoundedValue.POISSON_BOOTSTRAP)
        else:
            metric_value = convert_metric_value_to_proto(value)
            confidence_interval = None

        if isinstance(key, metric_types.MetricKey):
            result.metric_keys_and_values.add(
                key=key.to_proto(),
                value=metric_value,
                confidence_interval=confidence_interval)
        else:
            result.metrics[key].CopyFrom(metric_value)

    return result