コード例 #1
0
    def test_example_model(self):
        train_tf_file = self._write_tf_records(self._create_data())
        classifier = example_model.train_model(self._model_dir, train_tf_file,
                                               LABEL, TEXT_FEATURE,
                                               FEATURE_MAP)

        validate_tf_file = self._write_tf_records(self._create_data())
        tfma_eval_result_path = os.path.join(self._model_dir,
                                             'tfma_eval_result')
        example_model.evaluate_model(classifier, validate_tf_file,
                                     tfma_eval_result_path, SLICE, LABEL,
                                     FEATURE_MAP)

        expected_slice_keys = [
            'Overall', 'slice:slice3', 'slice:slice1', 'slice:slice2'
        ]
        evaluation_results = tfma.load_eval_result(tfma_eval_result_path)

        self.assertLen(evaluation_results.slicing_metrics, 4)

        # Verify if false_positive_rate metrics are computed for all values of
        # slice.
        for (slice_key, metric_value) in evaluation_results.slicing_metrics:
            slice_key = slicer.stringify_slice_key(slice_key)
            self.assertIn(slice_key, expected_slice_keys)
            self.assertGreaterEqual(
                1.0, metric_value['']['']
                ['post_export_metrics/[email protected]']
                ['doubleValue'])
            self.assertLessEqual(
                0.0, metric_value['']['']
                ['post_export_metrics/[email protected]']
                ['doubleValue'])
コード例 #2
0
    def process(self, element: types.Extracts) -> List[types.Extracts]:
        # Slice on transformed features if available.
        features_dicts = []
        if (constants.TRANSFORMED_FEATURES_KEY in element
                and element[constants.TRANSFORMED_FEATURES_KEY] is not None):
            transformed_features = element[constants.TRANSFORMED_FEATURES_KEY]
            # If only one model, the output is stored without keying on model name.
            if not self._eval_config or len(
                    self._eval_config.model_specs) == 1:
                features_dicts.append(transformed_features)
            else:
                # Search for slices in each model's transformed features output.
                for spec in self._eval_config.model_specs:
                    if spec.name in transformed_features:
                        features_dicts.append(transformed_features[spec.name])
        # Search for slices first in transformed features (if any). If a match is
        # not found there then search in raw features.
        slices = list(
            slicer.get_slices_for_features_dicts(
                features_dicts, util.get_features_from_extracts(element),
                self._slice_spec))

        # Make a a shallow copy, so we don't mutate the original.
        element_copy = copy.copy(element)

        element_copy[constants.SLICE_KEY_TYPES_KEY] = slices
        # Add a list of stringified slice keys to be materialized to output table.
        if self._materialize:
            element_copy[constants.SLICE_KEYS_KEY] = types.MaterializedColumn(
                name=constants.SLICE_KEYS_KEY,
                value=(list(
                    slicer.stringify_slice_key(x).encode('utf-8')
                    for x in slices)))
        return [element_copy]
コード例 #3
0
def convert_slicing_metrics_to_ui_input(
        slicing_metrics: List[Tuple[slicer.SliceKeyOrCrossSliceKeyType,
                                    view_types.MetricsByOutputName]],
        slicing_column: Optional[str] = None,
        slicing_spec: Optional[slicer.SingleSliceSpec] = None,
        output_name: str = '',
        multi_class_key: str = '') -> Optional[List[Dict[str, Any]]]:
    """Renders the Fairness Indicator view.

  Args:
    slicing_metrics: tfma.EvalResult.slicing_metrics.
    slicing_column: The slicing column to to filter results. If both
      slicing_column and slicing_spec are None, show all eval results.
    slicing_spec: The slicing spec to filter results. If both slicing_column and
      slicing_spec are None, show all eval results.
    output_name: The output name associated with metric (for multi-output
      models).
    multi_class_key: The multi-class key associated with metric (for multi-class
      models).

  Returns:
    A list of dicts for each slice, where each dict contains keys 'sliceValue',
    'slice', and 'metrics'.

  Raises:
    ValueError if no related eval result found or both slicing_column and
    slicing_spec are not None.
  """
    if slicing_column and slicing_spec:
        raise ValueError(
            'Only one of the "slicing_column" and "slicing_spec" parameters '
            'can be set.')
    if slicing_column:
        slicing_spec = slicer.SingleSliceSpec(columns=[slicing_column])

    data = []
    for (slice_key, metric_value) in slicing_metrics:
        if (metric_value is not None and output_name in metric_value
                and multi_class_key in metric_value[output_name]):
            metrics = metric_value[output_name][multi_class_key]
            # To add evaluation data for cross slice comparison.
            if slicer.is_cross_slice_key(slice_key):
                _add_cross_slice_key_data(slice_key, metrics, data)
            # To add evaluation data for regular slices.
            elif (slicing_spec is None or not slice_key
                  or slicing_spec.is_slice_applicable(slice_key)):
                data.append({
                    'sliceValue': stringify_slice_key_value(slice_key),
                    'slice': slicer.stringify_slice_key(slice_key),
                    'metrics': metrics
                })
    if not data:
        raise ValueError(
            'No eval result found for output_name:"%s" and '
            'multi_class_key:"%s" and slicing_column:"%s" and slicing_spec:"%s".'
            % (output_name, multi_class_key, slicing_column, slicing_spec))
    return data
コード例 #4
0
def convert_eval_result_to_ui_input(
    eval_result: model_eval_lib.EvalResult,
    slicing_column: Optional[Text] = None,
    slicing_spec: Optional[slicer.SingleSliceSpec] = None,
    output_name: Text = '',
    multi_class_key: Text = '') -> Optional[List[Dict[Text, Any]]]:
  """Renders the Fairness Indicator view.

  Args:
    eval_result: An tfma.EvalResult.
    slicing_column: The slicing column to to filter results. If both
      slicing_column and slicing_spec are None, show all eval results.
    slicing_spec: The slicing spec to filter results. If both slicing_column and
      slicing_spec are None, show all eval results.
    output_name: The output name associated with metric (for multi-output
      models).
    multi_class_key: The multi-class key associated with metric (for multi-class
      models).

  Returns:
    A FairnessIndicatorViewer object if in Jupyter notebook; None if in Colab.

  Raises:
    ValueError if no related eval result found or both slicing_column and
    slicing_spec are not None.
  """
  if slicing_column and slicing_spec:
    raise ValueError(
        'Only one of the "slicing_column" and "slicing_spec" parameters '
        'can be set.')
  if slicing_column:
    slicing_spec = slicer.SingleSliceSpec(columns=[slicing_column])

  data = []
  for (slice_key, metric_value) in eval_result.slicing_metrics:
    slice_key_ok = (
        slicing_spec is None or not slice_key or
        slicing_spec.is_slice_applicable(slice_key))
    metric_ok = (
        output_name in metric_value and
        multi_class_key in metric_value[output_name])

    if slice_key_ok and metric_ok:
      data.append({
          'sliceValue': stringify_slice_key_value(slice_key),
          'slice': slicer.stringify_slice_key(slice_key),
          'metrics': metric_value[output_name][multi_class_key]
      })
  if not data:
    raise ValueError(
        'No eval result found for output_name:"%s" and '
        'multi_class_key:"%s" and slicing_column:"%s" and slicing_spec:"%s".' %
        (output_name, multi_class_key, slicing_column, slicing_spec))
  return data
コード例 #5
0
 def testStringifySliceKey(self):
   test_cases = [
       ('overall', (), 'Overall'),
       ('one bytes feature', (('age_str', '5'),), 'age_str:5'),
       ('one int64 feature', (('age', 1),), 'age:1'),
       ('mixed', (('age', 1), ('gender', 'f')), 'age_X_gender:1_X_f'),
       ('more', (('age', 1), ('gender', 'f'), ('interest', 'cars')),
        'age_X_gender_X_interest:1_X_f_X_cars'),
       ('unicode', (('text', b'\xe4\xb8\xad\xe6\x96\x87'),), u'text:\u4e2d\u6587'),
   ]  # pyformat: disable
   for (name, slice_key, stringified_key) in test_cases:
     self.assertEqual(
         stringified_key, slicer.stringify_slice_key(slice_key), msg=name)
コード例 #6
0
def _add_cross_slice_key_data(slice_key: slicer.CrossSliceKeyType,
                              metrics: view_types.MetricsByTextKey,
                              data: List[Any]):
    """Adds data for cross slice key.

  Baseline and comparison slice keys are joined by '__XX__'.
  Args:
    slice_key: Cross slice key.
    metrics: Metrics data for the cross slice key.
    data: List where UI data is to be appended.
  """
    baseline_key = slice_key[0]
    comparison_key = slice_key[1]
    stringify_slice_value = stringify_slice_key_value(
        baseline_key) + '__XX__' + stringify_slice_key_value(comparison_key)
    stringify_slice = slicer.stringify_slice_key(
        baseline_key) + '__XX__' + slicer.stringify_slice_key(comparison_key)
    data.append({
        'sliceValue': stringify_slice_value,
        'slice': stringify_slice,
        'metrics': metrics
    })
コード例 #7
0
    def process(
            self, element: types.Extracts,
            slice_spec: List[slicer.SingleSliceSpec]) -> List[types.Extracts]:
        # Slice on transformed features if available.
        features_dicts = []
        if (constants.TRANSFORMED_FEATURES_KEY in element
                and element[constants.TRANSFORMED_FEATURES_KEY] is not None):
            transformed_features = element[constants.TRANSFORMED_FEATURES_KEY]
            # If only one model, the output is stored without keying on model name.
            if not self._eval_config or len(
                    self._eval_config.model_specs) == 1:
                features_dicts.append(transformed_features)
            else:
                # Search for slices in each model's transformed features output.
                for spec in self._eval_config.model_specs:
                    if spec.name in transformed_features:
                        features_dicts.append(transformed_features[spec.name])
        # Search for slices first in transformed features (if any). If a match is
        # not found there then search in raw features.
        slice_keys = list(
            slicer.get_slices_for_features_dicts(
                features_dicts, util.get_features_from_extracts(element),
                slice_spec))

        # If SLICE_KEY_TYPES_KEY already exists, that means the
        # SqlSliceKeyExtractor has generated some slice keys. We need to add
        # them to current slice_keys list.
        if (constants.SLICE_KEY_TYPES_KEY in element
                and element[constants.SLICE_KEY_TYPES_KEY]):
            slice_keys.extend(element[constants.SLICE_KEY_TYPES_KEY])

        unique_slice_keys = list(set(slice_keys))
        if len(slice_keys) != len(unique_slice_keys):
            self._duplicate_slice_keys_counter.inc()

        # Make a a shallow copy, so we don't mutate the original.
        element_copy = copy.copy(element)

        element_copy[constants.SLICE_KEY_TYPES_KEY] = (
            slicer.slice_keys_to_numpy_array(unique_slice_keys))
        # Add a list of stringified slice keys to be materialized to output table.
        if self._materialize:
            element_copy[constants.SLICE_KEYS_KEY] = types.MaterializedColumn(
                name=constants.SLICE_KEYS_KEY,
                value=(list(
                    slicer.stringify_slice_key(x).encode('utf-8')
                    for x in unique_slice_keys)))
        return [element_copy]
コード例 #8
0
  def process(self, element: types.Extracts) -> List[types.Extracts]:
    features = util.get_features_from_extracts(element)
    slices = list(
        slicer.get_slices_for_features_dict(features, self._slice_spec))

    # Make a a shallow copy, so we don't mutate the original.
    element_copy = copy.copy(element)

    element_copy[constants.SLICE_KEY_TYPES_KEY] = slices
    # Add a list of stringified slice keys to be materialized to output table.
    if self._materialize:
      element_copy[constants.SLICE_KEYS_KEY] = types.MaterializedColumn(
          name=constants.SLICE_KEYS_KEY,
          value=(list(
              slicer.stringify_slice_key(x).encode('utf-8') for x in slices)))
    return [element_copy]
コード例 #9
0
def get_slices_as_dataframe(
    slices: List[SliceComparisonResult],
    additional_metric_keys: Optional[List[metric_types.MetricKey]] = None
) -> pd.DataFrame:
    """Returns top slices as a dataframe.

  Args:
    slices: List of ordered slices.
    additional_metric_keys: An optional list of additional metric keys to
      display.

  Returns:
    Dataframe containing information about the slices.
  """
    dataframe_data = []
    for slice_info in slices:
        slice_metrics = _get_metrics_as_dict(slice_info.raw_slice_metrics)
        row_data = {
            'Slice': slicer_lib.stringify_slice_key(slice_info.slice_key),
            'Size': slice_info.num_examples,
            'Slice metric': slice_info.slice_metric,
            'Base metric': slice_info.base_metric,
            'P-Value': slice_info.p_value,
            'Effect size': slice_info.effect_size
        }
        if additional_metric_keys:
            for metric_key in additional_metric_keys:
                # The MetricKeys are converted to strings for the column names since
                # all of the other column names in the dataframe are strings.
                row_data[str(
                    metric_key)] = slice_metrics[metric_key].unsampled_value
        dataframe_data.append(row_data)

    # The column labels are used to ensure that the order of the columns is always
    # the same.
    ordered_column_labels = [
        'Slice', 'Size', 'Slice metric', 'Base metric', 'P-Value',
        'Effect size'
    ]
    if additional_metric_keys:
        ordered_column_labels.extend(
            [str(metric_key) for metric_key in additional_metric_keys])
    dataframe = pd.DataFrame(dataframe_data, columns=ordered_column_labels)
    dataframe.set_index('Slice', inplace=True)
    return dataframe
コード例 #10
0
def get_slices_as_dataframe(
        slices: List[SliceComparisonResult],
        additional_metric_names: Optional[List[Text]] = None) -> pd.DataFrame:
    """Returns top slices as a dataframe.

  Args:
    slices: List of ordered slices.
    additional_metric_names: An optional list of additional metric names to
      display

  Returns:
    Dataframe containing information about the slices.
  """
    rows = []
    for slice_info in slices:
        slice_metrics = _get_metrics_as_dict(slice_info.raw_slice_metrics)
        row = {
            'Slice': slicer_lib.stringify_slice_key(slice_info.slice_key),
            'Size': slice_info.num_examples,
            'Slice metric': slice_info.slice_metric,
            'Base metric': slice_info.base_metric,
            'P-Value': slice_info.p_value,
            'Effect size': slice_info.effect_size
        }
        if additional_metric_names:
            for metric_key in additional_metric_names:
                row[metric_key] = slice_metrics[metric_key].unsampled_value
        rows.append(row)

    ordered_columns = [
        'Slice', 'Size', 'Slice metric', 'Base metric', 'P-Value',
        'Effect size'
    ]
    if additional_metric_names:
        ordered_columns.extend(additional_metric_names)
    dataframe = pd.DataFrame(rows, columns=ordered_columns)
    dataframe.set_index('Slice', inplace=True)
    return dataframe
コード例 #11
0
def find_all_slices(
    results: List[Tuple[slicer.SliceKeyType,
                        Dict[Text, Any]]], slicing_spec: slicer.SingleSliceSpec
) -> List[Dict[Text, Union[Dict[Text, Any], Text]]]:
    """Util function that extracts slicing metrics for the named column.

  Args:
    results: A list of records. Each record is a tuple of (slice_name,
      {metric_name, metric_value}).
    slicing_spec: The spec to slice on.

  Returns:
    A list of {slice, metrics}
  """
    data = []
    for (slice_key, metric_value) in results:
        if slicing_spec.is_slice_applicable(slice_key):
            data.append({
                'slice': slicer.stringify_slice_key(slice_key),
                'metrics': metric_value
            })

    return data  # pytype: disable=bad-return-type
コード例 #12
0
def find_top_slices(metrics: List[metrics_for_slice_pb2.MetricsForSlice],
                    metric_key: Text,
                    statistics: statistics_pb2.DatasetFeatureStatisticsList,
                    comparison_type: Text = 'HIGHER',
                    min_num_examples: int = 10,
                    num_top_slices: int = 10,
                    rank_by: Text = 'EFFECT_SIZE'):
    """Finds top-k slices.

  Args:
    metrics: List of slice metrics protos. We assume that the metrics have
    MetricValue.confidence_interval field populated. This will be populated when
      the metrics computed with confidence intervals enabled.
    metric_key: Name of the metric based on which significance testing is done.
    statistics: Data statistics used to configure AutoSliceKeyExtractor.
    comparison_type: Type of comparison indicating if we are looking for slices
      whose metric is higher (`HIGHER`) or lower (`LOWER`) than the metric
      of the base slice (overall dataset).
    min_num_examples: Minimum number of examples that a slice should have.
    num_top_slices: Number of top slices to return.
    rank_by: Indicates how the slices should be ordered in the result.

  Returns:
    List of ordered slices.
  """
    assert comparison_type in ['HIGHER', 'LOWER']
    assert min_num_examples > 0
    assert 0 < num_top_slices
    assert rank_by in ['EFFECT_SIZE', 'PVALUE']

    metrics_dict = {
        slicer_lib.deserialize_slice_key(slice_metrics.slice_key):
        slice_metrics
        for slice_metrics in metrics
    }
    overall_slice_metrics = metrics_dict[()]
    del metrics_dict[()]

    boundaries = auto_slice_key_extractor._get_bucket_boundaries(statistics)  # pylint: disable=protected-access
    overall_metrics_dict = _get_metrics_as_dict(overall_slice_metrics)
    to_be_sorted_slices = []
    for slice_key, slice_metrics in metrics_dict.items():
        slice_metrics_dict = _get_metrics_as_dict(slice_metrics)
        num_examples = slice_metrics_dict['example_count'].unsampled_value
        if num_examples < min_num_examples:
            continue
        # Prune non-interesting slices.
        if np.isnan(slice_metrics_dict[metric_key].unsampled_value):
            continue
        if comparison_type == 'HIGHER':
            comparison_fn = operator.le
        else:
            comparison_fn = operator.ge
        if comparison_fn(slice_metrics_dict[metric_key].unsampled_value,
                         overall_metrics_dict[metric_key].unsampled_value):
            continue

        # Only consider statistically significant slices.
        is_significant, pvalue = _is_significant_slice(
            slice_metrics_dict[metric_key].unsampled_value,
            slice_metrics_dict[metric_key].sample_standard_deviation,
            slice_metrics_dict['example_count'].unsampled_value,
            overall_metrics_dict[metric_key].unsampled_value,
            overall_metrics_dict[metric_key].sample_standard_deviation,
            overall_metrics_dict['example_count'].unsampled_value,
            comparison_type)
        if not is_significant:
            continue
        # Format the slice info (feature names, values) in the proto into a
        # slice key.
        transformed_slice_key = []
        for (feature, value) in slice_key:
            if feature.startswith(
                    auto_slice_key_extractor.TRANSFORMED_FEATURE_PREFIX):
                feature = feature[len(auto_slice_key_extractor.
                                      TRANSFORMED_FEATURE_PREFIX):]
                value = _bucket_to_range(value, boundaries[feature])
            transformed_slice_key.append((feature, value))
        slice_key = slicer_lib.stringify_slice_key(
            tuple(transformed_slice_key))
        # Compute effect size for the slice.
        effect_size = _compute_effect_size(
            slice_metrics_dict[metric_key].unsampled_value,
            slice_metrics_dict[metric_key].sample_standard_deviation,
            overall_metrics_dict[metric_key].unsampled_value,
            overall_metrics_dict[metric_key].sample_standard_deviation)
        to_be_sorted_slices.append(
            SliceComparisonResult(
                slice_key, num_examples,
                slice_metrics_dict[metric_key].unsampled_value,
                overall_metrics_dict[metric_key].unsampled_value, pvalue,
                effect_size))
    # Rank the slices.
    ranking_fn, reverse = operator.attrgetter('effect_size'), True
    if rank_by == 'PVALUE':
        ranking_fn, reverse = operator.attrgetter('pvalue'), False
    result = sorted(to_be_sorted_slices, key=ranking_fn,
                    reverse=reverse)[:num_top_slices]
    return result
コード例 #13
0
    def test_get_slices_as_dataframe(self):
        input_slices = [
            auto_slicing_util.SliceComparisonResult(
                slice_key=(('native-country', 'United-States'), ),
                num_examples=29170,
                slice_metric=0.09,
                base_metric=0.087,
                p_value=0,
                effect_size=0.46,
                raw_slice_metrics=text_format.Parse(
                    """
                slice_key {
                  single_slice_keys {
                    column: "native-country"
                    bytes_value: "United-States"
                    }
                }
                metric_keys_and_values {
                  key { name: "false_positives" }
                  value {
                    bounded_value {
                      lower_bound { value: 1754.6514199722158 }
                      upper_bound { value: 2092.488580027784 }
                      value { value: 1923.57 }
                      methodology: POISSON_BOOTSTRAP
                    }
                    confidence_interval {
                      lower_bound { value: 1754.6514199722158 }
                      upper_bound { value: 2092.488580027784 }
                      t_distribution_value {
                        sample_mean { value: 1923.57 }
                        sample_standard_deviation { value: 85.13110418664061 }
                        sample_degrees_of_freedom { value: 99 }
                        unsampled_value { value: 1943.0 }
                      }
                    }
                  }
                }
                metric_keys_and_values {
                  key { name: "false_negatives" }
                  value {
                    bounded_value {
                      lower_bound { value: 3595.413107983637 }
                      upper_bound { value: 4195.886892016363 }
                      value { value: 3895.65 }
                      methodology: POISSON_BOOTSTRAP
                    }
                    confidence_interval {
                      lower_bound { value: 3595.413107983637 }
                      upper_bound { value: 4195.886892016363 }
                      t_distribution_value {
                        sample_mean { value: 3895.65 }
                        sample_standard_deviation { value: 151.31253252729257 }
                        sample_degrees_of_freedom { value: 99 }
                        unsampled_value { value: 3935.0 }
                      }
                    }
                  }
                }""", metrics_for_slice_pb2.MetricsForSlice())),
            auto_slicing_util.SliceComparisonResult(
                slice_key=(('age', '[58.0, 90.0)'), ),
                num_examples=2999,
                slice_metric=0.09,
                base_metric=0.0875,
                p_value=7.8,
                effect_size=0.98,
                raw_slice_metrics=text_format.Parse(
                    """
                slice_key {
                  single_slice_keys {
                    column: "age"
                    bytes_value: "[58.0, 90.0)"
                  }
                }
                metric_keys_and_values {
                  key { name: "false_positives" }
                  value {
                    bounded_value {
                      lower_bound { value: 167.54646972321814 }
                      upper_bound { value: 236.37353027678188 }
                      value { value: 201.96 }
                      methodology: POISSON_BOOTSTRAP
                    }
                    confidence_interval {
                      lower_bound { value: 167.54646972321814 }
                      upper_bound { value: 236.37353027678188 }
                      t_distribution_value {
                        sample_mean { value: 201.96 }
                        sample_standard_deviation { value: 17.343632837435358 }
                        sample_degrees_of_freedom { value: 99 }
                        unsampled_value { value: 204.0 }
                      }
                    }
                  }
                }
                metric_keys_and_values {
                  key { name: "false_negatives" }
                  value {
                    bounded_value {
                      lower_bound { value: 486.4402337348782 }
                      upper_bound { value: 610.479766265122 }
                      value { value: 548.46 }
                      methodology: POISSON_BOOTSTRAP
                    }
                    confidence_interval {
                      lower_bound { value: 486.4402337348782 }
                      upper_bound { value: 610.479766265122 }
                      t_distribution_value {
                        sample_mean { value: 548.46 }
                        sample_standard_deviation { value: 31.256544914589938 }
                        sample_degrees_of_freedom { value: 99 }
                        unsampled_value { value: 554.0 }
                      }
                    }
                  }
                }""", metrics_for_slice_pb2.MetricsForSlice()))
        ]
        additional_metric_keys = [
            metric_types.MetricKey('false_positives'),
            metric_types.MetricKey('false_negatives')
        ]
        expected_dataframe_data = [{
            'Slice':
            slicer_lib.stringify_slice_key(input_slices[0].slice_key),
            'Size':
            input_slices[0].num_examples,
            'Slice metric':
            input_slices[0].slice_metric,
            'Base metric':
            input_slices[0].base_metric,
            'P-Value':
            input_slices[0].p_value,
            'Effect size':
            input_slices[0].effect_size,
            str(additional_metric_keys[0]):
            1923.57,
            str(additional_metric_keys[1]):
            3895.65
        }, {
            'Slice':
            slicer_lib.stringify_slice_key(input_slices[1].slice_key),
            'Size':
            input_slices[1].num_examples,
            'Slice metric':
            input_slices[1].slice_metric,
            'Base metric':
            input_slices[1].base_metric,
            'P-Value':
            input_slices[1].p_value,
            'Effect size':
            input_slices[1].effect_size,
            str(additional_metric_keys[0]):
            201.96,
            str(additional_metric_keys[1]):
            548.46
        }]
        expected_dataframe_column_labels = [
            'Slice', 'Size', 'Slice metric', 'Base metric', 'P-Value',
            'Effect size',
            str(additional_metric_keys[0]),
            str(additional_metric_keys[1])
        ]
        expected_dataframe = pd.DataFrame(
            expected_dataframe_data, columns=expected_dataframe_column_labels)
        expected_dataframe.set_index('Slice', inplace=True)

        actual_dataframe = auto_slicing_util.get_slices_as_dataframe(
            input_slices, additional_metric_keys)

        assert_frame_equal(actual_dataframe, expected_dataframe)