Ejemplo n.º 1
0
    def process(self, element: types.Extracts) -> List[types.Extracts]:
        # Slice on transformed features if available.
        features_dicts = []
        if (constants.TRANSFORMED_FEATURES_KEY in element
                and element[constants.TRANSFORMED_FEATURES_KEY] is not None):
            transformed_features = element[constants.TRANSFORMED_FEATURES_KEY]
            # If only one model, the output is stored without keying on model name.
            if not self._eval_config or len(
                    self._eval_config.model_specs) == 1:
                features_dicts.append(transformed_features)
            else:
                # Search for slices in each model's transformed features output.
                for spec in self._eval_config.model_specs:
                    if spec.name in transformed_features:
                        features_dicts.append(transformed_features[spec.name])
        # Search for slices first in transformed features (if any). If a match is
        # not found there then search in raw features.
        slices = list(
            slicer.get_slices_for_features_dicts(
                features_dicts, util.get_features_from_extracts(element),
                self._slice_spec))

        # Make a a shallow copy, so we don't mutate the original.
        element_copy = copy.copy(element)

        element_copy[constants.SLICE_KEY_TYPES_KEY] = slices
        # Add a list of stringified slice keys to be materialized to output table.
        if self._materialize:
            element_copy[constants.SLICE_KEYS_KEY] = types.MaterializedColumn(
                name=constants.SLICE_KEYS_KEY,
                value=(list(
                    slicer.stringify_slice_key(x).encode('utf-8')
                    for x in slices)))
        return [element_copy]
Ejemplo n.º 2
0
 def assertSliceResult(self, name, features_dict, columns, features, expected):
   spec = slicer.SingleSliceSpec(columns=columns, features=features)
   msg = 'Test case %s: slice on columns %s, features %s' % (name, columns,
                                                             features)
   six.assertCountEqual(
       self, expected,
       slicer.get_slices_for_features_dicts([features_dict], None, [spec]),
       msg)
Ejemplo n.º 3
0
 def testNonUTF8ValueRaisesValueError(self):
   column_name = 'column_name'
   invalid_value = b'\x8a'
   spec = slicer.SingleSliceSpec(columns=[column_name])
   features_dict = self._makeFeaturesDict({
       column_name: [invalid_value],
   })
   with self.assertRaisesRegex(ValueError, column_name):
     list(slicer.get_slices_for_features_dicts([features_dict], None, [spec]))
Ejemplo n.º 4
0
    def process(
            self, element: types.Extracts,
            slice_spec: List[slicer.SingleSliceSpec]) -> List[types.Extracts]:
        # Slice on transformed features if available.
        features_dicts = []
        if (constants.TRANSFORMED_FEATURES_KEY in element
                and element[constants.TRANSFORMED_FEATURES_KEY] is not None):
            transformed_features = element[constants.TRANSFORMED_FEATURES_KEY]
            # If only one model, the output is stored without keying on model name.
            if not self._eval_config or len(
                    self._eval_config.model_specs) == 1:
                features_dicts.append(transformed_features)
            else:
                # Search for slices in each model's transformed features output.
                for spec in self._eval_config.model_specs:
                    if spec.name in transformed_features:
                        features_dicts.append(transformed_features[spec.name])
        # Search for slices first in transformed features (if any). If a match is
        # not found there then search in raw features.
        slice_keys = list(
            slicer.get_slices_for_features_dicts(
                features_dicts, util.get_features_from_extracts(element),
                slice_spec))

        # If SLICE_KEY_TYPES_KEY already exists, that means the
        # SqlSliceKeyExtractor has generated some slice keys. We need to add
        # them to current slice_keys list.
        if (constants.SLICE_KEY_TYPES_KEY in element
                and element[constants.SLICE_KEY_TYPES_KEY]):
            slice_keys.extend(element[constants.SLICE_KEY_TYPES_KEY])

        unique_slice_keys = list(set(slice_keys))
        if len(slice_keys) != len(unique_slice_keys):
            self._duplicate_slice_keys_counter.inc()

        # Make a a shallow copy, so we don't mutate the original.
        element_copy = copy.copy(element)

        element_copy[constants.SLICE_KEY_TYPES_KEY] = (
            slicer.slice_keys_to_numpy_array(unique_slice_keys))
        # Add a list of stringified slice keys to be materialized to output table.
        if self._materialize:
            element_copy[constants.SLICE_KEYS_KEY] = types.MaterializedColumn(
                name=constants.SLICE_KEYS_KEY,
                value=(list(
                    slicer.stringify_slice_key(x).encode('utf-8')
                    for x in unique_slice_keys)))
        return [element_copy]
  def process(self, element: types.Extracts,
              slice_spec: List[slicer.SingleSliceSpec]) -> List[types.Extracts]:
    features = util.get_features_from_extracts(element)
    # There are no transformed features so only search raw features for slices.
    slices = list(
        slicer.get_slices_for_features_dicts([], features, slice_spec))

    # Make a a shallow copy, so we don't mutate the original.
    element_copy = copy.copy(element)

    element_copy[constants.SLICE_KEY_TYPES_KEY] = slices
    # Add a list of stringified slice keys to be materialized to output table.
    if self._materialize:
      element_copy[constants.SLICE_KEYS_KEY] = types.MaterializedColumn(
          name=constants.SLICE_KEYS_KEY,
          value=(list(
              slicer.stringify_slice_key(x).encode('utf-8') for x in slices)))
    return [element_copy]
Ejemplo n.º 6
0
  def testGetSlicesForFeaturesDictMultipleSingleSliceSpecs(self):
    features_dict = self._makeFeaturesDict({
        'gender': ['f'],
        'age': [5],
        'interest': ['cars']
    })

    spec_overall = slicer.SingleSliceSpec()
    spec_age = slicer.SingleSliceSpec(columns=['age'])
    spec_age4 = slicer.SingleSliceSpec(features=[('age', 4)])
    spec_age5_gender = slicer.SingleSliceSpec(
        columns=['gender'], features=[('age', 5)])

    slice_spec = [spec_overall, spec_age, spec_age4, spec_age5_gender]
    expected = [(), (('age', 5),), (('age', 5), ('gender', 'f'))]
    self.assertCountEqual(
        expected,
        slicer.get_slices_for_features_dicts([features_dict], None, slice_spec))