예제 #1
0
    def process(self, element: types.Extracts) -> List[types.Extracts]:
        # Slice on transformed features if available.
        features_dicts = []
        if (constants.TRANSFORMED_FEATURES_KEY in element
                and element[constants.TRANSFORMED_FEATURES_KEY] is not None):
            transformed_features = element[constants.TRANSFORMED_FEATURES_KEY]
            # If only one model, the output is stored without keying on model name.
            if not self._eval_config or len(
                    self._eval_config.model_specs) == 1:
                features_dicts.append(transformed_features)
            else:
                # Search for slices in each model's transformed features output.
                for spec in self._eval_config.model_specs:
                    if spec.name in transformed_features:
                        features_dicts.append(transformed_features[spec.name])
        # Search for slices first in transformed features (if any). If a match is
        # not found there then search in raw features.
        slices = list(
            slicer.get_slices_for_features_dicts(
                features_dicts, util.get_features_from_extracts(element),
                self._slice_spec))

        # Make a a shallow copy, so we don't mutate the original.
        element_copy = copy.copy(element)

        element_copy[constants.SLICE_KEY_TYPES_KEY] = slices
        # Add a list of stringified slice keys to be materialized to output table.
        if self._materialize:
            element_copy[constants.SLICE_KEYS_KEY] = types.MaterializedColumn(
                name=constants.SLICE_KEYS_KEY,
                value=(list(
                    slicer.stringify_slice_key(x).encode('utf-8')
                    for x in slices)))
        return [element_copy]
예제 #2
0
 def testGetFeaturesFromExtracts(self):
     self.assertEqual(
         {'a': np.array([1])},
         util.get_features_from_extracts({
             constants.FEATURES_PREDICTIONS_LABELS_KEY:
             types.FeaturesPredictionsLabels(input_ref=0,
                                             features={'a': np.array([1])},
                                             predictions={},
                                             labels={})
         }),
     )
     self.assertEqual(
         {'a': np.array([1])},
         util.get_features_from_extracts(
             {constants.FEATURES_KEY: {
                 'a': np.array([1])
             }}),
     )
     self.assertEqual({}, util.get_features_from_extracts({}))
 def process(self, element: types.Extracts) -> List[types.Extracts]:
     # Make a deep copy, so we don't mutate the original.
     element_copy = copy.deepcopy(element)
     features = util.get_features_from_extracts(element_copy)
     for feature_name, boundaries in self._bucket_boundaries.items():
         if feature_name in features:
             transformed_values = []
             for value in features[feature_name]:
                 transformed_values.append(_bin_value(value, boundaries))
             features[TRANSFORMED_FEATURE_PREFIX +
                      feature_name] = np.array(transformed_values)
     return [element_copy]
예제 #4
0
  def process(self, element: types.Extracts) -> List[types.Extracts]:
    features = util.get_features_from_extracts(element)
    slices = list(
        slicer.get_slices_for_features_dict(features, self._slice_spec))

    # Make a a shallow copy, so we don't mutate the original.
    element_copy = copy.copy(element)

    element_copy[constants.SLICE_KEY_TYPES_KEY] = slices
    # Add a list of stringified slice keys to be materialized to output table.
    if self._materialize:
      element_copy[constants.SLICE_KEYS_KEY] = types.MaterializedColumn(
          name=constants.SLICE_KEYS_KEY,
          value=(list(
              slicer.stringify_slice_key(x).encode('utf-8') for x in slices)))
    return [element_copy]
예제 #5
0
    def process(
            self, element: types.Extracts,
            slice_spec: List[slicer.SingleSliceSpec]) -> List[types.Extracts]:
        # Slice on transformed features if available.
        features_dicts = []
        if (constants.TRANSFORMED_FEATURES_KEY in element
                and element[constants.TRANSFORMED_FEATURES_KEY] is not None):
            transformed_features = element[constants.TRANSFORMED_FEATURES_KEY]
            # If only one model, the output is stored without keying on model name.
            if not self._eval_config or len(
                    self._eval_config.model_specs) == 1:
                features_dicts.append(transformed_features)
            else:
                # Search for slices in each model's transformed features output.
                for spec in self._eval_config.model_specs:
                    if spec.name in transformed_features:
                        features_dicts.append(transformed_features[spec.name])
        # Search for slices first in transformed features (if any). If a match is
        # not found there then search in raw features.
        slice_keys = list(
            slicer.get_slices_for_features_dicts(
                features_dicts, util.get_features_from_extracts(element),
                slice_spec))

        # If SLICE_KEY_TYPES_KEY already exists, that means the
        # SqlSliceKeyExtractor has generated some slice keys. We need to add
        # them to current slice_keys list.
        if (constants.SLICE_KEY_TYPES_KEY in element
                and element[constants.SLICE_KEY_TYPES_KEY]):
            slice_keys.extend(element[constants.SLICE_KEY_TYPES_KEY])

        unique_slice_keys = list(set(slice_keys))
        if len(slice_keys) != len(unique_slice_keys):
            self._duplicate_slice_keys_counter.inc()

        # Make a a shallow copy, so we don't mutate the original.
        element_copy = copy.copy(element)

        element_copy[constants.SLICE_KEY_TYPES_KEY] = unique_slice_keys
        # Add a list of stringified slice keys to be materialized to output table.
        if self._materialize:
            element_copy[constants.SLICE_KEYS_KEY] = types.MaterializedColumn(
                name=constants.SLICE_KEYS_KEY,
                value=(list(
                    slicer.stringify_slice_key(x).encode('utf-8')
                    for x in unique_slice_keys)))
        return [element_copy]
 def process(
     self, element: types.Extracts,
     statistics: statistics_pb2.DatasetFeatureStatisticsList
 ) -> List[types.Extracts]:
   if self._bucket_boundaries is None:
     self._bucket_boundaries = get_quantile_boundaries(statistics)
   # Make a deep copy, so we don't mutate the original.
   element_copy = copy.deepcopy(element)
   features = util.get_features_from_extracts(element_copy)
   for feature_name, boundaries in self._bucket_boundaries.items():
     if (feature_name in features and features[feature_name] is not None and
         features[feature_name].size > 0):
       transformed_values = []
       for value in features[feature_name]:
         transformed_values.append(_bin_value(value, boundaries))
       features[TRANSFORMED_FEATURE_PREFIX +
                feature_name] = np.array(transformed_values)
   return [element_copy]