def process(self, element: types.Extracts) -> List[types.Extracts]: # Slice on transformed features if available. features_dicts = [] if (constants.TRANSFORMED_FEATURES_KEY in element and element[constants.TRANSFORMED_FEATURES_KEY] is not None): transformed_features = element[constants.TRANSFORMED_FEATURES_KEY] # If only one model, the output is stored without keying on model name. if not self._eval_config or len( self._eval_config.model_specs) == 1: features_dicts.append(transformed_features) else: # Search for slices in each model's transformed features output. for spec in self._eval_config.model_specs: if spec.name in transformed_features: features_dicts.append(transformed_features[spec.name]) # Search for slices first in transformed features (if any). If a match is # not found there then search in raw features. slices = list( slicer.get_slices_for_features_dicts( features_dicts, util.get_features_from_extracts(element), self._slice_spec)) # Make a a shallow copy, so we don't mutate the original. element_copy = copy.copy(element) element_copy[constants.SLICE_KEY_TYPES_KEY] = slices # Add a list of stringified slice keys to be materialized to output table. if self._materialize: element_copy[constants.SLICE_KEYS_KEY] = types.MaterializedColumn( name=constants.SLICE_KEYS_KEY, value=(list( slicer.stringify_slice_key(x).encode('utf-8') for x in slices))) return [element_copy]
def assertSliceResult(self, name, features_dict, columns, features, expected): spec = slicer.SingleSliceSpec(columns=columns, features=features) msg = 'Test case %s: slice on columns %s, features %s' % (name, columns, features) six.assertCountEqual( self, expected, slicer.get_slices_for_features_dicts([features_dict], None, [spec]), msg)
def testNonUTF8ValueRaisesValueError(self): column_name = 'column_name' invalid_value = b'\x8a' spec = slicer.SingleSliceSpec(columns=[column_name]) features_dict = self._makeFeaturesDict({ column_name: [invalid_value], }) with self.assertRaisesRegex(ValueError, column_name): list(slicer.get_slices_for_features_dicts([features_dict], None, [spec]))
def process( self, element: types.Extracts, slice_spec: List[slicer.SingleSliceSpec]) -> List[types.Extracts]: # Slice on transformed features if available. features_dicts = [] if (constants.TRANSFORMED_FEATURES_KEY in element and element[constants.TRANSFORMED_FEATURES_KEY] is not None): transformed_features = element[constants.TRANSFORMED_FEATURES_KEY] # If only one model, the output is stored without keying on model name. if not self._eval_config or len( self._eval_config.model_specs) == 1: features_dicts.append(transformed_features) else: # Search for slices in each model's transformed features output. for spec in self._eval_config.model_specs: if spec.name in transformed_features: features_dicts.append(transformed_features[spec.name]) # Search for slices first in transformed features (if any). If a match is # not found there then search in raw features. slice_keys = list( slicer.get_slices_for_features_dicts( features_dicts, util.get_features_from_extracts(element), slice_spec)) # If SLICE_KEY_TYPES_KEY already exists, that means the # SqlSliceKeyExtractor has generated some slice keys. We need to add # them to current slice_keys list. if (constants.SLICE_KEY_TYPES_KEY in element and element[constants.SLICE_KEY_TYPES_KEY]): slice_keys.extend(element[constants.SLICE_KEY_TYPES_KEY]) unique_slice_keys = list(set(slice_keys)) if len(slice_keys) != len(unique_slice_keys): self._duplicate_slice_keys_counter.inc() # Make a a shallow copy, so we don't mutate the original. element_copy = copy.copy(element) element_copy[constants.SLICE_KEY_TYPES_KEY] = ( slicer.slice_keys_to_numpy_array(unique_slice_keys)) # Add a list of stringified slice keys to be materialized to output table. if self._materialize: element_copy[constants.SLICE_KEYS_KEY] = types.MaterializedColumn( name=constants.SLICE_KEYS_KEY, value=(list( slicer.stringify_slice_key(x).encode('utf-8') for x in unique_slice_keys))) return [element_copy]
def process(self, element: types.Extracts, slice_spec: List[slicer.SingleSliceSpec]) -> List[types.Extracts]: features = util.get_features_from_extracts(element) # There are no transformed features so only search raw features for slices. slices = list( slicer.get_slices_for_features_dicts([], features, slice_spec)) # Make a a shallow copy, so we don't mutate the original. element_copy = copy.copy(element) element_copy[constants.SLICE_KEY_TYPES_KEY] = slices # Add a list of stringified slice keys to be materialized to output table. if self._materialize: element_copy[constants.SLICE_KEYS_KEY] = types.MaterializedColumn( name=constants.SLICE_KEYS_KEY, value=(list( slicer.stringify_slice_key(x).encode('utf-8') for x in slices))) return [element_copy]
def testGetSlicesForFeaturesDictMultipleSingleSliceSpecs(self): features_dict = self._makeFeaturesDict({ 'gender': ['f'], 'age': [5], 'interest': ['cars'] }) spec_overall = slicer.SingleSliceSpec() spec_age = slicer.SingleSliceSpec(columns=['age']) spec_age4 = slicer.SingleSliceSpec(features=[('age', 4)]) spec_age5_gender = slicer.SingleSliceSpec( columns=['gender'], features=[('age', 5)]) slice_spec = [spec_overall, spec_age, spec_age4, spec_age5_gender] expected = [(), (('age', 5),), (('age', 5), ('gender', 'f'))] self.assertCountEqual( expected, slicer.get_slices_for_features_dicts([features_dict], None, slice_spec))