def create_fpls(): """Create test FPL dicts that can be used for verification.""" fpl1 = types.FeaturesPredictionsLabels(input_ref=0, features=make_features_dict({ 'gender': ['f'], 'age': [13], 'interest': ['cars'] }), predictions=make_features_dict({ 'kb': [1], }), labels=make_features_dict( {'ad_risk_score': [0]})) fpl2 = types.FeaturesPredictionsLabels(input_ref=1, features=make_features_dict({ 'gender': ['m'], 'age': [10], 'interest': ['cars', 'movies'] }), predictions=make_features_dict({ 'kb': [1], }), labels=make_features_dict( {'ad_risk_score': [0]})) return [fpl1, fpl2]
def testMaterializeFeaturesWithExcludes(self): example1 = self._makeExample(age=3.0, language='english', label=1.0, slice_key='first_slice') features = { 'f': { encoding.NODE_SUFFIX: np.array([1]) }, 's': { encoding.NODE_SUFFIX: tf.SparseTensorValue(indices=[[0, 5], [1, 2], [3, 6]], values=[100., 200., 300.], dense_shape=[4, 10]) } } predictions = {'p': {encoding.NODE_SUFFIX: np.array([2])}} labels = {'l': {encoding.NODE_SUFFIX: np.array([3])}} extracts = { constants.INPUT_KEY: example1.SerializeToString(), constants.FEATURES_PREDICTIONS_LABELS_KEY: types.FeaturesPredictionsLabels(input_ref=0, features=features, predictions=predictions, labels=labels) } result = feature_extractor._MaterializeFeatures(extracts, excludes=['s']) self.assertFalse('features__s' in result)
def get_fpls_from_examples( self, input_example_bytes_list: List[bytes]) -> List[Any]: """Generates FPLs from serialized examples using a ModelAgnostic graph. Args: input_example_bytes_list: A string representing the serialized tf.example protos to be parsed by the graph. Returns: A list of FeaturesPredictionsLabels generated from the input examples. """ # Call the graph via the created session callable _get_features_fn and # get the tensor representation of the features. features = self._get_features_fn(input_example_bytes_list) split_features = {} num_examples = 0 # Split the features by the example keys. Also verify all each example # key has the same number of total examples. for key in features.keys(): split_features[key] = util.split_tensor_value(features[key]) if num_examples == 0: num_examples = len(split_features[key]) elif num_examples != len(split_features[key]): raise ValueError( 'Different keys unexpectedly had different number of ' 'examples. Key %s unexpectedly had %s elements.' % key, len(split_features[key])) # Sort out the examples into individual FPLs: one example -> one FPL. # Sort them into Features, Predictions, or Labels according to the input # config. result = [] for i in range(num_examples): labels = {} predictions = {} features = {} for key in split_features: if key in self._config.label_keys: labels[key] = { encoding.NODE_SUFFIX: split_features[key][i] } if key in self._config.prediction_keys: predictions[key] = { encoding.NODE_SUFFIX: split_features[key][i] } features[key] = {encoding.NODE_SUFFIX: split_features[key][i]} result.append( types.FeaturesPredictionsLabels(input_ref=i, features=features, predictions=predictions, labels=labels)) return result
def testAugmentFPLFromTfExample(self): example1 = self._makeExample(age=3.0, language='english', label=1.0, slice_key='first_slice', f=0.0) features = { 'f': { encoding.NODE_SUFFIX: np.array([1]) }, 's': { encoding.NODE_SUFFIX: tf.compat.v1.SparseTensorValue(indices=[[0, 5], [1, 2], [3, 6]], values=[100., 200., 300.], dense_shape=[4, 10]) } } predictions = {'p': {encoding.NODE_SUFFIX: np.array([2])}} labels = {'l': {encoding.NODE_SUFFIX: np.array([3])}} extracts = { constants.INPUT_KEY: example1.SerializeToString(), constants.FEATURES_PREDICTIONS_LABELS_KEY: types.FeaturesPredictionsLabels(input_ref=0, features=features, predictions=predictions, labels=labels) } fpl = extracts[constants.FEATURES_PREDICTIONS_LABELS_KEY] result = feature_extractor._MaterializeFeatures( extracts, source=constants.INPUT_KEY, dest=constants.FEATURES_PREDICTIONS_LABELS_KEY) self.assertIsInstance(result, dict) self.assertEqual(result[constants.FEATURES_PREDICTIONS_LABELS_KEY], fpl) # should still be there. # Assert that materialized columns are not added. self.assertNotIn('features__f', result) self.assertNotIn('features__age', result) # But that tf.Example features not present in FPL are. self.assertEqual(fpl.features['age'], {encoding.NODE_SUFFIX: np.array([3.0])}) self.assertEqual(fpl.features['language'], {'node': np.array([['english']], dtype='|S7')}) self.assertEqual(fpl.features['slice_key'], {'node': np.array([['first_slice']], dtype='|S11')}) # And that features present in both are not overwritten by tf.Example value. self.assertEqual(fpl.features['f'], {encoding.NODE_SUFFIX: np.array([1])})
def testGetSparseTensorValue(self): sparse_tensor_value = tf.compat.v1.SparseTensorValue( indices=[[0, 0, 0], [0, 1, 0], [0, 1, 1]], values=['', 'one', 'two'], dense_shape=[1, 2, 2]) fpl_with_sparse_tensor = types.FeaturesPredictionsLabels( input_ref=0, features={}, predictions={}, labels={}) meta_feature_extractor._set_feature_value( fpl_with_sparse_tensor.features, 'sparse', sparse_tensor_value) self.assertEqual(['', 'one', 'two'], meta_feature_extractor.get_feature_value( fpl_with_sparse_tensor, 'sparse'))
def get_fpl_copy(extracts: types.Extracts) -> types.FeaturesPredictionsLabels: """Get a copy of the FPL in the extracts of extracts.""" fpl_orig = extracts.get(constants.FEATURES_PREDICTIONS_LABELS_KEY) if not fpl_orig: raise RuntimeError('FPL missing, Please ensure _Predict() was called.') # We must make a copy of the FPL tuple as well, so that we don't mutate the # original which is disallowed by Beam. fpl_copy = types.FeaturesPredictionsLabels( features=copy.copy(fpl_orig.features), labels=fpl_orig.labels, predictions=fpl_orig.predictions, input_ref=fpl_orig.input_ref) return fpl_copy
def create_fpls(): fpl1 = types.FeaturesPredictionsLabels( input_ref=0, features=make_features_dict({ 'gender': ['f'], 'age': [13], 'interest': ['cars'] }), predictions=make_features_dict({ 'kb': [1], }), labels=make_features_dict({'ad_risk_score': [0]})) fpl2 = types.FeaturesPredictionsLabels( input_ref=0, features=make_features_dict({ 'gender': ['m'], 'age': [10], 'interest': ['cars'] }), predictions=make_features_dict({ 'kb': [1], }), labels=make_features_dict({'ad_risk_score': [0]})) return [fpl1, fpl2]
def testMaterializeFeaturesNoMaterializedColumns(self): example1 = self._makeExample(age=3.0, language='english', label=1.0, slice_key='first_slice') features = { 'f': { encoding.NODE_SUFFIX: np.array([1]) }, 's': { encoding.NODE_SUFFIX: tf.compat.v1.SparseTensorValue(indices=[[0, 5], [1, 2], [3, 6]], values=[100., 200., 300.], dense_shape=[4, 10]) } } predictions = {'p': {encoding.NODE_SUFFIX: np.array([2])}} labels = {'l': {encoding.NODE_SUFFIX: np.array([3])}} extracts = { constants.INPUT_KEY: example1.SerializeToString(), constants.FEATURES_PREDICTIONS_LABELS_KEY: types.FeaturesPredictionsLabels(input_ref=0, features=features, predictions=predictions, labels=labels) } fpl = extracts[constants.FEATURES_PREDICTIONS_LABELS_KEY] result = feature_extractor._MaterializeFeatures(extracts) self.assertIsInstance(result, dict) self.assertEqual(result[constants.FEATURES_PREDICTIONS_LABELS_KEY], fpl) # should still be there. self.assertEqual( result['features__f'], types.MaterializedColumn(name='features__f', value=[1])) self.assertEqual( result['predictions__p'], types.MaterializedColumn(name='predictions__p', value=[2])) self.assertEqual(result['labels__l'], types.MaterializedColumn(name='labels__l', value=[3])) self.assertEqual( result['features__s'], types.MaterializedColumn(name='features__s', value=[100., 200., 300.]))
def testGetFeaturesFromExtracts(self): self.assertEqual( {'a': np.array([1])}, util.get_features_from_extracts({ constants.FEATURES_PREDICTIONS_LABELS_KEY: types.FeaturesPredictionsLabels(input_ref=0, features={'a': np.array([1])}, predictions={}, labels={}) }), ) self.assertEqual( {'a': np.array([1])}, util.get_features_from_extracts( {constants.FEATURES_KEY: { 'a': np.array([1]) }}), ) self.assertEqual({}, util.get_features_from_extracts({}))
def as_features_predictions_labels(self, fetched_values): """Gets features, predictions, labels as FeaturesPredictionsLabelsType.""" def fpl_dict(fetched, group): native = fetched.values[group] wrapped = {} if not isinstance(native, dict): native = {util.default_dict_key(group): native} for key in native: wrapped[key] = {encoding.NODE_SUFFIX: native[key]} return wrapped fpls = [] for fetched in fetched_values: fpls.append( types.FeaturesPredictionsLabels( input_ref=fetched.input_ref, features=fpl_dict(fetched, constants.FEATURES_NAME), predictions=fpl_dict(fetched, constants.PREDICTIONS_NAME), labels=fpl_dict(fetched, constants.LABELS_NAME))) return fpls
def testEvaluateGraph(self): # Create some FPLs. The Features aren't terribly useful for these metrics. # Just make sure they can be processed correctly by the feed/feedlist # generation logic by having one dense tensor and one sparse tensor. features = { 'age': { encoding.NODE_SUFFIX: np.array([1]) }, 'language': { encoding.NODE_SUFFIX: tf.SparseTensorValue(indices=np.array([[0, 0]]), values=np.array(['english']), dense_shape=np.array([1, 1])) } } predictions = {'predictions': {encoding.NODE_SUFFIX: np.array([2])}} # Have 3 labels of values 3, 23, 16 and predictions of values 2, 2, 2. # This should give sum = 48 and mean = 8. labels = {'labels': {encoding.NODE_SUFFIX: np.array([3])}} labels_2 = {'labels': {encoding.NODE_SUFFIX: np.array([23])}} labels_3 = {'labels': {encoding.NODE_SUFFIX: np.array([16])}} # Compile the actual FPLs fpl = types.FeaturesPredictionsLabels(input_ref=0, features=features, predictions=predictions, labels=labels) fpl_2 = types.FeaturesPredictionsLabels(input_ref=0, features=features, predictions=predictions, labels=labels_2) fpl_3 = types.FeaturesPredictionsLabels(input_ref=0, features=features, predictions=predictions, labels=labels_3) # Set up a model agnostic config so we can get the FPLConfig. feature_map = { 'age': tf.FixedLenFeature([], tf.float32), 'language': tf.VarLenFeature(tf.string), 'predictions': tf.FixedLenFeature([], tf.float32), 'labels': tf.FixedLenFeature([], tf.float32) } model_agnostic_config = agnostic_predict.ModelAgnosticConfig( label_keys=['labels'], prediction_keys=['predictions'], feature_spec=feature_map) # Create a Model Anostic Evaluate graph handler and feed in the FPL list. evaluate_graph = model_agnostic_evaluate_graph.ModelAgnosticEvaluateGraph( [add_mean_callback], model_agnostic_extractor.ModelAgnosticGetFPLFeedConfig( model_agnostic_config)) evaluate_graph.metrics_reset_update_get_list([fpl, fpl_2, fpl_3]) outputs = evaluate_graph.get_metric_values() # Verify that we got the right metrics out. self.assertEqual(2, len(outputs)) self.assertEqual(outputs['tf_metric_mean'], 8.0) self.assertEqual(outputs['py_func_total_label'], 48.0)
def testEvaluateMultiLabelsPredictions(self): # Test case where we have multiple labels/predictions features = {'age': {encoding.NODE_SUFFIX: np.array([1])}} predictions = { 'prediction': { encoding.NODE_SUFFIX: np.array([2]) }, 'prediction_2': { encoding.NODE_SUFFIX: np.array([4]) } } # Have 6 labels of values 3, 5, 23, 12, 16, 31 and # 6 predictions of values 2, 2, 2, 4, 4, 4 # This should give sum = 108 and mean = 9. labels = { 'label': { encoding.NODE_SUFFIX: np.array([3]) }, 'label_2': { encoding.NODE_SUFFIX: np.array([5]) } } labels_2 = { 'label': { encoding.NODE_SUFFIX: np.array([23]) }, 'label_2': { encoding.NODE_SUFFIX: np.array([12]) } } labels_3 = { 'label': { encoding.NODE_SUFFIX: np.array([16]) }, 'label_2': { encoding.NODE_SUFFIX: np.array([31]) } } # Compile the actual FPLs fpl = types.FeaturesPredictionsLabels(input_ref=0, features=features, predictions=predictions, labels=labels) fpl_2 = types.FeaturesPredictionsLabels(input_ref=0, features=features, predictions=predictions, labels=labels_2) fpl_3 = types.FeaturesPredictionsLabels(input_ref=0, features=features, predictions=predictions, labels=labels_3) # Set up a model agnostic config so we can get the FPLConfig. feature_map = { 'age': tf.FixedLenFeature([], tf.float32), 'prediction': tf.FixedLenFeature([], tf.int64), 'prediction_2': tf.FixedLenFeature([], tf.int64), 'label': tf.FixedLenFeature([], tf.int64), 'label_2': tf.FixedLenFeature([], tf.int64) } model_agnostic_config = agnostic_predict.ModelAgnosticConfig( label_keys=['label', 'label_2'], prediction_keys=['prediction', 'prediction_2'], feature_spec=feature_map) # Create a Model Anostic Evaluate graph handler and feed in the FPL list. evaluate_graph = model_agnostic_evaluate_graph.ModelAgnosticEvaluateGraph( [add_mean_callback], model_agnostic_extractor.ModelAgnosticGetFPLFeedConfig( model_agnostic_config)) evaluate_graph.metrics_reset_update_get_list([fpl, fpl_2, fpl_3]) outputs = evaluate_graph.get_metric_values() # Verify that we got the right metrics out. self.assertEqual(2, len(outputs)) self.assertEqual(outputs['tf_metric_mean'], 9.0) self.assertEqual(outputs['py_func_total_label'], 108.0)
def predict_list(self, inputs): """Like predict, but takes a list of inputs. Args: inputs: A list of input data (or a dict of keys to lists of input data). See predict for more details. Returns: A list of FeaturesPredictionsLabels. See predict for more details. Raises: ValueError: If the original input_refs tensor passed to the EvalInputReceiver does not align with the features, predictions and labels returned after feeding the inputs. """ if isinstance(inputs, dict): input_args = [] # Only add values for keys that are in the input map (in order). for key in self._input_map: if key in inputs: input_args.append(inputs[key]) else: input_args = [inputs] (features, predictions, labels, input_refs) = self._predict_list_fn(*input_args) split_labels = {} for label_key in self._labels_map: split_labels[label_key] = util.split_tensor_value( labels[label_key][encoding.NODE_SUFFIX]) split_features = {} for feature_key in self._features_map: split_features[feature_key] = util.split_tensor_value( features[feature_key][encoding.NODE_SUFFIX]) split_predictions = {} for prediction_key in self._predictions_map: split_predictions[prediction_key] = util.split_tensor_value( predictions[prediction_key][encoding.NODE_SUFFIX]) result = [] if (not isinstance(input_refs, np.ndarray) or input_refs.ndim != 1 or not np.issubdtype(input_refs.dtype, np.integer)): raise ValueError( 'input_refs should be an 1-D array of integers. input_refs was {}.' .format(input_refs)) for result_key, split_values in itertools.chain( split_labels.items(), split_features.items(), split_predictions.items()): if len(split_values) != input_refs.shape[0]: raise ValueError( 'input_refs should be batch-aligned with features, predictions' ' and labels; key {} had {} slices but input_refs had batch size' ' of {}'.format(result_key, len(split_values), input_refs.shape[0])) for i, input_ref in enumerate(input_refs): if input_ref < 0 or input_ref >= len(inputs): raise ValueError( 'An index in input_refs is out of range: {} vs {}; ' 'inputs: {}'.format(input_ref, len(inputs), inputs)) labels = {} for label_key in self._labels_map: labels[label_key] = { encoding.NODE_SUFFIX: split_labels[label_key][i] } features = {} for feature_key in self._features_map: features[feature_key] = { encoding.NODE_SUFFIX: split_features[feature_key][i] } predictions = {} for prediction_key in self._predictions_map: predictions[prediction_key] = { encoding.NODE_SUFFIX: split_predictions[prediction_key][i] } result.append( types.FeaturesPredictionsLabels(input_ref=input_ref, features=features, predictions=predictions, labels=labels)) return result