def create_fpls(): fpl1 = api_types.FeaturesPredictionsLabels( example_ref=0, features=make_features_dict({ 'gender': ['f'], 'age': [13], 'interest': ['cars'] }), predictions=make_features_dict({ 'kb': [1], }), labels=make_features_dict({ 'ad_risk_score': [0] })) fpl2 = api_types.FeaturesPredictionsLabels( example_ref=0, features=make_features_dict({ 'gender': ['m'], 'age': [10], 'interest': ['cars'] }), predictions=make_features_dict({ 'kb': [1], }), labels=make_features_dict({ 'ad_risk_score': [0] })) return [fpl1, fpl2]
def create_fpls(): """Create test FPL dicts that can be used for verification.""" fpl1 = api_types.FeaturesPredictionsLabels( example_ref=0, features=make_features_dict({ 'gender': ['f'], 'age': [13], 'interest': ['cars'] }), predictions=make_features_dict({ 'kb': [1], }), labels=make_features_dict({ 'ad_risk_score': [0] })) fpl2 = api_types.FeaturesPredictionsLabels( example_ref=1, features=make_features_dict({ 'gender': ['m'], 'age': [10], 'interest': ['cars', 'movies'] }), predictions=make_features_dict({ 'kb': [1], }), labels=make_features_dict({ 'ad_risk_score': [0] })) return [fpl1, fpl2]
def testGetSparseTensorValue(self): sparse_tensor_value = tf.SparseTensorValue( indices=[[0, 0, 0], [0, 1, 0], [0, 1, 1]], values=['', 'one', 'two'], dense_shape=[1, 2, 2]) fpl_with_sparse_tensor = api_types.FeaturesPredictionsLabels( example_ref=0, features={}, predictions={}, labels={}) meta_feature_extractor._set_feature_value(fpl_with_sparse_tensor.features, 'sparse', sparse_tensor_value) self.assertEqual(['', 'one', 'two'], meta_feature_extractor.get_feature_value( fpl_with_sparse_tensor, 'sparse'))
def get_fpl_copy(example_and_extracts ): """Get a copy of the FPL in the extracts of example_and_extracts.""" fpl_orig = example_and_extracts.extracts.get( constants.FEATURES_PREDICTIONS_LABELS_KEY) if not fpl_orig: raise RuntimeError('FPL missing, Please ensure _Predict() was called.') # We must make a copy of the FPL tuple as well, so that we don't mutate the # original which is disallowed by Beam. fpl_copy = api_types.FeaturesPredictionsLabels( features=copy.copy(fpl_orig.features), labels=fpl_orig.labels, predictions=fpl_orig.predictions, example_ref=fpl_orig.example_ref) return fpl_copy
def testMaterializeFeaturesNoMaterializedColumns(self): example1 = self._makeExample( age=3.0, language='english', label=1.0, slice_key='first_slice') features = { 'f': { encoding.NODE_SUFFIX: np.array([1]) }, 's': { encoding.NODE_SUFFIX: tf.SparseTensorValue( indices=[[0, 5], [1, 2], [3, 6]], values=[100., 200., 300.], dense_shape=[4, 10]) } } predictions = {'p': {encoding.NODE_SUFFIX: np.array([2])}} labels = {'l': {encoding.NODE_SUFFIX: np.array([3])}} example_and_extracts = types.ExampleAndExtracts( example=example1.SerializeToString(), extracts={ 'fpl': api_types.FeaturesPredictionsLabels( example_ref=0, features=features, predictions=predictions, labels=labels) }) fpl = example_and_extracts.extracts[constants .FEATURES_PREDICTIONS_LABELS_KEY] result = feature_extractor._MaterializeFeatures(example_and_extracts) self.assertTrue(isinstance(result, types.ExampleAndExtracts)) self.assertEqual(result.extracts['fpl'], fpl) # should still be there. self.assertEqual(result.extracts['f'], types.MaterializedColumn(name='f', value=[1])) self.assertEqual(result.extracts['p'], types.MaterializedColumn(name='p', value=[2])) self.assertEqual(result.extracts['l'], types.MaterializedColumn(name='l', value=[3])) self.assertEqual( result.extracts['s'], types.MaterializedColumn(name='s', value=[100., 200., 300.]))
def predict_list(self, input_example_bytes_list): """Like predict, but takes a list of examples. Args: input_example_bytes_list: a list of input example bytes. Returns: A list of FeaturesPredictionsLabels (while in most cases one input_example_bytes will result in one FPL, in some cases, e.g. where examples are dynamically decoded and generated within the graph, one input_example_bytes might result in multiple examples). Raises: ValueError: if the example_ref is not a 1-D tensor integer tensor or it is not batch aligned with features, predictions and labels or it is out of range (< 0 or >= len(input_example_bytes_list)). """ (features, predictions, labels, example_refs) = self._predict_list_fn(input_example_bytes_list) split_labels = {} for label_key in self._labels_map: split_labels[label_key] = util.split_tensor_value( labels[label_key][encoding.NODE_SUFFIX]) split_features = {} for feature_key in self._features_map: split_features[feature_key] = util.split_tensor_value( features[feature_key][encoding.NODE_SUFFIX]) split_predictions = {} for prediction_key in self._predictions_map: split_predictions[prediction_key] = util.split_tensor_value( predictions[prediction_key][encoding.NODE_SUFFIX]) result = [] if (not isinstance(example_refs, np.ndarray) or example_refs.ndim != 1 or not np.issubdtype(example_refs.dtype, np.integer)): raise ValueError( 'example_ref should be an 1-D array of integers. example_ref was {}.' .format(example_refs)) for result_key, split_values in itertools.chain( split_labels.items(), split_features.items(), split_predictions.items()): if len(split_values) != example_refs.shape[0]: raise ValueError( 'example_ref should be batch-aligned with features, predictions' ' and labels; key {} had {} slices but ExampleRef had batch size' ' of {}'.format(result_key, len(split_values), example_refs.shape[0])) for i, example_ref in enumerate(example_refs): if example_ref < 0 or example_ref >= len(input_example_bytes_list): raise ValueError( 'An index in example_ref is out of range: {} vs {}; ' 'input_example_bytes: {}'.format( example_ref, len(input_example_bytes_list), input_example_bytes_list)) labels = {} for label_key in self._labels_map: labels[label_key] = { encoding.NODE_SUFFIX: split_labels[label_key][i] } features = {} for feature_key in self._features_map: features[feature_key] = { encoding.NODE_SUFFIX: split_features[feature_key][i] } predictions = {} for prediction_key in self._predictions_map: predictions[prediction_key] = { encoding.NODE_SUFFIX: split_predictions[prediction_key][i] } result.append( api_types.FeaturesPredictionsLabels(example_ref=example_ref, features=features, predictions=predictions, labels=labels)) return result