コード例 #1
0
def create_fpls():
    """Create test FPL dicts that can be used for verification."""
    fpl1 = types.FeaturesPredictionsLabels(input_ref=0,
                                           features=make_features_dict({
                                               'gender': ['f'],
                                               'age': [13],
                                               'interest': ['cars']
                                           }),
                                           predictions=make_features_dict({
                                               'kb': [1],
                                           }),
                                           labels=make_features_dict(
                                               {'ad_risk_score': [0]}))
    fpl2 = types.FeaturesPredictionsLabels(input_ref=1,
                                           features=make_features_dict({
                                               'gender': ['m'],
                                               'age': [10],
                                               'interest': ['cars', 'movies']
                                           }),
                                           predictions=make_features_dict({
                                               'kb': [1],
                                           }),
                                           labels=make_features_dict(
                                               {'ad_risk_score': [0]}))
    return [fpl1, fpl2]
コード例 #2
0
    def testMaterializeFeaturesWithExcludes(self):
        example1 = self._makeExample(age=3.0,
                                     language='english',
                                     label=1.0,
                                     slice_key='first_slice')

        features = {
            'f': {
                encoding.NODE_SUFFIX: np.array([1])
            },
            's': {
                encoding.NODE_SUFFIX:
                tf.SparseTensorValue(indices=[[0, 5], [1, 2], [3, 6]],
                                     values=[100., 200., 300.],
                                     dense_shape=[4, 10])
            }
        }
        predictions = {'p': {encoding.NODE_SUFFIX: np.array([2])}}
        labels = {'l': {encoding.NODE_SUFFIX: np.array([3])}}

        extracts = {
            constants.INPUT_KEY:
            example1.SerializeToString(),
            constants.FEATURES_PREDICTIONS_LABELS_KEY:
            types.FeaturesPredictionsLabels(input_ref=0,
                                            features=features,
                                            predictions=predictions,
                                            labels=labels)
        }
        result = feature_extractor._MaterializeFeatures(extracts,
                                                        excludes=['s'])
        self.assertFalse('features__s' in result)
コード例 #3
0
    def get_fpls_from_examples(
            self, input_example_bytes_list: List[bytes]) -> List[Any]:
        """Generates FPLs from serialized examples using a ModelAgnostic graph.

    Args:
      input_example_bytes_list: A string representing the serialized tf.example
        protos to be parsed by the graph.

    Returns:
      A list of FeaturesPredictionsLabels generated from the input examples.
    """
        # Call the graph via the created session callable _get_features_fn and
        # get the tensor representation of the features.
        features = self._get_features_fn(input_example_bytes_list)
        split_features = {}
        num_examples = 0

        # Split the features by the example keys. Also verify all each example
        # key has the same number of total examples.
        for key in features.keys():
            split_features[key] = util.split_tensor_value(features[key])
            if num_examples == 0:
                num_examples = len(split_features[key])
            elif num_examples != len(split_features[key]):
                raise ValueError(
                    'Different keys unexpectedly had different number of '
                    'examples. Key %s unexpectedly had %s elements.' % key,
                    len(split_features[key]))

        # Sort out the examples into individual FPLs: one example -> one FPL.
        # Sort them into Features, Predictions, or Labels according to the input
        # config.
        result = []
        for i in range(num_examples):
            labels = {}
            predictions = {}
            features = {}
            for key in split_features:
                if key in self._config.label_keys:
                    labels[key] = {
                        encoding.NODE_SUFFIX: split_features[key][i]
                    }
                if key in self._config.prediction_keys:
                    predictions[key] = {
                        encoding.NODE_SUFFIX: split_features[key][i]
                    }
                features[key] = {encoding.NODE_SUFFIX: split_features[key][i]}

            result.append(
                types.FeaturesPredictionsLabels(input_ref=i,
                                                features=features,
                                                predictions=predictions,
                                                labels=labels))

        return result
コード例 #4
0
    def testAugmentFPLFromTfExample(self):
        example1 = self._makeExample(age=3.0,
                                     language='english',
                                     label=1.0,
                                     slice_key='first_slice',
                                     f=0.0)

        features = {
            'f': {
                encoding.NODE_SUFFIX: np.array([1])
            },
            's': {
                encoding.NODE_SUFFIX:
                tf.compat.v1.SparseTensorValue(indices=[[0, 5], [1, 2], [3,
                                                                         6]],
                                               values=[100., 200., 300.],
                                               dense_shape=[4, 10])
            }
        }
        predictions = {'p': {encoding.NODE_SUFFIX: np.array([2])}}
        labels = {'l': {encoding.NODE_SUFFIX: np.array([3])}}

        extracts = {
            constants.INPUT_KEY:
            example1.SerializeToString(),
            constants.FEATURES_PREDICTIONS_LABELS_KEY:
            types.FeaturesPredictionsLabels(input_ref=0,
                                            features=features,
                                            predictions=predictions,
                                            labels=labels)
        }
        fpl = extracts[constants.FEATURES_PREDICTIONS_LABELS_KEY]
        result = feature_extractor._MaterializeFeatures(
            extracts,
            source=constants.INPUT_KEY,
            dest=constants.FEATURES_PREDICTIONS_LABELS_KEY)
        self.assertIsInstance(result, dict)
        self.assertEqual(result[constants.FEATURES_PREDICTIONS_LABELS_KEY],
                         fpl)  # should still be there.
        # Assert that materialized columns are not added.
        self.assertNotIn('features__f', result)
        self.assertNotIn('features__age', result)
        # But that tf.Example features not present in FPL are.
        self.assertEqual(fpl.features['age'],
                         {encoding.NODE_SUFFIX: np.array([3.0])})
        self.assertEqual(fpl.features['language'],
                         {'node': np.array([['english']], dtype='|S7')})
        self.assertEqual(fpl.features['slice_key'],
                         {'node': np.array([['first_slice']], dtype='|S11')})
        # And that features present in both are not overwritten by tf.Example value.
        self.assertEqual(fpl.features['f'],
                         {encoding.NODE_SUFFIX: np.array([1])})
コード例 #5
0
    def testGetSparseTensorValue(self):
        sparse_tensor_value = tf.compat.v1.SparseTensorValue(
            indices=[[0, 0, 0], [0, 1, 0], [0, 1, 1]],
            values=['', 'one', 'two'],
            dense_shape=[1, 2, 2])
        fpl_with_sparse_tensor = types.FeaturesPredictionsLabels(
            input_ref=0, features={}, predictions={}, labels={})

        meta_feature_extractor._set_feature_value(
            fpl_with_sparse_tensor.features, 'sparse', sparse_tensor_value)
        self.assertEqual(['', 'one', 'two'],
                         meta_feature_extractor.get_feature_value(
                             fpl_with_sparse_tensor, 'sparse'))
コード例 #6
0
def get_fpl_copy(extracts: types.Extracts) -> types.FeaturesPredictionsLabels:
    """Get a copy of the FPL in the extracts of extracts."""
    fpl_orig = extracts.get(constants.FEATURES_PREDICTIONS_LABELS_KEY)
    if not fpl_orig:
        raise RuntimeError('FPL missing, Please ensure _Predict() was called.')

    # We must make a copy of the FPL tuple as well, so that we don't mutate the
    # original which is disallowed by Beam.
    fpl_copy = types.FeaturesPredictionsLabels(
        features=copy.copy(fpl_orig.features),
        labels=fpl_orig.labels,
        predictions=fpl_orig.predictions,
        input_ref=fpl_orig.input_ref)
    return fpl_copy
コード例 #7
0
def create_fpls():
  fpl1 = types.FeaturesPredictionsLabels(
      input_ref=0,
      features=make_features_dict({
          'gender': ['f'],
          'age': [13],
          'interest': ['cars']
      }),
      predictions=make_features_dict({
          'kb': [1],
      }),
      labels=make_features_dict({'ad_risk_score': [0]}))
  fpl2 = types.FeaturesPredictionsLabels(
      input_ref=0,
      features=make_features_dict({
          'gender': ['m'],
          'age': [10],
          'interest': ['cars']
      }),
      predictions=make_features_dict({
          'kb': [1],
      }),
      labels=make_features_dict({'ad_risk_score': [0]}))
  return [fpl1, fpl2]
コード例 #8
0
    def testMaterializeFeaturesNoMaterializedColumns(self):
        example1 = self._makeExample(age=3.0,
                                     language='english',
                                     label=1.0,
                                     slice_key='first_slice')

        features = {
            'f': {
                encoding.NODE_SUFFIX: np.array([1])
            },
            's': {
                encoding.NODE_SUFFIX:
                tf.compat.v1.SparseTensorValue(indices=[[0, 5], [1, 2], [3,
                                                                         6]],
                                               values=[100., 200., 300.],
                                               dense_shape=[4, 10])
            }
        }
        predictions = {'p': {encoding.NODE_SUFFIX: np.array([2])}}
        labels = {'l': {encoding.NODE_SUFFIX: np.array([3])}}

        extracts = {
            constants.INPUT_KEY:
            example1.SerializeToString(),
            constants.FEATURES_PREDICTIONS_LABELS_KEY:
            types.FeaturesPredictionsLabels(input_ref=0,
                                            features=features,
                                            predictions=predictions,
                                            labels=labels)
        }
        fpl = extracts[constants.FEATURES_PREDICTIONS_LABELS_KEY]
        result = feature_extractor._MaterializeFeatures(extracts)
        self.assertIsInstance(result, dict)
        self.assertEqual(result[constants.FEATURES_PREDICTIONS_LABELS_KEY],
                         fpl)  # should still be there.
        self.assertEqual(
            result['features__f'],
            types.MaterializedColumn(name='features__f', value=[1]))
        self.assertEqual(
            result['predictions__p'],
            types.MaterializedColumn(name='predictions__p', value=[2]))
        self.assertEqual(result['labels__l'],
                         types.MaterializedColumn(name='labels__l', value=[3]))
        self.assertEqual(
            result['features__s'],
            types.MaterializedColumn(name='features__s',
                                     value=[100., 200., 300.]))
コード例 #9
0
 def testGetFeaturesFromExtracts(self):
     self.assertEqual(
         {'a': np.array([1])},
         util.get_features_from_extracts({
             constants.FEATURES_PREDICTIONS_LABELS_KEY:
             types.FeaturesPredictionsLabels(input_ref=0,
                                             features={'a': np.array([1])},
                                             predictions={},
                                             labels={})
         }),
     )
     self.assertEqual(
         {'a': np.array([1])},
         util.get_features_from_extracts(
             {constants.FEATURES_KEY: {
                 'a': np.array([1])
             }}),
     )
     self.assertEqual({}, util.get_features_from_extracts({}))
コード例 #10
0
    def as_features_predictions_labels(self, fetched_values):
        """Gets features, predictions, labels as FeaturesPredictionsLabelsType."""
        def fpl_dict(fetched, group):
            native = fetched.values[group]
            wrapped = {}
            if not isinstance(native, dict):
                native = {util.default_dict_key(group): native}
            for key in native:
                wrapped[key] = {encoding.NODE_SUFFIX: native[key]}
            return wrapped

        fpls = []
        for fetched in fetched_values:
            fpls.append(
                types.FeaturesPredictionsLabels(
                    input_ref=fetched.input_ref,
                    features=fpl_dict(fetched, constants.FEATURES_NAME),
                    predictions=fpl_dict(fetched, constants.PREDICTIONS_NAME),
                    labels=fpl_dict(fetched, constants.LABELS_NAME)))
        return fpls
コード例 #11
0
    def testEvaluateGraph(self):
        # Create some FPLs. The Features aren't terribly useful for these metrics.
        # Just make sure they can be processed correctly by the feed/feedlist
        # generation logic by having one dense tensor and one sparse tensor.
        features = {
            'age': {
                encoding.NODE_SUFFIX: np.array([1])
            },
            'language': {
                encoding.NODE_SUFFIX:
                tf.SparseTensorValue(indices=np.array([[0, 0]]),
                                     values=np.array(['english']),
                                     dense_shape=np.array([1, 1]))
            }
        }
        predictions = {'predictions': {encoding.NODE_SUFFIX: np.array([2])}}
        # Have 3 labels of values 3, 23, 16 and predictions of values 2, 2, 2.
        # This should give sum = 48 and mean = 8.
        labels = {'labels': {encoding.NODE_SUFFIX: np.array([3])}}
        labels_2 = {'labels': {encoding.NODE_SUFFIX: np.array([23])}}
        labels_3 = {'labels': {encoding.NODE_SUFFIX: np.array([16])}}

        # Compile the actual FPLs
        fpl = types.FeaturesPredictionsLabels(input_ref=0,
                                              features=features,
                                              predictions=predictions,
                                              labels=labels)
        fpl_2 = types.FeaturesPredictionsLabels(input_ref=0,
                                                features=features,
                                                predictions=predictions,
                                                labels=labels_2)
        fpl_3 = types.FeaturesPredictionsLabels(input_ref=0,
                                                features=features,
                                                predictions=predictions,
                                                labels=labels_3)

        # Set up a model agnostic config so we can get the FPLConfig.
        feature_map = {
            'age': tf.FixedLenFeature([], tf.float32),
            'language': tf.VarLenFeature(tf.string),
            'predictions': tf.FixedLenFeature([], tf.float32),
            'labels': tf.FixedLenFeature([], tf.float32)
        }

        model_agnostic_config = agnostic_predict.ModelAgnosticConfig(
            label_keys=['labels'],
            prediction_keys=['predictions'],
            feature_spec=feature_map)

        # Create a Model Anostic Evaluate graph handler and feed in the FPL list.
        evaluate_graph = model_agnostic_evaluate_graph.ModelAgnosticEvaluateGraph(
            [add_mean_callback],
            model_agnostic_extractor.ModelAgnosticGetFPLFeedConfig(
                model_agnostic_config))
        evaluate_graph.metrics_reset_update_get_list([fpl, fpl_2, fpl_3])
        outputs = evaluate_graph.get_metric_values()

        # Verify that we got the right metrics out.
        self.assertEqual(2, len(outputs))
        self.assertEqual(outputs['tf_metric_mean'], 8.0)
        self.assertEqual(outputs['py_func_total_label'], 48.0)
コード例 #12
0
    def testEvaluateMultiLabelsPredictions(self):
        # Test case where we have multiple labels/predictions
        features = {'age': {encoding.NODE_SUFFIX: np.array([1])}}
        predictions = {
            'prediction': {
                encoding.NODE_SUFFIX: np.array([2])
            },
            'prediction_2': {
                encoding.NODE_SUFFIX: np.array([4])
            }
        }
        # Have 6 labels of values 3, 5, 23, 12, 16, 31 and
        # 6 predictions of values 2, 2, 2, 4, 4, 4
        # This should give sum = 108 and mean = 9.
        labels = {
            'label': {
                encoding.NODE_SUFFIX: np.array([3])
            },
            'label_2': {
                encoding.NODE_SUFFIX: np.array([5])
            }
        }
        labels_2 = {
            'label': {
                encoding.NODE_SUFFIX: np.array([23])
            },
            'label_2': {
                encoding.NODE_SUFFIX: np.array([12])
            }
        }
        labels_3 = {
            'label': {
                encoding.NODE_SUFFIX: np.array([16])
            },
            'label_2': {
                encoding.NODE_SUFFIX: np.array([31])
            }
        }

        # Compile the actual FPLs
        fpl = types.FeaturesPredictionsLabels(input_ref=0,
                                              features=features,
                                              predictions=predictions,
                                              labels=labels)
        fpl_2 = types.FeaturesPredictionsLabels(input_ref=0,
                                                features=features,
                                                predictions=predictions,
                                                labels=labels_2)
        fpl_3 = types.FeaturesPredictionsLabels(input_ref=0,
                                                features=features,
                                                predictions=predictions,
                                                labels=labels_3)

        # Set up a model agnostic config so we can get the FPLConfig.
        feature_map = {
            'age': tf.FixedLenFeature([], tf.float32),
            'prediction': tf.FixedLenFeature([], tf.int64),
            'prediction_2': tf.FixedLenFeature([], tf.int64),
            'label': tf.FixedLenFeature([], tf.int64),
            'label_2': tf.FixedLenFeature([], tf.int64)
        }

        model_agnostic_config = agnostic_predict.ModelAgnosticConfig(
            label_keys=['label', 'label_2'],
            prediction_keys=['prediction', 'prediction_2'],
            feature_spec=feature_map)

        # Create a Model Anostic Evaluate graph handler and feed in the FPL list.
        evaluate_graph = model_agnostic_evaluate_graph.ModelAgnosticEvaluateGraph(
            [add_mean_callback],
            model_agnostic_extractor.ModelAgnosticGetFPLFeedConfig(
                model_agnostic_config))
        evaluate_graph.metrics_reset_update_get_list([fpl, fpl_2, fpl_3])
        outputs = evaluate_graph.get_metric_values()

        # Verify that we got the right metrics out.
        self.assertEqual(2, len(outputs))
        self.assertEqual(outputs['tf_metric_mean'], 9.0)
        self.assertEqual(outputs['py_func_total_label'], 108.0)
コード例 #13
0
    def predict_list(self, inputs):
        """Like predict, but takes a list of inputs.

    Args:
      inputs: A list of input data (or a dict of keys to lists of input data).
        See predict for more details.

    Returns:
       A list of FeaturesPredictionsLabels. See predict for more details.

    Raises:
      ValueError: If the original input_refs tensor passed to the
        EvalInputReceiver does not align with the features, predictions and
        labels returned after feeding the inputs.
    """
        if isinstance(inputs, dict):
            input_args = []
            # Only add values for keys that are in the input map (in order).
            for key in self._input_map:
                if key in inputs:
                    input_args.append(inputs[key])
        else:
            input_args = [inputs]

        (features, predictions, labels,
         input_refs) = self._predict_list_fn(*input_args)

        split_labels = {}
        for label_key in self._labels_map:
            split_labels[label_key] = util.split_tensor_value(
                labels[label_key][encoding.NODE_SUFFIX])
        split_features = {}
        for feature_key in self._features_map:
            split_features[feature_key] = util.split_tensor_value(
                features[feature_key][encoding.NODE_SUFFIX])
        split_predictions = {}
        for prediction_key in self._predictions_map:
            split_predictions[prediction_key] = util.split_tensor_value(
                predictions[prediction_key][encoding.NODE_SUFFIX])

        result = []

        if (not isinstance(input_refs, np.ndarray) or input_refs.ndim != 1
                or not np.issubdtype(input_refs.dtype, np.integer)):
            raise ValueError(
                'input_refs should be an 1-D array of integers. input_refs was {}.'
                .format(input_refs))

        for result_key, split_values in itertools.chain(
                split_labels.items(), split_features.items(),
                split_predictions.items()):
            if len(split_values) != input_refs.shape[0]:
                raise ValueError(
                    'input_refs should be batch-aligned with features, predictions'
                    ' and labels; key {} had {} slices but input_refs had batch size'
                    ' of {}'.format(result_key, len(split_values),
                                    input_refs.shape[0]))

        for i, input_ref in enumerate(input_refs):
            if input_ref < 0 or input_ref >= len(inputs):
                raise ValueError(
                    'An index in input_refs is out of range: {} vs {}; '
                    'inputs: {}'.format(input_ref, len(inputs), inputs))
            labels = {}
            for label_key in self._labels_map:
                labels[label_key] = {
                    encoding.NODE_SUFFIX: split_labels[label_key][i]
                }
            features = {}
            for feature_key in self._features_map:
                features[feature_key] = {
                    encoding.NODE_SUFFIX: split_features[feature_key][i]
                }
            predictions = {}
            for prediction_key in self._predictions_map:
                predictions[prediction_key] = {
                    encoding.NODE_SUFFIX: split_predictions[prediction_key][i]
                }
            result.append(
                types.FeaturesPredictionsLabels(input_ref=input_ref,
                                                features=features,
                                                predictions=predictions,
                                                labels=labels))

        return result