Esempio n. 1
0
    def testInvalidNeighborWeightRank(self):
        """Input containing a rank 3 neighbor weight tensor raises ValueError."""
        features = {
            'F0': tf.constant([1.0, 2.0]),
            'NL_nbr_0_F0': tf.constant([1.1, 2.1]),
            'NL_nbr_0_weight': tf.constant([[[0.25]]]),
        }

        with self.assertRaises(ValueError):
            neighbor_config = configs.GraphNeighborConfig(max_neighbors=1)
            utils.unpack_neighbor_features(features, neighbor_config)
Esempio n. 2
0
    def testInvalidRank(self):
        """Input containing rank 1 tensors raises ValueError."""
        # Simulate a batch size of 1 for simplicity.
        features = {
            'F0': tf.constant([1.0, 2.0]),
            'NL_nbr_0_F0': tf.constant([1.1, 2.1]),
            'NL_nbr_0_weight': tf.constant([0.25]),
        }

        with self.assertRaises(ValueError):
            neighbor_config = configs.GraphNeighborConfig(max_neighbors=1)
            utils.unpack_neighbor_features(features, neighbor_config)
Esempio n. 3
0
    def testMissingNeighborWeight(self):
        """Missing neighbor weight raises KeyError."""
        # Simulate a batch size of 1 for simplicity.
        features = {
            'F0': tf.constant([[1.0, 2.0]]),
            'NL_nbr_0_F0': tf.constant([[1.1, 2.1]]),
            'NL_nbr_0_weight': tf.constant([[0.25]]),
            'NL_nbr_1_F0': tf.constant([[1.2, 2.2]]),
        }

        with self.assertRaises(KeyError):
            neighbor_config = configs.GraphNeighborConfig(max_neighbors=2)
            utils.unpack_neighbor_features(features, neighbor_config)
Esempio n. 4
0
    def testSampleAndNeighborFeatureShapeIncompatibility(self):
        """Sample feature and neighbor feature have incompatible shapes."""
        # Simulate a batch size of 1 for simplicity.
        # The shape of the sample feature is 1x2 while the shape of the
        # corresponding neighbor feature 1x3.
        features = {
            'F0': tf.constant([[1.0, 2.0]]),
            'NL_nbr_0_F0': tf.constant([[1.1, 2.1, 3.1]]),
            'NL_nbr_0_weight': tf.constant([[0.25]]),
        }

        with self.assertRaises(ValueError):
            neighbor_config = configs.GraphNeighborConfig(max_neighbors=1)
            utils.unpack_neighbor_features(features, neighbor_config)
Esempio n. 5
0
    def testNeighborWeightShapeIncompatibility(self):
        """One neighbor weight has an incompatibile shape."""
        # Simulate a batch size of 1 for simplicity.
        # The shape of one neighbor weight is 1x2 instead of 1x1.
        features = {
            'F0': tf.constant([[1.0, 2.0]]),
            'NL_nbr_0_F0': tf.constant([[1.1, 2.1]]),
            'NL_nbr_0_weight': tf.constant([[0.25]]),
            'NL_nbr_1_F0': tf.constant([[1.2, 2.2]]),
            'NL_nbr_1_weight': tf.constant([[0.5, 0.75]]),
        }

        with self.assertRaises(ValueError):
            neighbor_config = configs.GraphNeighborConfig(max_neighbors=2)
            utils.unpack_neighbor_features(features, neighbor_config)
Esempio n. 6
0
    def testNeighborFeatureShapeIncompatibility(self):
        """One neighbor feature has an incompatible shape."""
        # Simulate a batch size of 1 for simplicity.
        # The shape of the sample feature and one neighbor feature is 1x2, while the
        # shape of another neighbor feature 1x3.
        features = {
            'F0': tf.constant([[1.0, 2.0]]),
            'NL_nbr_0_F0': tf.constant([[1.1, 2.1]]),
            'NL_nbr_0_weight': tf.constant([[0.25]]),
            'NL_nbr_1_F0': tf.constant([[1.2, 2.2, 3.2]]),
            'NL_nbr_1_weight': tf.constant([[0.5]]),
        }

        with self.assertRaises(ValueError):
            neighbor_config = configs.GraphNeighborConfig(max_neighbors=2)
            utils.unpack_neighbor_features(features, neighbor_config)
Esempio n. 7
0
    def testExtraNeighborFeaturesIgnored(self):
        """Test that extra neighbor features are ignored."""
        # Simulate a batch size of 1 for simplicity.
        features = {
            'F0': tf.constant([[1.0, 2.0]]),
            'NL_nbr_0_F0': tf.constant([[1.1, 2.1]]),
            'NL_nbr_0_weight': tf.constant([[0.25]]),
            'NL_nbr_1_F0': tf.constant([[1.2, 2.2]]),
            'NL_nbr_1_weight': tf.constant([[0.75]]),
        }

        expected_sample_features = {
            'F0': tf.constant([[1.0, 2.0]]),
        }

        expected_neighbor_features = {
            'F0': tf.constant([[1.1, 2.1]]),
        }
        expected_neighbor_weights = tf.constant([[0.25]])

        neighbor_config = configs.GraphNeighborConfig(max_neighbors=1)
        sample_features, nbr_features, nbr_weights = self.evaluate(
            utils.unpack_neighbor_features(features, neighbor_config))

        self.assertAllEqual(sample_features['F0'],
                            expected_sample_features['F0'])
        self.assertAllEqual(nbr_features['F0'],
                            expected_neighbor_features['F0'])
        self.assertAllEqual(nbr_weights, expected_neighbor_weights)
Esempio n. 8
0
    def testSampleFeatureOnlyExtractionWithNeighbors(self):
        """Test sample feature extraction with neighbor features."""
        # Simulate batch size of 1.
        features = {
            'F0': tf.constant([[1.0, 2.0]]),
            'F1': tf.constant([[3.0, 4.0, 5.0]]),
            'NL_nbr_0_F0': tf.constant([[1.1, 2.1]]),
            'NL_nbr_0_F1': tf.constant([[3.1, 4.1, 5.1]]),
            'NL_nbr_0_weight': tf.constant([[0.25]]),
            'NL_nbr_1_F0': tf.constant([[1.2, 2.2]]),
            'NL_nbr_1_F1': tf.constant([[3.2, 4.2, 5.2]]),
            'NL_nbr_1_weight': tf.constant([[0.75]]),
        }

        expected_sample_features = {
            'F0': tf.constant([[1.0, 2.0]]),
            'F1': tf.constant([[3.0, 4.0, 5.0]]),
        }

        neighbor_config = configs.GraphNeighborConfig(max_neighbors=0)
        sample_features, nbr_features, nbr_weights = utils.unpack_neighbor_features(
            features, neighbor_config)
        self.assertIsNone(nbr_weights)

        sample_features, nbr_features = self.evaluate(
            [sample_features, nbr_features])
        self.assertAllEqual(sample_features['F0'],
                            expected_sample_features['F0'])
        self.assertAllEqual(sample_features['F1'],
                            expected_sample_features['F1'])
        self.assertEmpty(nbr_features)
    def testSampleFeatureOnlyExtractionWithNoNeighbors(self):
        """Test sample feature extraction without neighbor features."""
        # Simulate batch size of 1.
        features = {
            'F0': tf.constant([[1.0, 2.0]]),
            'F1': tf.constant([[3.0, 4.0, 5.0]]),
        }

        expected_sample_features = {
            'F0': tf.constant([[1.0, 2.0]]),
            'F1': tf.constant([[3.0, 4.0, 5.0]]),
        }

        neighbor_config = configs.GraphNeighborConfig(max_neighbors=0)
        sample_features, nbr_features, nbr_weights = utils.unpack_neighbor_features(
            features, neighbor_config)
        self.assertIsNone(nbr_weights)

        with self.cached_session() as sess:
            sess.run([sample_features, nbr_features])
            self.assertAllEqual(sample_features['F0'],
                                expected_sample_features['F0'])
            self.assertAllEqual(sample_features['F1'],
                                expected_sample_features['F1'])
            self.assertEmpty(nbr_features)
Esempio n. 10
0
    def testEmptyFeatures(self):
        """Test unpack_neighbor_features with empty input."""
        features = {}
        neighbor_config = configs.GraphNeighborConfig(max_neighbors=0)
        sample_features, nbr_features, nbr_weights = utils.unpack_neighbor_features(
            features, neighbor_config)
        self.assertIsNone(nbr_weights)

        # We create a dummy tensor so that the computation graph is not empty.
        dummy_tensor = tf.constant(1.0)
        sample_features, nbr_features, dummy_tensor = self.evaluate(
            [sample_features, nbr_features, dummy_tensor])
        self.assertEmpty(sample_features)
        self.assertEmpty(nbr_features)
  def call(self, inputs, keep_rank=False):
    """Extracts neighbor features and weights from a dictionary of inputs.

    This function is a wrapper around `utils.unpack_neighbor_features`. See
    `utils.unpack_neighbor_features` for documentation on the expected input
    format and return values.

    Args:
      inputs: Dictionary of `tf.Tensor` features with keys for neighbors and
        weights described by `neighbor_config`.
      keep_rank: Defaults to `False`. If `True`, each value of
        `neighbor_features` will have an extra neighborhood size dimension at
        axis 1.

    Returns:
      A tuple (sample_features, neighbor_features, neighbor_weights) of tensors.
      See `utils.unpack_neighbor_features` for a detailed description.
    """
    return utils.unpack_neighbor_features(
        inputs, self._neighbor_config, keep_rank=keep_rank)
    def testBatchedSampleAndNeighborFeatureExtraction(self):
        """Test input contains two samples with one feature and three neighbors."""
        # Simulate a batch size of 2.
        features = {
            'F0': tf.constant(11.0, shape=[2, 2]),
            'NL_nbr_0_F0': tf.constant(22.0, shape=[2, 2]),
            'NL_nbr_0_weight': tf.constant(0.25, shape=[2, 1]),
            'NL_nbr_1_F0': tf.constant(33.0, shape=[2, 2]),
            'NL_nbr_1_weight': tf.constant(0.75, shape=[2, 1]),
            'NL_nbr_2_F0': tf.constant(44.0, shape=[2, 2]),
            'NL_nbr_2_weight': tf.constant(1.0, shape=[2, 1]),
        }

        expected_sample_features = {
            'F0': tf.constant(11.0, shape=[2, 2]),
        }

        # The key in this dictionary will contain the original sample's feature
        # name. The shape of the corresponding tensor will be 6x2, which is the
        # result of doing an interleaved merge of three 2x2 tensors along axis 0.
        expected_neighbor_features = {
            'F0':
            tf.constant([[22.0, 22.0], [33.0, 33.0], [44.0, 44.0],
                         [22.0, 22.0], [33.0, 33.0], [44.0, 44.0]]),
        }
        # The shape of this tensor is 6x1, which is the result of doing an
        # interleaved merge of three 2x1 tensors along axis 0.
        expected_neighbor_weights = tf.constant([[0.25], [0.75], [1.0], [0.25],
                                                 [0.75], [1.0]])

        neighbor_config = configs.GraphNeighborConfig(max_neighbors=3)
        sample_features, nbr_features, nbr_weights = utils.unpack_neighbor_features(
            features, neighbor_config)

        with self.cached_session() as sess:
            sess.run([sample_features, nbr_features, nbr_weights])
            self.assertAllEqual(sample_features['F0'],
                                expected_sample_features['F0'])
            self.assertAllEqual(nbr_features['F0'],
                                expected_neighbor_features['F0'])
            self.assertAllEqual(nbr_weights, expected_neighbor_weights)
Esempio n. 13
0
 def _unpack_neighbor_features(features):
     return utils.unpack_neighbor_features(features, neighbor_config)
Esempio n. 14
0
    def testSparseFeature(self):
        """Test the case when the sample has a sparse feature."""
        # Simulate batch size of 2.
        features = {
            'F0':
            tf.constant(11.0, shape=[2, 2]),
            'F1':
            tf.SparseTensor(indices=[[0, 0], [0, 1]],
                            values=[1.0, 2.0],
                            dense_shape=[2, 4]),
            'NL_nbr_0_F0':
            tf.constant(22.0, shape=[2, 2]),
            'NL_nbr_0_F1':
            tf.SparseTensor(indices=[[1, 0], [1, 1]],
                            values=[3.0, 4.0],
                            dense_shape=[2, 4]),
            'NL_nbr_0_weight':
            tf.constant(0.25, shape=[2, 1]),
            'NL_nbr_1_F0':
            tf.constant(33.0, shape=[2, 2]),
            'NL_nbr_1_F1':
            tf.SparseTensor(indices=[[0, 2], [1, 3]],
                            values=[5.0, 6.0],
                            dense_shape=[2, 4]),
            'NL_nbr_1_weight':
            tf.constant(0.75, shape=[2, 1]),
        }

        expected_sample_features = {
            'F0':
            tf.constant(11.0, shape=[2, 2]),
            'F1':
            tf.SparseTensor(indices=[[0, 0], [0, 1]],
                            values=[1.0, 2.0],
                            dense_shape=[2, 4]),
        }

        # The keys in this dictionary will contain the original sample's feature
        # names.
        expected_neighbor_features = {
            # The shape of the corresponding tensor for 'F0' will be 4x2, which is
            # the result of doing an interleaved merge of two 2x2 tensors along
            # axis 0.
            'F0':
            tf.constant([[22, 22], [33, 33], [22, 22], [33, 33]]),
            # The shape of the corresponding tensor for 'F1' will be 4x4, which is
            # the result of doing an interleaved merge of two 2x4 tensors along
            # axis 0.
            'F1':
            tf.SparseTensor(indices=[[1, 2], [2, 0], [2, 1], [3, 3]],
                            values=[5.0, 3.0, 4.0, 6.0],
                            dense_shape=[4, 4]),
        }
        # The shape of this tensor is 4x1, which is the result of doing an
        # interleaved merge of two 2x1 tensors along axis 0.
        expected_neighbor_weights = tf.constant([[0.25], [0.75], [0.25],
                                                 [0.75]])

        neighbor_config = configs.GraphNeighborConfig(max_neighbors=2)
        sample_features, nbr_features, nbr_weights = self.evaluate(
            utils.unpack_neighbor_features(features, neighbor_config))

        self.assertAllEqual(sample_features['F0'],
                            expected_sample_features['F0'])
        self.assertAllEqual(sample_features['F1'].values,
                            expected_sample_features['F1'].values)
        self.assertAllEqual(sample_features['F1'].indices,
                            expected_sample_features['F1'].indices)
        self.assertAllEqual(sample_features['F1'].dense_shape,
                            expected_sample_features['F1'].dense_shape)
        self.assertAllEqual(nbr_features['F0'],
                            expected_neighbor_features['F0'])
        self.assertAllEqual(nbr_features['F1'].values,
                            expected_neighbor_features['F1'].values)
        self.assertAllEqual(nbr_features['F1'].indices,
                            expected_neighbor_features['F1'].indices)
        self.assertAllEqual(nbr_features['F1'].dense_shape,
                            expected_neighbor_features['F1'].dense_shape)
        self.assertAllEqual(nbr_weights, expected_neighbor_weights)
    def graph_reg_model_fn(features, labels, mode, params=None, config=None):
        """The graph-regularized model function.

    Args:
      features: This is the first item returned from the `input_fn` passed to
        `train`, `evaluate`, and `predict`. This should be a dictionary
        containing sample features as well as corresponding neighbor features
        and neighbor weights.
      labels: This is the second item returned from the `input_fn` passed to
        `train`, `evaluate`, and `predict`. This should be a single `Tensor` or
        `dict` of same (for multi-head models). If mode is
        `tf.estimator.ModeKeys.PREDICT`, `labels=None` will be passed. If the
        `model_fn`'s signature does not accept `mode`, the `model_fn` must still
        be able to handle `labels=None`.
      mode: Optional. Specifies if this is training, evaluation, or prediction.
        See `tf.estimator.ModeKeys`.
      params: Optional `dict` of hyperparameters. Will receive what is passed to
        Estimator in the `params` parameter. This allows users to configure
        Estimators from hyper parameter tuning.
      config: Optional `tf.estimator.RunConfig` object. Will receive what is
        passed to Estimator as its `config` parameter, or a default value.
        Allows setting up things in the `model_fn` based on configuration such
        as `num_ps_replicas`, or `model_dir`. Unused currently.

    Returns:
      A `tf.estimator.EstimatorSpec` with graph regularization.
    """
        # Parameters 'params' and 'config' are optional. If they are not passed,
        # then it is possible for base_model_fn not to accept these arguments.
        # See documentation for tf.estimator.Estimator for additional context.
        kwargs = {'mode': mode}
        embedding_fn_kwargs = dict()
        if 'params' in base_model_fn_args:
            kwargs['params'] = params
            embedding_fn_kwargs['params'] = params
        if 'config' in base_model_fn_args:
            kwargs['config'] = config

        # Uses the same variable scope for calculating the original objective and
        # the graph regularization loss term.
        with tf.compat.v1.variable_scope(tf.compat.v1.get_variable_scope(),
                                         reuse=tf.compat.v1.AUTO_REUSE,
                                         auxiliary_name_scope=False):
            nbr_features = dict()
            nbr_weights = None
            if mode == tf.estimator.ModeKeys.TRAIN:
                # Extract sample features, neighbor features, and neighbor weights if we
                # are in training mode.
                sample_features, nbr_features, nbr_weights = (
                    utils.unpack_neighbor_features(
                        features, graph_reg_config.neighbor_config))
            else:
                # Otherwise, we strip out all neighbor features and use just the
                # sample's features.
                sample_features = utils.strip_neighbor_features(
                    features, graph_reg_config.neighbor_config)

            base_spec = base_model_fn(sample_features, labels, **kwargs)

            has_nbr_inputs = nbr_weights is not None and nbr_features

            # Graph regularization happens only if all the following conditions are
            # satisfied:
            # - the mode is training
            # - neighbor inputs exist
            # - the graph regularization multiplier is greater than zero.
            # So, return early if any of these conditions is false.
            if (not has_nbr_inputs or mode != tf.estimator.ModeKeys.TRAIN
                    or graph_reg_config.multiplier <= 0):
                return base_spec

            # Compute sample embeddings.
            sample_embeddings = embedding_fn(sample_features, mode,
                                             **embedding_fn_kwargs)

            # Compute the embeddings of the neighbors.
            nbr_embeddings = embedding_fn(nbr_features, mode,
                                          **embedding_fn_kwargs)

            replicated_sample_embeddings = utils.replicate_embeddings(
                sample_embeddings,
                graph_reg_config.neighbor_config.max_neighbors)

            # Compute the distance between the sample embeddings and each of their
            # corresponding neighbor embeddings.
            graph_loss = distances.pairwise_distance_wrapper(
                replicated_sample_embeddings,
                nbr_embeddings,
                weights=nbr_weights,
                distance_config=graph_reg_config.distance_config)
            scaled_graph_loss = graph_reg_config.multiplier * graph_loss
            tf.compat.v1.summary.scalar('loss/scaled_graph_loss',
                                        scaled_graph_loss)

            supervised_loss = base_spec.loss
            tf.compat.v1.summary.scalar('loss/supervised_loss',
                                        supervised_loss)

            total_loss = supervised_loss + scaled_graph_loss

            if not optimizer_fn:
                # Default to Adagrad optimizer, the same as the canned DNNEstimator.
                optimizer = tf.compat.v1.train.AdagradOptimizer(
                    learning_rate=0.05)
            else:
                optimizer = optimizer_fn()
            train_op = optimizer.minimize(
                loss=total_loss,
                global_step=tf.compat.v1.train.get_global_step())
            update_ops = tf.compat.v1.get_collection(
                tf.compat.v1.GraphKeys.UPDATE_OPS)
            if update_ops:
                train_op = tf.group(train_op, *update_ops)

        return base_spec._replace(loss=total_loss, train_op=train_op)
  def graph_reg_model_fn(features, labels, mode, params=None, config=None):
    """The graph-regularized model function.

    Args:
      features: This is the first item returned from the `input_fn` passed to
        `train`, `evaluate`, and `predict`. This should be a dictionary
        containing sample features as well as corresponding neighbor features
        and neighbor weights.
      labels: This is the second item returned from the `input_fn` passed to
        `train`, `evaluate`, and `predict`. This should be a single `Tensor` or
        `dict` of same (for multi-head models). If mode is
        `tf.estimator.ModeKeys.PREDICT`, `labels=None` will be passed. If the
        `model_fn`'s signature does not accept `mode`, the `model_fn` must still
        be able to handle `labels=None`.
      mode: Optional. Specifies if this is training, evaluation, or prediction.
        See `tf.estimator.ModeKeys`.
      params: Optional `dict` of hyperparameters. Will receive what is passed to
        Estimator in the `params` parameter. This allows users to configure
        Estimators from hyper parameter tuning.
      config: Optional `tf.estimator.RunConfig` object. Will receive what is
        passed to Estimator as its `config` parameter, or a default value.
        Allows setting up things in the `model_fn` based on configuration such
        as `num_ps_replicas`, or `model_dir`. Unused currently.

    Returns:
      A `tf.EstimatorSpec` whose loss incorporates graph-based regularization.
    """

    # Uses the same variable scope for calculating the original objective and
    # the graph regularization loss term.
    with tf.compat.v1.variable_scope(
        tf.compat.v1.get_variable_scope(),
        reuse=tf.compat.v1.AUTO_REUSE,
        auxiliary_name_scope=False):
      # Extract sample features, neighbor features, and neighbor weights.
      sample_features, nbr_features, nbr_weights = (
          utils.unpack_neighbor_features(features,
                                         graph_reg_config.neighbor_config))

      # If no 'params' is passed, then it is possible for base_model_fn not to
      # accept a 'params' argument. See documentation for tf.estimator.Estimator
      # for additional context.
      if params:
        base_spec = base_model_fn(sample_features, labels, mode, params, config)
      else:
        base_spec = base_model_fn(sample_features, labels, mode, config)

      has_nbr_inputs = nbr_weights is not None and nbr_features

      # Graph regularization happens only if all the following conditions are
      # satisfied:
      # - the mode is training
      # - neighbor inputs exist
      # - the graph regularization multiplier is greater than zero.
      # So, return early if any of these conditions is false.
      if (not has_nbr_inputs or mode != tf.estimator.ModeKeys.TRAIN or
          graph_reg_config.multiplier <= 0):
        return base_spec

      # Compute sample embeddings.
      sample_embeddings = embedding_fn(sample_features, mode)

      # Compute the embeddings of the neighbors.
      nbr_embeddings = embedding_fn(nbr_features, mode)

      replicated_sample_embeddings = utils.replicate_embeddings(
          sample_embeddings, graph_reg_config.neighbor_config.max_neighbors)

      # Compute the distance between the sample embeddings and each of their
      # corresponding neighbor embeddings.
      graph_loss = distances.pairwise_distance_wrapper(
          replicated_sample_embeddings,
          nbr_embeddings,
          weights=nbr_weights,
          distance_config=graph_reg_config.distance_config)
      total_loss = base_spec.loss + graph_reg_config.multiplier * graph_loss

      if not optimizer_fn:
        # Default to Adagrad optimizer, the same as the canned DNNEstimator.
        optimizer = tf.train.AdagradOptimizer(learning_rate=0.05)
      else:
        optimizer = optimizer_fn()
      final_train_op = optimizer.minimize(
          loss=total_loss, global_step=tf.compat.v1.train.get_global_step())

    return base_spec._replace(loss=total_loss, train_op=final_train_op)