Esempio n. 1
0
 def test_denylisted_column(self):
   # HashedCategoricalColumn is denylisted and so will raise an exception.
   categorical_column = fc_lib.categorical_column_with_hash_bucket(
       key='aaa', hash_bucket_size=3)
   embedding_dimension = 2
   with self.assertRaises(TypeError):
     tpu_fc.embedding_column(categorical_column, dimension=embedding_dimension)
Esempio n. 2
0
  def test_get_dense_tensor(self):
    # Inputs.
    vocabulary_size = 3
    sparse_input = sparse_tensor.SparseTensorValue(
        # example 0, ids [2]
        # example 1, ids [0, 1]
        # example 2, ids []
        # example 3, ids [1]
        indices=((0, 0), (1, 0), (1, 4), (3, 0)),
        values=(2, 0, 1, 1),
        dense_shape=(4, 5))

    # Embedding variable.
    embedding_dimension = 2
    embedding_values = (
        (1., 2.),  # id 0
        (3., 5.),  # id 1
        (7., 11.)  # id 2
    )

    def _initializer(shape, dtype, partition_info):
      self.assertAllEqual((vocabulary_size, embedding_dimension), shape)
      self.assertEqual(dtypes.float32, dtype)
      self.assertIsNone(partition_info)
      return embedding_values

    # Expected lookup result, using combiner='mean'.
    expected_lookups = (
        # example 0, ids [2], embedding = [7, 11]
        (7., 11.),
        # example 1, ids [0, 1], embedding = mean([1, 2] + [3, 5]) = [2, 3.5]
        (2., 3.5),
        # example 2, ids [], embedding = [0, 0]
        (0., 0.),
        # example 3, ids [1], embedding = [3, 5]
        (3., 5.),
    )

    # Build columns.
    categorical_column = fc_lib.categorical_column_with_identity(
        key='aaa', num_buckets=vocabulary_size)
    embedding_column = tpu_fc.embedding_column(
        categorical_column,
        dimension=embedding_dimension,
        initializer=_initializer)

    # Provide sparse input and get dense result.
    embedding_lookup = embedding_column._get_dense_tensor(
        fc._LazyBuilder({
            'aaa': sparse_input
        }))

    # Assert expected embedding variable and lookups.
    global_vars = ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES)
    self.assertItemsEqual(('embedding_weights:0',),
                          tuple([v.name for v in global_vars]))
    with _initialized_session():
      self.assertAllEqual(embedding_values, global_vars[0])
      self.assertAllEqual(expected_lookups, embedding_lookup)
  def test_get_dense_tensor(self):
    # Inputs.
    vocabulary_size = 3
    sparse_input = sparse_tensor.SparseTensorValue(
        # example 0, ids [2]
        # example 1, ids [0, 1]
        # example 2, ids []
        # example 3, ids [1]
        indices=((0, 0), (1, 0), (1, 4), (3, 0)),
        values=(2, 0, 1, 1),
        dense_shape=(4, 5))

    # Embedding variable.
    embedding_dimension = 2
    embedding_values = (
        (1., 2.),  # id 0
        (3., 5.),  # id 1
        (7., 11.)  # id 2
    )

    def _initializer(shape, dtype, partition_info):
      self.assertAllEqual((vocabulary_size, embedding_dimension), shape)
      self.assertEqual(dtypes.float32, dtype)
      self.assertIsNone(partition_info)
      return embedding_values

    # Expected lookup result, using combiner='mean'.
    expected_lookups = (
        # example 0, ids [2], embedding = [7, 11]
        (7., 11.),
        # example 1, ids [0, 1], embedding = mean([1, 2] + [3, 5]) = [2, 3.5]
        (2., 3.5),
        # example 2, ids [], embedding = [0, 0]
        (0., 0.),
        # example 3, ids [1], embedding = [3, 5]
        (3., 5.),
    )

    # Build columns.
    categorical_column = fc_lib.categorical_column_with_identity(
        key='aaa', num_buckets=vocabulary_size)
    embedding_column = tpu_fc.embedding_column(
        categorical_column,
        dimension=embedding_dimension,
        initializer=_initializer)

    # Provide sparse input and get dense result.
    embedding_lookup = embedding_column._get_dense_tensor(
        fc._LazyBuilder({
            'aaa': sparse_input
        }))

    # Assert expected embedding variable and lookups.
    global_vars = ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES)
    self.assertItemsEqual(('embedding_weights:0',),
                          tuple([v.name for v in global_vars]))
    with _initialized_session():
      self.assertAllEqual(embedding_values, global_vars[0].eval())
      self.assertAllEqual(expected_lookups, embedding_lookup.eval())
Esempio n. 4
0
def create_feature_columns(params):
  """Prepares the list of feature columns from the parameters."""
  # Create the 2 features columns
  initializer = tf.random_normal_initializer(0., 0.01)
  user_column = tf.feature_column.categorical_column_with_identity(
      key="user_id", num_buckets=params["num_users"])
  item_column = tf.feature_column.categorical_column_with_identity(
      key="item_id", num_buckets=params["num_items"])

  feature_columns = [
      feature_column.embedding_column(
          categorical_column=user_column,
          dimension=params["mf_dim"]+params["mlp_dim"],
          combiner=None,
          initializer=initializer),
      feature_column.embedding_column(
          categorical_column=item_column,
          dimension=params["mf_dim"]+params["mlp_dim"],
          combiner=None,
          initializer=initializer)]
  return feature_columns
Esempio n. 5
0
 def test_defaults(self):
     categorical_column = fc_lib.categorical_column_with_identity(
         key='aaa', num_buckets=3)
     embedding_dimension = 2
     embedding_column = tpu_fc.embedding_column(
         categorical_column, dimension=embedding_dimension)
     self.assertIs(categorical_column, embedding_column.categorical_column)
     self.assertEqual(embedding_dimension, embedding_column.dimension)
     self.assertEqual('mean', embedding_column.combiner)
     self.assertEqual('aaa_embedding', embedding_column.name)
     self.assertEqual('aaa_embedding', embedding_column._var_scope_name)
     self.assertEqual((embedding_dimension, ),
                      embedding_column._variable_shape)
     self.assertEqual({'aaa': parsing_ops.VarLenFeature(dtypes.int64)},
                      embedding_column._parse_example_spec)
 def test_defaults(self):
   categorical_column = fc_lib.categorical_column_with_identity(
       key='aaa', num_buckets=3)
   embedding_dimension = 2
   embedding_column = tpu_fc.embedding_column(
       categorical_column, dimension=embedding_dimension)
   self.assertIs(categorical_column, embedding_column.categorical_column)
   self.assertEqual(embedding_dimension, embedding_column.dimension)
   self.assertEqual('mean', embedding_column.combiner)
   self.assertEqual('aaa_embedding', embedding_column.name)
   self.assertEqual('aaa_embedding', embedding_column._var_scope_name)
   self.assertEqual((embedding_dimension,), embedding_column._variable_shape)
   self.assertEqual({
       'aaa': parsing_ops.VarLenFeature(dtypes.int64)
   }, embedding_column._parse_example_spec)
Esempio n. 7
0
 def test_custom_column(self):
   # This column is not in any allowlist but should succeed because
   # it inherits from V2 CategoricalColumn.
   categorical_column = fc_lib.categorical_column_with_identity(
       key='aaa', num_buckets=10)
   embedding_dimension = 2
   embedding_column = tpu_fc.embedding_column(
       categorical_column, dimension=embedding_dimension)
   self.assertIs(categorical_column, embedding_column.categorical_column)
   self.assertEqual(embedding_dimension, embedding_column.dimension)
   self.assertEqual('mean', embedding_column.combiner)
   self.assertEqual('aaa_embedding', embedding_column.name)
   self.assertEqual('aaa_embedding', embedding_column._var_scope_name)
   self.assertEqual((embedding_dimension,), embedding_column._variable_shape)
   self.assertEqual({'aaa': parsing_ops.VarLenFeature(dtypes.int64)},
                    embedding_column._parse_example_spec)
Esempio n. 8
0
 def test_all_constructor_args(self):
     categorical_column = fc_lib.categorical_column_with_identity(
         key='aaa', num_buckets=3)
     embedding_dimension = 2
     embedding_column = tpu_fc.embedding_column(
         categorical_column,
         dimension=embedding_dimension,
         combiner='my_combiner',
         initializer=lambda: 'my_initializer')
     self.assertIs(categorical_column, embedding_column.categorical_column)
     self.assertEqual(embedding_dimension, embedding_column.dimension)
     self.assertEqual('my_combiner', embedding_column.combiner)
     self.assertEqual('aaa_embedding', embedding_column.name)
     self.assertEqual('aaa_embedding', embedding_column._var_scope_name)
     self.assertEqual((embedding_dimension, ),
                      embedding_column._variable_shape)
     self.assertEqual({'aaa': parsing_ops.VarLenFeature(dtypes.int64)},
                      embedding_column._parse_example_spec)
 def test_all_constructor_args(self):
   categorical_column = fc_lib.categorical_column_with_identity(
       key='aaa', num_buckets=3)
   embedding_dimension = 2
   embedding_column = tpu_fc.embedding_column(
       categorical_column,
       dimension=embedding_dimension,
       combiner='my_combiner',
       initializer=lambda: 'my_initializer')
   self.assertIs(categorical_column, embedding_column.categorical_column)
   self.assertEqual(embedding_dimension, embedding_column.dimension)
   self.assertEqual('my_combiner', embedding_column.combiner)
   self.assertEqual('aaa_embedding', embedding_column.name)
   self.assertEqual('aaa_embedding', embedding_column._var_scope_name)
   self.assertEqual((embedding_dimension,), embedding_column._variable_shape)
   self.assertEqual({
       'aaa': parsing_ops.VarLenFeature(dtypes.int64)
   }, embedding_column._parse_example_spec)