def test_error_dense_shape_invalid(self): categorical_column_input = fc_lib.categorical_column_with_identity( key='inp', num_buckets=5) with self.assertRaisesRegexp(ValueError, 'tensor_core_shape must be size 2'): tpu_fc.shared_embedding_columns_v2([categorical_column_input], dimension=20, tensor_core_shape=[None, 20, 15])
def test_invalid_cases(self, shared): # Inputs. input_sparse_tensor = sparse_tensor.SparseTensorValue( indices=((0, 0), (1, 0), (1, 1), (1, 4)), values=(2, 0, 1, 3), dense_shape=(2, 5)) input_features = {'inp': input_sparse_tensor} # Build columns. categorical_column_input = fc_lib.categorical_column_with_identity( key='inp', num_buckets=3) # Training on TPU with cpu embedding lookups is not supported. if shared: embedding_column = tpu_fc.shared_embedding_columns_v2( [categorical_column_input], dimension=2, embedding_lookup_device='cpu', tensor_core_shape=[None, 3]) else: embedding_column = tpu_fc.embedding_column_v2( categorical_column_input, dimension=2, embedding_lookup_device='cpu', tensor_core_shape=[None, 3]) dense_features = fc_lib.DenseFeatures(embedding_column) with self.assertRaisesRegexp( ValueError, r'.*embedding_lookup_device=\"cpu\" during training is not'): dense_features(input_features) # Inference on with TPU Embedding Hardware is not supported. if shared: embedding_column = tpu_fc.shared_embedding_columns_v2( [categorical_column_input], dimension=2, embedding_lookup_device='tpu_embedding_core', tensor_core_shape=[None, 3]) else: embedding_column = tpu_fc.embedding_column_v2( categorical_column_input, dimension=2, embedding_lookup_device='tpu_embedding_core', tensor_core_shape=[None, 3]) context = tpu._TPUInferenceContext('tpu_inference') context.Enter() dense_features = fc_lib.DenseFeatures(embedding_column) with self.assertRaisesRegexp( ValueError, r'Using embedding_lookup_device=tpu_embedding_core during inference is ' ): dense_features(input_features) context.Exit()
def test_defaults(self): vocabulary_size = 3 categorical_column_a = fc_lib.categorical_column_with_identity( key='aaa', num_buckets=vocabulary_size) categorical_column_b = fc_lib.categorical_column_with_identity( key='bbb', num_buckets=vocabulary_size) embedding_dimension = 2 embedding_column_b, embedding_column_a = tpu_fc.shared_embedding_columns_v2( [categorical_column_b, categorical_column_a], dimension=embedding_dimension) self.assertIs(categorical_column_a, embedding_column_a.categorical_column) self.assertIs(categorical_column_b, embedding_column_b.categorical_column) self.assertEqual((vocabulary_size, embedding_dimension), embedding_column_a.get_embedding_table_size()) self.assertEqual((vocabulary_size, embedding_dimension), embedding_column_a.get_embedding_table_size()) self.assertEqual('mean', embedding_column_a.combiner) self.assertEqual('mean', embedding_column_b.combiner) self.assertIsNotNone(embedding_column_a.get_initializer()) self.assertIsNotNone(embedding_column_b.get_initializer()) self.assertEqual('aaa_bbb_shared_embedding', embedding_column_a.get_embedding_var_name()) self.assertEqual('aaa_bbb_shared_embedding', embedding_column_b.get_embedding_var_name()) self.assertEqual('aaa_shared_embedding', embedding_column_a.name) self.assertEqual('bbb_shared_embedding', embedding_column_b.name) self.assertEqual((embedding_dimension, ), embedding_column_a.variable_shape) self.assertEqual((embedding_dimension, ), embedding_column_b.variable_shape)
def test_all_constructor_args(self): vocabulary_size = 3 categorical_column_a = fc_lib.categorical_column_with_identity( key='aaa', num_buckets=vocabulary_size) categorical_column_b = fc_lib.categorical_column_with_identity( key='bbb', num_buckets=vocabulary_size) embedding_dimension = 2 embedding_column_a, embedding_column_b = tpu_fc.shared_embedding_columns_v2( [categorical_column_a, categorical_column_b], dimension=embedding_dimension, combiner='my_combiner', initializer=lambda: 'my_initializer', shared_embedding_collection_name='var_scope_name') self.assertIs(categorical_column_a, embedding_column_a.categorical_column) self.assertIs(categorical_column_b, embedding_column_b.categorical_column) self.assertEqual((vocabulary_size, embedding_dimension), embedding_column_a.get_embedding_table_size()) self.assertEqual((vocabulary_size, embedding_dimension), embedding_column_a.get_embedding_table_size()) self.assertEqual('my_combiner', embedding_column_a.combiner) self.assertEqual('my_combiner', embedding_column_b.combiner) self.assertEqual('my_initializer', embedding_column_a.get_initializer()()) self.assertEqual('my_initializer', embedding_column_b.get_initializer()()) self.assertEqual('var_scope_name', embedding_column_a.get_embedding_var_name()) self.assertEqual('var_scope_name', embedding_column_b.get_embedding_var_name()) self.assertEqual('aaa_shared_embedding', embedding_column_a.name) self.assertEqual('bbb_shared_embedding', embedding_column_b.name) self.assertEqual((embedding_dimension,), embedding_column_a.variable_shape) self.assertEqual((embedding_dimension,), embedding_column_b.variable_shape)
def test_deepcopy(self): vocabulary_size = 3 categorical_column_a = fc_lib.categorical_column_with_identity( key='aaa', num_buckets=vocabulary_size) categorical_column_b = fc_lib.categorical_column_with_identity( key='bbb', num_buckets=vocabulary_size) embedding_dimension = 2 columns = tpu_fc.shared_embedding_columns_v2( [categorical_column_b, categorical_column_a], dimension=embedding_dimension) columns_copy = copy.deepcopy(columns) self.assertEqual( [column._shared_embedding_collection_name for column in columns], [column._shared_embedding_collection_name for column in columns_copy])
def test_dense_embedding_lookup(self, shared, combiner): # Inputs. vocabulary_size = 3 input_sparse_tensor = sparse_tensor.SparseTensorValue( # example 0, ids [2] # example 1, ids [0, 1, 3] indices=((0, 0), (1, 0), (1, 1), (1, 4)), values=(2, 0, 1, 3), dense_shape=(2, 5)) input_features = {'inp': input_sparse_tensor} # Embedding variable. embedding_dimension = 2 embedding_values = ( (1., 2.), # id 0 (3., 5.), # id 1 (7., 11.), # id 2 (13., 17.) # id 3 ) def _initializer(shape, dtype, partition_info=None): self.assertAllEqual((vocabulary_size, embedding_dimension), shape) self.assertEqual(dtypes.float32, dtype) self.assertIsNone(partition_info) return embedding_values # Build columns. categorical_column_input = fc_lib.categorical_column_with_identity( key='inp', num_buckets=vocabulary_size) # Set tensor_core_shape to be [None, 20] to ensure some padding and # dynamic batch size. if shared: embedding_column = tpu_fc.shared_embedding_columns_v2( [categorical_column_input], dimension=embedding_dimension, initializer=_initializer, combiner=combiner, embedding_lookup_device='tpu_tensor_core', tensor_core_shape=[None, 3]) else: embedding_column = tpu_fc.embedding_column_v2( categorical_column_input, dimension=embedding_dimension, initializer=_initializer, combiner=combiner, embedding_lookup_device='tpu_tensor_core', tensor_core_shape=[None, 3]) # Run in TPUInferenceContext so that we hit the intended densification case. context = tpu._TPUInferenceContext('tpu_inference') context.Enter() dense_features = fc_lib.DenseFeatures(embedding_column) # Sqrtn combiner not supported for now. if combiner == 'sqrtn': with self.assertRaisesRegexp( ValueError, 'Dense TPU Embedding does not support combiner'): embedding_lookup = dense_features(input_features) return if combiner == 'mean': expected_lookups = ( # example 0: (7., 11.), # ids [2], embedding = [7, 11] # example 1: (2., 3.5 ), # ids [0, 1], embedding = mean([1, 2] + [3, 5]) = [2, 3.5] ) elif combiner == 'sum': expected_lookups = ( # example 0: (7., 11.), # ids [2], embedding = [7, 11] # example 1: (4., 7 ), # ids [0, 1], embedding = sum([1, 2] + [3, 5]) = [4, 7] ) embedding_lookup = dense_features(input_features) # Assert expected embedding variable and lookups. global_vars = ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES) if shared: self.assertCountEqual(('inp_shared_embedding:0', ), tuple([v.name for v in global_vars])) else: self.assertCountEqual( ('dense_features/inp_embedding/embedding_weights:0', ), tuple([v.name for v in global_vars])) embedding_var = global_vars[0] with _initialized_session(): self.assertAllEqual(embedding_values, embedding_var.eval()) eval_res = embedding_lookup.eval() self.assertAllEqual(expected_lookups, eval_res) context.Exit()
def test_feature_layer_cpu(self): # Inputs. vocabulary_size = 3 input_a = sparse_tensor.SparseTensorValue( # example 0, ids [2] # example 1, ids [0, 1] indices=((0, 0), (1, 0), (1, 1)), values=(2, 0, 1), dense_shape=(2, 2)) input_b = sparse_tensor.SparseTensorValue( # example 0, ids [2] # example 1, ids [0, 1] # example 2, ids [] indices=((0, 0), (1, 0), (1, 1)), values=(2, 0, 1), dense_shape=(3, 2)) input_features = {'aaa': input_a, 'bbb': input_b} # Embedding variable. embedding_dimension = 2 embedding_values = ( (1., 2.), # id 0 (3., 5.), # id 1 (7., 11.) # id 2 ) def _initializer(shape, dtype, partition_info=None): self.assertAllEqual((vocabulary_size, embedding_dimension), shape) self.assertEqual(dtypes.float32, dtype) self.assertIsNone(partition_info) return embedding_values # Expected lookup result, using combiner='mean'. expected_lookups_a = ( # example 0: (7., 11.), # ids [2], embedding = [7, 11] # example 1: (2., 3.5 ), # ids [0, 1], embedding = mean([1, 2] + [3, 5]) = [2, 3.5] ) expected_lookups_b = ( # example 0: ( (7., 11.), (0., 0.), ), # ids [2], embedding = [[7, 11], [0, 0]] # example 1: ( (1., 2.), (3., 5.), ), # ids [0, 1], embedding = [[1, 2], [3, 5]] # example 2: ( (0., 0.), (0., 0.), ), # ids [], embedding = [[0, 0], [0, 0]] ) # Build columns. categorical_column_a = fc_lib.categorical_column_with_identity( key='aaa', num_buckets=vocabulary_size) categorical_column_b = fc_lib.sequence_categorical_column_with_identity( key='bbb', num_buckets=vocabulary_size) embedding_column_a, embedding_column_b = tpu_fc.shared_embedding_columns_v2( [categorical_column_a, categorical_column_b], dimension=embedding_dimension, initializer=_initializer, max_sequence_lengths=[0, 2]) # Provide sparse input and get dense result. dense_features = fc_lib.DenseFeatures([embedding_column_a]) sequence_features = fc_lib.SequenceFeatures([embedding_column_b]) embedding_lookup_a = dense_features(input_features) embedding_lookup_b = sequence_features(input_features) # Assert expected embedding variable and lookups. global_vars = ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES) self.assertItemsEqual(('aaa_bbb_shared_embedding:0', ), tuple([v.name for v in global_vars])) embedding_var = global_vars[0] with _initialized_session(): self.assertAllEqual(embedding_values, embedding_var.eval()) self.assertAllEqual(expected_lookups_a, embedding_lookup_a.eval()) self.assertAllEqual(expected_lookups_b, embedding_lookup_b[0].eval())