def test_compute_output_shape(self): price1 = tf.feature_column.sequence_numeric_column('price1', shape=2) price2 = tf.feature_column.sequence_numeric_column('price2') features = { 'price1': tf.SparseTensor(indices=[[0, 0, 0], [0, 0, 1], [0, 1, 0], [0, 1, 1], [1, 0, 0], [1, 0, 1], [2, 0, 0], [2, 0, 1], [3, 0, 0], [3, 0, 1]], values=[ 0., 1., 10., 11., 100., 101., 200., 201., 300., 301. ], dense_shape=(4, 3, 2)), 'price2': tf.SparseTensor(indices=[[0, 0], [0, 1], [1, 0], [2, 0], [3, 0]], values=[10., 11., 20., 30., 40.], dense_shape=(4, 3)) } sequence_features = ksfc.SequenceFeatures([price1, price2]) seq_input, seq_len = sequence_features(features) self.assertEqual(sequence_features.compute_output_shape((None, None)), (None, None, 3)) self.evaluate(tf.compat.v1.global_variables_initializer()) self.evaluate(tf.compat.v1.tables_initializer()) self.assertAllClose([[[0., 1., 10.], [10., 11., 11.], [0., 0., 0.]], [[100., 101., 20.], [0., 0., 0.], [0., 0., 0.]], [[200., 201., 30.], [0., 0., 0.], [0., 0., 0.]], [[300., 301., 40.], [0., 0., 0.], [0., 0., 0.]]], self.evaluate(seq_input)) self.assertAllClose([2, 1, 1, 1], self.evaluate(seq_len))
def test_shared_embedding_column_with_non_sequence_categorical(self): """Tests that error is raised for non-sequence shared embedding column.""" with tf.Graph().as_default(): vocabulary_size = 3 sparse_input_a = tf.compat.v1.SparseTensorValue( # example 0, ids [2] # example 1, ids [0, 1] indices=((0, 0), (1, 0), (1, 1)), values=(2, 0, 1), dense_shape=(2, 2)) sparse_input_b = tf.compat.v1.SparseTensorValue( # example 0, ids [2] # example 1, ids [0, 1] indices=((0, 0), (1, 0), (1, 1)), values=(2, 0, 1), dense_shape=(2, 2)) categorical_column_a = tf.feature_column.categorical_column_with_identity( key='aaa', num_buckets=vocabulary_size) categorical_column_b = tf.feature_column.categorical_column_with_identity( key='bbb', num_buckets=vocabulary_size) shared_embedding_columns = tf.feature_column.shared_embeddings( [categorical_column_a, categorical_column_b], dimension=2) sequence_input_layer = ksfc.SequenceFeatures( shared_embedding_columns) with self.assertRaisesRegex( ValueError, r'In embedding_column: aaa_shared_embedding\. ' r'categorical_column must ' r'be of type SequenceCategoricalColumn to use SequenceFeatures\.' ): _, _ = sequence_input_layer({ 'aaa': sparse_input_a, 'bbb': sparse_input_b })
def test_indicator_column(self, sparse_input_args_a, sparse_input_args_b, expected_input_layer, expected_sequence_length): sparse_input_a = tf.compat.v1.SparseTensorValue(**sparse_input_args_a) sparse_input_b = tf.compat.v1.SparseTensorValue(**sparse_input_args_b) vocabulary_size_a = 3 vocabulary_size_b = 2 categorical_column_a = tf.feature_column.sequence_categorical_column_with_identity( key='aaa', num_buckets=vocabulary_size_a) indicator_column_a = tf.feature_column.indicator_column( categorical_column_a) categorical_column_b = tf.feature_column.sequence_categorical_column_with_identity( key='bbb', num_buckets=vocabulary_size_b) indicator_column_b = tf.feature_column.indicator_column( categorical_column_b) # Test that columns are reordered alphabetically. sequence_input_layer = ksfc.SequenceFeatures( [indicator_column_b, indicator_column_a]) input_layer, sequence_length = sequence_input_layer({ 'aaa': sparse_input_a, 'bbb': sparse_input_b }) self.assertAllEqual(expected_input_layer, self.evaluate(input_layer)) self.assertAllEqual(expected_sequence_length, self.evaluate(sequence_length))
def test_shared_sequence_non_sequence_into_input_layer(self): non_seq = tf.feature_column.categorical_column_with_identity( 'non_seq', num_buckets=10) seq = tf.feature_column.sequence_categorical_column_with_identity( 'seq', num_buckets=10) shared_non_seq, shared_seq = tf.feature_column.shared_embeddings( [non_seq, seq], dimension=4, combiner='sum', initializer=tf.ones_initializer(), shared_embedding_collection_name='shared') seq = tf.SparseTensor(indices=[[0, 0], [0, 1], [1, 0]], values=[0, 1, 2], dense_shape=[2, 2]) non_seq = tf.SparseTensor(indices=[[0, 0], [0, 1], [1, 0]], values=[0, 1, 2], dense_shape=[2, 2]) features = {'seq': seq, 'non_seq': non_seq} # Tile the context features across the sequence features seq_input, seq_length = ksfc.SequenceFeatures([shared_seq])(features) non_seq_input = dense_features.DenseFeatures([shared_non_seq ])(features) with self.cached_session() as sess: sess.run(tf.compat.v1.global_variables_initializer()) output_seq, output_seq_length, output_non_seq = sess.run( [seq_input, seq_length, non_seq_input]) self.assertAllEqual( output_seq, [[[1, 1, 1, 1], [1, 1, 1, 1]], [[1, 1, 1, 1], [0, 0, 0, 0]]]) self.assertAllEqual(output_seq_length, [2, 1]) self.assertAllEqual(output_non_seq, [[2, 2, 2, 2], [1, 1, 1, 1]])
def test_sequence_length_not_equal(self): """Tests that an error is raised when sequence lengths are not equal.""" # Input a with sequence_length = [2, 1] sparse_input_a = tf.compat.v1.SparseTensorValue( indices=((0, 0), (0, 1), (1, 0)), values=(0.0, 1.0, 10.0), dense_shape=(2, 2), ) # Input b with sequence_length = [1, 1] sparse_input_b = tf.compat.v1.SparseTensorValue(indices=((0, 0), (1, 0)), values=(1.0, 10.0), dense_shape=(2, 2)) numeric_column_a = tf.feature_column.sequence_numeric_column("aaa") numeric_column_b = tf.feature_column.sequence_numeric_column("bbb") sequence_input_layer = ksfc.SequenceFeatures( [numeric_column_a, numeric_column_b]) with self.assertRaisesRegex(tf.errors.InvalidArgumentError, r"Condition x == y did not hold.*"): _, sequence_length = sequence_input_layer({ "aaa": sparse_input_a, "bbb": sparse_input_b }) self.evaluate(sequence_length)
def test_serialization_sequence_features(self): rating = tf.feature_column.sequence_numeric_column("rating") sequence_feature = ksfc.SequenceFeatures([rating]) config = keras.layers.serialize(sequence_feature) revived = keras.layers.deserialize(config) self.assertIsInstance(revived, ksfc.SequenceFeatures)
def test_saving_with_sequence_features(self): cols = [ tf.feature_column.sequence_numeric_column('a'), tf.feature_column.indicator_column( tf.feature_column. sequence_categorical_column_with_vocabulary_list( 'b', ['one', 'two'])) ] input_layers = { 'a': keras.layers.Input(shape=(None, 1), sparse=True, name='a'), 'b': keras.layers.Input(shape=(None, 1), sparse=True, name='b', dtype='string') } fc_layer, _ = ksfc.SequenceFeatures(cols)(input_layers) # TODO(tibell): Figure out the right dtype and apply masking. # sequence_length_mask = array_ops.sequence_mask(sequence_length) # x = keras.layers.GRU(32)(fc_layer, mask=sequence_length_mask) x = keras.layers.GRU(32)(fc_layer) output = keras.layers.Dense(10)(x) model = keras.models.Model(input_layers, output) model.compile(loss=keras.losses.MSE, optimizer='rmsprop', metrics=[keras.metrics.categorical_accuracy]) config = model.to_json() loaded_model = model_config.model_from_json(config) batch_size = 10 timesteps = 1 values_a = np.arange(10, dtype=np.float32) indices_a = np.zeros((10, 3), dtype=np.int64) indices_a[:, 0] = np.arange(10) inputs_a = tf.SparseTensor(indices_a, values_a, (batch_size, timesteps, 1)) values_b = np.zeros(10, dtype=np.str) indices_b = np.zeros((10, 3), dtype=np.int64) indices_b[:, 0] = np.arange(10) inputs_b = tf.SparseTensor(indices_b, values_b, (batch_size, timesteps, 1)) with self.cached_session(): # Initialize tables for V1 lookup. if not tf.executing_eagerly(): self.evaluate(tf.compat.v1.tables_initializer()) self.assertLen( loaded_model.predict({ 'a': inputs_a, 'b': inputs_b }, steps=1), batch_size)
def test_embedding_column( self, sparse_input_args_a, sparse_input_args_b, expected_input_layer, expected_sequence_length): sparse_input_a = tf.compat.v1.SparseTensorValue(**sparse_input_args_a) sparse_input_b = tf.compat.v1.SparseTensorValue(**sparse_input_args_b) vocabulary_size = 3 embedding_dimension_a = 2 embedding_values_a = ( (1., 2.), # id 0 (3., 4.), # id 1 (5., 6.) # id 2 ) embedding_dimension_b = 3 embedding_values_b = ( (11., 12., 13.), # id 0 (14., 15., 16.), # id 1 (17., 18., 19.) # id 2 ) def _get_initializer(embedding_dimension, embedding_values): def _initializer(shape, dtype, partition_info=None): self.assertAllEqual((vocabulary_size, embedding_dimension), shape) self.assertEqual(tf.float32, dtype) self.assertIsNone(partition_info) return embedding_values return _initializer categorical_column_a = tf.feature_column.sequence_categorical_column_with_identity( key='aaa', num_buckets=vocabulary_size) embedding_column_a = tf.feature_column.embedding_column( categorical_column_a, dimension=embedding_dimension_a, initializer=_get_initializer(embedding_dimension_a, embedding_values_a)) categorical_column_b = tf.feature_column.sequence_categorical_column_with_identity( key='bbb', num_buckets=vocabulary_size) embedding_column_b = tf.feature_column.embedding_column( categorical_column_b, dimension=embedding_dimension_b, initializer=_get_initializer(embedding_dimension_b, embedding_values_b)) # Test that columns are reordered alphabetically. sequence_input_layer = ksfc.SequenceFeatures( [embedding_column_b, embedding_column_a]) input_layer, sequence_length = sequence_input_layer({ 'aaa': sparse_input_a, 'bbb': sparse_input_b,}) self.evaluate(tf.compat.v1.global_variables_initializer()) weights = sequence_input_layer.weights self.assertCountEqual( ('sequence_features/aaa_embedding/embedding_weights:0', 'sequence_features/bbb_embedding/embedding_weights:0'), tuple([v.name for v in weights])) self.assertAllEqual(embedding_values_a, self.evaluate(weights[0])) self.assertAllEqual(embedding_values_b, self.evaluate(weights[1])) self.assertAllEqual(expected_input_layer, self.evaluate(input_layer)) self.assertAllEqual( expected_sequence_length, self.evaluate(sequence_length))
def test_static_shape_from_tensors_numeric( self, sparse_input_args, expected_shape): """Tests that we return a known static shape when we have one.""" sparse_input = tf.compat.v1.SparseTensorValue(**sparse_input_args) numeric_column = tf.feature_column.sequence_numeric_column('aaa', shape=(2, 2)) sequence_input_layer = ksfc.SequenceFeatures([numeric_column]) input_layer, _ = sequence_input_layer({'aaa': sparse_input}) shape = input_layer.get_shape() self.assertEqual(shape, expected_shape)
def test_from_config(self, trainable, name): cols = [tf.feature_column.sequence_numeric_column('a')] orig_layer = ksfc.SequenceFeatures(cols, trainable=trainable, name=name) config = orig_layer.get_config() new_layer = ksfc.SequenceFeatures.from_config(config) self.assertEqual(new_layer.name, orig_layer.name) self.assertEqual(new_layer.trainable, trainable) self.assertLen(new_layer._feature_columns, 1) self.assertEqual(new_layer._feature_columns[0].name, 'a')
def test_get_config(self, trainable, name): cols = [tf.feature_column.sequence_numeric_column('a')] orig_layer = ksfc.SequenceFeatures(cols, trainable=trainable, name=name) config = orig_layer.get_config() self.assertEqual(config['name'], orig_layer.name) self.assertEqual(config['trainable'], trainable) self.assertLen(config['feature_columns'], 1) self.assertEqual(config['feature_columns'][0]['class_name'], 'SequenceNumericColumn') self.assertEqual(config['feature_columns'][0]['config']['shape'], (1,))
def test_static_shape_from_tensors_indicator( self, sparse_input_args, expected_shape): """Tests that we return a known static shape when we have one.""" sparse_input = tf.compat.v1.SparseTensorValue(**sparse_input_args) categorical_column = tf.feature_column.sequence_categorical_column_with_identity( key='aaa', num_buckets=3) indicator_column = tf.feature_column.indicator_column(categorical_column) sequence_input_layer = ksfc.SequenceFeatures([indicator_column]) input_layer, _ = sequence_input_layer({'aaa': sparse_input}) shape = input_layer.get_shape() self.assertEqual(shape, expected_shape)
def test_numeric_column( self, sparse_input_args, expected_input_layer, expected_sequence_length): sparse_input = tf.compat.v1.SparseTensorValue(**sparse_input_args) numeric_column = tf.feature_column.sequence_numeric_column('aaa') sequence_input_layer = ksfc.SequenceFeatures([numeric_column]) input_layer, sequence_length = sequence_input_layer({'aaa': sparse_input}) self.assertAllEqual(expected_input_layer, self.evaluate(input_layer)) self.assertAllEqual( expected_sequence_length, self.evaluate(sequence_length))
def test_numeric_column_multi_dim( self, sparse_input_args, expected_input_layer, expected_sequence_length): """Tests SequenceFeatures for multi-dimensional numeric_column.""" sparse_input = tf.compat.v1.SparseTensorValue(**sparse_input_args) numeric_column = tf.feature_column.sequence_numeric_column('aaa', shape=(2, 2)) sequence_input_layer = ksfc.SequenceFeatures([numeric_column]) input_layer, sequence_length = sequence_input_layer({'aaa': sparse_input}) self.assertAllEqual(expected_input_layer, self.evaluate(input_layer)) self.assertAllEqual( expected_sequence_length, self.evaluate(sequence_length))
def test_get_config(self, trainable, name): cols = [tf.feature_column.sequence_numeric_column("a")] orig_layer = ksfc.SequenceFeatures(cols, trainable=trainable, name=name) config = orig_layer.get_config() self.assertEqual(config["name"], orig_layer.name) self.assertEqual(config["trainable"], trainable) self.assertLen(config["feature_columns"], 1) self.assertEqual(config["feature_columns"][0]["class_name"], "SequenceNumericColumn") self.assertEqual(config["feature_columns"][0]["config"]["shape"], (1, ))
def test_embedding_column_with_non_sequence_categorical(self): """Tests that error is raised for non-sequence embedding column.""" vocabulary_size = 3 sparse_input = tf.compat.v1.SparseTensorValue( # example 0, ids [2] # example 1, ids [0, 1] indices=((0, 0), (1, 0), (1, 1)), values=(2, 0, 1), dense_shape=(2, 2)) categorical_column_a = tf.feature_column.categorical_column_with_identity( key='aaa', num_buckets=vocabulary_size) embedding_column_a = tf.feature_column.embedding_column( categorical_column_a, dimension=2) sequence_input_layer = ksfc.SequenceFeatures([embedding_column_a]) with self.assertRaisesRegex( ValueError, r'In embedding_column: aaa_embedding\. categorical_column must be of ' r'type SequenceCategoricalColumn to use SequenceFeatures\.'): _, _ = sequence_input_layer({'aaa': sparse_input})
def test_sequence_example_into_input_layer(self): examples = [_make_sequence_example().SerializeToString()] * 100 ctx_cols, seq_cols = self._build_feature_columns() def _parse_example(example): ctx, seq = tf.io.parse_single_sequence_example( example, context_features=tf.feature_column.make_parse_example_spec( ctx_cols), sequence_features=tf.feature_column.make_parse_example_spec( seq_cols), ) ctx.update(seq) return ctx ds = tf.data.Dataset.from_tensor_slices(examples) ds = ds.map(_parse_example) ds = ds.batch(20) # Test on a single batch features = tf.compat.v1.data.make_one_shot_iterator(ds).get_next() # Tile the context features across the sequence features sequence_input_layer = ksfc.SequenceFeatures(seq_cols) seq_input, _ = sequence_input_layer(features) dense_input_layer = dense_features.DenseFeatures(ctx_cols) ctx_input = dense_input_layer(features) ctx_input = backend.repeat(ctx_input, tf.shape(seq_input)[1]) concatenated_input = merging.concatenate([seq_input, ctx_input]) rnn_layer = base_rnn.RNN(simple_rnn.SimpleRNNCell(10)) output = rnn_layer(concatenated_input) with self.cached_session() as sess: sess.run(tf.compat.v1.global_variables_initializer()) features_r = sess.run(features) self.assertAllEqual(features_r["int_list"].dense_shape, [20, 3, 6]) output_r = sess.run(output) self.assertAllEqual(output_r.shape, [20, 10])
def test_indicator_column_with_non_sequence_categorical(self): """Tests that error is raised for non-sequence categorical column.""" vocabulary_size = 3 sparse_input = tf.compat.v1.SparseTensorValue( # example 0, ids [2] # example 1, ids [0, 1] indices=((0, 0), (1, 0), (1, 1)), values=(2, 0, 1), dense_shape=(2, 2), ) categorical_column_a = ( tf.feature_column.categorical_column_with_identity( key="aaa", num_buckets=vocabulary_size)) indicator_column_a = tf.feature_column.indicator_column( categorical_column_a) sequence_input_layer = ksfc.SequenceFeatures([indicator_column_a]) with self.assertRaisesRegex( ValueError, r"In indicator_column: aaa_indicator\. categorical_column must be of " r"type SequenceCategoricalColumn to use SequenceFeatures\.", ): _, _ = sequence_input_layer({"aaa": sparse_input})
def test_feature_layer_cpu(self, use_safe_embedding_lookup): # Inputs. vocabulary_size = 3 sparse_input = sparse_tensor.SparseTensorValue( # example 0, ids [2] # example 1, ids [0, 1] # example 2, ids [] # example 3, ids [1] indices=((0, 0), (1, 0), (1, 1), (3, 0)), values=(2, 0, 1, 1), dense_shape=(4, 2)) # Embedding variable. embedding_dimension = 2 embedding_values = ( (1., 2.), # id 0 (3., 5.), # id 1 (7., 11.) # id 2 ) def _initializer(shape, dtype, partition_info=None): self.assertAllEqual((vocabulary_size, embedding_dimension), shape) self.assertEqual(dtypes.float32, dtype) self.assertIsNone(partition_info) return embedding_values # Expected lookup result, using combiner='mean'. expected_lookups = ( # example 0, ids [2], embedding = [7, 11] (7., 11.), # example 1, ids [0, 1], embedding = mean([1, 2] + [3, 5]) = [2, 3.5] (2., 3.5), # example 2, ids [], embedding = [0, 0] (0., 0.), # example 3, ids [1], embedding = [3, 5] (3., 5.), ) expected_lookups_sequence = ( # example 0, ids [2], embedding = [[7, 11], [0, 0]] ( (7., 11.), (0., 0.), ), # example 1, ids [0, 1], embedding = [[1, 2], [3. 5]] ( (1., 2.), (3., 5.), ), # example 2, ids [], embedding = [0, 0] ( (0., 0.), (0., 0.), ), # example 3, ids [1], embedding = [3, 5] ( (3., 5.), (0., 0.), ), ) # Build columns. categorical_column = fc_lib.categorical_column_with_identity( key='aaa', num_buckets=vocabulary_size) sequence_categorical_column = ( fc_lib.sequence_categorical_column_with_identity( key='bbb', num_buckets=vocabulary_size)) embedding_column = tpu_fc.embedding_column_v2( categorical_column, dimension=embedding_dimension, initializer=_initializer, use_safe_embedding_lookup=use_safe_embedding_lookup) sequence_embedding_column = tpu_fc.embedding_column_v2( sequence_categorical_column, dimension=embedding_dimension, initializer=_initializer, max_sequence_length=2, use_safe_embedding_lookup=use_safe_embedding_lookup) # Provide sparse input and get dense result. features = {'aaa': sparse_input, 'bbb': sparse_input} dense_features = df_lib.DenseFeatures([embedding_column]) sequence_features = sfc_lib.SequenceFeatures( [sequence_embedding_column]) embedding_lookup = dense_features(features) sequence_embedding_lookup = sequence_features(features) # Assert expected embedding variable and lookups. global_vars = ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES) self.assertItemsEqual(( 'dense_features/aaa_embedding/embedding_weights:0', 'sequence_features/bbb_embedding/embedding_weights:0', ), tuple([v.name for v in global_vars])) with _initialized_session(): self.assertAllEqual(embedding_values, global_vars[0]) self.assertAllEqual(expected_lookups, embedding_lookup) self.assertAllEqual(expected_lookups_sequence, sequence_embedding_lookup[0].eval()) # The graph will still have SparseFillEmptyRows due to sequence being # a Rank3 embedding lookup. if use_safe_embedding_lookup: self.assertEqual(2, [ x.type for x in ops.get_default_graph().get_operations() ].count('SparseFillEmptyRows')) else: self.assertEqual(1, [ x.type for x in ops.get_default_graph().get_operations() ].count('SparseFillEmptyRows'))
def test_feature_layer_cpu(self, use_safe_embedding_lookup): # Inputs. vocabulary_size = 3 input_a = sparse_tensor.SparseTensorValue( # example 0, ids [2] # example 1, ids [0, 1] indices=((0, 0), (1, 0), (1, 1)), values=(2, 0, 1), dense_shape=(2, 2)) input_b = sparse_tensor.SparseTensorValue( # example 0, ids [2] # example 1, ids [0, 1] # example 2, ids [] indices=((0, 0), (1, 0), (1, 1)), values=(2, 0, 1), dense_shape=(3, 2)) input_features = {'aaa': input_a, 'bbb': input_b} # Embedding variable. embedding_dimension = 2 embedding_values = ( (1., 2.), # id 0 (3., 5.), # id 1 (7., 11.) # id 2 ) def _initializer(shape, dtype, partition_info=None): self.assertAllEqual((vocabulary_size, embedding_dimension), shape) self.assertEqual(dtypes.float32, dtype) self.assertIsNone(partition_info) return embedding_values # Expected lookup result, using combiner='mean'. expected_lookups_a = ( # example 0: (7., 11.), # ids [2], embedding = [7, 11] # example 1: (2., 3.5 ), # ids [0, 1], embedding = mean([1, 2] + [3, 5]) = [2, 3.5] ) expected_lookups_b = ( # example 0: ( (7., 11.), (0., 0.), ), # ids [2], embedding = [[7, 11], [0, 0]] # example 1: ( (1., 2.), (3., 5.), ), # ids [0, 1], embedding = [[1, 2], [3, 5]] # example 2: ( (0., 0.), (0., 0.), ), # ids [], embedding = [[0, 0], [0, 0]] ) # Build columns. categorical_column_a = fc_lib.categorical_column_with_identity( key='aaa', num_buckets=vocabulary_size) categorical_column_b = fc_lib.sequence_categorical_column_with_identity( key='bbb', num_buckets=vocabulary_size) embedding_column_a, embedding_column_b = tpu_fc.shared_embedding_columns_v2( [categorical_column_a, categorical_column_b], dimension=embedding_dimension, initializer=_initializer, max_sequence_lengths=[0, 2], use_safe_embedding_lookup=use_safe_embedding_lookup) # Provide sparse input and get dense result. dense_features = df_lib.DenseFeatures([embedding_column_a]) sequence_features = sfc_lib.SequenceFeatures([embedding_column_b]) embedding_lookup_a = dense_features(input_features) embedding_lookup_b = sequence_features(input_features) # Assert expected embedding variable and lookups. global_vars = ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES) self.assertItemsEqual(('aaa_bbb_shared_embedding:0', ), tuple([v.name for v in global_vars])) embedding_var = global_vars[0] with _initialized_session(): self.assertAllEqual(embedding_values, embedding_var) self.assertAllEqual(expected_lookups_a, embedding_lookup_a) self.assertAllEqual(expected_lookups_b, embedding_lookup_b[0].eval()) # The graph will still have SparseFillEmptyRows due to sequence being # a Rank3 embedding lookup. if use_safe_embedding_lookup: self.assertEqual(2, [ x.type for x in ops.get_default_graph().get_operations() ].count('SparseFillEmptyRows')) else: self.assertEqual(1, [ x.type for x in ops.get_default_graph().get_operations() ].count('SparseFillEmptyRows'))
def test_shared_embedding_column(self): with tf.Graph().as_default(): vocabulary_size = 3 sparse_input_a = tf.compat.v1.SparseTensorValue( # example 0, ids [2] # example 1, ids [0, 1] indices=((0, 0), (1, 0), (1, 1)), values=(2, 0, 1), dense_shape=(2, 2), ) sparse_input_b = tf.compat.v1.SparseTensorValue( # example 0, ids [1] # example 1, ids [2, 0] indices=((0, 0), (1, 0), (1, 1)), values=(1, 2, 0), dense_shape=(2, 2), ) embedding_dimension = 2 embedding_values = ( (1.0, 2.0), # id 0 (3.0, 4.0), # id 1 (5.0, 6.0), # id 2 ) def _get_initializer(embedding_dimension, embedding_values): def _initializer(shape, dtype, partition_info=None): self.assertAllEqual((vocabulary_size, embedding_dimension), shape) self.assertEqual(tf.float32, dtype) self.assertIsNone(partition_info) return embedding_values return _initializer expected_input_layer = [ # example 0, ids_a [2], ids_b [1] [[5.0, 6.0, 3.0, 4.0], [0.0, 0.0, 0.0, 0.0]], # example 1, ids_a [0, 1], ids_b [2, 0] [[1.0, 2.0, 5.0, 6.0], [3.0, 4.0, 1.0, 2.0]], ] expected_sequence_length = [1, 2] categorical_column_a = ( tf.feature_column.sequence_categorical_column_with_identity( key="aaa", num_buckets=vocabulary_size)) categorical_column_b = ( tf.feature_column.sequence_categorical_column_with_identity( key="bbb", num_buckets=vocabulary_size)) # Test that columns are reordered alphabetically. shared_embedding_columns = tf.feature_column.shared_embeddings( [categorical_column_b, categorical_column_a], dimension=embedding_dimension, initializer=_get_initializer(embedding_dimension, embedding_values), ) sequence_input_layer = ksfc.SequenceFeatures( shared_embedding_columns) input_layer, sequence_length = sequence_input_layer({ "aaa": sparse_input_a, "bbb": sparse_input_b }) global_vars = tf.compat.v1.get_collection( tf.compat.v1.GraphKeys.GLOBAL_VARIABLES) self.assertCountEqual( ("aaa_bbb_shared_embedding:0", ), tuple([v.name for v in global_vars]), ) with _initialized_session() as sess: self.assertAllEqual(embedding_values, global_vars[0].eval(session=sess)) self.assertAllEqual(expected_input_layer, input_layer.eval(session=sess)) self.assertAllEqual(expected_sequence_length, sequence_length.eval(session=sess))