def shared_embedding_columns(categorical_columns, dimension, combiner='mean', initializer=None, shared_embedding_collection_name=None): """List of dense columns that convert from sparse, categorical input.""" for categorical_column in categorical_columns: if not isinstance(categorical_column, _SUPPORTED_CATEGORICAL_COLUMNS): raise TypeError( 'categorical_column for tpu ' ' shared_embedding_columns must be type %s, got %s.' % (' or '.join([ cc.__name__ for cc in _SUPPORTED_CATEGORICAL_COLUMNS ]), type(categorical_column))) columns = fc_lib.shared_embedding_columns( categorical_columns, dimension, combiner=combiner, initializer=initializer, shared_embedding_collection_name=shared_embedding_collection_name, ckpt_to_load_from=None, tensor_name_in_ckpt=None, max_norm=None, trainable=True) # Use the initializer and shared_embedding_collection_name to create TPU # version initializer = columns[0].initializer shared_embedding_collection_name = columns[ 0].shared_embedding_collection_name tpu_columns = [] # Create the state (_SharedEmbeddingColumnLayer) here. for categorical_column in categorical_columns: column = _TPUSharedEmbeddingColumn( categorical_column=categorical_column, dimension=dimension, combiner=combiner, initializer=initializer, shared_embedding_collection_name=shared_embedding_collection_name, ckpt_to_load_from=None, tensor_name_in_ckpt=None, max_norm=None, trainable=True) tpu_columns.append(column) return tpu_columns
def shared_embedding_columns(categorical_columns, dimension, combiner='mean', initializer=None, shared_embedding_collection_name=None): """List of dense columns that convert from sparse, categorical input.""" for categorical_column in categorical_columns: if not isinstance(categorical_column, _SUPPORTED_CATEGORICAL_COLUMNS): raise TypeError( 'categorical_column for tpu ' ' shared_embedding_columns must be type %s, got %s.' % (' or '.join([ cc.__name__ for cc in _SUPPORTED_CATEGORICAL_COLUMNS ]), type(categorical_column))) columns = fc_lib.shared_embedding_columns( categorical_columns, dimension, combiner=combiner, initializer=initializer, shared_embedding_collection_name=shared_embedding_collection_name, ckpt_to_load_from=None, tensor_name_in_ckpt=None, max_norm=None, trainable=True) # Use the initializer and shared_embedding_collection_name to create TPU # version initializer = columns[0].initializer shared_embedding_collection_name = columns[0].shared_embedding_collection_name tpu_columns = [] # Create the state (_SharedEmbeddingColumnLayer) here. for categorical_column in categorical_columns: column = _TPUSharedEmbeddingColumn( categorical_column=categorical_column, dimension=dimension, combiner=combiner, initializer=initializer, shared_embedding_collection_name=shared_embedding_collection_name, ckpt_to_load_from=None, tensor_name_in_ckpt=None, max_norm=None, trainable=True) tpu_columns.append(column) return tpu_columns
def test_encode_features(self): with tf.Graph().as_default(): # Inputs. vocabulary_size = 4 # -1 values are ignored. input_a = np.array([ [3, -1, -1], # example 0, ids [3] [0, 1, -1], # example 1, ids [0, 1] ]) input_b = np.array([ [0, -1, -1], # example 0, ids [0] [-1, -1, -1], # example 1, ids [] ]) input_features = {"aaa": input_a, "bbb": input_b} # Embedding variable. embedding_dimension = 2 embedding_values = ( (1., 2.), # id 0 (3., 5.), # id 1 (7., 11.), # id 2 (9., 13.) # id 3 ) # Expected lookup result, using combiner='mean'. expected_lookups_a = ( # example 0: (9., 13.), # ids [3], embedding = [9, 13] # example 1: (2., 3.5), # ids [0, 1], embedding = mean([1, 2] + [3, 5]) = [2, 3.5] ) expected_lookups_b = ( # example 0: (1., 2.), # ids [0], embedding = [1, 2] # example 1: (0., 0.), # ids [], embedding = [0, 0] ) # Build columns. categorical_column_a = feature_column.categorical_column_with_identity( key="aaa", num_buckets=vocabulary_size) categorical_column_b = feature_column.categorical_column_with_identity( key="bbb", num_buckets=vocabulary_size) embed_column_a, embed_column_b = feature_column.shared_embedding_columns( [categorical_column_a, categorical_column_b], dimension=embedding_dimension, initializer=lambda shape, dtype, partition_info: embedding_values, shared_embedding_collection_name="custom_collection_name") feature_columns = {"aaa": embed_column_a, "bbb": embed_column_b} cols_to_tensors = feature_lib.encode_features( input_features, feature_columns.values(), mode=tf.estimator.ModeKeys.EVAL) embedding_lookup_a = cols_to_tensors[feature_columns["aaa"]] embedding_lookup_b = cols_to_tensors[feature_columns["bbb"]] # Assert expected embedding variable and lookups. global_vars = tf.compat.v1.get_collection( tf.compat.v1.GraphKeys.GLOBAL_VARIABLES) embedding_var = global_vars[0] with tf.compat.v1.Session() as sess: sess.run(tf.compat.v1.global_variables_initializer()) sess.run(tf.compat.v1.tables_initializer()) self.assertAllEqual(embedding_values, embedding_var.eval()) self.assertAllEqual(expected_lookups_a, embedding_lookup_a.eval()) self.assertAllEqual(expected_lookups_b, embedding_lookup_b.eval())