def _create_global_visual_feature_embeddings(model_config, features, flags) -> tf.Tensor: """Creates global embeddings based on visual features.""" initializer_range = 0.02 indicator_cross_emb_lookup = etc_layers.EmbeddingLookup( vocab_size=2**len(flags.indicators_to_cross), embedding_size=model_config.hidden_size, initializer_range=initializer_range, use_one_hot_lookup=flags.use_tpu, name='indicator_cross_emb_lookup') global_embedding_adder = indicator_cross_emb_lookup( features['global_indicator_cross']) font_id_emb_lookup = etc_layers.EmbeddingLookup( vocab_size=generate_examples_lib.FONT_ID_VOCAB_SIZE, embedding_size=model_config.hidden_size, initializer_range=initializer_range, use_one_hot_lookup=flags.use_tpu, name='font_id_emb_lookup') global_embedding_adder += font_id_emb_lookup(features['global_font_ids']) parent_font_id_emb_lookup = etc_layers.EmbeddingLookup( vocab_size=generate_examples_lib.FONT_ID_VOCAB_SIZE, embedding_size=model_config.hidden_size, initializer_range=initializer_range, use_one_hot_lookup=flags.use_tpu, name='parent_font_id_emb_lookup') global_embedding_adder += parent_font_id_emb_lookup( features['global_parent_font_ids']) # Add transformation of dense features dense_feature_projection = tf.keras.layers.Dense( units=model_config.hidden_size, activation=tensor_utils.get_activation('gelu'), kernel_initializer=tf.keras.initializers.TruncatedNormal(stddev=0.02), name='dense_feature_projection') dense_feature_embeddings = dense_feature_projection( features['global_dense_features']) if flags.extra_dense_feature_layers > 1: raise NotImplementedError( '`extra_dense_feature_layers` must be at most 1.') elif flags.extra_dense_feature_layers == 1: dense_feature_layer2 = tf.keras.layers.Dense( units=model_config.hidden_size, activation=tensor_utils.get_activation('gelu'), kernel_initializer=tf.keras.initializers.TruncatedNormal( stddev=0.02), name='dense_feature_layer2') dense_feature_embeddings = dense_feature_layer2( dense_feature_embeddings) global_embedding_adder += dense_feature_embeddings return global_embedding_adder
def test_embedding_lookup_1d_ids(self, use_one_hot_lookup): embedding_table = tf.constant([ [1.0, -1.0], # [1.1, -1.1], # [1.2, -1.2], # [1.3, -1.3], # [1.4, -1.4], # ]) vocab_size, embedding_size = embedding_table.shape.as_list() input_ids = tf.constant([1, 0, 0, 3]) input_mask = tf.constant([1, 1, 0, 1]) layer = etc_layers.EmbeddingLookup( vocab_size=vocab_size, embedding_size=embedding_size, use_one_hot_lookup=use_one_hot_lookup) layer.build(None) # Shapes are unused so we pass None. layer.embedding_table = embedding_table expected = [ [1.1, -1.1], # [1.0, -1.0], # [0.0, 0.0], # [1.3, -1.3], # ] result = layer(input_ids, input_mask=input_mask) self.evaluate(tf.compat.v1.global_variables_initializer()) self.assertAllClose(expected, result)
def test_embedding_lookup_with_projection(self): # Create an embedding table with width != projection_size embedding_table = tf.constant([ [1.0, -1.0, 0.5], # [1.1, -1.1, -0.4], # [1.2, -1.2, -0.5], # [1.3, -1.3, 0.8], # [1.4, -1.4, 0.9], # ]) projection_size = 2 # Different from the embedding_dimension. vocab_size, embedding_size = embedding_table.shape.as_list() input_ids = tf.constant([ [3, 2, 1], # [4, 0, 4], # ]) input_mask = tf.constant([ [1, 0, 0], # [0, 0, 1], # ]) layer = etc_layers.EmbeddingLookup(vocab_size=vocab_size, embedding_size=embedding_size, projection_size=projection_size, use_one_hot_lookup=True) layer.build(None) # Shapes are unused so we pass None. layer.embedding_table = embedding_table # Dense layer to use for projection. Note that, we have a non-zero # bias initializer here to ensure that the bias term doesn't get through # to the masked_ids after projection. layer.embedding_projection = tf.keras.layers.Dense( units=projection_size, activation=None, use_bias=True, kernel_initializer='ones', bias_initializer='ones') expected = [ [ [1.8, 1.8], # [1.3, -1.3, 0.8] * kernel_initializer + 1 (bias). [0., 0.], # [0., 0.], # ], # [ [0., 0.], # [0., 0.], # [1.9, 1.9], # [1.4, -1.4, 0.9] * kernel_initializer + 1 (bias). ], # ] result = layer(input_ids, input_mask) self.evaluate(tf.compat.v1.global_variables_initializer()) self.assertAllClose(expected, result)
def test_embedding_lookup_random_init_no_mask(self, use_one_hot_lookup): vocab_size = 5 embedding_size = 2 input_ids = tf.constant([1, 0, 0, 3]) input_size = input_ids.shape.as_list()[0] layer = etc_layers.EmbeddingLookup( vocab_size=vocab_size, embedding_size=embedding_size, use_one_hot_lookup=use_one_hot_lookup) result = layer(input_ids) self.evaluate(tf.compat.v1.global_variables_initializer()) self.evaluate(result) self.assertAllEqual([input_size, embedding_size], result.shape)
def test_embedding_lookup_no_projection(self, projection_size): # Create an embedding table with width = projection_size embedding_table = tf.constant([ [1.0, -1.0, 0.5], # [1.1, -1.1, -0.5], # [1.2, -1.2, -0.2], # [1.3, -1.3, 0.3], # [1.4, -1.4, 0.4], # ]) vocab_size, embedding_size = embedding_table.shape.as_list() input_ids = tf.constant([ [3, 2, 1], # [4, 0, 4], # ]) layer = etc_layers.EmbeddingLookup( vocab_size=vocab_size, embedding_size=embedding_size, projection_size=projection_size, use_one_hot_lookup=True) layer.build(None) # Shapes are unused so we pass None. layer.embedding_table = embedding_table expected = [ [ [1.3, -1.3, 0.3], # [1.2, -1.2, -0.2], # [1.1, -1.1, -0.5], # ], # [ [1.4, -1.4, 0.4], # [1.0, -1.0, 0.5], # [1.4, -1.4, 0.4], # ], # ] result = layer(input_ids) self.evaluate(tf.compat.v1.global_variables_initializer()) self.assertAllClose(expected, result)
def __init__(self, config: EtcConfig, is_training: Optional[bool] = None, use_one_hot_embeddings=False, use_one_hot_relative_embeddings=False, name: Text = "etc_document_bert", **kwargs): """Constructor for `EtcModel`. Args: config: `EtcConfig` instance. is_training: Optional bool. True for training model, False for eval model. The None default will defer to the typical Keras `training` argument in `call` instead. When `is_training` is specified here, the `training` argument from `call` must not be used. use_one_hot_embeddings: (optional) bool. Whether to use one-hot word embeddings or tf.nn.embedding_lookup() for the word embeddings. use_one_hot_relative_embeddings: (optional) bool. Whether to use one-hot word embeddings or tf.nn.embedding_lookup() for the relative position embeddings. name: (Optional) name of the layer. **kwargs: Forwarded to super. Raises: ValueError: The config is invalid. """ super(EtcModel, self).__init__(name=name, **kwargs) config = copy.deepcopy(config) if is_training is not None and not is_training: config.hidden_dropout_prob = 0.0 config.attention_probs_dropout_prob = 0.0 self.config = config self.is_training = is_training self.use_one_hot_embeddings = use_one_hot_embeddings self.use_one_hot_relative_embeddings = use_one_hot_relative_embeddings if config.relative_vocab_size is None: if config.relative_pos_max_distance != 0: raise ValueError( "`relative_pos_max_distance` must be 0 when `relative_vocab_size` " "is None.") elif config.relative_vocab_size < (feature_utils.RelativePositionGenerator( config.relative_pos_max_distance).relative_vocab_size + _NUM_OTHER_RELATIVE_IDS): raise ValueError("`relative_vocab_size` ({}) too small for " "`relative_pos_max_distance` ({})".format( config.relative_vocab_size, config.relative_pos_max_distance)) if config.embedding_size is None: config.embedding_size = config.hidden_size self.token_embedding = etc_layers.EmbeddingLookup( vocab_size=config.vocab_size, embedding_size=config.embedding_size, projection_size=config.hidden_size, initializer_range=config.initializer_range, use_one_hot_lookup=use_one_hot_embeddings, name="token_emb_lookup") self.token_embedding_norm = tf.keras.layers.LayerNormalization( axis=-1, epsilon=1e-12, name="long_emb_layer_norm") self.token_embedding_dropout = tf.keras.layers.Dropout( rate=config.hidden_dropout_prob) self.segment_embedding = etc_layers.EmbeddingLookup( vocab_size=config.segment_vocab_size, embedding_size=config.hidden_size, initializer_range=config.initializer_range, use_one_hot_lookup=True, name="segment_emb_lookup") if config.max_absolute_position_embeddings != 0: self.position_embedding = etc_layers.EmbeddingLookup( vocab_size=config.max_absolute_position_embeddings, embedding_size=config.hidden_size, initializer_range=config.initializer_range, use_one_hot_lookup=use_one_hot_embeddings, name="position_emb_lookup_long") # We use `max_absolute_position_embeddings` for the maximum global input # length even though it's larger than we need. This makes it easier to # initialize both long and global position embedding tables with the same # values if desired. self.global_position_embedding = etc_layers.EmbeddingLookup( vocab_size=config.max_absolute_position_embeddings, embedding_size=config.hidden_size, initializer_range=config.initializer_range, use_one_hot_lookup=use_one_hot_embeddings, name="position_emb_lookup_global") # Call layers to force variable initialization. self.position_embedding(tf.ones([1, 1], tf.int32)) self.global_position_embedding(tf.ones([1, 1], tf.int32)) else: self.position_embedding = None self.global_position_embedding = None # We use the same embedding table for global tokens to make it easy to place # WordPieces in the global memory for finetuning tasks downstream. self.global_token_embedding = self.token_embedding self.global_token_embedding_norm = tf.keras.layers.LayerNormalization( axis=-1, epsilon=1e-12, name="global_emb_layer_norm") self.global_token_embedding_dropout = tf.keras.layers.Dropout( rate=config.hidden_dropout_prob) self.global_local_transformer = etc_layers.GlobalLocalTransformerLayers( long_hidden_size=config.hidden_size, global_hidden_size=config.hidden_size, num_hidden_layers=config.num_hidden_layers, num_attention_heads=config.num_attention_heads, local_radius=config.local_radius, att_size_per_head=config.att_size_per_head, long_intermediate_size=config.intermediate_size, global_intermediate_size=config.intermediate_size, hidden_act=tensor_utils.get_activation(config.hidden_act), hidden_dropout_prob=config.hidden_dropout_prob, attention_probs_dropout_prob=config.attention_probs_dropout_prob, initializer_range=config.initializer_range, relative_vocab_size=config.relative_vocab_size, share_feed_forward_params=config.share_feed_forward_params, share_kv_projections=config.share_kv_projections, share_qkv_projections=config.share_qkv_projections, share_att_output_projection=config.share_att_output_projection, use_pre_activation_order=config.use_pre_activation_order, use_one_hot_lookup=use_one_hot_relative_embeddings, grad_checkpointing_period=config.grad_checkpointing_period)