def _create_global_visual_feature_embeddings(model_config, features,
                                             flags) -> tf.Tensor:
    """Creates global embeddings based on visual features."""
    initializer_range = 0.02

    indicator_cross_emb_lookup = etc_layers.EmbeddingLookup(
        vocab_size=2**len(flags.indicators_to_cross),
        embedding_size=model_config.hidden_size,
        initializer_range=initializer_range,
        use_one_hot_lookup=flags.use_tpu,
        name='indicator_cross_emb_lookup')
    global_embedding_adder = indicator_cross_emb_lookup(
        features['global_indicator_cross'])

    font_id_emb_lookup = etc_layers.EmbeddingLookup(
        vocab_size=generate_examples_lib.FONT_ID_VOCAB_SIZE,
        embedding_size=model_config.hidden_size,
        initializer_range=initializer_range,
        use_one_hot_lookup=flags.use_tpu,
        name='font_id_emb_lookup')
    global_embedding_adder += font_id_emb_lookup(features['global_font_ids'])

    parent_font_id_emb_lookup = etc_layers.EmbeddingLookup(
        vocab_size=generate_examples_lib.FONT_ID_VOCAB_SIZE,
        embedding_size=model_config.hidden_size,
        initializer_range=initializer_range,
        use_one_hot_lookup=flags.use_tpu,
        name='parent_font_id_emb_lookup')
    global_embedding_adder += parent_font_id_emb_lookup(
        features['global_parent_font_ids'])

    # Add transformation of dense features
    dense_feature_projection = tf.keras.layers.Dense(
        units=model_config.hidden_size,
        activation=tensor_utils.get_activation('gelu'),
        kernel_initializer=tf.keras.initializers.TruncatedNormal(stddev=0.02),
        name='dense_feature_projection')
    dense_feature_embeddings = dense_feature_projection(
        features['global_dense_features'])
    if flags.extra_dense_feature_layers > 1:
        raise NotImplementedError(
            '`extra_dense_feature_layers` must be at most 1.')
    elif flags.extra_dense_feature_layers == 1:
        dense_feature_layer2 = tf.keras.layers.Dense(
            units=model_config.hidden_size,
            activation=tensor_utils.get_activation('gelu'),
            kernel_initializer=tf.keras.initializers.TruncatedNormal(
                stddev=0.02),
            name='dense_feature_layer2')
        dense_feature_embeddings = dense_feature_layer2(
            dense_feature_embeddings)
    global_embedding_adder += dense_feature_embeddings

    return global_embedding_adder
    def test_embedding_lookup_1d_ids(self, use_one_hot_lookup):
        embedding_table = tf.constant([
            [1.0, -1.0],  #
            [1.1, -1.1],  #
            [1.2, -1.2],  #
            [1.3, -1.3],  #
            [1.4, -1.4],  #
        ])
        vocab_size, embedding_size = embedding_table.shape.as_list()

        input_ids = tf.constant([1, 0, 0, 3])
        input_mask = tf.constant([1, 1, 0, 1])

        layer = etc_layers.EmbeddingLookup(
            vocab_size=vocab_size,
            embedding_size=embedding_size,
            use_one_hot_lookup=use_one_hot_lookup)
        layer.build(None)  # Shapes are unused so we pass None.
        layer.embedding_table = embedding_table

        expected = [
            [1.1, -1.1],  #
            [1.0, -1.0],  #
            [0.0, 0.0],  #
            [1.3, -1.3],  #
        ]
        result = layer(input_ids, input_mask=input_mask)

        self.evaluate(tf.compat.v1.global_variables_initializer())
        self.assertAllClose(expected, result)
    def test_embedding_lookup_with_projection(self):
        # Create an embedding table with width != projection_size
        embedding_table = tf.constant([
            [1.0, -1.0, 0.5],  #
            [1.1, -1.1, -0.4],  #
            [1.2, -1.2, -0.5],  #
            [1.3, -1.3, 0.8],  #
            [1.4, -1.4, 0.9],  #
        ])

        projection_size = 2  #  Different from the embedding_dimension.
        vocab_size, embedding_size = embedding_table.shape.as_list()
        input_ids = tf.constant([
            [3, 2, 1],  #
            [4, 0, 4],  #
        ])

        input_mask = tf.constant([
            [1, 0, 0],  #
            [0, 0, 1],  #
        ])

        layer = etc_layers.EmbeddingLookup(vocab_size=vocab_size,
                                           embedding_size=embedding_size,
                                           projection_size=projection_size,
                                           use_one_hot_lookup=True)

        layer.build(None)  # Shapes are unused so we pass None.
        layer.embedding_table = embedding_table

        # Dense layer to use for projection. Note that, we have a non-zero
        # bias initializer here to ensure that the bias term doesn't get through
        # to the masked_ids after projection.
        layer.embedding_projection = tf.keras.layers.Dense(
            units=projection_size,
            activation=None,
            use_bias=True,
            kernel_initializer='ones',
            bias_initializer='ones')

        expected = [
            [
                [1.8,
                 1.8],  # [1.3, -1.3, 0.8] * kernel_initializer + 1 (bias).
                [0., 0.],  #
                [0., 0.],  #
            ],  #
            [
                [0., 0.],  #
                [0., 0.],  #
                [1.9,
                 1.9],  # [1.4, -1.4, 0.9] * kernel_initializer + 1 (bias).
            ],  #
        ]
        result = layer(input_ids, input_mask)

        self.evaluate(tf.compat.v1.global_variables_initializer())
        self.assertAllClose(expected, result)
    def test_embedding_lookup_random_init_no_mask(self, use_one_hot_lookup):
        vocab_size = 5
        embedding_size = 2

        input_ids = tf.constant([1, 0, 0, 3])
        input_size = input_ids.shape.as_list()[0]

        layer = etc_layers.EmbeddingLookup(
            vocab_size=vocab_size,
            embedding_size=embedding_size,
            use_one_hot_lookup=use_one_hot_lookup)

        result = layer(input_ids)

        self.evaluate(tf.compat.v1.global_variables_initializer())
        self.evaluate(result)
        self.assertAllEqual([input_size, embedding_size], result.shape)
Esempio n. 5
0
  def test_embedding_lookup_no_projection(self, projection_size):
    # Create an embedding table with width = projection_size
    embedding_table = tf.constant([
        [1.0, -1.0, 0.5],  #
        [1.1, -1.1, -0.5],  #
        [1.2, -1.2, -0.2],  #
        [1.3, -1.3, 0.3],  #
        [1.4, -1.4, 0.4],  #
    ])
    vocab_size, embedding_size = embedding_table.shape.as_list()

    input_ids = tf.constant([
        [3, 2, 1],  #
        [4, 0, 4],  #
    ])

    layer = etc_layers.EmbeddingLookup(
        vocab_size=vocab_size,
        embedding_size=embedding_size,
        projection_size=projection_size,
        use_one_hot_lookup=True)

    layer.build(None)  # Shapes are unused so we pass None.
    layer.embedding_table = embedding_table

    expected = [
        [
            [1.3, -1.3, 0.3],  #
            [1.2, -1.2, -0.2],  #
            [1.1, -1.1, -0.5],  #
        ],  #
        [
            [1.4, -1.4, 0.4],  #
            [1.0, -1.0, 0.5],  #
            [1.4, -1.4, 0.4],  #
        ],  #
    ]
    result = layer(input_ids)

    self.evaluate(tf.compat.v1.global_variables_initializer())
    self.assertAllClose(expected, result)
Esempio n. 6
0
  def __init__(self,
               config: EtcConfig,
               is_training: Optional[bool] = None,
               use_one_hot_embeddings=False,
               use_one_hot_relative_embeddings=False,
               name: Text = "etc_document_bert",
               **kwargs):
    """Constructor for `EtcModel`.

    Args:
      config: `EtcConfig` instance.
      is_training: Optional bool. True for training model, False for eval model.
        The None default will defer to the typical Keras `training` argument in
        `call` instead. When `is_training` is specified here, the `training`
        argument from `call` must not be used.
      use_one_hot_embeddings: (optional) bool. Whether to use one-hot word
        embeddings or tf.nn.embedding_lookup() for the word embeddings.
      use_one_hot_relative_embeddings: (optional) bool. Whether to use one-hot
        word embeddings or tf.nn.embedding_lookup() for the relative position
        embeddings.
      name: (Optional) name of the layer.
      **kwargs: Forwarded to super.

    Raises:
      ValueError: The config is invalid.
    """
    super(EtcModel, self).__init__(name=name, **kwargs)

    config = copy.deepcopy(config)
    if is_training is not None and not is_training:
      config.hidden_dropout_prob = 0.0
      config.attention_probs_dropout_prob = 0.0

    self.config = config
    self.is_training = is_training
    self.use_one_hot_embeddings = use_one_hot_embeddings
    self.use_one_hot_relative_embeddings = use_one_hot_relative_embeddings

    if config.relative_vocab_size is None:
      if config.relative_pos_max_distance != 0:
        raise ValueError(
            "`relative_pos_max_distance` must be 0 when `relative_vocab_size` "
            "is None.")
    elif config.relative_vocab_size < (feature_utils.RelativePositionGenerator(
        config.relative_pos_max_distance).relative_vocab_size +
                                       _NUM_OTHER_RELATIVE_IDS):
      raise ValueError("`relative_vocab_size` ({}) too small for "
                       "`relative_pos_max_distance` ({})".format(
                           config.relative_vocab_size,
                           config.relative_pos_max_distance))
    if config.embedding_size is None:
      config.embedding_size = config.hidden_size

    self.token_embedding = etc_layers.EmbeddingLookup(
        vocab_size=config.vocab_size,
        embedding_size=config.embedding_size,
        projection_size=config.hidden_size,
        initializer_range=config.initializer_range,
        use_one_hot_lookup=use_one_hot_embeddings,
        name="token_emb_lookup")

    self.token_embedding_norm = tf.keras.layers.LayerNormalization(
        axis=-1, epsilon=1e-12, name="long_emb_layer_norm")
    self.token_embedding_dropout = tf.keras.layers.Dropout(
        rate=config.hidden_dropout_prob)

    self.segment_embedding = etc_layers.EmbeddingLookup(
        vocab_size=config.segment_vocab_size,
        embedding_size=config.hidden_size,
        initializer_range=config.initializer_range,
        use_one_hot_lookup=True,
        name="segment_emb_lookup")

    if config.max_absolute_position_embeddings != 0:
      self.position_embedding = etc_layers.EmbeddingLookup(
          vocab_size=config.max_absolute_position_embeddings,
          embedding_size=config.hidden_size,
          initializer_range=config.initializer_range,
          use_one_hot_lookup=use_one_hot_embeddings,
          name="position_emb_lookup_long")
      # We use `max_absolute_position_embeddings` for the maximum global input
      # length even though it's larger than we need. This makes it easier to
      # initialize both long and global position embedding tables with the same
      # values if desired.
      self.global_position_embedding = etc_layers.EmbeddingLookup(
          vocab_size=config.max_absolute_position_embeddings,
          embedding_size=config.hidden_size,
          initializer_range=config.initializer_range,
          use_one_hot_lookup=use_one_hot_embeddings,
          name="position_emb_lookup_global")
      # Call layers to force variable initialization.
      self.position_embedding(tf.ones([1, 1], tf.int32))
      self.global_position_embedding(tf.ones([1, 1], tf.int32))
    else:
      self.position_embedding = None
      self.global_position_embedding = None

    # We use the same embedding table for global tokens to make it easy to place
    # WordPieces in the global memory for finetuning tasks downstream.
    self.global_token_embedding = self.token_embedding
    self.global_token_embedding_norm = tf.keras.layers.LayerNormalization(
        axis=-1, epsilon=1e-12, name="global_emb_layer_norm")
    self.global_token_embedding_dropout = tf.keras.layers.Dropout(
        rate=config.hidden_dropout_prob)

    self.global_local_transformer = etc_layers.GlobalLocalTransformerLayers(
        long_hidden_size=config.hidden_size,
        global_hidden_size=config.hidden_size,
        num_hidden_layers=config.num_hidden_layers,
        num_attention_heads=config.num_attention_heads,
        local_radius=config.local_radius,
        att_size_per_head=config.att_size_per_head,
        long_intermediate_size=config.intermediate_size,
        global_intermediate_size=config.intermediate_size,
        hidden_act=tensor_utils.get_activation(config.hidden_act),
        hidden_dropout_prob=config.hidden_dropout_prob,
        attention_probs_dropout_prob=config.attention_probs_dropout_prob,
        initializer_range=config.initializer_range,
        relative_vocab_size=config.relative_vocab_size,
        share_feed_forward_params=config.share_feed_forward_params,
        share_kv_projections=config.share_kv_projections,
        share_qkv_projections=config.share_qkv_projections,
        share_att_output_projection=config.share_att_output_projection,
        use_pre_activation_order=config.use_pre_activation_order,
        use_one_hot_lookup=use_one_hot_relative_embeddings,
        grad_checkpointing_period=config.grad_checkpointing_period)