Exemple #1
0
    def build(self, input_shape):
        self.input_spec = tf.keras.layers.InputSpec(shape=input_shape)

        attention_head_size = int(self.hidden_size / self.num_attention_heads)
        self.attention_layer = MultiAttentionLayer(
            num_attention_heads=self.num_attention_heads,
            size_per_head=attention_head_size,
            attention_probs_dropout_prob=self.attention_probs_dropout_prob,
            initializer_range=self.initializer_range,
            do_return_2d_tensor=True,
            batch_size=self.batch_size,
            from_seq_length=self.seq_length,
            to_seq_length=self.seq_length,
            name="self")
        self.attention_output_layer = tf.keras.layers.Dense(
            self.hidden_size,
            kernel_initializer=create_initializer(self.initializer_range),
            name="dense")
        self.inter_output = tf.keras.layers.Dense(
            self.intermediate_size,
            activation=self.intermediate_act_fn,
            kernel_initializer=create_initializer(self.initializer_range),
            name="dense")
        self.layer_out = tf.keras.layers.Dense(
            self.hidden_size,
            kernel_initializer=create_initializer(self.initializer_range),
            name="dense")
        self.dropout = tf.keras.layers.Dropout(self.hidden_dropout_prob)
        self.layer_norm = tf.keras.layers.LayerNormalization(axis=-1,
                                                             name="LayerNorm")
        self.out_layer_norm = tf.keras.layers.LayerNormalization(
            axis=-1, name="LayerNorm")

        self.built = True
Exemple #2
0
    def build(self, input_shape):
        hidden_size = input_shape[-1]
        self.c_fc = tf.keras.layers.Dense(
            hidden_size * 4,
            name="c_fc",
            kernel_initializer=create_initializer(self.initializer_range))

        self.c_proj = tf.keras.layers.Dense(
            hidden_size,
            name="c_proj",
            kernel_initializer=create_initializer(self.initializer_range))
        self.act = get_activation('gelu')

        self.dropout = tf.keras.layers.Dropout(self.resid_pdrop_rate)
Exemple #3
0
    def build(self, input_shape):
        self.token_embedding = WTEmbedding(
            vocab_size=self.vocab_size,
            embedding_size=self.hidden_size,
            initializer_range=self.initializer_range,
            word_embedding_name="embedding",
            name="wte")
        # position embedding
        self.posembedding = tf.keras.layers.Embedding(
            self.max_position_length,
            self.hidden_size,
            embeddings_initializer=create_initializer(self.initializer_range),
            name="wpe",
        )
        self.embedding_drop = tf.keras.layers.Dropout(self.embedding_drop_rate)

        self.encoder_layers = []
        for layer_idx in range(self.num_hidden_layers):
            self.encoder_layer = GPT2Transformer(
                num_attention_heads=self.num_attention_heads,
                attention_probs_dropout_prob=self.attention_probs_dropout_prob,
                initializer_range=self.initializer_range,
                epsilon=self.layer_norm_epsilon,
                resid_out_rate=self.resid_out_rate,
                name="h{}".format(layer_idx))
            self.encoder_layers.append(self.encoder_layer)

        self.ln_f = tf.keras.layers.LayerNormalization(
            epsilon=self.layer_norm_epsilon, name='ln_f')

        # self.ln_f = normalization.GPTNorm(epsilon=self.layer_norm_epsilon, name='ln_f')

        self.built = True
Exemple #4
0
    def build(self, input_shape):
        self.size_per_head = int(input_shape[-1] / self.num_attention_heads)
        self.c_att = tf.keras.layers.Dense(
            # 12*64
            self.num_attention_heads * self.size_per_head * 3,
            name="c_attn",
            kernel_initializer=create_initializer(self.initializer_range))
        self.c_proj = tf.keras.layers.Dense(
            self.num_attention_heads * self.size_per_head,
            name="c_proj",
            kernel_initializer=create_initializer(self.initializer_range))

        self.resid_out = tf.keras.layers.Dropout(self.resid_out_rate)

        self.drop_out = tf.keras.layers.Dropout(
            self.attention_probs_dropout_prob)
        self.built = True
Exemple #5
0
    def build(self, input_shape):
        self.input_spec = tf.keras.layers.InputSpec(shape=input_shape)
        size_per_head = int(input_shape[2] / self.num_attention_heads)
        self.q = dense.DenseLayer3d(self.num_attention_heads, size_per_head,
                                    create_initializer(self.initializer_range),
                                    self.query_act, self.use_einsum, "query")

        self.k = dense.DenseLayer3d(self.num_attention_heads, size_per_head,
                                    create_initializer(self.initializer_range),
                                    self.key_act, self.use_einsum, "key")

        self.v = dense.DenseLayer3d(self.num_attention_heads, size_per_head,
                                    create_initializer(self.initializer_range),
                                    self.value_act, self.use_einsum, "value")
        self.drop_out = tf.keras.layers.Dropout(
            self.attention_probs_dropout_prob)
        self.built = True
Exemple #6
0
 def build(self, input_shape):
     self.nx = input_shape[-1]
     self.weight = self.add_weight("w",
                                   shape=[1, self.nx, self.nf],
                                   initializer=create_initializer(
                                       self.initializer_range))
     self.bias = self.add_weight("b",
                                 shape=[self.nf],
                                 initializer=tf.zeros_initializer())
     self.built = True
Exemple #7
0
 def build(self, input_shape):
     self.input_spec = tf.keras.layers.InputSpec(shape=input_shape)
     self.embedding_table = self.add_weight(
         name=self.word_embedding_name,
         dtype=tf.keras.backend.floatx(),
         shape=[self.vocab_size, self.embedding_size],
         initializer=create_initializer(self.initializer_range),
         trainable=True,
     )
     self.built = True
    def build(self, input_shape):
        self.input_spec = tf.keras.layers.InputSpec(shape=input_shape)

        self.attention = ALBERTAttention(
            num_attention_heads=self.num_attention_heads,
            attention_probs_dropout_prob=self.attention_probs_dropout_prob,
            initializer_range=self.initializer_range,
            use_einsum=True,
            name='self',
        )

        self.dense_layer_3d_proj = dense.DenseLayer3dProj(
            self.hidden_size,
            self.attention_head_size,
            create_initializer(self.initializer_range),
            None,
            use_einsum=self.use_einsum,
            name="dense")

        self.dense_layer_2d = dense.DenseLayer2d(
            self.intermediate_size,
            create_initializer(self.initializer_range),
            self.intermediate_act_fn,
            use_einsum=self.use_einsum,
            num_attention_heads=self.num_attention_heads,
            name="dense")

        self.out_dense_layer_2d = dense.DenseLayer2d(
            self.hidden_size,
            create_initializer(self.initializer_range),
            None,
            use_einsum=self.use_einsum,
            num_attention_heads=self.num_attention_heads,
            name="dense")
        self.attdropout = tf.keras.layers.Dropout(self.hidden_dropout_prob)
        self.ffdropout = tf.keras.layers.Dropout(self.hidden_dropout_prob)
        self.attlayer_norm = tf.keras.layers.LayerNormalization(
            axis=-1, name="LayerNorm")
        self.ffnlayer_norm = tf.keras.layers.LayerNormalization(
            axis=-1, name="LayerNorm")

        self.built = True
Exemple #9
0
 def build(self, input_shape):
     self.input_spec = tf.keras.layers.InputSpec(shape=input_shape)
     # `query_layer` =[B*F, N*H]
     self._query_layer = tf.keras.layers.Dense(
         self.num_attention_heads * self.size_per_head,
         activation=self.query_act,
         name="query",
         kernel_initializer=create_initializer(self.initializer_range))
     # `value_layer` = [B*T, N*H]
     self._key_layer = tf.keras.layers.Dense(
         self.num_attention_heads * self.size_per_head,
         activation=self.key_act,
         name="key",
         kernel_initializer=create_initializer(self.initializer_range))
     # `query_layer` =[B*T, N*H]
     self._value_layer = tf.keras.layers.Dense(
         self.num_attention_heads * self.size_per_head,
         activation=self.value_act,
         name="value",
         kernel_initializer=create_initializer(self.initializer_range))
     self.drop_out = tf.keras.layers.Dropout(
         self.attention_probs_dropout_prob)
     self.built = True
Exemple #10
0
    def build(self, input_shape):

        input_ids_shape = input_shape
        self.input_spec = tf.keras.layers.InputSpec(shape=input_ids_shape)

        self.token_type_table = self.add_weight(
            name=self.token_type_embedding_name,
            shape=[self.token_type_vocab_size, input_shape[2]],
            dtype=tf.keras.backend.floatx(),
            initializer=create_initializer(self.initializer_range),
            trainable=True)

        self.full_position_embeddings = self.add_weight(
            name=self.position_embedding_name,
            shape=[self.max_position_embeddings, input_shape[2]],
            dtype=tf.keras.backend.floatx(),
            initializer=create_initializer(self.initializer_range),
            trainable=True)

        self.drop_out = tf.keras.layers.Dropout(self.hidden_dropout_prob)
        self.layer_norm = tf.keras.layers.LayerNormalization(axis=-1,
                                                             name="LayerNorm")

        self.built = True
Exemple #11
0
    def build(self, input_shape):
        self.token_embedding = WDEmbedding(vocab_size=self.vocab_size,
                                           embedding_size=self.embedding_size,
                                           initializer_range=self.initializer_range,
                                           word_embedding_name="word_embeddings",
                                           use_one_hot_embedding=self.use_one_hot_embedding,
                                           name="embeddings")
        # segment and position embedding
        self.segposembedding = SegPosEmbedding(use_token_type=True,
                                               hidden_dropout_prob=self.hidden_dropout_prob,
                                               token_type_vocab_size=self.type_vocab_size,
                                               token_type_embedding_name="token_type_embeddings",
                                               use_position_embeddings=True,
                                               position_embedding_name="position_embeddings",
                                               initializer_range=self.initializer_range,
                                               max_position_embeddings=self.max_position_embeddings,
                                               use_one_hot_embedding=self.use_one_hot_embedding,
                                               name="embeddings"
                                               )
        self.shape_change = dense.DenseLayer2d(
            self.hidden_size,
            create_initializer(self.initializer_range),
            None,
            use_einsum=self.use_einsum,
            name="embedding_hidden_mapping_in",
        )

        self.encoder_layer = AlbertTransformer(
            hidden_size=self.hidden_size,
            num_attention_heads=self.num_attention_heads,
            attention_head_size=self.attention_head_size,
            attention_probs_dropout_prob=self.attention_probs_dropout_prob,
            intermediate_size=self.intermediate_size,
            intermediate_act_fn=get_activation(self.hidden_act),
            initializer_range=self.initializer_range,
            hidden_dropout_prob=self.hidden_dropout_prob,
            use_einsum=True,
            name="inner_group_{}".format(0)
        )

        self.pool_out = tf.keras.layers.Dense(
            self.hidden_size,
            activation=tf.tanh,
            # kernel_constraint=create_initializer(self.initializer_range),
            name="dense")
        self.built = True