コード例 #1
0
    def __init__(self, embed_dim, ff_dim, num_heads, **kwargs):
        super().__init__(**kwargs)
        self.embed_dim = embed_dim
        self.ff_dim = ff_dim
        self.num_heads = num_heads
        self.attention_1 = layers.MultiHeadAttention(num_heads=num_heads,
                                                     key_dim=embed_dim,
                                                     dropout=0.1)
        self.attention_2 = layers.MultiHeadAttention(num_heads=num_heads,
                                                     key_dim=embed_dim,
                                                     dropout=0.1)
        self.ffn_layer_1 = layers.Dense(ff_dim, activation="relu")
        self.ffn_layer_2 = layers.Dense(embed_dim)

        self.layernorm_1 = layers.LayerNormalization()
        self.layernorm_2 = layers.LayerNormalization()
        self.layernorm_3 = layers.LayerNormalization()

        self.embedding = PositionalEmbedding(embed_dim=EMBED_DIM,
                                             sequence_length=SEQ_LENGTH,
                                             vocab_size=VOCAB_SIZE)
        self.out = layers.Dense(VOCAB_SIZE, activation="softmax")

        self.dropout_1 = layers.Dropout(0.3)
        self.dropout_2 = layers.Dropout(0.5)
        self.supports_masking = True
コード例 #2
0
ファイル: transformer.py プロジェクト: 1170300521/MCN
 def __init__(self, embed_dim, ff_dim=2048, num_heads=4, rate=0.1):
     super(TransformerDecoderLayer, self).__init__()
     self.relation_att = layers.MultiHeadAttention(num_heads, embed_dim)
     self.align_att = layers.MultiHeadAttention(num_heads, embed_dim)
     self.ffn = tf.keras.Sequential(
         [layers.Dense(ff_dim, activation="relu"), layers.Dense(embed_dim)]
     )
     self.dropout1 = layers.Dropout(rate)
     self.dropout2 = layers.Dropout(rate)
     self.dropout3 = layers.Dropout(rate)
     self.layernorm1 = layers.LayerNormalization(epsilon=1e-6)
     self.layernorm2 = layers.LayerNormalization(epsilon=1e-6)
     self.layernorm3 = layers.LayerNormalization(epsilon=1e-6)
 def _attention_builder(query, value):
     return layers.MultiHeadAttention(
         num_heads=head_num,
         key_dim=key_dim,
         trainable=trainable,
         name=name,
     )(query, value)
コード例 #4
0
def create_transformer_module(
    latent_dim,
    projection_dim,
    num_heads,
    num_transformer_blocks,
    ffn_units,
    dropout_rate,
):

    # input_shape: [1, latent_dim, projection_dim]
    inputs = layers.Input(shape=(latent_dim, projection_dim))

    x0 = inputs
    # Create multiple layers of the Transformer block.
    for _ in range(num_transformer_blocks):
        # Apply layer normalization 1.
        x1 = layers.LayerNormalization(epsilon=1e-6)(x0)
        # Create a multi-head self-attention layer.
        attention_output = layers.MultiHeadAttention(num_heads=num_heads,
                                                     key_dim=projection_dim,
                                                     dropout=0.1)(x1, x1)
        # Skip connection 1.
        x2 = layers.Add()([attention_output, x0])
        # Apply layer normalization 2.
        x3 = layers.LayerNormalization(epsilon=1e-6)(x2)
        # Apply Feedforward network.
        ffn = create_ffn(hidden_units=ffn_units, dropout_rate=dropout_rate)
        x3 = ffn(x3)
        # Skip connection 2.
        x0 = layers.Add()([x3, x2])

    # Create the Keras model.
    model = keras.Model(inputs=inputs, outputs=x0)
    return model
コード例 #5
0
    def create_model(self):
        inputs = layers.Input(shape=(self.num_of_frames, self.frame_size))

        # Attention and Normalization
        res = inputs
        x = layers.MultiHeadAttention(key_dim=256, num_heads=32,
                                      dropout=.5)(inputs, inputs)
        x = layers.Dropout(.5)(x)
        x = layers.LayerNormalization()(x)
        res += x

        # Feed Forward Part
        x = layers.Conv1D(filters=512, kernel_size=1)(res)
        x = layers.PReLU()(x)
        x = layers.Dropout(.5)(x)

        x = layers.Conv1D(filters=res.shape[-1], kernel_size=1)(x)
        x = layers.Dropout(.5)(x)
        x = layers.LayerNormalization()(x)
        x += res

        x = layers.GlobalAveragePooling1D()(x)

        outputs = layers.Dense(self.num_of_classes, activation='softmax')(x)
        self.model = tf.keras.Model(inputs, outputs)
コード例 #6
0
def bert_module(query, key, value, i):
    # Multi headed self-attention
    attention_output = layers.MultiHeadAttention(
        num_heads=config.NUM_HEAD,
        key_dim=config.EMBED_DIM // config.NUM_HEAD,
        name="encoder_{}/multiheadattention".format(i),
    )(query, key, value)
    attention_output = layers.Dropout(
        0.1, name="encoder_{}/att_dropout".format(i))(attention_output)
    attention_output = layers.LayerNormalization(
        epsilon=1e-6,
        name="encoder_{}/att_layernormalization".format(i))(query +
                                                            attention_output)

    # Feed-forward layer
    ffn = keras.Sequential(
        [
            layers.Dense(config.FF_DIM, activation="relu"),
            layers.Dense(config.EMBED_DIM),
        ],
        name="encoder_{}/ffn".format(i),
    )
    ffn_output = ffn(attention_output)
    ffn_output = layers.Dropout(
        0.1, name="encoder_{}/ffn_dropout".format(i))(ffn_output)
    sequence_output = layers.LayerNormalization(
        epsilon=1e-6,
        name="encoder_{}/ffn_layernormalization".format(i))(attention_output +
                                                            ffn_output)
    return sequence_output
コード例 #7
0
def create_decoder(num_layers=DEC_LAYERS,
                   num_heads=DEC_NUM_HEADS,
                   image_size=IMAGE_SIZE):
    inputs = layers.Input((NUM_PATCHES, ENC_PROJECTION_DIM))
    x = layers.Dense(DEC_PROJECTION_DIM)(inputs)

    for _ in range(num_layers):
        # Layer normalization 1.
        x1 = layers.LayerNormalization(epsilon=LAYER_NORM_EPS)(x)

        # Create a multi-head attention layer.
        attention_output = layers.MultiHeadAttention(
            num_heads=num_heads, key_dim=DEC_PROJECTION_DIM, dropout=0.1)(x1,
                                                                          x1)

        # Skip connection 1.
        x2 = layers.Add()([attention_output, x])

        # Layer normalization 2.
        x3 = layers.LayerNormalization(epsilon=LAYER_NORM_EPS)(x2)

        # MLP.
        x3 = mlp(x3, hidden_units=DEC_TRANSFORMER_UNITS, dropout_rate=0.1)

        # Skip connection 2.
        x = layers.Add()([x3, x2])

    x = layers.LayerNormalization(epsilon=LAYER_NORM_EPS)(x)
    x = layers.Flatten()(x)
    pre_final = layers.Dense(units=image_size * image_size * 3,
                             activation="sigmoid")(x)
    outputs = layers.Reshape((image_size, image_size, 3))(pre_final)

    return keras.Model(inputs, outputs, name="mae_decoder")
コード例 #8
0
def create_encoder(num_heads=ENC_NUM_HEADS, num_layers=ENC_LAYERS):
    inputs = layers.Input((None, ENC_PROJECTION_DIM))
    x = inputs

    for _ in range(num_layers):
        # Layer normalization 1.
        x1 = layers.LayerNormalization(epsilon=LAYER_NORM_EPS)(x)

        # Create a multi-head attention layer.
        attention_output = layers.MultiHeadAttention(
            num_heads=num_heads, key_dim=ENC_PROJECTION_DIM, dropout=0.1)(x1,
                                                                          x1)

        # Skip connection 1.
        x2 = layers.Add()([attention_output, x])

        # Layer normalization 2.
        x3 = layers.LayerNormalization(epsilon=LAYER_NORM_EPS)(x2)

        # MLP.
        x3 = mlp(x3, hidden_units=ENC_TRANSFORMER_UNITS, dropout_rate=0.1)

        # Skip connection 2.
        x = layers.Add()([x3, x2])

    outputs = layers.LayerNormalization(epsilon=LAYER_NORM_EPS)(x)
    return keras.Model(inputs, outputs, name="mae_encoder")
コード例 #9
0
 def __init__(self, embed_dim, num_heads, feed_forward_dim, rate=0.1):
     super().__init__()
     self.self_att = layers.MultiHeadAttention(num_heads=num_heads,
                                               key_dim=embed_dim)
     self.enc_att = layers.MultiHeadAttention(num_heads=num_heads,
                                              key_dim=embed_dim)
     self.ffn = keras.Sequential([
         layers.Dense(feed_forward_dim, activation="relu"),
         layers.Dense(embed_dim),
     ])
     self.layernorm1 = layers.LayerNormalization(epsilon=1e-6)
     self.layernorm2 = layers.LayerNormalization(epsilon=1e-6)
     self.layernorm3 = layers.LayerNormalization(epsilon=1e-6)
     self.self_dropout = layers.Dropout(rate)
     self.enc_dropout = layers.Dropout(rate)
     self.ffn_dropout = layers.Dropout(rate)
コード例 #10
0
def transformer_encoder(
    x,
    embedding_dim,
    mlp_dim,
    num_heads,
    dim_coefficient,
    attention_dropout,
    projection_dropout,
    attention_type="external_attention",
):
    residual_1 = x
    x = layers.LayerNormalization(epsilon=1e-5)(x)
    if attention_type == "external_attention":
        x = external_attention(
            x,
            embedding_dim,
            num_heads,
            dim_coefficient,
            attention_dropout,
            projection_dropout,
        )
    elif attention_type == "self_attention":
        x = layers.MultiHeadAttention(num_heads=num_heads,
                                      key_dim=embedding_dim,
                                      dropout=attention_dropout)(x, x)
    x = layers.add([x, residual_1])
    residual_2 = x
    x = layers.LayerNormalization(epsilon=1e-5)(x)
    x = mlp(x, embedding_dim, mlp_dim)
    x = layers.add([x, residual_2])
    return x
コード例 #11
0
def classify_branch(input_shape=(256, 256, 32),
                    roi_pool_size=[10, 10],
                    num_bbox=400,
                    chan_num=3,
                    projection_dim=100,
                    transformer_layers=4,
                    num_heads=4,
                    crypt_class=False):
    Input_bbox = Input(shape=(num_bbox, 4))
    fmap = Input(shape=input_shape)

    # Transformer part =========
    pooled_features = ROIPoolingLayer(roi_pool_size[0],
                                      roi_pool_size[1])([fmap, Input_bbox])
    c_p_f = PatchEncoder_w_position(num_bbox, projection_dim,
                                    128)([pooled_features, Input_bbox])

    # Create multiple layers of the Transformer block.
    for _ in range(transformer_layers):
        # Layer normalization 1.
        x1 = layers.LayerNormalization(epsilon=1e-6)(c_p_f)
        # Create a multi-head attention layer.
        attention_output = layers.MultiHeadAttention(num_heads=num_heads,
                                                     key_dim=projection_dim,
                                                     dropout=0.15)(x1, x1)
        # Skip connection 1.
        x2 = layers.Add()([attention_output, c_p_f])
        # Layer normalization 2.
        x3 = layers.LayerNormalization(epsilon=1e-6)(x2)
        # MLP.
        x3 = mlp(x3,
                 hidden_units=[projection_dim * 2, projection_dim],
                 dropout_rate=0.15)
        # Skip connection 2.
        c_p_f = layers.Add()([x3, x2])

    # Create a [batch_size, projection_dim] tensor.
    c_p_f = layers.LayerNormalization(epsilon=1e-6)(c_p_f)
    c_p_f = layers.Dropout(0.3)(c_p_f)  # increased from 0.2

    clone = layers.Dense(1)(c_p_f)
    partial = layers.Dense(1)(c_p_f)
    fufi = layers.Dense(1)(c_p_f)
    clone = layers.Activation('sigmoid', dtype='float32', name='clone')(clone)
    partial = layers.Activation('sigmoid', dtype='float32',
                                name='partial')(partial)
    fufi = layers.Activation('sigmoid', dtype='float32', name='fufi')(fufi)
    if crypt_class:
        crypt = layers.Dense(1)(c_p_f)
        crypt = layers.Activation('sigmoid', dtype='float32',
                                  name='crclass')(crypt)
        just_trnsf = Model(inputs=[fmap, Input_bbox],
                           outputs=[clone, partial, fufi, crypt],
                           name="cpf")
    else:
        just_trnsf = Model(inputs=[fmap, Input_bbox],
                           outputs=[clone, partial, fufi],
                           name="cpf")
    return just_trnsf
コード例 #12
0
 def __init__(self, embed_dim, latent_dim, num_heads, **kwargs):
     super(FNetDecoder, self).__init__(**kwargs)
     self.embed_dim = embed_dim
     self.latent_dim = latent_dim
     self.num_heads = num_heads
     self.attention_1 = layers.MultiHeadAttention(num_heads=num_heads,
                                                  key_dim=embed_dim)
     self.attention_2 = layers.MultiHeadAttention(num_heads=num_heads,
                                                  key_dim=embed_dim)
     self.dense_proj = keras.Sequential([
         layers.Dense(latent_dim, activation="relu"),
         layers.Dense(embed_dim),
     ])
     self.layernorm_1 = layers.LayerNormalization()
     self.layernorm_2 = layers.LayerNormalization()
     self.layernorm_3 = layers.LayerNormalization()
     self.supports_masking = True
コード例 #13
0
 def __init__(self, embed_dim, dense_dim, num_heads, **kwargs):
     super().__init__(**kwargs)
     self.embed_dim = embed_dim
     self.dense_dim = dense_dim
     self.num_heads = num_heads
     self.attention = layers.MultiHeadAttention(num_heads=num_heads,
                                                key_dim=embed_dim)
     self.dense_proj = layers.Dense(embed_dim, activation="relu")
     self.layernorm_1 = layers.LayerNormalization()
コード例 #14
0
ファイル: model.py プロジェクト: lexafaxine/bertsum-keras
    def __init__(self, d_model, heads, d_ff, dropout):
        super(TransformerEncoderLayer, self).__init__()

        self.self_attn = layers.MultiHeadAttention(num_heads=heads,
                                                   key_dim=d_model,
                                                   dropout=dropout)
        self.feed_forward = PositionwiseFeedForward(d_model, d_ff, dropout)
        self.layer_norm = layers.LayerNormalization(epsilon=1e-6)
        self.dropout = layers.Dropout(dropout)
コード例 #15
0
 def __init__(self, embed_dim, num_heads, ff_dim, rate=0.1):
     super(TransformerBlock, self).__init__()
     self.att = layers.MultiHeadAttention(num_heads=num_heads, key_dim=embed_dim)
     self.ffn = keras.Sequential(
         [layers.Dense(ff_dim, activation="relu"), layers.Dense(embed_dim),]
     )
     self.layernorm1 = layers.LayerNormalization(epsilon=1e-6)
     self.layernorm2 = layers.LayerNormalization(epsilon=1e-6)
     self.dropout1 = layers.Dropout(rate)
     self.dropout2 = layers.Dropout(rate)
コード例 #16
0
ファイル: networks.py プロジェクト: ADGEfficiency/energy-py
def attention(
    input_shape,
    outputs,
    size_scale=1,
):
    if isinstance(input_shape, tuple):
        mask = keras.Input(shape=(input_shape[0], input_shape[0]))
        inputs = keras.Input(shape=input_shape)
    else:
        #  input_shape already a tensor
        mask = keras.Input(shape=(input_shape.shape[1], input_shape.shape[1]))

    net = layers.MultiHeadAttention(num_heads=4, key_dim=32 * size_scale)(
        inputs, inputs, attention_mask=mask)
    net = layers.MultiHeadAttention(num_heads=4, key_dim=32 * size_scale)(net,
                                                                          net)
    net = layers.Flatten()(net)
    outputs = layers.Dense(outputs, activation="linear")(net)
    return [inputs, mask], outputs
コード例 #17
0
 def __init__(self, embed_dim, num_heads, ff_dim, rate=0.1):
     super().__init__()
     self.att = L.MultiHeadAttention(num_heads, key_dim=embed_dim)
     self.ffn = M.Sequential([
         L.Dense(ff_dim, activation='relu'),
         L.Dense(embed_dim),
     ])
     self.layernorm1 = L.LayerNormalization(epsilon=1e-6)
     self.layernorm2 = L.LayerNormalization(epsilon=1e-6)
     self.dropout1 = L.Dropout(rate)
     self.dropout2 = L.Dropout(rate)
コード例 #18
0
 def __init__(self, embed_dim, num_heads, ffn, dropout_rate=0.1):
     super(TransformerBlock, self).__init__()
     self.att = layers.MultiHeadAttention(num_heads=num_heads,
                                          key_dim=embed_dim)
     # The ffn can be either a standard feedforward network or a switch
     # layer with a Mixture of Experts.
     self.ffn = ffn
     self.layernorm1 = layers.LayerNormalization(epsilon=1e-6)
     self.layernorm2 = layers.LayerNormalization(epsilon=1e-6)
     self.dropout1 = layers.Dropout(dropout_rate)
     self.dropout2 = layers.Dropout(dropout_rate)
コード例 #19
0
    def __init__(self, num_heads=8, embed_dim=64, dense_dim=512, **kwargs):
        super().__init__(**kwargs)

        self.attention = layers.MultiHeadAttention(num_heads, embed_dim)
        self.dense_proj = keras.Sequential([
            layers.Dense(dense_dim, activation="relu"),
            layers.Dense(embed_dim),
        ])
        self.layernorm_1 = layers.LayerNormalization()
        self.layernorm_2 = layers.LayerNormalization()
        self.supports_masking = True
コード例 #20
0
def create_vit_classifier(input_shape, patch_size, num_patches,
                          transformer_layers, num_heads, transformer_units,
                          num_classes, mlp_head_units, data_augmentation,
                          projection_dim):
    inputs = layers.Input(shape=input_shape)

    # Augment data.
    augmented = data_augmentation(inputs)

    # Create patches.
    patches = Patches(patch_size)(augmented)

    # Encode patches.
    encoded_patches = PatchEncoder(num_patches, projection_dim)(patches)

    # Create multiple layers of the Transformer block.
    for _ in range(transformer_layers):
        # Layer normalization 1.
        x1 = layers.LayerNormalization(epsilon=1e-6)(encoded_patches)

        # Create a multi-head attention layer.
        attention_output = layers.MultiHeadAttention(num_heads=num_heads,
                                                     key_dim=projection_dim,
                                                     dropout=0.1)(x1, x1)

        # Skip connection 1.
        x2 = layers.Add()([attention_output, encoded_patches])

        # Layer Normalization 2.
        x3 = layers.LayerNormalization(epsilon=1e-6)(x2)

        # MLP.
        x3 = mlp(x3, hidden_units=transformer_units, dropout_rate=0.1)

        # Skip connection 2.
        encoded_patches = layers.Add()([x3, x2])

    # Create a [batch_size, projection_dim] tensor.
    representation = layers.LayerNormalization(epsilon=1e-6)(encoded_patches)
    representation = layers.Flatten()(representation)
    representation = layers.Dropout(0.5)(representation)

    # Add MLP.
    features = mlp(representation,
                   hidden_units=mlp_head_units,
                   dropout_rate=0.5)

    # Classify outputs.
    logits = layers.Dense(num_classes)(features)

    # Create the keras model.
    model = keras.Model(inputs=inputs, outputs=logits)
    return model
コード例 #21
0
 def __init__(self, embed_dim, num_heads, ffn_dim, dropout_rate=0.1):
     super(TransformerBlock, self).__init__()
     self.att = layers.MultiHeadAttention(num_heads=num_heads,
                                          key_dim=embed_dim,
                                          dropout=dropout_rate)
     self.layernorm1 = layers.LayerNormalization(epsilon=1e-6)
     self.leakyrelu = layers.LeakyReLU()
     self.ffn = layers.Dense(units=ffn_dim)
     self.layernorm2 = layers.LayerNormalization(epsilon=1e-6)
     self.dropout1 = layers.Dropout(dropout_rate)
     self.dropout2 = layers.Dropout(dropout_rate)
     self.out = layers.Flatten()
コード例 #22
0
 def __init__(self, embed_dim, num_heads, ff_dim, rate=0.1):
     super().__init__()  # for stop referring the base class
     self.att = layers.MultiHeadAttention(num_heads=num_heads,
                                          key_dim=embed_dim)
     self.ffn = keras.Sequential([
         layers.Dense(ff_dim, activation='relu'),
         layers.Dense(embed_dim, )
     ])
     #print("hello ")
     self.layernorm1 = layers.LayerNormalization(epsilon=1e-6)
     self.layernorm2 = layers.LayerNormalization(epsilon=1e-6)
     self.dropout1 = layers.Dropout(rate)
     self.dropout2 = layers.Dropout(rate)
コード例 #23
0
    def __init__(
        self, num_heads=8, embed_dim=64, dense_dim=512, batch_size=32, **kwargs
    ):
        super().__init__(**kwargs)

        self.partition_padding = PartitionPadding(batch_size)
        self.attention = layers.MultiHeadAttention(num_heads, embed_dim)
        self.dense_proj = keras.Sequential(
            [layers.Dense(dense_dim, activation="relu"), layers.Dense(embed_dim),]
        )
        self.layernorm_1 = layers.LayerNormalization()
        self.layernorm_2 = layers.LayerNormalization()
        self.average_pooling = layers.GlobalAveragePooling1D()
 def __init__(self, embed_dim, dense_dim, num_heads, **kwargs):
     super().__init__(**kwargs)
     self.embed_dim = embed_dim
     self.dense_dim = dense_dim
     self.num_heads = num_heads
     self.attention = layers.MultiHeadAttention(
         num_heads=num_heads, key_dim=embed_dim, dropout=0.3
     )
     self.dense_proj = keras.Sequential(
         [layers.Dense(dense_dim, activation=tf.nn.gelu), layers.Dense(embed_dim),]
     )
     self.layernorm_1 = layers.LayerNormalization()
     self.layernorm_2 = layers.LayerNormalization()
コード例 #25
0
ファイル: vivit.py プロジェクト: ksalama/keras-io
def create_vivit_classifier(
    tubelet_embedder,
    positional_encoder,
    input_shape=INPUT_SHAPE,
    transformer_layers=NUM_LAYERS,
    num_heads=NUM_HEADS,
    embed_dim=PROJECTION_DIM,
    layer_norm_eps=LAYER_NORM_EPS,
    num_classes=NUM_CLASSES,
):
    # Get the input layer
    inputs = layers.Input(shape=input_shape)
    # Create patches.
    patches = tubelet_embedder(inputs)
    # Encode patches.
    encoded_patches = positional_encoder(patches)

    # Create multiple layers of the Transformer block.
    for _ in range(transformer_layers):
        # Layer normalization and MHSA
        x1 = layers.LayerNormalization(epsilon=1e-6)(encoded_patches)
        attention_output = layers.MultiHeadAttention(
            num_heads=num_heads, key_dim=embed_dim // num_heads, dropout=0.1
        )(x1, x1)

        # Skip connection
        x2 = layers.Add()([attention_output, encoded_patches])

        # Layer Normalization and MLP
        x3 = layers.LayerNormalization(epsilon=1e-6)(x2)
        x3 = keras.Sequential(
            [
                layers.Dense(units=embed_dim * 4, activation=tf.nn.gelu),
                layers.Dense(units=embed_dim, activation=tf.nn.gelu),
            ]
        )(x3)

        # Skip connection
        encoded_patches = layers.Add()([x3, x2])

    # Layer normalization and Global average pooling.
    representation = layers.LayerNormalization(epsilon=layer_norm_eps)(encoded_patches)
    representation = layers.GlobalAvgPool1D()(representation)

    # Classify outputs.
    outputs = layers.Dense(units=num_classes, activation="softmax")(representation)

    # Create the Keras model.
    model = keras.Model(inputs=inputs, outputs=outputs)
    return model
コード例 #26
0
    def __init__(self, units=512, num_heads=8, key_dim=64, drop_prob=0):
        super(TransformerBlock, self).__init__()

        self.drop_prob = drop_prob

        self.att = layers.MultiHeadAttention(num_heads=num_heads,
                                             key_dim=key_dim,
                                             dropout=self.drop_prob)
        self.add0 = layers.Add()
        self.layer_norm0 = layers.LayerNormalization()
        self.feed_fwd = FeedForward(units_w1=2048, units_w2=units)

        self.add1 = layers.Add()
        self.layer_norm1 = layers.LayerNormalization()
コード例 #27
0
ファイル: layers.py プロジェクト: bitextor/bicleaner-ai
 def __init__(self, embed_dim, num_heads, ff_dim, dropout=0.1):
     super(TransformerBlock, self).__init__()
     # Divide key dimension by the number of heads
     # so that each head is projected to a lower dimension
     # then gets concatenated
     self.att = layers.MultiHeadAttention(num_heads=num_heads,
                                          key_dim=embed_dim // num_heads)
     self.ffn = keras.Sequential([
         layers.Dense(ff_dim, activation="relu"),
         layers.Dense(embed_dim),
     ])
     self.layernorm1 = layers.LayerNormalization(epsilon=1e-6)
     self.layernorm2 = layers.LayerNormalization(epsilon=1e-6)
     self.dropout1 = layers.Dropout(dropout)
     self.dropout2 = layers.Dropout(dropout)
コード例 #28
0
def transformer_encoder(inputs, head_size, num_heads, ff_dim, dropout=0):
    # Attention and Normalization
    x = layers.MultiHeadAttention(key_dim=head_size,
                                  num_heads=num_heads,
                                  dropout=dropout)(inputs, inputs)
    x = layers.Dropout(dropout)(x)
    x = layers.LayerNormalization(epsilon=1e-6)(x)
    res = x + inputs

    # Feed Forward Part
    x = layers.Conv1D(filters=ff_dim, kernel_size=1, activation="relu")(res)
    x = layers.Dropout(dropout)(x)
    x = layers.Conv1D(filters=inputs.shape[-1], kernel_size=1)(x)
    x = layers.LayerNormalization(epsilon=1e-6)(x)
    return x + res
コード例 #29
0
 def build(self, input_shape):
     self.attention = layers.MultiHeadAttention(
         num_heads=1,
         key_dim=self.dimensions,
         dropout=0.2,
     )
     self.layer_norm1 = layers.LayerNormalization(epsilon=1e-6)
     self.layer_norm2 = layers.LayerNormalization(epsilon=1e-6)
     self.layer_norm3 = layers.LayerNormalization(epsilon=1e-6)
     self.mlp = keras.Sequential([
         layers.Dense(units=self.dimensions, activation=tf.nn.gelu),
         layers.Dropout(0.2),
         layers.Dense(units=self.dimensions, activation=tf.nn.gelu),
     ])
     self.dense = layers.Dense(units=self.num_classes)
     self.flatten = layers.Flatten()
コード例 #30
0
def transformer_block(x, transformer_layers, projection_dim, num_heads=2):
    for _ in range(transformer_layers):
        # Layer normalization 1.
        x1 = layers.LayerNormalization(epsilon=1e-6)(x)
        # Create a multi-head attention layer.
        attention_output = layers.MultiHeadAttention(
            num_heads=num_heads, key_dim=projection_dim, dropout=0.1
        )(x1, x1)
        # Skip connection 1.
        x2 = layers.Add()([attention_output, x])
        # Layer normalization 2.
        x3 = layers.LayerNormalization(epsilon=1e-6)(x2)
        # MLP.
        x3 = mlp(x3, hidden_units=[x.shape[-1] * 2, x.shape[-1]], dropout_rate=0.1,)
        # Skip connection 2.
        x = layers.Add()([x3, x2])

    return x