Example #1
0
    def __init__(self, d_model, num_heads, dff, rate=0.1):
        super(ConditionalDecoderLayer, self).__init__()

        def point_wise_feed_forward_network(d_model, dff):
            return tf.keras.Sequential([
                tf.keras.layers.Dense(
                    dff, activation='relu'),  # (batch_size, seq_len, dff)
                tf.keras.layers.Dense(
                    d_model)  # (batch_size, seq_len, d_model)
            ])

        self.mha1 = MultiHeadAttention(d_model,
                                       num_heads,
                                       return_attn_coef=True)
        self.mha2 = MultiHeadAttention(d_model,
                                       num_heads,
                                       return_attn_coef=True)

        self.ffn = point_wise_feed_forward_network(d_model, dff)

        self.layernorm1 = tf.keras.layers.LayerNormalization(epsilon=1e-6)
        self.layernorm2 = tf.keras.layers.LayerNormalization(epsilon=1e-6)
        self.layernorm3 = tf.keras.layers.LayerNormalization(epsilon=1e-6)

        self.dropout1 = tf.keras.layers.Dropout(rate)
        self.dropout2 = tf.keras.layers.Dropout(rate)
        self.dropout3 = tf.keras.layers.Dropout(rate)
 def __init__(self,
              head_size,
              num_heads,
              output_size=None,
              dropout=0.1,
              name="rel_pos_multihead_self_attention",
              **kwargs):
     super(MultiHeadSelfAttention, self).__init__(name=name, **kwargs)
     self.multihead_attention = MultiHeadAttention(head_size=head_size,
                                                   num_heads=num_heads,
                                                   output_size=output_size,
                                                   dropout=dropout)
Example #3
0
 def __init__(self, embed_dim, num_heads, ff_dim, rate=0.8):
     super(TransformerBlock, self).__init__()
     self.att = MultiHeadAttention(head_size=embed_dim, num_heads=num_heads)
     self.ffn = Sequential(
         [Dense(ff_dim, activation="relu"), Dense(embed_dim),]
     )
     self.layernorm1 = LayerNormalization(epsilon=1e-6)
     self.layernorm2 = LayerNormalization(epsilon=1e-6)
     self.dropout1 = Dropout(rate)
     self.dropout2 = Dropout(rate)
class MultiHeadSelfAttention(tf.keras.layers.Layer):
    def __init__(self,
                 head_size,
                 num_heads,
                 output_size=None,
                 dropout=0.1,
                 name="rel_pos_multihead_self_attention",
                 **kwargs):
        super(MultiHeadSelfAttention, self).__init__(name=name, **kwargs)
        self.multihead_attention = MultiHeadAttention(head_size=head_size,
                                                      num_heads=num_heads,
                                                      output_size=output_size,
                                                      dropout=dropout)

    def call(self, inputs, training=False, **kwargs):
        output = self.multihead_attention([inputs, inputs], training=training)
        return output

    def get_config(self):
        conf = super(MultiHeadSelfAttention, self).get_config()
        conf.update(self.multihead_attention.get_config())
        return conf
Example #5
0
    def __init__(self,
                 name="AttentionBlock",
                 num_heads=2,
                 head_size=128,
                 ff_dim=None,
                 dropout=0,
                 **kwargs):
        super().__init__(name=name, **kwargs)

        if ff_dim is None:
            ff_dim = head_size

        self.attention = MultiHeadAttention(num_heads=num_heads,
                                            head_size=head_size,
                                            dropout=dropout)
        self.attention_dropout = keras.layers.Dropout(dropout)
        self.attention_norm = keras.layers.LayerNormalization(epsilon=1e-6)

        self.ff_conv1 = keras.layers.Conv1D(filters=ff_dim,
                                            kernel_size=1,
                                            activation="relu")
        # self.ff_conv2 at build()
        self.ff_dropout = keras.layers.Dropout(dropout)
        self.ff_norm = keras.layers.LayerNormalization(epsilon=1e-6)