Beispiel #1
0
    def __init__(self, d_model, num_heads, dff, rate=0.1):
        super(EncoderLayer, self).__init__()

        self.mha = attention.MultiHeadAttention(d_model, num_heads)
        self.ffn = convention.point_wise_feed_forward_network(d_model, dff)

        self.layernorm1 = tf.keras.layers.LayerNormalization(epsilon=1e-6)
        self.layernorm2 = tf.keras.layers.LayerNormalization(epsilon=1e-6)

        self.dropout1 = tf.keras.layers.Dropout(rate)
        self.dropout2 = tf.keras.layers.Dropout(rate)
Beispiel #2
0
    def __init__(self, d_model, num_heads, dff, rate=0.1, max_seq=2048):
        super(DecoderLayer, self).__init__()

        self.mha1 = attention.RelativeAttention(d_model,
                                                num_heads,
                                                max_seq=max_seq)
        self.mha2 = attention.RelativeAttention(d_model,
                                                num_heads,
                                                max_seq=max_seq)

        self.ffn = convention.point_wise_feed_forward_network(d_model, dff)

        self.layernorm1 = tf.keras.layers.LayerNormalization(epsilon=1e-6)
        self.layernorm2 = tf.keras.layers.LayerNormalization(epsilon=1e-6)
        self.layernorm3 = tf.keras.layers.LayerNormalization(epsilon=1e-6)

        self.dropout1 = tf.keras.layers.Dropout(rate)
        self.dropout2 = tf.keras.layers.Dropout(rate)
        self.dropout3 = tf.keras.layers.Dropout(rate)