def __init__(self,
                 d_model,
                 heads,
                 d_ff=1024,
                 dropout=0.1,
                 attention_type="Baseline",
                 relative_time_pitch=False,
                 max_relative_position=512):
        super().__init__()
        self.norm_1 = Norm(d_model)
        self.norm_2 = Norm(d_model)
        self.norm_3 = Norm(d_model)

        self.attention_type = attention_type
        self.relative_time_pitch = relative_time_pitch
        self.dropout_1 = nn.Dropout(dropout)
        self.dropout_2 = nn.Dropout(dropout)
        self.dropout_3 = nn.Dropout(dropout)

        self.attn_1 = MultiHeadAttention(heads, d_model, dropout = dropout, attention_type = self.attention_type, \
                                                            relative_time_pitch = self.relative_time_pitch,
                                                            max_relative_position = max_relative_position)
        self.attn_2 = MultiHeadAttention(heads, d_model, dropout =dropout, attention_type = self.attention_type, \
                                                            relative_time_pitch = self.relative_time_pitch,
                                                            max_relative_position = max_relative_position)
        self.ff = FeedForward(d_model, d_ff, dropout)
Esempio n. 2
0
    def __init__(self, d_model, heads, dropout=0.1):
        super().__init__()
        self.norm_1 = Norm(d_model)
        self.norm_2 = Norm(d_model)
        self.norm_3 = Norm(d_model)

        self.dropout_1 = nn.Dropout(dropout)
        self.dropout_2 = nn.Dropout(dropout)
        self.dropout_3 = nn.Dropout(dropout)

        self.attn_1 = MultiHeadAttention(heads, d_model, dropout=dropout)
        self.attn_2 = MultiHeadAttention(heads, d_model, dropout=dropout)
        self.ff = FeedForward(d_model, dropout=dropout)
Esempio n. 3
0
    def __init__(self, d_model, num_heads, dff, rate=0.1):
        super(DecoderLayer, self).__init__()

        self.mha1 = MultiHeadAttention(d_model, num_heads)
        self.mha2 = MultiHeadAttention(d_model, num_heads)

        self.ffn = point_wise_feed_forward_network(d_model, dff)

        self.layernorm1 = tf.keras.layers.LayerNormalization(epsilon=1e-6)
        self.layernorm2 = tf.keras.layers.LayerNormalization(epsilon=1e-6)
        self.layernorm3 = tf.keras.layers.LayerNormalization(epsilon=1e-6)

        self.dropout1 = tf.keras.layers.Dropout(rate)
        self.dropout2 = tf.keras.layers.Dropout(rate)
        self.dropout3 = tf.keras.layers.Dropout(rate)
Esempio n. 4
0
 def __init__(self, d_input, d_model, heads, dropout=0.1):
     super().__init__()
     self.input_linear = nn.Linear(d_input, d_model)
     self.norm_1 = nn.LayerNorm(d_model)
     self.norm_2 = nn.LayerNorm(d_model)
     self.attn = MultiHeadAttention(heads, d_model, dropout=dropout)
     self.ff = FeedForward(d_model, dropout=dropout)
     self.dropout_1 = nn.Dropout(dropout)
     self.dropout_2 = nn.Dropout(dropout)