def __init__(self,
                 d_model,
                 heads,
                 d_ff=1024,
                 dropout=0.1,
                 attention_type="Baseline",
                 relative_time_pitch=False,
                 max_relative_position=512):
        super().__init__()
        self.norm_1 = Norm(d_model)
        self.norm_2 = Norm(d_model)
        self.norm_3 = Norm(d_model)

        self.attention_type = attention_type
        self.relative_time_pitch = relative_time_pitch
        self.dropout_1 = nn.Dropout(dropout)
        self.dropout_2 = nn.Dropout(dropout)
        self.dropout_3 = nn.Dropout(dropout)

        self.attn_1 = MultiHeadAttention(heads, d_model, dropout = dropout, attention_type = self.attention_type, \
                                                            relative_time_pitch = self.relative_time_pitch,
                                                            max_relative_position = max_relative_position)
        self.attn_2 = MultiHeadAttention(heads, d_model, dropout =dropout, attention_type = self.attention_type, \
                                                            relative_time_pitch = self.relative_time_pitch,
                                                            max_relative_position = max_relative_position)
        self.ff = FeedForward(d_model, d_ff, dropout)
Esempio n. 2
0
 def __init__(self, d_model, heads, dropout=0.1):
     super().__init__()
     self.norm_1 = Norm(d_model)
     self.norm_2 = Norm(d_model)
     self.attn = MultiHeadAttention(heads, d_model, dropout=dropout)
     self.ff = FeedForward(d_model, dropout=dropout)
     self.dropout_1 = nn.Dropout(dropout)
     self.dropout_2 = nn.Dropout(dropout)
Esempio n. 3
0
    def __init__(self, d_model, heads, decoder_extra_layers, dropout=0.1):
        super().__init__()
        self.decoder_extra_layers = decoder_extra_layers

        self.norm_1 = Norm(d_model)
        self.norm_2 = Norm(d_model)
        self.norm_3 = Norm(d_model)

        self.dropout_1 = nn.Dropout(dropout)
        self.dropout_2 = nn.Dropout(dropout)
        self.dropout_3 = nn.Dropout(dropout)

        self.attn_1 = MultiHeadAttention(heads, d_model, dropout=dropout)
        self.attn_2 = MultiHeadAttention(heads, d_model, dropout=dropout)
        self.ff = FeedForward(d_model, dropout=dropout)
Esempio n. 4
0
 def __init__(self, vocab_size, d_model, N, heads, dropout):
     super().__init__()
     self.N = N
     self.embed = Embedder(vocab_size, d_model)
     self.pe = PositionalEncoder(d_model, dropout=dropout)
     self.layers = get_clones(DecoderLayer(d_model, heads, dropout), N)
     self.norm = Norm(d_model)
Esempio n. 5
0
 def __init__(self, bert, hidden_size, num_hidden_layers, num_attention_heads, dropout):
     super().__init__()
     self.N = num_hidden_layers
     self.bert = bert
     self.pe = PositionalEncoder(hidden_size, dropout=dropout)
     self.layers = get_clones(DecoderLayer(hidden_size, num_attention_heads, dropout), num_hidden_layers)
     self.norm = Norm(hidden_size)
Esempio n. 6
0
 def __init__(self, d_model, heads, dropout=0.1):
     super(EncoderLayer, self).__init__()
     
     self.norm = Norm(d_model)
     self.dropout = nn.Dropout(dropout)
     self.attention_layer = MultiHeadedSelfAttention(heads, d_model, dropout=dropout)
     self.ffnn_layer = FeedForward(d_model, dropout=dropout)
    def __init__(self, vocab_size, opt):
        super().__init__()
        self.N = opt.n_layers
        self.embed = Embedder(vocab_size, opt.d_model)
        if opt.concat_pos_sinusoid is True:
            self.pe = PositionalEncoderConcat(opt.d_model, opt.dropout,
                                              opt.max_seq_len)
            self.d_model = 2 * opt.d_model
        else:
            self.pe = PositionalEncoder(opt.d_model, opt.dropout,
                                        opt.max_seq_len)
            self.d_model = opt.d_model

        if opt.relative_time_pitch is True:
            self.layers = get_clones(DecoderLayer(self.d_model, opt.heads, opt.d_ff, \
                                                opt.dropout, opt.attention_type, \
                                                opt.relative_time_pitch,
                                                max_relative_position = opt.max_relative_position),
                                                opt.n_layers-1)
            self.layers.insert(0, copy.deepcopy(DecoderLayer(self.d_model, opt.heads, opt.d_ff, \
                                                opt.dropout, opt.attention_type, \
                                                relative_time_pitch = False,
                                                max_relative_position = opt.max_relative_position)))

        else:
            self.layers = get_clones(DecoderLayer(self.d_model, opt.heads, opt.d_ff, \
                                                opt.dropout, opt.attention_type, \
                                                opt.relative_time_pitch,
                                                max_relative_position = opt.max_relative_position),
                                                opt.n_layers)
        self.norm = Norm(self.d_model)
Esempio n. 8
0
 def __init__(self, d_model, N_layers, heads, dropout):
     super().__init__()
     self.N_layers = N_layers
     # self.embed = Embedder(vocab_size, d_model)
     # self.pe = PositionalEncoder(d_model, dropout=dropout)
     # self.attn = MultiHeadAttention(heads, d_model, dropout=dropout)
     self.layers = get_clones(EncoderLayer(d_model, heads, dropout),
                              N_layers)
     self.norm = Norm(d_model)
Esempio n. 9
0
 def __init__(self, vocab_size, d_model, N, heads, dropout, field, word_emb,
              opt):
     super().__init__()
     self.N = N
     self.word_emb = word_emb
     self.opt = opt  # unused, just for querying
     self.embed = Embedder(vocab_size, d_model, word_emb, field)
     self.pe = PositionalEncoder(d_model, dropout=dropout)
     self.layers = get_clones(EncoderLayer(d_model, heads, dropout),
                              N)  # attention
     self.norm = Norm(d_model)
Esempio n. 10
0
    def __init__(self, vocab_size, d_model, N, heads, dropout, device):
        super().__init__()
        self.N = N

        # We need to use the embedder
        # self.embed = Embedder(vocab_size, d_model)
        # self.embed = nn.Linear(vocab_size, d_model)

        self.pe = PositionalEncoder(d_model, dropout=dropout, device=device)
        self.layers = get_clones(EncoderLayer(d_model, heads, dropout), N)
        self.norm = Norm(d_model)
Esempio n. 11
0
 def __init__(self, d_model, heads, dropout=0.1):
     super(DecoderLayer, self).__init__()
     
     self.norm = Norm(d_model)
     self.dropout = nn.Dropout(dropout)
     
     # in the decoder, the self-attention layer is only allowed to attend to earlier positions in the output sequence
     # this is different than the encoder counterparts
     self.attention_layer = MultiHeadedSelfAttention(heads, d_model, dropout=dropout)
     self.encoder_decoder_attention_layer = MultiHeadedSelfAttention(heads, d_model, dropout=dropout)
     self.ffnn_layer = FeedForward(d_model, dropout=dropout)
Esempio n. 12
0
 def __init__(self, src_vocab, n_classes, d_model, N, heads, dropout):
     super().__init__()
     self.encoder = Encoder(src_vocab, d_model, N, heads, dropout)
     self.classifier = Classifier(d_model)
     self.out = nn.Linear(d_model, n_classes)
     self.norm = Norm(n_classes)
Esempio n. 13
0
 def __init__(self, d_model):
     super().__init__()
     self.layer = ClassifierLayer(d_model)
     self.norm = Norm(d_model)