Ejemplo n.º 1
0
 def __init__(self, d_k, d_v, d_model, d_ff, n_heads, dropout=0.1):
     super(DecoderLayer, self).__init__()
     self.dec_self_attn = MultiHeadAttention(d_k, d_v, d_model, n_heads,
                                             dropout)
     self.dec_enc_attn = MultiHeadAttention(d_k, d_v, d_model, n_heads,
                                            dropout)
     self.pos_ffn = PoswiseFeedForwardNet(d_model, d_ff, dropout)
Ejemplo n.º 2
0
 def __init__(self, d_model, n_heads, d_ffn, dropout):
     super().__init__()
     self.d_model = d_model
     self.self_attn = MultiHeadAttention(d_model, n_heads, dropout)
     self.context_attn = MultiHeadAttention(d_model, n_heads, dropout)
     self.feed_forward = PositionwiseFeedForward(d_model, d_ffn, dropout)
     self.sublayer = nn.ModuleList(
         [SublayerConnection(d_model, dropout) for _ in range(3)])
Ejemplo n.º 3
0
    def __init__(self, d_model, d_hidden, n_heads, d_k, d_v, dropout=0.1):
        super(DecoderLayer, self).__init__()

        self.slf_attn = MultiHeadAttention(
            n_heads=n_heads, d_k=d_k, d_v=d_v, d_model=d_model, dropout=dropout)
        self.inter_attn = MultiHeadAttention(
            n_heads=n_heads, d_k=d_k, d_v=d_v, d_model=d_model, dropout=dropout)
        self.fc = PositionwiseFeedForward(
            d_hidden=d_hidden, d_model=d_model, dropout=dropout)
Ejemplo n.º 4
0
    def __init__(self, embed_dim: int, n_head: int = 8, d_ff: int = 2048, dropout: float = 0.1):
        super(DecoderLayer, self).__init__()
        self.norm1 = NormLayer(embed_dim)
        self.norm2 = NormLayer(embed_dim)

        self.self_attn1 = MultiHeadAttention(embed_dim, n_head, dropout=dropout)
        self.self_attn2 = MultiHeadAttention(embed_dim, n_head, dropout=dropout)
        self.pos_ffn = PositionWiseFeedForward(embed_dim, d_ff=d_ff, dropout=dropout)

        self.dropout1 = nn.Dropout(dropout)
        self.dropout2 = nn.Dropout(dropout)
        self.dropout3 = nn.Dropout(dropout)
Ejemplo n.º 5
0
 def __init__(self,
              n_layers,
              d_k,
              d_v,
              d_model,
              d_ff,
              n_heads,
              max_seq_len,
              tgt_vocab_size,
              dropout=0.1,
              weighted=False):
     super(tree_encoder, self).__init__()
     self.d_model = d_model
     self.n_layers = 1
     self.layers = nn.ModuleList([
         MultiHeadAttention(d_k, d_v, d_model, n_heads, dropout)
         for _ in range(self.n_layers)
     ])
     #self.layers = nn.ModuleList([ScaledDotProductAttention(300, dropout) for _ in range(self.n_layers)])
     #self.layers = nn.ModuleList([MultiBranchAttention(d_k, d_v, d_model, d_ff, n_heads, dropout) for _ in range(self.n_layers)])
     self.dropout = nn.Dropout(dropout)
     self.new_objective = False
     self.proj = nn.Linear(900, 300)
     self.proj1 = nn.Linear(900, 300)
Ejemplo n.º 6
0
 def __init__(self, d_k, d_v, d_model, d_ff, n_branches, dropout=0.1):
     super(WeightedDecoderLayer, self).__init__()
     self.dec_self_attn = MultiHeadAttention(d_k, d_v, d_model, n_branches,
                                             dropout)
     self.dec_enc_attn = MultiBranchAttention(d_k, d_v, d_model, d_ff,
                                              n_branches, dropout)
Ejemplo n.º 7
0
 def __init__(self, d_model, d_inner, n_head, d_k, d_v, dropout=0.1):
     super(DecoderLayer, self).__init__()
     self.slf_attn = MultiHeadAttention(n_head, d_model, d_k, d_v, dropout=dropout)
     self.enc_attn = MultiHeadAttention(n_head, d_model, d_k, d_v, dropout=dropout)
     self.pos_ffn = PositionwiseFeedForward(d_model, d_inner, dropout=dropout)
Ejemplo n.º 8
0
 def __init__(self):
     super(DecoderLayer, self).__init__()
     self.mask_multi_head_attn = MultiHeadAttention(
     )  # 解码器子层一,带mask的多头注意力机制层
     self.multi_head_attn = MultiHeadAttention()  # 解码器子层二,多头注意力机制层
     self.feed_forward = PositionWiseFeedForward()  # 解码器子层三,按位置前馈层
Ejemplo n.º 9
0
 def __init__(self):
     super(EncoderLayer, self).__init__()
     self.multi_heads_attn = MultiHeadAttention()  # 编码器子层一,多头注意力机制层
     self.feed_forward = PositionWiseFeedForward()  # 编码器子层二,按位置的前馈层