def __init__(self, d_k, d_v, d_model, d_ff, n_heads, dropout=0.1): super(DecoderLayer, self).__init__() self.dec_self_attn = MultiHeadAttention(d_k, d_v, d_model, n_heads, dropout) self.dec_enc_attn = MultiHeadAttention(d_k, d_v, d_model, n_heads, dropout) self.pos_ffn = PoswiseFeedForwardNet(d_model, d_ff, dropout)
def __init__(self, d_model, n_heads, d_ffn, dropout): super().__init__() self.d_model = d_model self.self_attn = MultiHeadAttention(d_model, n_heads, dropout) self.context_attn = MultiHeadAttention(d_model, n_heads, dropout) self.feed_forward = PositionwiseFeedForward(d_model, d_ffn, dropout) self.sublayer = nn.ModuleList( [SublayerConnection(d_model, dropout) for _ in range(3)])
def __init__(self, d_model, d_hidden, n_heads, d_k, d_v, dropout=0.1): super(DecoderLayer, self).__init__() self.slf_attn = MultiHeadAttention( n_heads=n_heads, d_k=d_k, d_v=d_v, d_model=d_model, dropout=dropout) self.inter_attn = MultiHeadAttention( n_heads=n_heads, d_k=d_k, d_v=d_v, d_model=d_model, dropout=dropout) self.fc = PositionwiseFeedForward( d_hidden=d_hidden, d_model=d_model, dropout=dropout)
def __init__(self, embed_dim: int, n_head: int = 8, d_ff: int = 2048, dropout: float = 0.1): super(DecoderLayer, self).__init__() self.norm1 = NormLayer(embed_dim) self.norm2 = NormLayer(embed_dim) self.self_attn1 = MultiHeadAttention(embed_dim, n_head, dropout=dropout) self.self_attn2 = MultiHeadAttention(embed_dim, n_head, dropout=dropout) self.pos_ffn = PositionWiseFeedForward(embed_dim, d_ff=d_ff, dropout=dropout) self.dropout1 = nn.Dropout(dropout) self.dropout2 = nn.Dropout(dropout) self.dropout3 = nn.Dropout(dropout)
def __init__(self, n_layers, d_k, d_v, d_model, d_ff, n_heads, max_seq_len, tgt_vocab_size, dropout=0.1, weighted=False): super(tree_encoder, self).__init__() self.d_model = d_model self.n_layers = 1 self.layers = nn.ModuleList([ MultiHeadAttention(d_k, d_v, d_model, n_heads, dropout) for _ in range(self.n_layers) ]) #self.layers = nn.ModuleList([ScaledDotProductAttention(300, dropout) for _ in range(self.n_layers)]) #self.layers = nn.ModuleList([MultiBranchAttention(d_k, d_v, d_model, d_ff, n_heads, dropout) for _ in range(self.n_layers)]) self.dropout = nn.Dropout(dropout) self.new_objective = False self.proj = nn.Linear(900, 300) self.proj1 = nn.Linear(900, 300)
def __init__(self, d_k, d_v, d_model, d_ff, n_branches, dropout=0.1): super(WeightedDecoderLayer, self).__init__() self.dec_self_attn = MultiHeadAttention(d_k, d_v, d_model, n_branches, dropout) self.dec_enc_attn = MultiBranchAttention(d_k, d_v, d_model, d_ff, n_branches, dropout)
def __init__(self, d_model, d_inner, n_head, d_k, d_v, dropout=0.1): super(DecoderLayer, self).__init__() self.slf_attn = MultiHeadAttention(n_head, d_model, d_k, d_v, dropout=dropout) self.enc_attn = MultiHeadAttention(n_head, d_model, d_k, d_v, dropout=dropout) self.pos_ffn = PositionwiseFeedForward(d_model, d_inner, dropout=dropout)
def __init__(self): super(DecoderLayer, self).__init__() self.mask_multi_head_attn = MultiHeadAttention( ) # 解码器子层一,带mask的多头注意力机制层 self.multi_head_attn = MultiHeadAttention() # 解码器子层二,多头注意力机制层 self.feed_forward = PositionWiseFeedForward() # 解码器子层三,按位置前馈层
def __init__(self): super(EncoderLayer, self).__init__() self.multi_heads_attn = MultiHeadAttention() # 编码器子层一,多头注意力机制层 self.feed_forward = PositionWiseFeedForward() # 编码器子层二,按位置的前馈层