def __init__(self, size, self_attn, src_attn, feed_forward, dropout): super(DecoderLayer, self).__init__() self.size = size self.self_attn = self_attn self.src_attn = src_attn self.feed_forward = feed_forward self.sublayer = clones(SublayerConnection(size, dropout), 3)
def __init__(self, h, d_model, dropout=0.1): "设置模型大小和注意力头部数量" super(MultiHeadedAttention, self).__init__() assert d_model % h == 0 # 假设 d_v 等于 d_k self.d_k = d_model // h self.h = h self.linears = clones(nn.Linear(d_model, d_model), 4) # 对应 Q,K,V 3次线性变换 + 最终的1次线性变换 self.attn = None self.dropout = nn.Dropout(p=dropout)
def __init__(self, h=8, d_model=512, dropout=0.1): ''' Args: h: num of self-attention heads d_model: dim of input (possible) ''' super(MultiHeadedAttn, self).__init__() assert d_model % h == 0, "h has value not compatible" #assume d_v == d_k self.d_k = d_model // h self.h = h self.dropout = dropout self.linears = clones(nn.Linear(d_model, d_model), 4) self.attn = None
def __init__(self, size, self_attn, feed_forward, dropout): super(EncoderLayer4newAST, self).__init__() self.self_attn = self_attn self.feed_forward = feed_forward self.sublayer = clones(SublayerConnection(size, dropout), 2) self.size = size
def __init__(self, layer, N): super(Decoder, self).__init__() self.layers = clones(layer, N) self.norm = LayerNorm(layer.size)