def __init__(self, size, self_attn, x_attn, feed_forward, dropout): super(DecoderLayer, self).__init__() self.size = size self.self_attn = self_attn self.src_attn = x_attn self.feed_forward = feed_forward self.sublayer = cloneModule(SublayerConnection(size, dropout), 3)
def __init__(self, h, d_model, dropout=0.1): super(MultiHeadedAttention, self).__init__() assert d_model % h == 0 self.d_k = d_model // h self.h = h self.linears = cloneModule(nn.Linear(d_model, d_model), 4) self.attn = None self.dropout = nn.Dropout(p=dropout)
def __init__(self, layer, N): super(Encoder, self).__init__() self.layers = cloneModule(layer, N) self.norm = LayerNorm(layer.size)