def __init__(self, size, self_attn, src_attn, feed_forward, dropout): super(DecoderLayer, self).__init__() self.size = size self.self_attn = self_attn self.src_attn = src_attn self.feed_forward = feed_forward self.sublayer = multi_layer(SublayerConnection(size, dropout), 3)
def __init__(self, h, d_model, dropout=0.1): super(MultiHeadAttention, self).__init__() assert d_model % h == 0 # We assume d_v always equals d_k self.d_k = d_model // h self.h = h self.linears = multi_layer(nn.Linear(d_model, d_model), 4) self.attn = None self.dropout = nn.Dropout(p=dropout)
def __init__(self, layer, N): super(Decoder, self).__init__() self.layers = multi_layer(layer, N) self.norm = LayerNorm(layer.size)