def __init__(self, size, self_attn, src_attn, feed_forward, dropout): super(DecoderLayer, self).__init__() self.size = size self.self_attn = self_attn self.src_attn = src_attn self.feed_forward = feed_forward self.sublayer = clones(SublayerConnection(size, dropout), 3)
def __init__(self, h, d_model, dropout=0.1): """Take in model size and number of heads.""" super(MultiHeadedAttention, self).__init__() assert d_model % h == 0 # We assume d_v always equals d_k # self.d_k = d_model // h self.d_k = 64 self.h = h self.linears = clones(nn.Linear(d_model, h * self.d_k), 4) self.attn = None self.dropout = nn.Dropout(p=dropout)
def __init__(self, layer, N): super(Decoder, self).__init__() self.layers = clones(layer, N) self.norm = LayerNorm(layer.size)