def __init__(self, d_model, self_attention_layer, feed_forward_layer, dropout): super(EncoderLayer, self).__init__() self.self_attention_layer = self_attention_layer self.feed_forward_layer = feed_forward_layer self.sublayer = clones(SublayerConnection(d_model, dropout), 2) self.d_model = d_model
def __init__(self, header_num, d_model, dropout=0.1): super(MultiHeadedAttention, self).__init__() assert d_model % header_num == 0 self.dk = d_model // header_num self.header_num = header_num self.linear_layers = clones(nn.Linear(d_model, d_model), 4) self.atten = None self.dropout_layer = nn.Dropout(p=dropout)
def __init__(self, layer, N): super(Encoder, self).__init__() self.layers = clones(layer, N) self.norm = LayerNorm(layer.d_model)