def __init__(self, d_model=512, d_k=64, d_v=64, h=8, d_ff=2048, dropout=.1, identity_map_reordering=False, attention_module=None, attention_module_kwargs=None): super(EncoderLayer, self).__init__() self.identity_map_reordering = identity_map_reordering self.mhatt = MultiHeadAttention( d_model, d_k, d_v, h, dropout, identity_map_reordering=identity_map_reordering, attention_module=attention_module, attention_module_kwargs=attention_module_kwargs) self.pwff = PositionWiseFeedForward( d_model, d_ff, dropout, identity_map_reordering=identity_map_reordering)
def __init__(self, d_model=512, d_k=64, d_v=64, h=8, d_ff=2048, dropout=.1, self_att_module=None, enc_att_module=None, self_att_module_kwargs=None, enc_att_module_kwargs=None): super(MeshedDecoderLayer, self).__init__() self.self_att = MultiHeadAttention(d_model, d_k, d_v, h, dropout, can_be_stateful=True, attention_module=self_att_module, attention_module_kwargs=self_att_module_kwargs) self.enc_att = MultiHeadAttention(d_model, d_k, d_v, h, dropout, can_be_stateful=False, attention_module=enc_att_module, attention_module_kwargs=enc_att_module_kwargs) self.pwff = PositionWiseFeedForward(d_model, d_ff, dropout) self.fc_alpha1 = nn.Linear(d_model + d_model, d_model) self.fc_alpha2 = nn.Linear(d_model + d_model, d_model) self.fc_alpha3 = nn.Linear(d_model + d_model, d_model) self.init_weights()
def __init__(self, d_model=512, d_k=64, d_v=64, h=8, d_ff=2048, dropout=.1, identity_map_reordering=False, self_att_module=None, enc_att_module=None, self_att_module_kwargs=None, enc_att_module_kwargs=None): super(DecoderLayer, self).__init__() self.self_att = MultiHeadAttention( d_model, d_k, d_v, h, dropout, can_be_stateful=True, identity_map_reordering=identity_map_reordering, attention_module=self_att_module, attention_module_kwargs=self_att_module_kwargs) self.enc_att = MultiHeadAttention( d_model, d_k, d_v, h, dropout, can_be_stateful=False, identity_map_reordering=identity_map_reordering, attention_module=enc_att_module, attention_module_kwargs=enc_att_module_kwargs) self.pwff = PositionWiseFeedForward( d_model, d_ff, dropout, identity_map_reordering=identity_map_reordering)