예제 #1
0
 def __init__(self,
              d_model=512,
              d_k=64,
              d_v=64,
              h=8,
              d_ff=2048,
              dropout=.1,
              identity_map_reordering=False,
              attention_module=None,
              attention_module_kwargs=None):
     super(EncoderLayer, self).__init__()
     self.identity_map_reordering = identity_map_reordering
     self.mhatt = MultiHeadAttention(
         d_model,
         d_k,
         d_v,
         h,
         dropout,
         identity_map_reordering=identity_map_reordering,
         attention_module=attention_module,
         attention_module_kwargs=attention_module_kwargs)
     self.pwff = PositionWiseFeedForward(
         d_model,
         d_ff,
         dropout,
         identity_map_reordering=identity_map_reordering)
예제 #2
0
    def __init__(self, d_model=512, d_k=64, d_v=64, h=8, d_ff=2048, dropout=.1, self_att_module=None,
                 enc_att_module=None, self_att_module_kwargs=None, enc_att_module_kwargs=None):
        super(MeshedDecoderLayer, self).__init__()
        self.self_att = MultiHeadAttention(d_model, d_k, d_v, h, dropout, can_be_stateful=True,
                                           attention_module=self_att_module,
                                           attention_module_kwargs=self_att_module_kwargs)
        self.enc_att = MultiHeadAttention(d_model, d_k, d_v, h, dropout, can_be_stateful=False,
                                          attention_module=enc_att_module,
                                          attention_module_kwargs=enc_att_module_kwargs)
        self.pwff = PositionWiseFeedForward(d_model, d_ff, dropout)

        self.fc_alpha1 = nn.Linear(d_model + d_model, d_model)
        self.fc_alpha2 = nn.Linear(d_model + d_model, d_model)
        self.fc_alpha3 = nn.Linear(d_model + d_model, d_model)

        self.init_weights()
예제 #3
0
 def __init__(self,
              d_model=512,
              d_k=64,
              d_v=64,
              h=8,
              d_ff=2048,
              dropout=.1,
              identity_map_reordering=False,
              self_att_module=None,
              enc_att_module=None,
              self_att_module_kwargs=None,
              enc_att_module_kwargs=None):
     super(DecoderLayer, self).__init__()
     self.self_att = MultiHeadAttention(
         d_model,
         d_k,
         d_v,
         h,
         dropout,
         can_be_stateful=True,
         identity_map_reordering=identity_map_reordering,
         attention_module=self_att_module,
         attention_module_kwargs=self_att_module_kwargs)
     self.enc_att = MultiHeadAttention(
         d_model,
         d_k,
         d_v,
         h,
         dropout,
         can_be_stateful=False,
         identity_map_reordering=identity_map_reordering,
         attention_module=enc_att_module,
         attention_module_kwargs=enc_att_module_kwargs)
     self.pwff = PositionWiseFeedForward(
         d_model,
         d_ff,
         dropout,
         identity_map_reordering=identity_map_reordering)