def __init__(self,
              d_model,
              d_inner_hid,
              n_head,
              d_k,
              d_v,
              dropout=0.1,
              n_max_seq=None,
              use_attentions=None):
     super(EncoderLayer, self).__init__()
     self.d_model = d_model
     self.n_head = n_head
     self.use_attentions = use_attentions
     self.slf_attn = MultiHeadAttention(n_head,
                                        d_model,
                                        d_k,
                                        d_v,
                                        dropout=dropout)
     self.pos_ffn = PositionwiseFeedForward(d_model,
                                            d_inner_hid,
                                            dropout=dropout)
     if self.use_attentions is not None:
         self.n_max_seq = n_max_seq
         both_mult = 2 if self.use_attentions == 'both' else 1
         self.attn_fc = nn.Linear(d_model + both_mult * n_head * n_max_seq,
                                  d_model)
Example #2
0
    def __init__(self, hps, embed):
        """
        
        :param hps: hyperparameters for the model
        :param embed: word embedding
        """
        super(SummarizationModel, self).__init__()

        self._hps = hps
        self.Train = (hps.mode == 'train')

        # sentence encoder
        self.encoder = Encoder(hps, embed)

        # Multi-layer highway lstm
        self.num_layers = hps.n_layers
        self.sent_embedding_size = (hps.max_kernel_size - hps.min_kernel_size + 1) * hps.output_channel
        self.lstm_hidden_size = hps.lstm_hidden_size
        self.recurrent_dropout = hps.recurrent_dropout_prob

        self.deep_lstm = DeepLSTM(self.sent_embedding_size, self.lstm_hidden_size, self.num_layers, self.recurrent_dropout,
                                                hps.use_orthnormal_init, hps.fix_mask, hps.cuda)

        # Multi-head attention
        self.n_head = hps.n_head
        self.d_v = self.d_k = int(self.lstm_hidden_size / hps.n_head)
        self.d_inner = hps.ffn_inner_hidden_size
        self.slf_attn = MultiHeadAttention(hps.n_head, self.lstm_hidden_size , self.d_k, self.d_v, dropout=hps.atten_dropout_prob)
        self.pos_ffn = PositionwiseFeedForward(self.d_v, self.d_inner, dropout = hps.ffn_dropout_prob)

        self.wh = nn.Linear(self.d_v, 2)
 def __init__(self,
              d_model,
              d_inner,
              n_head,
              d_k,
              d_v,
              dropout=0.1,
              keyword_module=None):
     super(DecoderLayer, self).__init__()
     self.key_module = keyword_module
     self.slf_attn = MultiHeadAttention(n_head,
                                        d_model,
                                        d_k,
                                        d_v,
                                        dropout=dropout)
     self.enc_attn = MultiHeadAttention(n_head,
                                        d_model,
                                        d_k,
                                        d_v,
                                        dropout=dropout)
     if self.key_module is not None:
         self.key_attn = MultiHeadAttention(n_head,
                                            d_model,
                                            d_k,
                                            d_v,
                                            dropout=dropout)
     self.pos_ffn = PositionwiseFeedForward(d_model,
                                            d_inner,
                                            dropout=dropout)
Example #4
0
 def __init__(self, d_model, d_inner, n_head, dropout=0.1):
     super(DecoderLayer, self).__init__()
     self.slf_attn = MultiHeadAttention(n_head, d_model, dropout=dropout)
     self.enc_attn = MultiHeadAttention(n_head, d_model, dropout=dropout)
     self.pos_ffn = PositionwiseFeedForward(d_model,
                                            d_inner,
                                            dropout=dropout)
Example #5
0
    def __init__(self, d_model, d_inner, n_head, d_k, d_v, dropout=0.1):
        super(DecoderLayer, self).__init__()
        self.slf_attn = MultiHeadAttention(n_head,
                                           d_model,
                                           d_k,
                                           d_v,
                                           dropout=dropout)  # 解码里面多一层.enc_attn
        self.enc_attn = MultiHeadAttention(n_head,
                                           d_model,
                                           d_k,
                                           d_v,
                                           dropout=dropout)
        self.pos_ffn = PositionwiseFeedForward(d_model,
                                               d_inner,
                                               dropout=dropout)

        from pathlib import Path
        output_filedir = Path(
            __file__).resolve().parent.parent / 'vocab_pair'  # 获取绝对路径的方法

        dic2 = {}
        with open(output_filedir, encoding='utf-8') as f:
            tmp = f.readlines()
            for i in tmp:
                i = i.strip('\n').split(':')
                dic2[i[0]] = i[1]
        self.check_dic = dic2
        tmmm = 1
    def __init__(self, d_out, d_inner, n_head, d_k, d_v, dropout=0.1, type_='same', skip_connect=False, d_in=None):
        super(UNetEncoderLayer, self).__init__()

        d_in = d_in if d_in is not None else d_out  # size of input to unet layer

        self.slf_attn = MultiHeadAttention(
            n_head, d_out, d_k, d_v, dropout=dropout, d_in=d_in)
        self.pos_ffn = PositionwiseFeedForward(d_out, d_inner, dropout=dropout)

        self.norm = nn.LayerNorm(d_out)

        self.skip_connect = skip_connect

        # TODO add depthwise-separable convolutions

        self.maxpool = None
        self.type = type_
        if type_ == 'down':
            # half size of output
            self.conv = nn.Conv1d(d_in, d_in, kernel_size=3, padding=1, groups=d_in)
            self.maxpool = nn.MaxPool1d(kernel_size=3, stride=2, padding=1)
        elif type_ == 'same':
            # keep size of output the same
            self.conv = nn.Conv1d(d_in, d_in, kernel_size=3, padding=1, groups=d_in)
        elif type_ == 'up':
            # double size of output
            self.conv = nn.ConvTranspose1d(d_in, d_in, kernel_size=3, stride=2, padding=1, groups=d_in)
        elif type_== 'none':
            self.conv = None
        else:
            raise RuntimeError('Did not specify appropriate convolution type')

        self.conv_out = nn.Linear(d_in, d_out)
    def __init__(self,
                 attention_type,
                 d_model,
                 d_inner,
                 n_head,
                 d_k,
                 d_v,
                 dropout=0.1,
                 normalize_before=True):
        super(EncoderLayer, self).__init__()
        if attention_type == 'softmax':
            self.slf_attn = MultiHeadAttention(
                n_head,
                d_model,
                d_k,
                d_v,
                dropout=dropout,
                normalize_before=normalize_before)
        elif attention_type == 'performer':
            self.slf_attn = PerformerAttention(
                n_head,
                d_model,
                d_k,
                d_v,
                dropout=dropout,
                normalize_before=normalize_before)

        self.pos_ffn = PositionwiseFeedForward(
            d_model,
            d_inner,
            dropout=dropout,
            normalize_before=normalize_before)
Example #8
0
 def __init__(self, d_model, d_inner, n_head, d_k, d_v, dropout=0.1):
     super(EncoderLayer, self).__init__()
     self.slf_attn = MultiHeadAttention(  # 实例化多头注意力模型
         n_head, d_model, d_k, d_v, dropout=dropout)
     self.pos_ffn = PositionwiseFeedForward(d_model,
                                            d_inner,
                                            dropout=dropout)  # 定义前馈层
Example #9
0
 def __init__(self,
              d_model,
              d_inner,
              n_head,
              d_k,
              d_v,
              dropout=0.1,
              share_qk=False,
              swish_activation=False):
     super(DecoderLayer, self).__init__()
     self.slf_attn = MultiHeadAttention(n_head,
                                        d_model,
                                        d_k,
                                        d_v,
                                        dropout=dropout)
     self.enc_attn = MultiHeadAttention(n_head,
                                        d_model,
                                        d_k,
                                        d_v,
                                        dropout=dropout)
     self.pos_ffn = PositionwiseFeedForward(
         d_model,
         d_inner,
         dropout=dropout,
         swish_activation=swish_activation)
Example #10
0
    def __init__(self, hid_dim, n_heads, pf_dim, dropout, device):
        super(DecoderLayer, self).__init__()

        self.layer_norm = nn.LayerNorm(hid_dim)
        self.slf_attn = SelfAttention(hid_dim, n_heads, dropout, device)
        self.enc_attn = SelfAttention(hid_dim, n_heads, dropout, device)
        self.pos_forward = PositionwiseFeedForward(hid_dim, pf_dim, dropout)
        self.dropout = nn.Dropout(dropout)
Example #11
0
 def __init__(self, d_model, d_inner, n_head, d_k, d_v, dropout=0.1):
     super(FFTBlock, self).__init__()
     self.slf_attn = MultiHeadAttention(n_head,
                                        d_model,
                                        d_k,
                                        d_v,
                                        dropout=dropout)
     self.pos_ffn = PositionwiseFeedForward(d_model,
                                            d_inner,
                                            dropout=dropout)
 def __init__(self, d_model, d_inner, n_head, d_k, d_v, rel_pos_op=None, dropout=0.1):
     super(EncoderLayer, self).__init__()
     self.rel_pos_op = rel_pos_op
     if rel_pos_op is None:
         self.slf_attn = MultiHeadAttention(
             n_head, d_model, d_k, d_v, dropout=dropout)
     else:
         self.slf_attn = RelMultiHeadAttention(
             n_head, d_model, d_k, d_v, rel_pos_op=rel_pos_op, dropout=dropout)
     self.pos_ffn = PositionwiseFeedForward(d_model, d_inner, dropout=dropout)
Example #13
0
 def __init__(self, d_model, d_inner, n_head, d_k, d_v, dropout=0.1):
     super(EncoderLayer, self).__init__()
     # 多头注意力机制,重复d_head = 8次, 包含了ADD&NORM
     # MultiHeadAttention中的forward需要的参数有Q、K、V以及mask
     # MultiHeadAttention网络需要的参数有n_head, d_model, d_k, d_v
     self.slf_attn = MultiHeadAttention(n_head,
                                        d_model,
                                        d_k,
                                        d_v,
                                        dropout=dropout)
     self.pos_ffn = PositionwiseFeedForward(d_model,
                                            d_inner,
                                            dropout=dropout)
Example #14
0
    def __init__(self, d_model, d_inner, n_head, d_k, d_v, dropout=0.1):
        super(EncoderLayer, self).__init__()

        # encoder 层首先是一个 multi-head attention
        self.slf_attn = MultiHeadAttention(n_head,
                                           d_model,
                                           d_k,
                                           d_v,
                                           dropout=dropout)

        # 第2个部分是一个前馈层
        self.pos_ffn = PositionwiseFeedForward(d_model,
                                               d_inner,
                                               dropout=dropout)
Example #15
0
 def __init__(self, d_model, d_inner, n_head, d_k, d_v, dropout=0.1):
     super(DecoderLayer, self).__init__()
     # decoder的第一层是带有mask的multi-head attention
     self.slf_attn = MultiHeadAttention(n_head,
                                        d_model,
                                        d_k,
                                        d_v,
                                        dropout=dropout)
     # 第2层是个 encoder相关的 multi-head attention
     self.enc_attn = MultiHeadAttention(n_head,
                                        d_model,
                                        d_k,
                                        d_v,
                                        dropout=dropout)
     # 第3 层是一个前馈层
     self.pos_ffn = PositionwiseFeedForward(d_model,
                                            d_inner,
                                            dropout=dropout)
Example #16
0
 def __init__(self,
              d_model,
              d_inner,
              n_head,
              d_k,
              d_v,
              dropout=0.1,
              tt_params={}):
     super(EncoderLayer, self).__init__()
     self.slf_attn = MultiHeadAttention(n_head,
                                        d_model,
                                        d_k,
                                        d_v,
                                        dropout=dropout,
                                        tt_params=tt_params)
     self.pos_ffn = PositionwiseFeedForward(d_model,
                                            d_inner,
                                            dropout=dropout,
                                            tt_params=tt_params)
Example #17
0
 def __init__(self,
              d_model,
              d_inner,
              n_head,
              d_k,
              d_v,
              dropout=0.1,
              normalize_before=True):
     super(EncoderLayer, self).__init__()
     self.slf_attn = MultiHeadAttention(n_head,
                                        d_model,
                                        d_k,
                                        d_v,
                                        dropout=dropout,
                                        normalize_before=normalize_before)
     self.pos_ffn = PositionwiseFeedForward(
         d_model,
         d_inner,
         dropout=dropout,
         normalize_before=normalize_before)
Example #18
0
    def __init__(self, d_model, d_inner, n_head, d_k, d_v, dropout=0.1):
        '''

        :param d_model:     模型输入维度
        :param d_inner:     前馈层隐层维度
        :param n_head:      多头
        :param d_k:         键向量维度
        :param d_v:         值向量维度
        :param dropout:
        '''
        super(EncoderLayer, self).__init__()

        self.sef_attn = MultiHeadAttention(n_head,
                                           d_model,
                                           d_k,
                                           d_v,
                                           dropout=dropout)

        self.pos_ffn = PositionwiseFeedForward(d_model,
                                               d_inner,
                                               dropout=dropout)
 def __init__(self,
              d_model,
              d_inner,
              n_head,
              d_k,
              d_v,
              dropout=0.1,
              kernel='linear',
              kernel_size_tcn=3,
              kernel_size_scn=2):
     super(EncoderLayer, self).__init__()
     self.slf_attn = MultiHeadAttention(n_head,
                                        d_model,
                                        d_k,
                                        d_v,
                                        dropout=dropout,
                                        kernel=kernel,
                                        kernel_size_tcn=kernel_size_tcn,
                                        kernel_size_scn=kernel_size_scn)
     self.pos_ffn = PositionwiseFeedForward(d_model,
                                            d_inner,
                                            dropout=dropout)
Example #20
0
 def __init__(self, d_model, d_inner, n_head, d_k, d_v, dropout=0.1):  #(512, 2048, 8, 64, 64, 0.1 )
     super(EncoderLayer, self).__init__()
     self.slf_attn = MultiHeadAttention(
         n_head, d_model, d_k, d_v, dropout=dropout)  #(8, 512, 64, 64)
     self.pos_ffn = PositionwiseFeedForward(d_model, d_inner, dropout=dropout)  #(512, 2048, 0.1)
 def __init__(self, d_model, d_inner_hid, n_head):
     super(EncoderLayer, self).__init__()
     self.slf_attn = MultiHeadAttention(d_model, n_head)
     self.pos_ffn = PositionwiseFeedForward(d_model, d_inner_hid)
 def __init__(self, d_model, d_inner, n_head, d_k, d_v, dropout=0.1, d_enc=None):
     super(DecoderLayer, self).__init__()
     d_enc = d_model if d_enc is None else d_enc
     self.slf_attn = MultiHeadAttention(n_head, d_model, d_k, d_v, dropout=dropout)
     self.enc_attn = MultiHeadAttention(n_head, d_model, d_k, d_v, dropout=dropout, d_in=d_enc)
     self.pos_ffn = PositionwiseFeedForward(d_model, d_inner, dropout=dropout)
Example #23
0
 def __init__(self, d_model, d_inner, n_head, dropout=0.1):
     super(EncoderNewLayer, self).__init__()
     self.slf_attn = MlpAttention(n_head, d_model, dropout=dropout)
     self.pos_ffn = PositionwiseFeedForward(d_model, d_inner, dropout=dropout)