def __init__(self, d_model, d_inner_hid, n_head, d_k, d_v, dropout=0.1, n_max_seq=None, use_attentions=None): super(EncoderLayer, self).__init__() self.d_model = d_model self.n_head = n_head self.use_attentions = use_attentions self.slf_attn = MultiHeadAttention(n_head, d_model, d_k, d_v, dropout=dropout) self.pos_ffn = PositionwiseFeedForward(d_model, d_inner_hid, dropout=dropout) if self.use_attentions is not None: self.n_max_seq = n_max_seq both_mult = 2 if self.use_attentions == 'both' else 1 self.attn_fc = nn.Linear(d_model + both_mult * n_head * n_max_seq, d_model)
def __init__(self, hps, embed): """ :param hps: hyperparameters for the model :param embed: word embedding """ super(SummarizationModel, self).__init__() self._hps = hps self.Train = (hps.mode == 'train') # sentence encoder self.encoder = Encoder(hps, embed) # Multi-layer highway lstm self.num_layers = hps.n_layers self.sent_embedding_size = (hps.max_kernel_size - hps.min_kernel_size + 1) * hps.output_channel self.lstm_hidden_size = hps.lstm_hidden_size self.recurrent_dropout = hps.recurrent_dropout_prob self.deep_lstm = DeepLSTM(self.sent_embedding_size, self.lstm_hidden_size, self.num_layers, self.recurrent_dropout, hps.use_orthnormal_init, hps.fix_mask, hps.cuda) # Multi-head attention self.n_head = hps.n_head self.d_v = self.d_k = int(self.lstm_hidden_size / hps.n_head) self.d_inner = hps.ffn_inner_hidden_size self.slf_attn = MultiHeadAttention(hps.n_head, self.lstm_hidden_size , self.d_k, self.d_v, dropout=hps.atten_dropout_prob) self.pos_ffn = PositionwiseFeedForward(self.d_v, self.d_inner, dropout = hps.ffn_dropout_prob) self.wh = nn.Linear(self.d_v, 2)
def __init__(self, d_model, d_inner, n_head, d_k, d_v, dropout=0.1, keyword_module=None): super(DecoderLayer, self).__init__() self.key_module = keyword_module self.slf_attn = MultiHeadAttention(n_head, d_model, d_k, d_v, dropout=dropout) self.enc_attn = MultiHeadAttention(n_head, d_model, d_k, d_v, dropout=dropout) if self.key_module is not None: self.key_attn = MultiHeadAttention(n_head, d_model, d_k, d_v, dropout=dropout) self.pos_ffn = PositionwiseFeedForward(d_model, d_inner, dropout=dropout)
def __init__(self, d_model, d_inner, n_head, dropout=0.1): super(DecoderLayer, self).__init__() self.slf_attn = MultiHeadAttention(n_head, d_model, dropout=dropout) self.enc_attn = MultiHeadAttention(n_head, d_model, dropout=dropout) self.pos_ffn = PositionwiseFeedForward(d_model, d_inner, dropout=dropout)
def __init__(self, d_model, d_inner, n_head, d_k, d_v, dropout=0.1): super(DecoderLayer, self).__init__() self.slf_attn = MultiHeadAttention(n_head, d_model, d_k, d_v, dropout=dropout) # 解码里面多一层.enc_attn self.enc_attn = MultiHeadAttention(n_head, d_model, d_k, d_v, dropout=dropout) self.pos_ffn = PositionwiseFeedForward(d_model, d_inner, dropout=dropout) from pathlib import Path output_filedir = Path( __file__).resolve().parent.parent / 'vocab_pair' # 获取绝对路径的方法 dic2 = {} with open(output_filedir, encoding='utf-8') as f: tmp = f.readlines() for i in tmp: i = i.strip('\n').split(':') dic2[i[0]] = i[1] self.check_dic = dic2 tmmm = 1
def __init__(self, d_out, d_inner, n_head, d_k, d_v, dropout=0.1, type_='same', skip_connect=False, d_in=None): super(UNetEncoderLayer, self).__init__() d_in = d_in if d_in is not None else d_out # size of input to unet layer self.slf_attn = MultiHeadAttention( n_head, d_out, d_k, d_v, dropout=dropout, d_in=d_in) self.pos_ffn = PositionwiseFeedForward(d_out, d_inner, dropout=dropout) self.norm = nn.LayerNorm(d_out) self.skip_connect = skip_connect # TODO add depthwise-separable convolutions self.maxpool = None self.type = type_ if type_ == 'down': # half size of output self.conv = nn.Conv1d(d_in, d_in, kernel_size=3, padding=1, groups=d_in) self.maxpool = nn.MaxPool1d(kernel_size=3, stride=2, padding=1) elif type_ == 'same': # keep size of output the same self.conv = nn.Conv1d(d_in, d_in, kernel_size=3, padding=1, groups=d_in) elif type_ == 'up': # double size of output self.conv = nn.ConvTranspose1d(d_in, d_in, kernel_size=3, stride=2, padding=1, groups=d_in) elif type_== 'none': self.conv = None else: raise RuntimeError('Did not specify appropriate convolution type') self.conv_out = nn.Linear(d_in, d_out)
def __init__(self, attention_type, d_model, d_inner, n_head, d_k, d_v, dropout=0.1, normalize_before=True): super(EncoderLayer, self).__init__() if attention_type == 'softmax': self.slf_attn = MultiHeadAttention( n_head, d_model, d_k, d_v, dropout=dropout, normalize_before=normalize_before) elif attention_type == 'performer': self.slf_attn = PerformerAttention( n_head, d_model, d_k, d_v, dropout=dropout, normalize_before=normalize_before) self.pos_ffn = PositionwiseFeedForward( d_model, d_inner, dropout=dropout, normalize_before=normalize_before)
def __init__(self, d_model, d_inner, n_head, d_k, d_v, dropout=0.1): super(EncoderLayer, self).__init__() self.slf_attn = MultiHeadAttention( # 实例化多头注意力模型 n_head, d_model, d_k, d_v, dropout=dropout) self.pos_ffn = PositionwiseFeedForward(d_model, d_inner, dropout=dropout) # 定义前馈层
def __init__(self, d_model, d_inner, n_head, d_k, d_v, dropout=0.1, share_qk=False, swish_activation=False): super(DecoderLayer, self).__init__() self.slf_attn = MultiHeadAttention(n_head, d_model, d_k, d_v, dropout=dropout) self.enc_attn = MultiHeadAttention(n_head, d_model, d_k, d_v, dropout=dropout) self.pos_ffn = PositionwiseFeedForward( d_model, d_inner, dropout=dropout, swish_activation=swish_activation)
def __init__(self, hid_dim, n_heads, pf_dim, dropout, device): super(DecoderLayer, self).__init__() self.layer_norm = nn.LayerNorm(hid_dim) self.slf_attn = SelfAttention(hid_dim, n_heads, dropout, device) self.enc_attn = SelfAttention(hid_dim, n_heads, dropout, device) self.pos_forward = PositionwiseFeedForward(hid_dim, pf_dim, dropout) self.dropout = nn.Dropout(dropout)
def __init__(self, d_model, d_inner, n_head, d_k, d_v, dropout=0.1): super(FFTBlock, self).__init__() self.slf_attn = MultiHeadAttention(n_head, d_model, d_k, d_v, dropout=dropout) self.pos_ffn = PositionwiseFeedForward(d_model, d_inner, dropout=dropout)
def __init__(self, d_model, d_inner, n_head, d_k, d_v, rel_pos_op=None, dropout=0.1): super(EncoderLayer, self).__init__() self.rel_pos_op = rel_pos_op if rel_pos_op is None: self.slf_attn = MultiHeadAttention( n_head, d_model, d_k, d_v, dropout=dropout) else: self.slf_attn = RelMultiHeadAttention( n_head, d_model, d_k, d_v, rel_pos_op=rel_pos_op, dropout=dropout) self.pos_ffn = PositionwiseFeedForward(d_model, d_inner, dropout=dropout)
def __init__(self, d_model, d_inner, n_head, d_k, d_v, dropout=0.1): super(EncoderLayer, self).__init__() # 多头注意力机制,重复d_head = 8次, 包含了ADD&NORM # MultiHeadAttention中的forward需要的参数有Q、K、V以及mask # MultiHeadAttention网络需要的参数有n_head, d_model, d_k, d_v self.slf_attn = MultiHeadAttention(n_head, d_model, d_k, d_v, dropout=dropout) self.pos_ffn = PositionwiseFeedForward(d_model, d_inner, dropout=dropout)
def __init__(self, d_model, d_inner, n_head, d_k, d_v, dropout=0.1): super(EncoderLayer, self).__init__() # encoder 层首先是一个 multi-head attention self.slf_attn = MultiHeadAttention(n_head, d_model, d_k, d_v, dropout=dropout) # 第2个部分是一个前馈层 self.pos_ffn = PositionwiseFeedForward(d_model, d_inner, dropout=dropout)
def __init__(self, d_model, d_inner, n_head, d_k, d_v, dropout=0.1): super(DecoderLayer, self).__init__() # decoder的第一层是带有mask的multi-head attention self.slf_attn = MultiHeadAttention(n_head, d_model, d_k, d_v, dropout=dropout) # 第2层是个 encoder相关的 multi-head attention self.enc_attn = MultiHeadAttention(n_head, d_model, d_k, d_v, dropout=dropout) # 第3 层是一个前馈层 self.pos_ffn = PositionwiseFeedForward(d_model, d_inner, dropout=dropout)
def __init__(self, d_model, d_inner, n_head, d_k, d_v, dropout=0.1, tt_params={}): super(EncoderLayer, self).__init__() self.slf_attn = MultiHeadAttention(n_head, d_model, d_k, d_v, dropout=dropout, tt_params=tt_params) self.pos_ffn = PositionwiseFeedForward(d_model, d_inner, dropout=dropout, tt_params=tt_params)
def __init__(self, d_model, d_inner, n_head, d_k, d_v, dropout=0.1, normalize_before=True): super(EncoderLayer, self).__init__() self.slf_attn = MultiHeadAttention(n_head, d_model, d_k, d_v, dropout=dropout, normalize_before=normalize_before) self.pos_ffn = PositionwiseFeedForward( d_model, d_inner, dropout=dropout, normalize_before=normalize_before)
def __init__(self, d_model, d_inner, n_head, d_k, d_v, dropout=0.1): ''' :param d_model: 模型输入维度 :param d_inner: 前馈层隐层维度 :param n_head: 多头 :param d_k: 键向量维度 :param d_v: 值向量维度 :param dropout: ''' super(EncoderLayer, self).__init__() self.sef_attn = MultiHeadAttention(n_head, d_model, d_k, d_v, dropout=dropout) self.pos_ffn = PositionwiseFeedForward(d_model, d_inner, dropout=dropout)
def __init__(self, d_model, d_inner, n_head, d_k, d_v, dropout=0.1, kernel='linear', kernel_size_tcn=3, kernel_size_scn=2): super(EncoderLayer, self).__init__() self.slf_attn = MultiHeadAttention(n_head, d_model, d_k, d_v, dropout=dropout, kernel=kernel, kernel_size_tcn=kernel_size_tcn, kernel_size_scn=kernel_size_scn) self.pos_ffn = PositionwiseFeedForward(d_model, d_inner, dropout=dropout)
def __init__(self, d_model, d_inner, n_head, d_k, d_v, dropout=0.1): #(512, 2048, 8, 64, 64, 0.1 ) super(EncoderLayer, self).__init__() self.slf_attn = MultiHeadAttention( n_head, d_model, d_k, d_v, dropout=dropout) #(8, 512, 64, 64) self.pos_ffn = PositionwiseFeedForward(d_model, d_inner, dropout=dropout) #(512, 2048, 0.1)
def __init__(self, d_model, d_inner_hid, n_head): super(EncoderLayer, self).__init__() self.slf_attn = MultiHeadAttention(d_model, n_head) self.pos_ffn = PositionwiseFeedForward(d_model, d_inner_hid)
def __init__(self, d_model, d_inner, n_head, d_k, d_v, dropout=0.1, d_enc=None): super(DecoderLayer, self).__init__() d_enc = d_model if d_enc is None else d_enc self.slf_attn = MultiHeadAttention(n_head, d_model, d_k, d_v, dropout=dropout) self.enc_attn = MultiHeadAttention(n_head, d_model, d_k, d_v, dropout=dropout, d_in=d_enc) self.pos_ffn = PositionwiseFeedForward(d_model, d_inner, dropout=dropout)
def __init__(self, d_model, d_inner, n_head, dropout=0.1): super(EncoderNewLayer, self).__init__() self.slf_attn = MlpAttention(n_head, d_model, dropout=dropout) self.pos_ffn = PositionwiseFeedForward(d_model, d_inner, dropout=dropout)