def __init__(self, config, embedding=None): super(rnn_encoder, self).__init__() self.embedding = embedding if embedding is not None else nn.Embedding(config.src_vocab_size, config.emb_size) self.hidden_size = config.hidden_size self.config = config self.dropout = nn.Dropout(p=0.02) self.sw1 = nn.Sequential(nn.Conv1d(config.hidden_size, config.hidden_size, kernel_size=1, padding=0), nn.BatchNorm1d(config.hidden_size), nn.ReLU()) self.sw3 = nn.Sequential(nn.Conv1d(config.hidden_size, config.hidden_size, kernel_size=1, padding=0), nn.ReLU(), nn.BatchNorm1d(config.hidden_size), nn.Conv1d(config.hidden_size, config.hidden_size, kernel_size=3, padding=1), nn.ReLU(), nn.BatchNorm1d(config.hidden_size)) self.sw33 = nn.Sequential(nn.Conv1d(config.hidden_size, config.hidden_size, kernel_size=1, padding=0), nn.ReLU(), nn.BatchNorm1d(config.hidden_size), nn.Conv1d(config.hidden_size, config.hidden_size, kernel_size=3, padding=1), nn.ReLU(), nn.BatchNorm1d(config.hidden_size), nn.Conv1d(config.hidden_size, config.hidden_size, kernel_size=3, padding=1), nn.ReLU(), nn.BatchNorm1d(config.hidden_size)) self.swish = nn.Conv1d(config.hidden_size, config.hidden_size, kernel_size=5, padding=2) self.linear = nn.Sequential(nn.Linear(2*config.hidden_size, 2*config.hidden_size), nn.GLU(), nn.Dropout(config.dropout)) self.filter_linear = nn.Linear(3*config.hidden_size, config.hidden_size) self.tanh = nn.Tanh() self.sigmoid = nn.Sigmoid() self.PosEnc = PositionalEncoding(config) if config.attention == 'None': self.attention = None elif config.attention == 'bahdanau': self.attention = models.bahdanau_attention(config.hidden_size, config.emb_size, config.pool_size) elif config.attention == 'luong': self.attention = models.luong_attention(config.hidden_size, config.emb_size, config.pool_size) elif config.attention == 'luong_gate': self.attention = models.luong_gate_attention(config.hidden_size, config.emb_size) if config.cell == 'gru': self.rnn = nn.GRU(input_size=config.emb_size, hidden_size=config.hidden_size, num_layers=config.enc_num_layers, dropout=config.dropout, bidirectional=config.bidirectional) else: self.rnn = nn.LSTM(input_size=config.emb_size, hidden_size=config.hidden_size, num_layers=config.enc_num_layers, dropout=config.dropout, bidirectional=config.bidirectional)
def __init__(self, config, embedding=None): super(rnn_encoder, self).__init__() self.embedding = embedding if embedding is not None else nn.Embedding( config.src_vocab_size, config.emb_size) self.hidden_size = config.hidden_size self.config = config if config.attention == 'None': self.attention = None elif config.attention == 'bahdanau': self.attention = models.bahdanau_attention(config.hidden_size, config.emb_size, config.pool_size) elif config.attention == 'luong': self.attention = models.luong_attention(config.hidden_size, config.emb_size, config.pool_size) elif config.attention == 'luong_gate': self.attention = models.luong_gate_attention( config.hidden_size, config.emb_size) if config.cell == 'gru': self.rnn = nn.GRU(input_size=config.emb_size, hidden_size=config.hidden_size, num_layers=config.enc_num_layers, dropout=config.dropout, bidirectional=config.bidirectional) else: # self.rnn_enc = nn.ModuleList([rnn_encoder_layer(config) # for _ in range(config.enc_num_layers)]) self.rnn = nn.LSTM(input_size=config.emb_size, hidden_size=config.hidden_size, num_layers=config.enc_num_layers, dropout=config.dropout, bidirectional=config.bidirectional) self.ln = LayerNorm(config.hidden_size)
def __init__(self, config, embedding=None, use_attention=True): super(rnn_decoder, self).__init__() self.embedding = embedding if embedding is not None else nn.Embedding(config.tgt_vocab_size, config.emb_size) input_size = config.emb_size if config.cell == 'gru': self.rnn = StackedGRU(input_size=input_size, hidden_size=config.hidden_size, num_layers=config.dec_num_layers, dropout=config.dropout) else: self.rnn = StackedLSTM(input_size=input_size, hidden_size=config.hidden_size, num_layers=config.dec_num_layers, dropout=config.dropout) self.linear = nn.Linear(config.hidden_size, config.tgt_vocab_size) self.linear_ = nn.Linear(config.hidden_size, config.hidden_size) self.sigmoid = nn.Sigmoid() if not use_attention or config.attention == 'None': self.attention = None elif config.attention == 'bahdanau': self.attention = models.bahdanau_attention(config.hidden_size, config.emb_size, config.pool_size) elif config.attention == 'luong': self.attention = models.luong_attention(config.hidden_size, config.emb_size, config.pool_size) elif config.attention == 'luong_gate': self.attention = models.luong_gate_attention(config.hidden_size, config.emb_size, prob=config.dropout) self.hidden_size = config.hidden_size self.dropout = nn.Dropout(config.dropout) self.config = config
def __init__(self, config, embedding=None, use_attention=True): super(conv_decoder, self).__init__() self.embedding = embedding if embedding is not None else nn.Embedding( config.tgt_vocab_size, config.emb_size) if not use_attention or config.attention == 'None': self.attention = None elif config.attention == 'bahdanau': self.attention = models.bahdanau_attention(config.hidden_size, config.emb_size, config.pool_size) elif config.attention == 'luong': self.attention = models.luong_attention(config.hidden_size, config.emb_size, config.pool_size) elif config.attention == 'luong_gate': self.attention_1 = models.luong_gate_attention(config.hidden_size, config.emb_size, selfatt=True) self.attention_2 = models.luong_gate_attention(config.hidden_size, config.emb_size, selfatt=True) self.attention_3 = models.luong_gate_attention(config.hidden_size, config.emb_size, selfatt=True) self.dropout = nn.Dropout(config.dropout) self.hidden_size = config.hidden_size self.config = config self.deconv_1 = nn.Sequential( nn.ConvTranspose1d(config.hidden_size, config.hidden_size, kernel_size=2, stride=1, padding=0), nn.ReLU(), nn.Dropout(config.dropout)) self.deconv_2 = nn.Sequential( nn.ConvTranspose1d(config.hidden_size, config.hidden_size, kernel_size=3, stride=2, padding=0), nn.ReLU(), nn.Dropout(config.dropout)) self.deconv_3 = nn.Sequential( nn.ConvTranspose1d(config.hidden_size, config.hidden_size, kernel_size=4, stride=3, padding=0), nn.ReLU(), nn.Dropout(config.dropout)) # self.ln_1, self.ln_2, self.ln_3, self.ln_4 = LayerNorm(config.hidden_size), LayerNorm(config.hidden_size), LayerNorm(config.hidden_size), LayerNorm(config.hidden_size) # self.linear = nn.Linear(config.hidden_size, config.tgt_vocab_size) # self.linear = lambda x: torch.matmul(x, Variable(self.embedding.weight.t().data) self.linear = lambda x: torch.matmul( x, torch.tensor(self.embedding.weight.t(), requires_grad=False))
def __init__(self, config, embedding=None): super(rnn_encoder, self).__init__() self.embedding = embedding if embedding is not None else nn.Embedding( config.src_vocab_size, config.emb_size) self.hidden_size = config.hidden_size self.config = config if config.selfatt: if config.attention == 'None': self.attention = None elif config.attention == 'bahdanau': self.attention = models.bahdanau_attention( config.hidden_size, config.emb_size, config.pool_size) elif config.attention == 'luong': self.attention = models.luong_attention( config.hidden_size, config.emb_size, config.pool_size) elif config.attention == 'luong_gate': self.attention = models.luong_gate_attention( config.hidden_size, config.emb_size) elif config.attention == 'self_att': self.attention = models.MultiHeadAttention( n_head=8, d_model=config.hidden_size, d_k=64, d_v=64) if config.cell == 'gru': self.rnn1 = nn.GRU(input_size=config.emb_size, hidden_size=config.hidden_size, dropout=config.dropout, bidirectional=config.bidirectional, num_layers=1) self.rnn2 = nn.GRU(input_size=config.hidden_size, hidden_size=config.hidden_size, dropout=config.dropout, bidirectional=config.bidirectional) self.rnn3 = nn.GRU(input_size=config.hidden_size, hidden_size=config.hidden_size, dropout=config.dropout, bidirectional=config.bidirectional) else: self.rnn1 = nn.LSTM(input_size=config.emb_size, hidden_size=config.hidden_size, dropout=config.dropout, bidirectional=config.bidirectional, num_layers=1) self.rnn2 = nn.LSTM(input_size=config.hidden_size, hidden_size=config.hidden_size, dropout=config.dropout, bidirectional=config.bidirectional) self.rnn3 = nn.LSTM(input_size=config.hidden_size, hidden_size=config.hidden_size, dropout=config.dropout, bidirectional=config.bidirectional)
def __init__(self, config, vocab_size, embedding=None, score_fn=None, extend_vocab_size=0): super(rnn_decoder, self).__init__() if embedding is not None: self.embedding = embedding else: self.embedding = nn.Embedding(vocab_size, config.emb_size) if hasattr(config,'gru'): self.rnn = StackedGRU(input_size=config.emb_size, hidden_size=config.hidden_size, num_layers=config.num_layers, dropout=config.dropout) else: self.rnn = StackedLSTM(input_size=config.emb_size, hidden_size=config.hidden_size, num_layers=config.num_layers, dropout=config.dropout) self.score_fn = score_fn if self.score_fn.startswith('general'): self.linear = nn.Linear(config.hidden_size, config.emb_size) elif score_fn.startswith('concat'): self.linear_query = nn.Linear(config.hidden_size, config.hidden_size) self.linear_weight = nn.Linear(config.emb_size, config.hidden_size) self.linear_v = nn.Linear(config.hidden_size, 1) elif not self.score_fn.startswith('dot'): self.linear = nn.Linear(config.hidden_size, vocab_size) if self.score_fn.startswith('copy'): self.gen_linear = nn.Sequential(nn.Linear(config.hidden_size, 1), nn.Sigmoid()) if hasattr(config, 'att_act'): activation = config.att_act print('use attention activation %s' % activation) else: activation = None if activation == 'bahd': self.attention = models.bahdanau_attention(config.hidden_size, config.emb_size, activation) else: self.attention = models.luong_attention(config.hidden_size, activation) self.hidden_size = config.hidden_size self.dropout = nn.Dropout(config.dropout) self.config = config self.extend_vocab_size = extend_vocab_size
def __init__(self, config, embedding=None): super(rnn_encoder, self).__init__() self.embedding = embedding if embedding is not None else nn.Embedding( config.src_vocab_size, config.emb_size) self.hidden_size = config.hidden_size self.config = config if config.attention == 'None': self.attention = None elif config.attention == 'bahdanau': self.attention = models.bahdanau_attention(config.hidden_size, config.emb_size, config.pool_size) elif config.attention == 'luong': self.attention = models.luong_attention(config.hidden_size, config.emb_size, config.pool_size) elif config.attention == 'luong_gate': self.attention = models.luong_gate_attention( config.hidden_size, config.emb_size) if config.cell == 'gru': self.rnn = nn.GRU(input_size=config.emb_size, hidden_size=config.hidden_size, num_layers=config.enc_num_layers, dropout=config.dropout, bidirectional=config.bidirectional) else: self.rnn = nn.LSTM(input_size=config.emb_size, hidden_size=config.hidden_size, num_layers=config.enc_num_layers, dropout=config.dropout, bidirectional=config.bidirectional) if config.attemb: self.rnnpos = nn.GRU(input_size=self.config.emb_size, hidden_size=self.config.hidden_size, num_layers=1, dropout=self.config.dropout, bidirectional=False)
def __init__(self, config, embedding=None): # config为模型配置 super(rnn_encoder, self).__init__() # 如果embedding为空,则使用新的embedding self.embedding = embedding if embedding is not None else nn.Embedding( config.src_vocab_size, config.emb_size) self.hidden_size = config.hidden_size self.config = config # True,加入卷积层 if config.swish: # Conv参数:输入通道数(词向量维度)、产生通道数、卷积核大小 self.sw1 = nn.Sequential( nn.Conv1d(config.hidden_size, config.hidden_size, kernel_size=1, padding=0), nn.BatchNorm1d(config.hidden_size), nn.ReLU()) self.sw3 = nn.Sequential( nn.Conv1d(config.hidden_size, config.hidden_size, kernel_size=1, padding=0), nn.ReLU(), nn.BatchNorm1d(config.hidden_size), nn.Conv1d(config.hidden_size, config.hidden_size, kernel_size=3, padding=1), nn.ReLU(), nn.BatchNorm1d(config.hidden_size)) self.sw33 = nn.Sequential( nn.Conv1d(config.hidden_size, config.hidden_size, kernel_size=1, padding=0), nn.ReLU(), nn.BatchNorm1d(config.hidden_size), nn.Conv1d(config.hidden_size, config.hidden_size, kernel_size=3, padding=1), nn.ReLU(), nn.BatchNorm1d(config.hidden_size), nn.Conv1d(config.hidden_size, config.hidden_size, kernel_size=3, padding=1), nn.ReLU(), nn.BatchNorm1d(config.hidden_size)) # 线性连接单元,在rnn层过后,输出双向拼接 self.linear = nn.Sequential( nn.Linear(2 * config.hidden_size, 2 * config.hidden_size), nn.GLU(), nn.Dropout(config.dropout)) # 将3d变为d self.filter_linear = nn.Linear(3 * config.hidden_size, config.hidden_size) self.tanh = nn.Tanh() self.sigmoid = nn.Sigmoid() # 自注意力机制 if config.selfatt: if config.attention == 'None': self.attention = None elif config.attention == 'bahdanau': self.attention = models.bahdanau_attention( config.hidden_size, config.emb_size, config.pool_size) elif config.attention == 'luong': self.attention = models.luong_attention( config.hidden_size, config.emb_size, config.pool_size) # 配置中使用该注意力机制 elif config.attention == 'luong_gate': self.attention = models.luong_gate_attention( config.hidden_size, config.emb_size) # encoder中GRU或LSTM层定义,默认双向LSTM,3层 if config.cell == 'gru': self.rnn = nn.GRU(input_size=config.emb_size, hidden_size=config.hidden_size, num_layers=config.enc_num_layers, dropout=config.dropout, bidirectional=config.bidirectional) else: self.rnn = nn.LSTM(input_size=config.emb_size, hidden_size=config.hidden_size, num_layers=config.enc_num_layers, dropout=config.dropout, bidirectional=config.bidirectional)
def __init__(self, config, embedding=None): super(rnn_encoder, self).__init__() self.embedding = embedding if embedding is not None else nn.Embedding( config.src_vocab_size, config.emb_size) self.hidden_size = config.hidden_size self.config = config if config.swish: # swish 卷积门 self.sw1 = nn.Sequential( nn.Conv1d(config.hidden_size, config.hidden_size, kernel_size=1, padding=0), nn.BatchNorm1d(config.hidden_size), nn.ReLU()) self.sw3 = nn.Sequential( nn.Conv1d(config.hidden_size, config.hidden_size, kernel_size=1, padding=0), nn.ReLU(), nn.BatchNorm1d(config.hidden_size), nn.Conv1d(config.hidden_size, config.hidden_size, kernel_size=3, padding=1), nn.ReLU(), nn.BatchNorm1d(config.hidden_size)) self.sw33 = nn.Sequential( nn.Conv1d(config.hidden_size, config.hidden_size, kernel_size=1, padding=0), nn.ReLU(), nn.BatchNorm1d(config.hidden_size), nn.Conv1d(config.hidden_size, config.hidden_size, kernel_size=3, padding=1), nn.ReLU(), nn.BatchNorm1d(config.hidden_size), nn.Conv1d(config.hidden_size, config.hidden_size, kernel_size=3, padding=1), nn.ReLU(), nn.BatchNorm1d(config.hidden_size)) self.linear = nn.Sequential( nn.Linear(2 * config.hidden_size, 2 * config.hidden_size), nn.GLU(), nn.Dropout(config.dropout)) self.filter_linear = nn.Linear(3 * config.hidden_size, config.hidden_size) self.tanh = nn.Tanh() self.sigmoid = nn.Sigmoid() if config.rwnn: self.rwnn = CNN(config, num_classes=config.emb_size, nxpath=config.nxpath) if config.selfatt: # 自注意力 if config.attention == 'None': self.attention = None elif config.attention == 'bahdanau': self.attention = models.bahdanau_attention( config.hidden_size, config.emb_size, config.pool_size) elif config.attention == 'luong': self.attention = models.luong_attention( config.hidden_size, config.emb_size, config.pool_size) elif config.attention == 'luong_gate': self.attention = models.luong_gate_attention( config.hidden_size, config.emb_size) if config.cell == 'gru': self.rnn = nn.GRU(input_size=config.emb_size, hidden_size=config.hidden_size, num_layers=config.enc_num_layers, dropout=config.dropout, bidirectional=config.bidirectional) elif config.cell == 'moglstm': self.rnn = MogLSTM(config.emb_size, config.hidden_size, mog_iterations=2) else: self.rnn = nn.LSTM(input_size=config.emb_size, hidden_size=config.hidden_size, num_layers=config.enc_num_layers, dropout=config.dropout, bidirectional=config.bidirectional)
def __init__(self, config, embedding=None, use_attention=True, score_fn=None): super(rnn_decoder, self).__init__() self.embedding = embedding if embedding is not None else nn.Embedding( config.tgt_vocab_size, config.emb_size) input_size = config.emb_size if config.cell == 'gru': self.rnn = StackedGRU(input_size=input_size, hidden_size=config.hidden_size, num_layers=config.dec_num_layers, dropout=config.dropout) else: self.rnn = StackedLSTM(input_size=input_size, hidden_size=config.hidden_size, num_layers=config.dec_num_layers, dropout=config.dropout) if score_fn.startswith('general'): self.linear = nn.Linear(config.hidden_size, config.emb_size) if score_fn.endswith('not'): self.score_fn = lambda x: torch.matmul( self.linear(x), Variable(self.embedding.weight.t().data)) else: self.score_fn = lambda x: torch.matmul( self.linear(x), self.embedding.weight.t()) elif score_fn.startswith('dot'): if score_fn.endswith('not'): self.score_fn = lambda x: torch.matmul( x, Variable(self.embedding.weight.t().data)) else: self.score_fn = lambda x: torch.matmul( x, self.embedding.weight.t()) else: self.score_fn = nn.Linear(config.hidden_size, config.tgt_vocab_size) self.sigmoid = nn.Sigmoid() if not use_attention or config.attention == 'None': self.attention = None elif config.attention == 'bahdanau': self.attention = models.bahdanau_attention(config.hidden_size, config.emb_size, config.pool_size) elif config.attention == 'luong': self.attention = models.luong_attention(config.hidden_size, config.emb_size, config.pool_size) elif config.attention == 'luong_gate': self.attention = models.luong_gate_attention(config.hidden_size, config.emb_size, prob=config.dropout) self.hidden_size = config.hidden_size self.dropout = nn.Dropout(config.dropout) self.config = config