def __init__(self, config, vocab_size, embedding=None): super(pointer_decoder, self).__init__() if embedding is not None: self.embedding = embedding else: self.embedding = nn.Embedding(vocab_size, config.emb_size) self.rnn = StackedLSTM(input_size=config.emb_size, hidden_size=config.decoder_hidden_size, num_layers=config.num_layers, dropout=config.dropout) self.p_gen_weight = nn.Linear( config.emb_size + config.decoder_hidden_size * 3, 1) self.linear = nn.Linear(config.decoder_hidden_size, vocab_size) self.output_merge = nn.Linear(2 * config.decoder_hidden_size, config.decoder_hidden_size) if hasattr(config, 'att_act'): activation = config.att_act print('use attention activation %s' % activation) else: activation = None self.attention = models.global_attention(config.decoder_hidden_size, activation) self.memory_attention = models.global_attention( config.decoder_hidden_size, activation) self.softmax = nn.Softmax(-1) self.hidden_size = config.decoder_hidden_size self.vocab_size = vocab_size self.dropout = nn.Dropout(config.dropout) self.config = config
def __init__(self, config, vocab_size, embedding=None, score_fn=None): super(rnn_decoder, self).__init__() if embedding is not None: self.embedding = embedding else: self.embedding = nn.Embedding(vocab_size, config.emb_size) self.rnn = StackedLSTM(input_size=config.emb_size, hidden_size=config.hidden_size, num_layers=config.num_layers, dropout=config.dropout) self.score_fn = score_fn self.linear = nn.Linear(config.hidden_size, vocab_size) if hasattr(config, 'att_act'): activation = config.att_act print('use attention activation %s' % activation) else: activation = None self.attention = models.global_attention(config.hidden_size, activation) self.hidden_size = config.hidden_size self.dropout = nn.Dropout(config.dropout) self.config = config
def __init__(self, config, vocab_size, embedding=None, score_fn=None): super(rnn_decoder, self).__init__() if embedding is not None: self.embedding = embedding else: self.embedding = nn.Embedding(vocab_size, config.emb_size) self.rnn = StackedLSTM(input_size=config.emb_size, hidden_size=config.decoder_hidden_size, num_layers=config.num_layers, dropout=config.dropout) self.score_fn = score_fn if self.score_fn.startswith('general'): self.linear = nn.Linear(config.decoder_hidden_size, config.emb_size) elif score_fn.startswith('concat'): self.linear_query = nn.Linear(config.decoder_hidden_size, config.decoder_hidden_size) self.linear_weight = nn.Linear(config.emb_size, config.decoder_hidden_size) self.linear_v = nn.Linear(config.decoder_hidden_size, 1) elif not self.score_fn.startswith('dot'): self.linear = nn.Linear(config.decoder_hidden_size, vocab_size) if hasattr(config, 'att_act'): activation = config.att_act print('use attention activation %s' % activation) else: activation = None self.attention = models.global_attention(config.decoder_hidden_size, activation) self.hidden_size = config.decoder_hidden_size self.dropout = nn.Dropout(config.dropout) self.config = config if self.config.global_emb: self.gated1 = nn.Linear(config.emb_size, config.emb_size) self.gated2 = nn.Linear(config.emb_size, config.emb_size)
def __init__(self, config, src_vocab_size, tgt_vocab_size, slot_embedding=None, value_embedding=None, score_fn=None): super(rnn_decoder, self).__init__() self.slot_embedding = slot_embedding self.vocab_size = tgt_vocab_size self.rnn = StackedLSTM(input_size=config.emb_size, hidden_size=config.decoder_hidden_size, num_layers=config.num_layers, dropout=config.dropout) for param in self.rnn.parameters(): if len(param.shape) >= 2: init.orthogonal_(param.data) else: init.constant_(param.data, 0) self.score_fn = score_fn self.slot_linear = nn.Linear(config.decoder_hidden_size, config.emb_size) init.kaiming_normal_(self.slot_linear.weight.data) init.constant_(self.slot_linear.bias.data, 0) activation = None self.attention = models.global_attention(config.decoder_hidden_size, activation) #self.user_attention = models.global_attention(config.decoder_hidden_size, activation) #self.sys_attention = models.global_attention(config.decoder_hidden_size, activation) self.hidden_size = config.decoder_hidden_size self.config = config self.linear_out = nn.Linear(4 * self.hidden_size, self.hidden_size) init.kaiming_normal_(self.linear_out.weight.data) init.constant_(self.linear_out.bias.data, 0) self.re1 = nn.ReLU() # self.dp1=nn.Dropout(0.1) self.linear_slot = nn.Linear(self.hidden_size, self.hidden_size) init.kaiming_normal_(self.linear_slot.weight.data) init.constant_(self.linear_slot.bias.data, 0) self.re2 = nn.ReLU() # self.dp2=nn.Dropout(0.1) self.linear3 = nn.Linear(self.hidden_size, self.hidden_size) init.kaiming_normal_(self.linear3.weight.data) init.constant_(self.linear3.bias.data, 0) self.re3 = nn.ReLU() self.sigmoid = nn.Sigmoid() self.log_softmax = nn.LogSoftmax(dim=1) self.linear4 = nn.Linear(self.hidden_size, self.hidden_size) init.kaiming_normal_(self.linear4.weight.data) init.constant_(self.linear4.bias.data, 0) self.re4 = nn.ReLU() self.dropout = nn.Dropout(0.5) #config.dropout)
def __init__(self, config, vocab_size, embedding=None, score_fn=None): super(rnn_decoder, self).__init__() if embedding is not None: self.embedding = embedding else: self.embedding = nn.Embedding(vocab_size, config.emb_size) self.rnn = StackedLSTM(input_size=config.emb_size, hidden_size=config.decoder_hidden_size, num_layers=config.num_layers, dropout=config.dropout) self.score_fn = score_fn if self.score_fn.startswith('general'): self.linear = nn.Linear(config.decoder_hidden_size, config.emb_size) elif score_fn.startswith('concat'): self.linear_query = nn.Linear(config.decoder_hidden_size, config.decoder_hidden_size) self.linear_weight = nn.Linear(config.emb_size, config.decoder_hidden_size) self.linear_v = nn.Linear(config.decoder_hidden_size, 1) elif score_fn == 'hinge_margin_loss': self.toEmbLinear = nn.Linear(config.decoder_hidden_size, config.emb_size) if config.global_emb: self.linear = nn.Linear(config.decoder_hidden_size, vocab_size) # for global_embedding elif score_fn == 'hybrid': # Question: 有空搞清楚这里config的形式 # 转换到Emedding空间 self.toEmbLinear = nn.Linear(config.decoder_hidden_size, config.emb_size) self.toCatLinear = nn.Linear(config['emb_size'], vocab_size) self.grl = models.GradReverse(config['GRL_fraction']) self.activation = nn.Tanh() if config.global_emb: self.linear = nn.Linear(config.decoder_hidden_size, vocab_size) elif not self.score_fn.startswith('dot'): self.linear = nn.Linear(config.decoder_hidden_size, vocab_size) if hasattr(config, 'att_act'): activation = config.att_act print('use attention activation %s' % activation) else: activation = None self.attention = models.global_attention(config.decoder_hidden_size, activation) self.hidden_size = config.decoder_hidden_size self.dropout = nn.Dropout(config.dropout) self.config = config if self.config.global_emb: self.gated1 = nn.Linear(config.emb_size, config.emb_size) # 论文公式(13)的两个W矩阵 self.gated2 = nn.Linear(config.emb_size, config.emb_size)
def __init__(self, config, vocab_size, embedding=None, score_fn=None): super(rnn_decoder, self).__init__() if embedding is not None: self.embedding = embedding else: self.embedding = nn.Embedding(vocab_size, config.emb_size) self.rnn = StackedLSTM(input_size=config.emb_size, hidden_size=config.hidden_size, num_layers=config.num_layers, dropout=config.dropout) if score_fn.startswith('general'): self.linear = nn.Linear(config.hidden_size, config.emb_size) if score_fn.endswith('not'): self.score_fn = lambda x: torch.matmul( self.linear(x), Variable(self.embedding.weight.t().data)) else: self.score_fn = lambda x: torch.matmul( self.linear(x), self.embedding.weight.t()) elif score_fn.startswith('dot'): if score_fn.endswith('not'): self.score_fn = lambda x: torch.matmul( x, Variable(self.embedding.weight.t().data)) else: self.score_fn = lambda x: torch.matmul( x, self.embedding.weight.t()) else: self.score_fn = nn.Linear(config.hidden_size, vocab_size) if hasattr(config, 'att_act'): activation = config.att_act print('use attention activation %s' % activation) else: activation = None self.attention = models.global_attention(config.hidden_size, activation) self.hidden_size = config.hidden_size self.dropout = nn.Dropout(config.dropout) self.log_softmax = nn.LogSoftmax() self.config = config