def __init__(self, corpus, config): super(Seq2Seq, self).__init__() self.src_vocab_size = corpus.SRC.vocab_size self.tgt_vocab_size = corpus.TGT.vocab_size self.embed_size = config.embed_size self.hidden_size = config.hidden_size self.padding_idx = corpus.padding_idx self.num_layers = config.num_layers self.bidirectional = config.bidirectional self.attn_mode = config.attn_mode self.attn_hidden_size = config.attn_hidden_size self.with_bridge = config.with_bridge self.tie_embedding = config.tie_embedding self.dropout = config.dropout self.use_gpu = config.use_gpu enc_embedder = Embedder(num_embeddings=self.src_vocab_size, embedding_dim=self.embed_size, padding_idx=self.padding_idx) self.encoder = RNNEncoder(input_size=self.embed_size, hidden_size=self.hidden_size, embedder=enc_embedder, num_layers=self.num_layers, bidirectional=self.bidirectional, dropout=self.dropout) if self.with_bridge: self.bridge = nn.Sequential( nn.Linear(self.hidden_size, self.hidden_size), nn.Tanh(), ) if self.tie_embedding: assert self.src_vocab_size == self.tgt_vocab_size dec_embedder = enc_embedder else: dec_embedder = Embedder(num_embeddings=self.tgt_vocab_size, embedding_dim=self.embed_size, padding_idx=self.padding_idx) self.decoder = RNNDecoder(input_size=self.embed_size, hidden_size=self.hidden_size, output_size=self.tgt_vocab_size, embedder=dec_embedder, num_layers=self.num_layers, attn_mode=self.attn_mode, attn_hidden_size=self.attn_hidden_size, memory_size=self.hidden_size, feature_size=None, dropout=self.dropout) # Loss Definition if self.padding_idx is not None: weight = torch.ones(self.tgt_vocab_size) weight[self.padding_idx] = 0 else: weight = None self.nll_loss = NLLLoss(weight=weight, ignore_index=self.padding_idx, reduction='mean') if self.use_gpu: self.cuda()
class Seq2Seq(BaseModel): def __init__(self, src_vocab_size, tgt_vocab_size, embed_size, hidden_size, padding_idx=None, num_layers=1, bidirectional=True, attn_mode="mlp", attn_hidden_size=None, with_bridge=False, tie_embedding=False, dropout=0.0, use_gpu=False): super(Seq2Seq, self).__init__() self.src_vocab_size = src_vocab_size self.tgt_vocab_size = tgt_vocab_size self.embed_size = embed_size self.hidden_size = hidden_size self.padding_idx = padding_idx self.num_layers = num_layers self.bidirectional = bidirectional self.attn_mode = attn_mode self.attn_hidden_size = attn_hidden_size self.with_bridge = with_bridge self.tie_embedding = tie_embedding self.dropout = dropout self.use_gpu = use_gpu enc_embedder = Embedder(num_embeddings=self.src_vocab_size, embedding_dim=self.embed_size, padding_idx=self.padding_idx) self.encoder = RNNEncoder(input_size=self.embed_size, hidden_size=self.hidden_size, embedder=enc_embedder, num_layers=self.num_layers, bidirectional=self.bidirectional, dropout=self.dropout) if self.with_bridge: self.bridge = nn.Sequential( nn.Linear(self.hidden_size, self.hidden_size), nn.Tanh(), ) if self.tie_embedding: assert self.src_vocab_size == self.tgt_vocab_size dec_embedder = enc_embedder else: dec_embedder = Embedder(num_embeddings=self.tgt_vocab_size, embedding_dim=self.embed_size, padding_idx=self.padding_idx) self.decoder = RNNDecoder(input_size=self.embed_size, hidden_size=self.hidden_size, output_size=self.tgt_vocab_size, embedder=dec_embedder, num_layers=self.num_layers, attn_mode=self.attn_mode, attn_hidden_size=self.attn_hidden_size, memory_size=self.hidden_size, feature_size=None, dropout=self.dropout) # Loss Definition if self.padding_idx is not None: weight = torch.ones(self.tgt_vocab_size) weight[self.padding_idx] = 0 else: weight = None self.nll_loss = NLLLoss(weight=weight, ignore_index=self.padding_idx, reduction='mean') if self.use_gpu: self.cuda() def encode(self, inputs, hidden=None): outputs = Pack() enc_inputs = _, lengths = inputs.src[0][:, 1:-1], inputs.src[1] - 2 enc_outputs, enc_hidden = self.encoder(enc_inputs, hidden) if self.with_bridge: enc_hidden = self.bridge(enc_hidden) dec_init_state = self.decoder.initialize_state( hidden=enc_hidden, attn_memory=enc_outputs if self.attn_mode else None, memory_lengths=lengths if self.attn_mode else None) return outputs, dec_init_state def decode(self, input, state): log_prob, state, output = self.decoder.decode(input, state) return log_prob, state, output def forward(self, enc_inputs, dec_inputs, hidden=None): outputs, dec_init_state = self.encode(enc_inputs, hidden) log_probs, _ = self.decoder(dec_inputs, dec_init_state) outputs.add(logits=log_probs) return outputs def collect_metrics(self, outputs, target): num_samples = target.size(0) metrics = Pack(num_samples=num_samples) loss = 0 logits = outputs.logits nll = self.nll_loss(logits, target) num_words = target.ne(self.padding_idx).sum().item() acc = accuracy(logits, target, padding_idx=self.padding_idx) metrics.add(nll=(nll, num_words), acc=acc) loss += nll metrics.add(loss=loss) return metrics def iterate(self, inputs, optimizer=None, grad_clip=None, is_training=True, epoch=-1): enc_inputs = inputs dec_inputs = inputs.tgt[0][:, :-1], inputs.tgt[1] - 1 target = inputs.tgt[0][:, 1:] outputs = self.forward(enc_inputs, dec_inputs) metrics = self.collect_metrics(outputs, target) loss = metrics.loss if torch.isnan(loss): raise ValueError("nan loss encountered") if is_training: assert optimizer is not None optimizer.zero_grad() loss.backward() if grad_clip is not None and grad_clip > 0: clip_grad_norm_(parameters=self.parameters(), max_norm=grad_clip) optimizer.step() return metrics
def __init__(self, src_vocab_size, tgt_vocab_size, embed_size, hidden_size, padding_idx=None, num_layers=1, bidirectional=True, attn_mode="mlp", attn_hidden_size=None, with_bridge=False, tie_embedding=False, dropout=0.0, use_gpu=False, copy=False): super(Seq2Seq, self).__init__() self.src_vocab_size = src_vocab_size self.tgt_vocab_size = tgt_vocab_size self.embed_size = embed_size self.hidden_size = hidden_size self.padding_idx = padding_idx self.num_layers = num_layers self.bidirectional = bidirectional self.attn_mode = attn_mode self.attn_hidden_size = attn_hidden_size self.with_bridge = with_bridge self.tie_embedding = tie_embedding self.dropout = dropout self.use_gpu = use_gpu self.copy = copy enc_embedder = Embedder(num_embeddings=self.src_vocab_size, embedding_dim=self.embed_size, padding_idx=self.padding_idx) self.encoder = RNNEncoder(input_size=self.embed_size, hidden_size=self.hidden_size, embedder=enc_embedder, num_layers=self.num_layers, bidirectional=self.bidirectional, dropout=self.dropout) if self.with_bridge: self.bridge = nn.Sequential( nn.Linear(self.hidden_size, self.hidden_size), nn.Tanh(), ) if self.tie_embedding: assert self.src_vocab_size == self.tgt_vocab_size dec_embedder = enc_embedder else: dec_embedder = Embedder(num_embeddings=self.tgt_vocab_size, embedding_dim=self.embed_size, padding_idx=self.padding_idx) self.decoder = RNNDecoder(input_size=self.embed_size, hidden_size=self.hidden_size, output_size=self.tgt_vocab_size, embedder=dec_embedder, num_layers=self.num_layers, attn_mode=self.attn_mode, attn_hidden_size=self.attn_hidden_size, memory_size=self.hidden_size, feature_size=None, dropout=self.dropout) # Loss Definition if self.padding_idx is not None: weight = torch.ones(self.tgt_vocab_size) weight[self.padding_idx] = 0 else: weight = None self.nll_loss = NLLLoss(weight=weight, ignore_index=self.padding_idx, reduction='mean') if self.use_gpu: self.cuda()
class Entity_Seq2Seq(BaseModel): """ Seq2Seq """ def __init__(self, src_vocab_size, embed_size, hidden_size, padding_idx=None, num_layers=1, bidirectional=True, attn_mode="mlp", with_bridge=False, dropout=0.0, use_gpu=False, pretrain_epoch=5): super(Entity_Seq2Seq, self).__init__() self.src_vocab_size = src_vocab_size self.embed_size = embed_size self.hidden_size = hidden_size self.padding_idx = padding_idx self.num_layers = num_layers self.bidirectional = bidirectional self.attn_mode = attn_mode self.with_bridge = with_bridge self.dropout = dropout self.use_gpu = use_gpu self.pretrain_epoch = pretrain_epoch enc_embedder = Embedder(num_embeddings=self.src_vocab_size, embedding_dim=self.embed_size, padding_idx=self.padding_idx) self.encoder = RNNEncoder(input_size=self.embed_size, hidden_size=self.hidden_size, embedder=enc_embedder, num_layers=self.num_layers, bidirectional=self.bidirectional, dropout=self.dropout) if self.with_bridge: self.bridge1 = nn.Sequential( nn.Linear(self.hidden_size, self.hidden_size), nn.Tanh(), ) self.bridge2 = nn.Sequential( nn.Linear(self.hidden_size, self.hidden_size), nn.Tanh(), ) self.decoder = RNNDecoder(input_size=self.embed_size, hidden_size=self.hidden_size, embedder=enc_embedder, num_layers=self.num_layers, attn_mode=self.attn_mode, memory_size=self.hidden_size, dropout=self.dropout) # Loss Definition if self.use_gpu: self.cuda() def encode(self, inputs, hidden=None): """ encode """ outputs = Pack() enc_inputs, lengths = inputs.num_src enc_outputs, enc_hidden = self.encoder(enc_inputs, hidden) if self.with_bridge: enc_hidden[0] = self.bridge1(enc_hidden[0]) enc_hidden[1] = self.bridge1(enc_hidden[1]) layer, batch_size, dim = enc_hidden[0].size() dec_init_state = self.decoder.initialize_state( hidden=enc_hidden, input_feed=enc_hidden[0].data.new(batch_size,dim).zero_() \ .unsqueeze(1), attn_memory=enc_outputs if self.attn_mode else None, mask= inputs.mask[0]) return outputs, dec_init_state def decode(self, input, state): """ decode step by step """ log_prob, state, output = self.decoder.decode(input, state) return log_prob, state, output def forward(self, enc_inputs, dec_inputs, hidden=None): """ forward """ outputs, dec_init_state = self.encode(enc_inputs, hidden) log_probs, state, out_copy = self.decoder(dec_inputs, dec_init_state) outputs.add(logits=log_probs) outputs.add(out_copy=out_copy) return outputs def collect_metrics(self, outputs, target, emo_target): """ collect_metrics """ num_samples = target[0].size(0) num_words = target[1].sum().item() metrics = Pack(num_samples=num_samples) target_len = target[1] mask = sequence_mask(target_len) mask = mask.float() # logits = outputs.logits # nll = self.nll_loss(logits, target) out_copy = outputs.out_copy # out_copy batch x max_len x src target_loss = out_copy.gather(2, target[0].unsqueeze(-1)).squeeze(-1) target_loss = target_loss * mask target_loss += 1e-15 target_loss = target_loss.log() loss = -((target_loss.sum()) / num_words) out_emo = outputs.logits # batch x max_len x dim batch_size, max_len, class_num = out_emo.size() # out_emo=out_emo.view(batch_size*max_len, class_num) # emo_target=emo_target.view(-1) target_emo_loss = out_emo.gather( 2, emo_target[0].unsqueeze(-1)).squeeze(-1) target_len -= 1 mask_ = sequence_mask(target_len) mask_ = mask_.float() new_mask = mask.data.new(batch_size, max_len).zero_() # print(mask.size()) # print(new_mask.size()) new_mask[:, :max_len - 1] = mask_ target_emo_loss = target_emo_loss * new_mask target_emo_loss += 1e-15 target_emo_loss = target_emo_loss.log() emo_loss = -((target_emo_loss.sum()) / num_words) metrics.add(loss=loss) metrics.add(emo_loss=emo_loss) # 这里,我们将只计算 acc = accuracy(out_copy, target[0], mask=mask) metrics.add(acc=acc) return metrics def iterate(self, inputs, optimizer=None, grad_clip=None, is_training=True, epoch=-1): """ iterate """ enc_inputs = inputs dec_inputs = inputs.num_tgt_input target = inputs.tgt_output emo_target = inputs.tgt_emo outputs = self.forward(enc_inputs, dec_inputs) metrics = self.collect_metrics(outputs, target, emo_target) loss = metrics.loss if epoch > self.pretrain_epoch: loss += metrics.emo_loss if torch.isnan(loss): raise ValueError("nan loss encountered") if is_training: assert optimizer is not None optimizer.zero_grad() loss.backward() if grad_clip is not None and grad_clip > 0: clip_grad_norm_(parameters=self.parameters(), max_norm=grad_clip) optimizer.step() return metrics
def __init__(self, src_vocab_size, embed_size, hidden_size, padding_idx=None, num_layers=1, bidirectional=True, attn_mode="mlp", with_bridge=False, dropout=0.0, use_gpu=False, pretrain_epoch=5, batch_size=64): super(Entity_Seq2Seq_elmo, self).__init__() self.src_vocab_size = src_vocab_size self.embed_size = embed_size self.hidden_size = hidden_size self.padding_idx = padding_idx self.num_layers = num_layers self.bidirectional = bidirectional self.attn_mode = attn_mode self.with_bridge = with_bridge self.dropout = dropout self.use_gpu = use_gpu self.pretrain_epoch = pretrain_epoch self.batch_size = batch_size enc_embedder = Embedder(num_embeddings=self.src_vocab_size, embedding_dim=self.embed_size, padding_idx=self.padding_idx) model_file = './extend/zhs.model' elmo_embedder = elmo_Embedder(model_file, batch_size=self.batch_size) self.encoder = RNNEncoder(input_size=self.embed_size + 1024, hidden_size=self.hidden_size, embedder=enc_embedder, elmo_embedder=elmo_embedder, num_layers=self.num_layers, bidirectional=self.bidirectional, dropout=self.dropout) if self.with_bridge: self.bridge1 = nn.Sequential( nn.Linear(self.hidden_size, self.hidden_size), nn.Tanh(), ) self.bridge2 = nn.Sequential( nn.Linear(self.hidden_size, self.hidden_size), nn.Tanh(), ) self.decoder = RNNDecoder(input_size=self.embed_size, hidden_size=self.hidden_size, embedder=enc_embedder, num_layers=self.num_layers, attn_mode=self.attn_mode, memory_size=self.hidden_size, dropout=self.dropout) # Loss Definition if self.use_gpu: self.cuda()
def __init__(self, vocab_size, embed_units, hidden_size, padding_idx=None, num_layers=1, max_hop=3, bidirectional=True, attn_mode='mlp', dropout=0.0, use_gpu=False): super(MemNet, self).__init__() self.vocab_size = vocab_size self.embed_units = embed_units self.padding_idx = padding_idx self.hidden_size = hidden_size self.num_layers = num_layers self.max_hop = max_hop self.bidirectional = bidirectional self.dropout = dropout self.attn_mode = attn_mode self.use_gpu = use_gpu enc_embedder = Embedder(num_embeddings=self.vocab_size, embedding_dim=self.embed_units, padding_idx=self.padding_idx) dec_embedder = enc_embedder self.rnn_encoder = RNNEncoder(input_size=self.embed_units, hidden_size=self.hidden_size, embedder=enc_embedder, num_layers=self.num_layers, bidirectional=self.bidirectional, dropout=self.dropout) self.mem_encoder = EncoderMemNN(vocab=self.vocab_size, hidden_size=self.hidden_size, hop=self.max_hop, attn_mode='general', padding_idx=self.padding_idx) self.decoder = RNNDecoder(input_size=self.embed_units, hidden_size=self.hidden_size, output_size=self.vocab_size, embedder=dec_embedder, attn_mode=self.attn_mode, attn_hidden_size=self.hidden_size, memory_size=self.hidden_size, feature_size=None, dropout=self.dropout) self.softmax = nn.Softmax(dim=-1) if self.padding_idx is not None: self.weight = torch.ones(self.vocab_size) self.weight[self.padding_idx] = 0 else: self.weight = None self.nll_loss = NLLLoss(weight=self.weight, ignore_index=self.padding_idx, reduction='mean') if self.use_gpu: self.cuda() self.weight = self.weight.cuda()
class MemNet(BaseModel): """ """ def __init__(self, vocab_size, embed_units, hidden_size, padding_idx=None, num_layers=1, max_hop=3, bidirectional=True, attn_mode='mlp', dropout=0.0, use_gpu=False): super(MemNet, self).__init__() self.vocab_size = vocab_size self.embed_units = embed_units self.padding_idx = padding_idx self.hidden_size = hidden_size self.num_layers = num_layers self.max_hop = max_hop self.bidirectional = bidirectional self.dropout = dropout self.attn_mode = attn_mode self.use_gpu = use_gpu enc_embedder = Embedder(num_embeddings=self.vocab_size, embedding_dim=self.embed_units, padding_idx=self.padding_idx) dec_embedder = enc_embedder self.rnn_encoder = RNNEncoder(input_size=self.embed_units, hidden_size=self.hidden_size, embedder=enc_embedder, num_layers=self.num_layers, bidirectional=self.bidirectional, dropout=self.dropout) self.mem_encoder = EncoderMemNN(vocab=self.vocab_size, hidden_size=self.hidden_size, hop=self.max_hop, attn_mode='general', padding_idx=self.padding_idx) self.decoder = RNNDecoder(input_size=self.embed_units, hidden_size=self.hidden_size, output_size=self.vocab_size, embedder=dec_embedder, attn_mode=self.attn_mode, attn_hidden_size=self.hidden_size, memory_size=self.hidden_size, feature_size=None, dropout=self.dropout) self.softmax = nn.Softmax(dim=-1) if self.padding_idx is not None: self.weight = torch.ones(self.vocab_size) self.weight[self.padding_idx] = 0 else: self.weight = None self.nll_loss = NLLLoss(weight=self.weight, ignore_index=self.padding_idx, reduction='mean') if self.use_gpu: self.cuda() self.weight = self.weight.cuda() def encode(self, inputs, hidden=None): outputs = Pack() enc_inputs = _, lengths = inputs.src[0][:, 1:-1], inputs.src[1] - 2 enc_outputs, enc_hidden = self.rnn_encoder(enc_inputs, hidden) # knowledge batch_size, sent_num, sent = inputs.cue[0].size() tmp_len = inputs.cue[1] tmp_len[tmp_len > 0] -= 2 cue_inputs = inputs.cue[0][:, :, 1:-1], tmp_len u = self.mem_encoder(cue_inputs, enc_hidden[-1]) dec_init_state = self.decoder.initialize_state( hidden=u.unsqueeze(0), attn_memory=enc_outputs if self.attn_mode else None, memory_lengths=lengths if self.attn_mode else None) return outputs, dec_init_state def decode(self, input, state): """ decode """ log_prob, state, output = self.decoder.decode(input, state) return log_prob, state, output def forward(self, enc_inputs, dec_inputs, hidden=None): """ forward """ outputs, dec_init_state = self.encode(enc_inputs, hidden) log_probs, _ = self.decoder(dec_inputs, dec_init_state) outputs.add(logits=log_probs) return outputs def collect_metrics(self, outputs, target): """ collect_metrics """ num_samples = target.size(0) metrics = Pack(num_samples=num_samples) loss = 0 logits = outputs.logits scores = -self.nll_loss(logits, target, reduction=False) nll = self.nll_loss(logits, target) num_words = target.ne(self.padding_idx).sum().item() acc = accuracy(logits, target, padding_idx=self.padding_idx) metrics.add(nll=(nll, num_words), acc=acc) loss += nll metrics.add(loss=loss) return metrics, scores def iterate(self, inputs, optimizer=None, grad_clip=None, is_training=True, epoch=-1): """ iterate """ enc_inputs = inputs dec_inputs = inputs.tgt[0][:, :-1], inputs.tgt[1] - 1 target = inputs.tgt[0][:, 1:] outputs = self.forward(enc_inputs, dec_inputs) metrics, scores = self.collect_metrics(outputs, target) loss = metrics.loss if torch.isnan(loss): raise ValueError("nan loss encountered") if is_training: assert optimizer is not None optimizer.zero_grad() loss.backward() if grad_clip is not None and grad_clip > 0: clip_grad_norm_(parameters=self.parameters(), max_norm=grad_clip) optimizer.step() return metrics, scores