class AspectSent(nn.Module): def __init__(self, config): ''' LSTM+Aspect ''' super(AspectSent, self).__init__() self.config = config input_dim = config.l_hidden_size kernel_num = config.l_hidden_size self.conv = nn.Conv1d(input_dim, kernel_num, 3, padding=1) #self.conv = nn.Conv1d(input_dim, kernel_num, 3, dilation=2, padding=2) self.bilstm = biLSTM(config) self.feat2tri = nn.Linear(kernel_num, 2+2) self.inter_crf = LinearChainCrf(2+2) self.feat2label = nn.Linear(kernel_num, 3) self.loss = nn.NLLLoss() self.tanh = nn.Tanh() self.dropout = nn.Dropout(config.dropout2) #Modified by Richard Sun self.cat_layer = SimpleCat(config) self.cat_layer.load_vector() def get_pos_weight(self, masks, lens): ''' Get positional weight ''' pos_wghts = torch.zeros(masks.size()) t_num = masks.sum(1) for i, m in enumerate(masks): begin = m.argmax() for j, b in enumerate(m): #padding words' weights are zero if j > lens[i]: break if j < begin: pos_wghts[i][j] = 1 - (begin-j).to(torch.float)/lens[i].to(torch.float) if b == 1: pos_wghts[i][j] = 1 if j > begin + t_num[i]: pos_wghts[i][j] = 1 - (j-begin).to(torch.float)/lens[i].to(torch.float) return pos_wghts def get_target_emb(self, context, masks): #Target embeddings #Find target indices, a list of indices batch_size, max_len, hidden_dim = context.size() target_indices, target_max_len = convert_mask_index(masks) #Find the target context embeddings, batch_size*max_len*hidden_size masks = masks.type_as(context) masks = masks.expand(hidden_dim, batch_size, max_len).transpose(0, 1).transpose(1, 2) target_emb = masks * context target_emb_avg = torch.sum(target_emb, 1)/torch.sum(masks, 1)#Batch_size*embedding return target_emb_avg def compute_scores(self, sents, masks, lens, is_training=True): ''' Args: sents: batch_size*max_len*word_dim masks: batch_size*max_len lens: batch_size ''' context = self.bilstm(sents, lens)#Batch_size*sent_len*hidden_dim #pos_weights = self.get_pos_weight(masks, lens)#Batch_size*sent_len #context = torch.cat([context, sents[:, :, :-30]], 2)#Batch_size*sent_len*(hidden_dim+word_embed) # context = F.relu(self.conv(context.transpose(1, 2))) # context = context.transpose(1, 2) context = torch.tanh(context) batch_size, max_len, hidden_dim = context.size() #Expand dimension for concatenation target_emb_avg = self.get_target_emb(context, masks) target_emb_avg_exp = target_emb_avg.expand(max_len, batch_size, hidden_dim) target_emb_avg_exp = target_emb_avg_exp.transpose(0, 1)#Batch_size*max_len*embedding ###Addition model u = target_emb_avg_exp context = context + u#Batch_size*max_len*embedding #concatenation model #context1 = torch.cat([context, target_emb_avg_exp], 2) word_mask = torch.full((batch_size, max_len), 0) for i in range(batch_size): word_mask[i, :lens[i]] = 1.0 ###neural features feats = self.feat2tri(context) #Batch_size*sent_len*2 marginals = self.inter_crf.compute_marginal(feats, word_mask.type_as(feats)) #print(word_mask.sum(1)) select_polarities = [marginal[:, 1] for marginal in marginals] gammas = [sp.sum()/2 for sp in select_polarities] sent_vs = [torch.mm(sp.unsqueeze(0), context[i, :lens[i], :]) for i, sp in enumerate(select_polarities)] sent_vs = [sv/gamma for sv, gamma in zip(sent_vs, gammas)]#normalization sent_vs = torch.cat(sent_vs)#batch_size, hidden_size #sent_vs = [self.dropout(sent_v) for sent_v in sent_vs] #label_scores = [self.feat2label(sent_v).squeeze(0) for sent_v in sent_vs] #label_scores = torch.stack(label_scores) sent_vs = self.dropout(sent_vs) label_scores = self.feat2label(sent_vs) best_latent_seqs = self.inter_crf.decode(feats, word_mask.type_as(feats)) if is_training: return label_scores, select_polarities else: return label_scores, best_latent_seqs def forward(self, sents, masks, labels, lens): ''' inputs are list of list for the convenince of top CRF Args: sent: a list of sentences, batch_size*len*emb_dim mask: a list of mask for each sentence, batch_size*len label: a list labels ''' #scores: batch_size*label_size #s_prob:batch_size*sent_len if self.config.if_reset: self.cat_layer.reset_binary() sents = self.cat_layer(sents, masks) scores, s_prob = self.compute_scores(sents, masks, lens) s_prob_norm = torch.stack([s.norm(1) for s in s_prob]).mean() pena = F.relu( self.inter_crf.transitions[1,0] - self.inter_crf.transitions[0,0]) + \ F.relu(self.inter_crf.transitions[0,1] - self.inter_crf.transitions[1,1]) norm_pen = self.config.C1 * pena + self.config.C2 * s_prob_norm #print('Transition Penalty:', pena) #print('Marginal Penalty:', s_prob_norm) scores = F.log_softmax(scores, 1)#Batch_size*label_size cls_loss = self.loss(scores, labels) return cls_loss, norm_pen def predict(self, sents, masks, sent_lens): if self.config.if_reset: self.cat_layer.reset_binary() sents = self.cat_layer(sents, masks) scores, best_seqs = self.compute_scores(sents, masks, sent_lens, False) _, pred_label = scores.max(1) #Modified by Richard Sun return pred_label, best_seqs
class AspectSent(nn.Module): def __init__(self, config): super(AspectSent, self).__init__() self.config = config self.cat_layer = SimpleCat(config) self.lstm = MLSTM(config) self.feat2tri = nn.Linear(config.l_hidden_size, 2) self.inter_crf = LinearCRF(config) self.feat2label = nn.Linear(config.l_hidden_size, 3) self.cri = nn.CrossEntropyLoss() self.cat_layer.load_vector() if not config.if_update_embed: self.cat_layer.word_embed.weight.requires_grad = False def compute_scores(self, sent, mask): if self.config.if_reset: self.cat_layer.reset_binary() # self.inter_crf.reset_transition() sent = torch.LongTensor(sent) mask = torch.LongTensor(mask) sent_vec = self.cat_layer(sent, mask) context = self.lstm(sent_vec) # feat_context = torch.cat([context, asp_v], 1) # sent_len * dim_sum feat_context = context # sent_len * dim_sum tri_scores = self.feat2tri(feat_context) marginals = self.inter_crf(tri_scores) select_polarity = marginals[:,1] marginals = marginals.transpose(0,1) # 2 * sent_len sent_v = torch.mm(select_polarity.unsqueeze(0), context) # 1 * feat_dim label_scores = self.feat2label(sent_v).squeeze(0) return label_scores, select_polarity, marginals def compute_predict_scores(self, sent, mask): if self.config.if_reset: self.cat_layer.reset_binary() # self.inter_crf.reset_transition() sent = torch.LongTensor(sent) mask = torch.LongTensor(mask) sent_vec = self.cat_layer(sent, mask) context = self.lstm(sent_vec) # feat_context = torch.cat([context, asp_v], 1) # sent_len * dim_sum feat_context = context # sent_len * dim_sum tri_scores = self.feat2tri(feat_context) marginals = self.inter_crf(tri_scores) select_polarity = marginals[:,1] best_seqs = self.inter_crf.predict(tri_scores) sent_v = torch.mm(select_polarity.unsqueeze(0), context) # 1 * feat_dim label_scores = self.feat2label(sent_v).squeeze(0) return label_scores, select_polarity, best_seqs def forward(self, sent, mask, label): ''' inputs are list of list for the convenince of top CRF ''' # scores = self.compute_scores(sents, ents, asps, labels) scores, s_prob, marginal_prob = self.compute_scores(sent, mask) pena = F.relu( self.inter_crf.transitions[1,0] - self.inter_crf.transitions[0,0]) + \ F.relu(self.inter_crf.transitions[0,1] - self.inter_crf.transitions[1,1]) norm_pen = ( self.config.C1 * pena + self.config.C2 * s_prob.norm(1) ) / self.config.batch_size scores = F.softmax(scores) cls_loss = -1 * torch.log(scores[label]) print "cls loss {0} with penalty {1}".format(cls_loss.data[0], norm_pen.data[0]) return cls_loss + norm_pen def predict(self, sent, mask): scores, s_probs, best_seqs = self.compute_predict_scores(sent, mask) _, pred_label = scores.max(0) return pred_label.data[0], best_seqs
class AspectSent(nn.Module): def __init__(self, config): ''' LSTM+Aspect ''' super(AspectSent, self).__init__() self.config = config self.input_dim = config.l_hidden_size # + config.pos_dim kernel_num = config.l_hidden_size # + config.pos_dim reduced_size = int(config.l_hidden_size / 4) self.conv = nn.Conv1d(self.input_dim, kernel_num, 3, padding=1) # not used self.bilstm = biLSTM(config) self.feat2tri = nn.Linear(reduced_size, 2 + 2) self.inter_crf = LinearChainCrf(2 + 2) self.h1linear = nn.Linear(self.input_dim, reduced_size) self.feat2tri2 = nn.Linear(reduced_size, 2 + 2) self.inter_crf2 = LinearChainCrf(2 + 2) self.h2linear = nn.Linear(self.input_dim, reduced_size) self.feat2tri3 = nn.Linear(reduced_size, 2 + 2) self.inter_crf3 = LinearChainCrf(2 + 2) self.h3linear = nn.Linear(self.input_dim, reduced_size) self.feat2tri4 = nn.Linear(reduced_size, 2 + 2) self.inter_crf4 = LinearChainCrf(2 + 2) self.h4linear = nn.Linear(self.input_dim, reduced_size) self.feat2label = nn.Linear(kernel_num, 3) self.feat2label2 = nn.Linear(self.input_dim * 4, 3) # gcn - not used for current model self.W = nn.ModuleList() self.W.append(nn.Linear(config.l_hidden_size, config.l_hidden_size)) self.W.append(nn.Linear(config.l_hidden_size, config.l_hidden_size)) self.W.append(nn.Linear(200, 100)) # cnn - not used for current model self.filters = [3] self.convs = nn.ModuleList([ nn.Conv1d(in_channels=1, out_channels=config.l_hidden_size, kernel_size=(k, config.l_hidden_size + 2), padding=1) for k in self.filters ]) self.loss = nn.NLLLoss() self.tanh = nn.Tanh() self.dropout = nn.Dropout(config.dropout2) self.dropout2 = nn.Dropout(config.dropout) self.cat_layer = SimpleCat(config) self.cat_layer.load_vector() self.sigmoid = nn.Sigmoid() def get_pos_weight(self, masks, lens): ''' Get positional weight ''' pos_wghts = torch.zeros(masks.size()) t_num = masks.sum(1) for i, m in enumerate(masks): begin = m.argmax() for j, b in enumerate(m): # padding words' weights are zero if j > lens[i]: break if j < begin: pos_wghts[i][j] = 1 - (begin - j).to( torch.float) / lens[i].to(torch.float) if b == 1: pos_wghts[i][j] = 1 if j > begin + t_num[i]: pos_wghts[i][j] = 1 - (j - begin).to( torch.float) / lens[i].to(torch.float) return pos_wghts def get_target_emb(self, context, masks): # Target embeddings # Find target indices, a list of indices batch_size, max_len, hidden_dim = context.size() target_indices, target_max_len = convert_mask_index(masks) # Find the target context embeddings, batch_size*max_len*hidden_size masks = masks.type_as(context) masks = masks.expand(hidden_dim, batch_size, max_len).transpose(0, 1).transpose(1, 2) target_emb = masks * context target_emb_avg = torch.sum(target_emb, 1) / torch.sum( masks, 1) # Batch_size*embedding return target_emb_avg def compute_scores(self, sents, masks, lens, is_training=True): ''' Args: sents: batch_size*max_len*word_dim masks: batch_size*max_len lens: batch_size ''' batch_size, max_len = masks.size() target_indices, target_max_len = convert_mask_index(masks) sents, mask, pos = self.cat_layer(sents, masks) sents = self.dropout2(sents) sents = torch.cat([sents, mask], 2) context = self.bilstm(sents, lens) # Batch_size*sent_len*hidden_dim pos = [x.unsqueeze(1).expand(max_len, self.input_dim) for x in pos] pos = torch.stack(pos) context = torch.mul(context, pos) batch_size, max_len, hidden_dim = context.size() word_mask = torch.full((batch_size, max_len), 0) for i in range(batch_size): word_mask[i, :lens[i]] = 1.0 # head 1 context1 = self.h1linear(context) feats1 = self.feat2tri(context1) # Batch_size*sent_len*2 marginals1 = self.inter_crf.compute_marginal(feats1, word_mask.type_as(feats1)) select_polarities1 = [marginal[:, 1] for marginal in marginals1] gammas = [sp.sum() for sp in select_polarities1] select_polarities1 = [ sv / gamma for sv, gamma in zip(select_polarities1, gammas) ] sent_vs1 = [ torch.mm(sp.unsqueeze(0), context[i, :lens[i], :]) for i, sp in enumerate(select_polarities1) ] # head 2 context2 = self.h2linear(context) feats2 = self.feat2tri2(context2) # Batch_size*sent_len*2 marginals2 = self.inter_crf2.compute_marginal( feats2, word_mask.type_as(feats2)) select_polarities2 = [marginal[:, 1] for marginal in marginals2] gammas = [sp.sum() for sp in select_polarities2] select_polarities2 = [ sv / gamma for sv, gamma in zip(select_polarities2, gammas) ] sent_vs2 = [ torch.mm(sp.unsqueeze(0), context[i, :lens[i], :]) for i, sp in enumerate(select_polarities2) ] # head 3 context3 = self.h3linear(context) feats3 = self.feat2tri3(context3) # Batch_size*sent_len*2 marginals3 = self.inter_crf3.compute_marginal( feats3, word_mask.type_as(feats3)) select_polarities3 = [marginal[:, 1] for marginal in marginals3] gammas = [sp.sum() for sp in select_polarities3] select_polarities3 = [ sv / gamma for sv, gamma in zip(select_polarities3, gammas) ] sent_vs3 = [ torch.mm(sp.unsqueeze(0), context[i, :lens[i], :]) for i, sp in enumerate(select_polarities3) ] # head 4 context4 = self.h4linear(context) feats4 = self.feat2tri4(context4) # Batch_size*sent_len*2 marginals4 = self.inter_crf4.compute_marginal( feats4, word_mask.type_as(feats4)) select_polarities4 = [marginal[:, 1] for marginal in marginals4] gammas = [sp.sum() for sp in select_polarities4] select_polarities4 = [ sv / gamma for sv, gamma in zip(select_polarities4, gammas) ] sent_vs4 = [ torch.mm(sp.unsqueeze(0), context[i, :lens[i], :]) for i, sp in enumerate(select_polarities4) ] sent_vs = torch.zeros(batch_size, hidden_dim * 4).cuda() for i in range(batch_size): sent_vs[i] = torch.cat( (sent_vs1[i], sent_vs2[i], sent_vs3[i], sent_vs4[i]), dim=1) select_polarities = [[marginal[:, 1] for marginal in marginals1], [marginal[:, 1] for marginal in marginals2], [marginal[:, 1] for marginal in marginals3], [marginal[:, 1] for marginal in marginals4]] sent_vs = F.relu(self.dropout(sent_vs)) label_scores = self.feat2label2(sent_vs).squeeze(0) if is_training: return label_scores, 0, 0, 0 else: return label_scores, select_polarities, sent_vs, [ select_polarities1, select_polarities2, select_polarities3, select_polarities4 ] def forward(self, sents, masks, labels, lens): ''' inputs are list of list for the convenince of top CRF Args: sent: a list of sentences, batch_size*len*emb_dim mask: a list of mask for each sentence, batch_size*len label: a list labels ''' if self.config.if_reset: self.cat_layer.reset_binary() scores, s_prob, sent_vs, p = self.compute_scores(sents, masks, lens) scores = F.log_softmax(scores, 1) # Batch_size*label_size cls_loss = self.loss(scores, labels) return cls_loss, 0 def predict(self, sents, masks, sent_lens): if self.config.if_reset: self.cat_layer.reset_binary() scores, best_seqs, sent_vs, p = self.compute_scores( sents, masks, sent_lens, False) batch, length = sents.size() if batch == 1: _, pred_label = scores.unsqueeze(0).max(1) else: _, pred_label = scores.max(1) return pred_label, best_seqs, sent_vs, p