def __init__(self, opt, emb_matrix=None): self.opt = opt self.emb_matrix = emb_matrix self.model = GCNClassifier(opt, emb_matrix=emb_matrix) self.model.to(self.opt["device"]) if opt['cuda']: self.model.cuda()
def __init__(self, opt, lbstokens, emb_matrix=None, cls=None): self.opt = opt self.emb_matrix = emb_matrix if cls is None: self.model = GCNClassifier(opt, lbstokens, emb_matrix=emb_matrix) else: self.model = GCNClassifier(opt, lbstokens, emb_matrix=emb_matrix, cls=cls) self.cls = cls self.rel_types = len(constant.LABEL_TO_ID) self.loss_matrix = torch.zeros((self.rel_types, self.rel_types), requires_grad=False) self.miss_matrix = torch.zeros((self.rel_types, self.rel_types), requires_grad=False) # self.alpha=torch.full((1,),0.1,requires_grad=True) # self.beta=torch.full((1,),0.1,requires_grad=True) #self.model = nn.DataParallel(GCNClassifier(opt, emb_matrix=emb_matrix),device_ids=[0,1,2,3]) #self.model.half() print(self.get_parameter_number(self.model)) self.soft_criterion = SoftCrossEntropyLoss() self.criterion = nn.CrossEntropyLoss() self.parameters = [ p for p in self.model.parameters() if p.requires_grad ] if opt['cuda']: self.model.cuda() self.criterion.cuda() self.loss_matrix = self.loss_matrix.cuda() self.miss_matrix = self.miss_matrix.cuda() self.optimizer = torch_utils.get_optimizer( opt['optim'], [{ 'params': self.parameters }], opt['lr'])
def __init__(self, opt, emb_matrix=None): self.opt = opt self.emb_matrix = emb_matrix self.model = GCNClassifier(opt, emb_matrix=emb_matrix) self.criterion = nn.CrossEntropyLoss() self.parameters = [ p for p in self.model.parameters() if p.requires_grad ] if opt['cuda']: self.model.cuda() self.criterion.cuda() self.optimizer = torch_utils.get_optimizer(opt['optim'], self.parameters, opt['lr'])
class GCNTrainer(Trainer): def __init__(self, opt, emb_matrix=None): self.opt = opt self.emb_matrix = emb_matrix self.model = GCNClassifier(opt, emb_matrix=emb_matrix) self.criterion = nn.CrossEntropyLoss() self.parameters = [ p for p in self.model.parameters() if p.requires_grad ] if opt['cuda']: self.model.cuda() self.criterion.cuda() self.optimizer = torch_utils.get_optimizer(opt['optim'], self.parameters, opt['lr']) def update(self, batch): inputs, labels, tokens, head, first_pos, second_pos, third_pos, cross, lens = unpack_batch( batch, self.opt['cuda']) # step forward self.model.train() self.optimizer.zero_grad() logits, pooling_output = self.model(inputs) loss = self.criterion(logits, labels) # l2 decay on all conv layers if self.opt.get('conv_l2', 0) > 0: loss += self.model.conv_l2() * self.opt['conv_l2'] # l2 penalty on output representations if self.opt.get('pooling_l2', 0) > 0: loss += self.opt['pooling_l2'] * (pooling_output**2).sum(1).mean() loss_val = loss.item() # backward loss.backward() torch.nn.utils.clip_grad_norm_(self.model.parameters(), self.opt['max_grad_norm']) self.optimizer.step() return loss_val def predict(self, batch, unsort=True): inputs, labels, tokens, head, first_pos, second_pos, third_pos, cross, lens = unpack_batch( batch, self.opt['cuda']) # orig_idx = batch[9] orig_idx = batch[10] # forward self.model.eval() logits, _ = self.model(inputs) loss = self.criterion(logits, labels) probs = F.softmax(logits, 1).data.cpu().numpy().tolist() predictions = np.argmax(logits.data.cpu().numpy(), axis=1).tolist() if unsort: _, predictions, probs = [list(t) for t in zip(*sorted(zip(orig_idx,\ predictions, probs)))] return predictions, probs, loss.item()
class GCNTrainer(Trainer): def __init__(self, opt, emb_matrix=None): self.opt = opt self.emb_matrix = emb_matrix self.model = GCNClassifier(opt, emb_matrix=emb_matrix) self.model.to(self.opt["device"]) if opt['cuda']: self.model.cuda() def predict(self, batch, unsort=True): input_ids,input_mask,segment_ids,label_id=batch[12],batch[13],batch[14],batch[15] inputs, labels, tokens, head, subj_pos, obj_pos, lens = unpack_batch(batch, self.opt['cuda']) l = (inputs[1].data.cpu().numpy() == 0).astype(np.int64).sum(1) orig_idx = batch[11] device=self.opt["device"] input_ids = input_ids.to(device) input_mask = input_mask.to(device) segment_ids = segment_ids.to(device) label_id = label_id.to(device) self.model.eval() with torch.no_grad(): logits,loss = self.model(inputs,input_ids,input_mask,segment_ids,label_id,l,b=1) probs = F.softmax(logits, 1).data.cpu().numpy().tolist() predictions = np.argmax(logits.data.cpu().numpy(), axis=1).tolist() if unsort: _, predictions, probs = [list(t) for t in zip(*sorted(zip(orig_idx,\ predictions, probs)))] return predictions, probs, loss.item()
class GCNTrainer(Trainer): def __init__(self, opt, lbstokens, emb_matrix=None, cls=None): self.opt = opt self.emb_matrix = emb_matrix if cls is None: self.model = GCNClassifier(opt, lbstokens, emb_matrix=emb_matrix) else: self.model = GCNClassifier(opt, lbstokens, emb_matrix=emb_matrix, cls=cls) self.cls = cls self.rel_types = len(constant.LABEL_TO_ID) self.loss_matrix = torch.zeros((self.rel_types, self.rel_types), requires_grad=False) self.miss_matrix = torch.zeros((self.rel_types, self.rel_types), requires_grad=False) # self.alpha=torch.full((1,),0.1,requires_grad=True) # self.beta=torch.full((1,),0.1,requires_grad=True) #self.model = nn.DataParallel(GCNClassifier(opt, emb_matrix=emb_matrix),device_ids=[0,1,2,3]) #self.model.half() print(self.get_parameter_number(self.model)) self.soft_criterion = SoftCrossEntropyLoss() self.criterion = nn.CrossEntropyLoss() self.parameters = [ p for p in self.model.parameters() if p.requires_grad ] if opt['cuda']: self.model.cuda() self.criterion.cuda() self.loss_matrix = self.loss_matrix.cuda() self.miss_matrix = self.miss_matrix.cuda() self.optimizer = torch_utils.get_optimizer( opt['optim'], [{ 'params': self.parameters }], opt['lr']) def get_parameter_number(self, net): total_num = sum(p.numel() for p in net.parameters()) trainable_num = sum(p.numel() for p in net.parameters() if p.requires_grad) return {'Total': total_num, 'Trainable': trainable_num} def update(self, batch, is_soft=False): inputs, labels, tokens, distance, ids, iscross = unpack_batch( batch, self.opt['cuda']) # step forward self.model.train() self.optimizer.zero_grad() logits, pooling_output = self.model(inputs) # batch_size = labels.shape[0] # rel_types= len(constant.LABEL_TO_ID) # indices = torch.cat((labels.unsqueeze(0), logits.argmax(dim=-1).unsqueeze(0)),dim=0) # # extra_loss =torch.from_numpy(np.apply_along_axis(calchis, -1, self.loss_matrix.cpu().numpy())) # extra_loss =F.softmax(extra_loss.reshape(-1)).reshape(rel_types,rel_types)[tuple(indices.cpu().detach().numpy().tolist())] #calc&update history loss #margin=self.loss_matrix[labels] #indices = torch.cat((torch.linspace(0, batch_size - 1, batch_size).unsqueeze(0).long().cuda(), labels.unsqueeze(0))) # gold_mask = torch.sparse_coo_tensor(indices, torch.ones(batch_size), # (batch_size, rel_types)).to_dense().cuda() # self.logits=(1+(distance / 14.0)).unsqueeze(-1).mul(logits) #logits = logits + 0.15*torch.abs(logits.mul(margin)) # indices = torch.cat((torch.linspace(0, batch_size - 1, batch_size).unsqueeze(0).long(), labels.unsqueeze(0))) # gold_logits = torch.masked_select(logits, gold_mask == 1) # dif = (logits.max(dim=-1)[0] - gold_logits).detach() # extra_loss=dif.mul(extra_loss.float()).sum().squeeze() # loss+=extra_loss if is_soft: # if self.cls is not None: # labels=torch.FloatTansor([[1-l[self.cls],l[self.cls]] for l in labels]) # if self.opt['cuda']: # labels=labels.cuda() labels = labels.float() loss = self.soft_criterion(logits, labels) else: # if self.cls is not None: # labels =torch.LongTensor([l if l in self.cls else 0 for l in labels]) # if self.opt['cuda']: # labels=labels.cuda() loss = self.criterion(logits, labels) # l2 decay on all conv layers if self.opt.get('conv_l2', 0) > 0: loss += self.model.conv_l2() * self.opt['conv_l2'] # l2 penalty on output representations if self.opt.get('pooling_l2', 0) > 0: loss += self.opt['pooling_l2'] * (pooling_output**2).sum(1).mean() loss_val = loss.item() # print(((predictions) * (labels != 0) * 1).sum()) #indices = torch.cat((labels.unsqueeze(0), logits.argmax(dim=-1).unsqueeze(0)), dim=0).detach() # mismat = torch.sparse_coo_tensor(indices, dif, # (rel_types, rel_types)).to_dense() # mismask = torch.sparse_coo_tensor(indices, torch.ones(batch_size), # (rel_types, rel_types)).to_dense().cuda() # # mismask = mismask - 1 # mismask = mismask.masked_fill(mismask >= 0, 0) # mismat += mismask #self.miss_matrix += mismask # # # backward loss.backward() torch.nn.utils.clip_grad_norm_(self.model.parameters(), self.opt['max_grad_norm']) self.optimizer.step() return loss_val def predict(self, batch, unsort=True): inputs, labels, tokens, distance, ids, iscross = unpack_batch( batch, self.opt['cuda']) orig_idx = batch[21] batch_size = labels.shape[0] # forward self.model.eval() logits, _ = self.model(inputs) # labels_map = dict(zip(self.cls, list(range(1, 1 + len(self.cls))))) # if self.cls is not None: # labels=labels.cpu().detach().numpy().tolist() # labels =torch.LongTensor([labels_map[l] if l in self.cls else 0 for l in labels]) # if self.opt['cuda']: # labels=labels.cuda() #raw_predictions = np.argmax(logits.data.cpu().numpy(), axis=1).tolist() # logits[:,1:]=logits[:,1:].mul((iscross==0).unsqueeze(-1).float().cuda()) loss = self.criterion(logits, labels) probs = F.softmax(logits, dim=-1).data.cpu().numpy().tolist() predictions = np.argmax(logits.data.cpu().numpy(), axis=1).tolist() samples = [] for i in range(batch_size): if predictions[i] != labels[i]: samples.append(ids[i]) NER2ID = constant.NER_TO_ID ID2NER = dict([(v, k) for k, v in NER2ID.items()]) if unsort: _, predictions, probs = [list(t) for t in zip(*sorted(zip(orig_idx,\ predictions, probs)))] ids = [str(i) for i in ids] probs = dict(zip(ids, probs)) return predictions, probs, loss.item(), samples def updatemisclass(self): self.loss_matrix = self.miss_matrix.clone() self.loss_matrix = self.loss_matrix / (self.loss_matrix.sum( dim=-1).unsqueeze(-1)) np.save('missclass.npy', self.miss_matrix.cpu().detach().numpy()) self.loss_matrix = self.loss_matrix.masked_fill( torch.eye(self.rel_types).cuda() == 1, 0) self.miss_matrix = self.miss_matrix.zero_()