def __init__(self, opt, emb_matrix=None): self.opt = opt self.emb_matrix = emb_matrix self.model = GCNClassifier(opt, emb_matrix=emb_matrix) self.criterion = nn.CrossEntropyLoss() self.parameters = [p for p in self.model.parameters() if p.requires_grad] if opt['cuda']: self.model.cuda() self.criterion.cuda() self.optimizer = torch_utils.get_optimizer(opt['optim'], self.parameters, opt['lr'])
class GCNTrainer(Trainer): def __init__(self, opt, emb_matrix=None): self.opt = opt self.emb_matrix = emb_matrix self.model = GCNClassifier(opt, emb_matrix=emb_matrix) self.criterion = nn.CrossEntropyLoss() self.parameters = [ p for p in self.model.parameters() if p.requires_grad ] if opt['cuda']: self.model.cuda() self.criterion.cuda() self.optimizer = torch_utils.get_optimizer(opt['optim'], self.parameters, opt['lr']) def update(self, batch): inputs, labels, tokens, head, subj_pos, obj_pos, lens = unpack_batch( batch, self.opt['cuda']) # step forward self.model.train() self.optimizer.zero_grad() logits, pooling_output = self.model(inputs) loss = self.criterion(logits, labels) # l2 decay on all conv layers if self.opt.get('conv_l2', 0) > 0: loss += self.model.conv_l2() * self.opt['conv_l2'] # l2 penalty on output representations if self.opt.get('pooling_l2', 0) > 0: loss += self.opt['pooling_l2'] * (pooling_output**2).sum(1).mean() loss_val = loss.item() # backward loss.backward() torch.nn.utils.clip_grad_norm_(self.model.parameters(), self.opt['max_grad_norm']) self.optimizer.step() return loss_val def predict(self, batch, unsort=True): inputs, labels, tokens, head, subj_pos, obj_pos, lens = unpack_batch( batch, self.opt['cuda']) orig_idx = batch[11] # forward self.model.eval() logits, _ = self.model(inputs) print("logits:", logits.shape) loss = self.criterion(logits, labels) probs = F.softmax(logits, 1).data.cpu().numpy().tolist() predictions = np.argmax(logits.data.cpu().numpy(), axis=1).tolist() # predictions = np.argmax(probs.cpu().numpy(), axis=1).tolist() if unsort: _, predictions, probs = [list(t) for t in zip(*sorted(zip(orig_idx,\ predictions, probs)))] return predictions, probs, loss.item()
class GCNTrainer(Trainer): def __init__(self, opt, emb_matrix=None): self.opt = opt self.emb_matrix = emb_matrix self.model = GCNClassifier(opt, emb_matrix=emb_matrix) self.criterion = nn.CrossEntropyLoss() self.parameters = [p for p in self.model.parameters() if p.requires_grad] if opt['cuda']: self.model.cuda() self.criterion.cuda() self.optimizer = torch_utils.get_optimizer(opt['optim'], self.parameters, opt['lr']) def update(self, batch): inputs, labels, lens = unpack_batch(batch, self.opt['cuda']) # step forward self.model.train() self.optimizer.zero_grad() logits, pooling_output, g, sparse_graph, c1, c2 = self.model(inputs) loss = self.criterion(logits, labels) # l2 decay on all conv layers if self.opt.get('conv_l2', 0) > 0: loss += self.model.conv_l2() * self.opt['conv_l2'] # l2 penalty on output representations # if self.opt.get('pooling_l2', 0) > 0: # loss += self.opt['pooling_l2'] * (pooling_output ** 2).sum(1).mean() # loss += 0.0000001 * self.hloss(g.view(-1,g.shape[1])) # loss += 0.0000000001 * torch.norm(torch.abs(g.view(-1, g.shape[-1])-sparse_graph.view(-1, sparse_graph.shape[-1]))) # c1l = c1.pow(2).sum(1).sqrt().unsqueeze(1) # c2l = c2.pow(2).sum(1).sqrt().unsqueeze(1) # loss_pred = -(torch.mm(c1, c2.transpose(0,1)) / torch.mm(c1l, c2l.transpose(0,1))).diag().abs().mean() + (c1l-c2l).abs().mean() c2 = torch.max(c2, 1)[1] loss_pred = self.criterion(c1, c2) loss += loss_pred loss_val = loss.item() # backward loss.backward() torch.nn.utils.clip_grad_norm_(self.model.parameters(), self.opt['max_grad_norm']) self.optimizer.step() return loss_val, loss_pred.item() def predict(self, batch, unsort=True): inputs, labels, lens = unpack_batch(batch, self.opt['cuda']) # orig_idx = batch[11] # forward self.model.eval() logits, _, _, _, _, _ = self.model(inputs) loss = self.criterion(logits, labels) probs = F.softmax(logits, 1).data.cpu().numpy().tolist() predictions = np.argmax(logits.data.cpu().numpy(), axis=1).tolist() # if unsort: # _, predictions, probs = [list(t) for t in zip(*sorted(zip(orig_idx,\ # predictions, probs)))] return predictions, probs, loss.item()
def __init__(self, opt, emb_matrix=None): self.opt = opt self.emb_matrix = emb_matrix self.model = GCNClassifier(opt, emb_matrix=emb_matrix) self.criterion = nn.CrossEntropyLoss(reduction="none") self.parameters = [ p for p in self.model.parameters() if p.requires_grad ] self.crf = CRF(self.opt['num_class'], batch_first=True) self.bc = nn.BCELoss() if opt['cuda']: self.model.cuda() self.criterion.cuda() self.crf.cuda() self.bc.cuda() self.optimizer = torch_utils.get_optimizer(opt['optim'], self.parameters, opt['lr'])
def __init__(self, model_files): self.models = [] for model_file in model_files: opt = torch_utils.load_config(model_file) model = GCNClassifier(opt) checkpoint = self.get_checkpoint(model_file) model.load_state_dict(checkpoint['model']) if opt['cuda']: model.cuda() self.models.append(model)
def __init__(self, model_stuff_list: List[ModelStuff], biassed_prediction=None): self.id2label = dict([(v, k) for k, v in constant.LABEL_TO_ID.items()]) self.models = OrderedDict() self.biassed_prediction = biassed_prediction for model_stuff in model_stuff_list: self.models[model_stuff.representation] = [] for model_file in model_stuff.files: opt = torch_utils.load_config(model_file) model = GCNClassifier(opt) checkpoint = self.get_checkpoint(model_file) model.load_state_dict(checkpoint['model']) if opt['cuda']: model.cuda() self.models[model_stuff.representation].append(model)
class GCNTrainer(Trainer): def __init__(self, opt, emb_matrix=None): self.opt = opt self.emb_matrix = emb_matrix self.model = GCNClassifier(opt, emb_matrix=emb_matrix) self.criterion = nn.CrossEntropyLoss() self.parameters = [ p for p in self.model.parameters() if p.requires_grad ] if opt['cuda']: self.model.cuda() self.criterion.cuda() self.optimizer = torch_utils.get_optimizer(opt['optim'], self.parameters, opt['lr']) def update(self, batch): losses = {} # inputs, labels, tokens, head, subj_pos, obj_pos, lens = unpack_batch(batch, self.opt['cuda']) inputs, labels, orig_idx = maybe_place_batch_on_cuda( batch, self.opt['cuda']) # step forward self.model.train() self.optimizer.zero_grad() logits, pooling_output, suppplemental_losses = self.model(inputs) main_loss = self.criterion(logits, labels) losses['re_loss'] = main_loss.data.item() # l2 decay on all conv layers if self.opt.get('conv_l2', 0) > 0: conv_l2_loss = self.model.conv_l2() * self.opt['conv_l2'] main_loss += conv_l2_loss losses['conv_l2'] = conv_l2_loss.data.item() # l2 penalty on output representations if self.opt.get('pooling_l2', 0) > 0: pooling_l2_loss = self.opt['pooling_l2'] * (pooling_output** 2).sum(1).mean() main_loss += pooling_l2_loss losses['pooling_l2'] = pooling_l2_loss.data.item() if self.opt['link_prediction'] is not None: label_smoothing = self.opt['link_prediction']['label_smoothing'] observed_loss = suppplemental_losses['observed'] * ( 1. - self.opt['link_prediction']['without_observed']) predicted_loss = suppplemental_losses['baseline'] * ( 1. - self.opt['link_prediction']['without_verification']) main_loss += (observed_loss + predicted_loss ) * self.opt['link_prediction']['lambda'] observed_loss_value = observed_loss.data.item() predicted_loss_value = predicted_loss.data.item() losses.update({ 'kg_observed': observed_loss_value, 'kg_predicted': predicted_loss_value }) loss_val = main_loss.item() # backward main_loss.backward() torch.nn.utils.clip_grad_norm_(self.model.parameters(), self.opt['max_grad_norm']) self.optimizer.step() return loss_val def predict(self, batch, unsort=True): # inputs, labels, tokens, head, subj_pos, obj_pos, lens = unpack_batch(batch, self.opt['cuda']) inputs, labels, orig_idx = maybe_place_batch_on_cuda( batch, self.opt['cuda']) # orig_idx = batch[11] # forward self.model.eval() logits, _, _ = self.model(inputs) loss = self.criterion(logits, labels) probs = F.softmax(logits, 1).data.cpu().numpy().tolist() predictions = np.argmax(logits.data.cpu().numpy(), axis=1).tolist() if unsort: _, predictions, probs = [list(t) for t in zip(*sorted(zip(orig_idx,\ predictions, probs)))] return predictions, probs, loss.item()
class GCNTrainer(Trainer): def __init__(self, opt, emb_matrix=None): self.opt = opt self.emb_matrix = emb_matrix self.model = GCNClassifier(opt, emb_matrix=emb_matrix) self.criterion = nn.CrossEntropyLoss(reduction="none") self.parameters = [ p for p in self.model.parameters() if p.requires_grad ] self.crf = CRF(self.opt['num_class'], batch_first=True) self.bc = nn.BCELoss() if opt['cuda']: self.model.cuda() self.criterion.cuda() self.crf.cuda() self.bc.cuda() self.optimizer = torch_utils.get_optimizer(opt['optim'], self.parameters, opt['lr']) def update(self, batch): inputs, labels, sent_labels, dep_path, tokens, head, lens = unpack_batch( batch, self.opt['cuda']) _, _, _, _, terms, _, _ = inputs # step forward self.model.train() self.optimizer.zero_grad() logits, class_logits, selections, term_def, not_term_def, term_selections = self.model( inputs) labels = labels - 1 labels[labels < 0] = 0 mask = inputs[1].float() mask[mask == 0.] = -1. mask[mask == 1.] = 0. mask[mask == -1.] = 1. mask = mask.byte() loss = -self.crf(logits, labels, mask=mask) sent_loss = self.bc(class_logits, sent_labels) loss += self.opt['sent_loss'] * sent_loss selection_loss = self.bc(selections.view(-1, 1), dep_path.view(-1, 1)) loss += self.opt['dep_path_loss'] * selection_loss term_def_loss = -self.opt['consistency_loss'] * (term_def - not_term_def) loss += term_def_loss #loss += self.opt['consistency_loss'] * not_term_def term_loss = self.opt['sent_loss'] * self.bc( term_selections.view(-1, 1), terms.float().view(-1, 1)) loss += term_loss loss_val = loss.item() # backward loss.backward() torch.nn.utils.clip_grad_norm_(self.model.parameters(), self.opt['max_grad_norm']) self.optimizer.step() return loss_val, sent_loss.item(), term_loss.item() def predict(self, batch, unsort=True): inputs, labels, sent_labels, dep_path, tokens, head, lens = unpack_batch( batch, self.opt['cuda']) orig_idx = batch[-1] # forward self.model.eval() logits, sent_logits, _, _, _, _ = self.model(inputs) labels = labels - 1 labels[labels < 0] = 0 mask = inputs[1].float() mask[mask == 0.] = -1. mask[mask == 1.] = 0. mask[mask == -1.] = 1. mask = mask.byte() loss = -self.crf(logits, labels, mask=mask) # self.crf.transitions[0][4] = -1 # self.crf.transitions[0][5] = -1 # self.crf.transitions[0][6] = -1 # self.crf.transitions[1][5] = -1 # self.crf.transitions[1][6] = -1 probs = F.softmax(logits, dim=1) predictions = self.crf.decode(logits, mask=mask) sent_predictions = sent_logits.round().long().data.cpu().numpy() if unsort: _, predictions, probs, sent_predictions = [ list(t) for t in zip(*sorted( zip(orig_idx, predictions, probs, sent_predictions))) ] return predictions, probs, loss.item(), sent_predictions
class GCNTrainer(Trainer): def __init__(self, opt, emb_matrix=None, ucca_embedding_matrix=None): self.opt = opt self.emb_matrix = emb_matrix self.ucca_embedding_matrix = ucca_embedding_matrix self.model = GCNClassifier(opt, emb_matrix=emb_matrix, ucca_embedding_matrix=ucca_embedding_matrix) self.criterion = nn.CrossEntropyLoss() self.parameters = [ p for p in self.model.parameters() if p.requires_grad ] if opt['cuda']: self.model.cuda() self.criterion.cuda() self.optimizer = get_optimizer(opt['optim'], self.parameters, opt['lr']) def update(self, batch): input, labels = self.unpack_batch(batch, self.opt['cuda']) # step forward self.model.train() self.optimizer.zero_grad() logits, pooling_output = self.model(input) loss = self.criterion(logits, labels) # l2 decay on all conv layers if self.opt.get('conv_l2', 0) > 0: loss += self.model.conv_l2() * self.opt['conv_l2'] # l2 penalty on output representations if self.opt.get('pooling_l2', 0) > 0: loss += self.opt['pooling_l2'] * (pooling_output**2).sum(1).mean() loss_val = loss.item() # backward loss.backward() torch.nn.utils.clip_grad_norm_(self.model.parameters(), self.opt['max_grad_norm']) self.optimizer.step() return loss_val def predict(self, batch, unsort=True): input, labels = self.unpack_batch(batch, self.opt['cuda']) orig_idx = input.orig_idx ids = input.id # forward self.model.eval() logits, _ = self.model(input) loss = self.criterion(logits, labels) probs = F.softmax(logits, 1).data.cpu().numpy().tolist() predictions = np.argmax(logits.data.cpu().numpy(), axis=1).tolist() if unsort: _, predictions, probs, ids = [ list(t) for t in zip(*sorted(zip(orig_idx, predictions, probs, ids))) ] return predictions, probs, loss.item(), ids def predict_with_confidence(self, batch, unsort=True): input, labels = self.unpack_batch(batch, self.opt['cuda']) orig_idx = input.orig_idx ids = input.id # forward self.model.eval() logits, _ = self.model(input) loss = self.criterion(logits, labels) probs = np.max(F.softmax(logits, 1).data.cpu().numpy(), axis=1).tolist() predictions = np.argmax(logits.data.cpu().numpy(), axis=1).tolist() if unsort: _, predictions, probs, ids = [ list(t) for t in zip(*sorted(zip(orig_idx, predictions, probs, ids))) ] return predictions, probs, loss.item(), ids def plural_predict(self, batch, plurality=2): input, labels = self.unpack_batch(batch, self.opt['cuda']) orig_idx = input.orig_idx ids = input.id # forward self.model.eval() logits, _ = self.model(input) ordered_predictions = np.argsort(-logits.data.cpu().numpy(), axis=1) predictions = [] for i in range(plurality): prediction = ordered_predictions[:, i].tolist() _, prediction = [ list(t) for t in zip(*sorted(zip(orig_idx, prediction))) ] predictions.append(prediction) _, ids = [list(t) for t in zip(*sorted(zip(orig_idx, ids)))] return predictions, ids def unpack_batch(self, batch, cuda): words = set_cuda(get_long_tensor(batch.word, batch.batch_size), cuda) masks = set_cuda(torch.eq(words, 0), cuda) pos = set_cuda(get_long_tensor(batch.pos, batch.batch_size), cuda) ner = set_cuda(get_long_tensor(batch.ner, batch.batch_size), cuda) coref = set_cuda(get_long_tensor(batch.coref, batch.batch_size), cuda) ucca_enc = set_cuda(get_long_tensor(batch.ucca_enc, batch.batch_size), cuda) rel = set_cuda(torch.LongTensor(batch.rel), cuda) input = Input(batch_size=batch.batch_size, word=words, mask=masks, pos=pos, ner=ner, coref=coref, ucca_enc=ucca_enc, len=batch.len, head=batch.head, ucca_head=batch.ucca_head, ucca_multi_head=batch.ucca_multi_head, ucca_dist_from_mh_path=batch.ucca_dist_from_mh_path, subj_p=batch.subj_p, obj_p=batch.obj_p, id=batch.id, orig_idx=batch.orig_idx) return input, rel