def train(self, dataset): self.model.train() self.optimizer.zero_grad() loss, k = 0.0, 0 indices = torch.randperm(len(dataset)) for idx in tqdm(range(len(dataset)), desc='Training epoch ' + str(self.epoch + 1) + ''): ltree, lsent, rtree, rsent, label = dataset[indices[idx]] if ltree is not None and rtree is not None: linput, rinput = Var(lsent), Var(rsent) target = Var(map_label_to_target(label, dataset.num_classes)) if self.args.cuda: linput, rinput = linput.cuda(), rinput.cuda() target = target.cuda() output = self.model(ltree, linput, rtree, rinput) err = self.criterion(output, target) loss += err.data[0] (err / self.args.batchsize).backward() k += 1 if k % self.args.batchsize == 0: self.optimizer.step() self.optimizer.zero_grad() self.epoch += 1 return loss / len(dataset)
def train_epoch(epoch, args, model, dataset, optimizer): model.train() optimizer.zero_grad() indices = torch.randperm(len(dataset)) batch_size = args.batch_size loss, k = 0.0, 0 for idx in tqdm(range(len(dataset)), desc="Training epoch {}".format(epoch)): ltree, lsent, lrel, rtree, rsent, rrel, sim = dataset[indices[idx]] linput, rinput = Var(lsent), Var(rsent) lrel, rrel = Var(lrel), Var(rrel) target = Var(map_label_to_target(sim, args.num_classes)) if args.cuda: linput, rinput = linput.cuda(), rinput.cuda() lrel, rrel = lrel.cuda(), rrel.cuda() target = target.cuda() output = model(ltree, linput, lrel, rtree, rinput, rrel) err = F.kl_div(output, target) loss += err.data[0] (err/batch_size).backward() k += 1 if k % batch_size == 0: optimizer.step() optimizer.zero_grad() avg_loss = loss/len(dataset) return avg_loss
def test(self, dataset): """""" self.model.eval() total_loss = 0 predictions = torch.zeros(len(dataset)) for idx in tqdm(range(len(dataset)), desc='Testing epoch ' + str(self.epoch) + ''): ltree, lsent, rtree, rsent, label, ledge, redge = dataset[idx] linput, rinput = Variable(lsent, volatile=True), Variable(rsent, volatile=True) target = Variable(map_label_to_target(label), volatile=True) ledge_input, redge_input = Variable( ledge, volatile=True), Variable(redge, volatile=True) if self.args.cuda: linput, rinput = linput.cuda(), rinput.cuda() target = target.cuda() ledge_input, redge_input = ledge_input.cuda( ), redge_input.cuda() if self.args.model != 'base': output = self.model(ltree, linput, rtree, rinput, ledge_input, redge_input) else: output = self.model(ltree, linput, rtree, rinput) loss = self.criterion(output, target) total_loss += loss.data[0] _, predict_class = torch.max(output, 1) predictions[idx] = predict_class.data.cpu()[0] return total_loss / len(dataset), predictions
def _train(model, optimizer, args, criterion, dataset, epoch): model.train() # construct optimizer optimizer = optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.wd) optimizer.zero_grad() loss = 0.0 indices = torch.randperm(len(dataset)) for idx, data in enumerate(dataset): # ltree, lsent, rtree, rsent, label = dataset[indices[idx]] ltree, lsent, rtree, rsent, label = data linput, rinput = Var(lsent), Var(rsent) target = Var(map_label_to_target(label, dataset.num_classes)) if args.cuda: linput, rinput = linput.cuda(), rinput.cuda() target = target.cuda() output = model(ltree, linput, rtree, rinput) err = criterion(output, target) loss += err.data[0] err.backward() if (idx + 1) % args.batchsize == 0: optimizer.step() optimizer.zero_grad() epoch += 1 return loss / len(dataset)
def test(self, dataset): self.model.eval() loss = 0 count = 0 predictions = torch.zeros(len(dataset)) idxs = [] indices = torch.arange(1, dataset.num_classes + 1) for idx in tqdm(range(len(dataset)), desc='Testing epoch ' + str(self.epoch) + ''): ltree, lsent, rtree, rsent, label = dataset[idx] if ltree is not None and rtree is not None: count = count + 1 linput, rinput = Var(lsent, volatile=True), Var(rsent, volatile=True) target = Var(map_label_to_target(label, dataset.num_classes), volatile=True) if self.args.cuda: linput, rinput = linput.cuda(), rinput.cuda() target = target.cuda() output = self.model(ltree, linput, rtree, rinput) err = self.criterion(output, target) loss += err.data[0] output = output.data.squeeze().cpu() idxs.append(idx) predictions[idx] = torch.dot(indices, torch.exp(output)) print('Sentences processed: %d' % (count)) return loss / len(dataset), predictions, torch.from_numpy( np.asarray(idxs))
def train(self, dataset): self.model.train() self.embedding_model.train() self.embedding_model.zero_grad() self.optimizer.zero_grad() loss, k = 0.0, 0 indices = torch.randperm(len(dataset)) for idx in tqdm(xrange(len(dataset)),desc='Training epoch '+str(self.epoch+1)+''): ltree,lsent,rtree,rsent,label = dataset[indices[idx]] linput, rinput = Var(lsent), Var(rsent) target = Var(map_label_to_target(label,dataset.num_classes)) if self.args.cuda: linput, rinput = linput.cuda(), rinput.cuda() target = target.cuda() lemb = torch.unsqueeze(self.embedding_model(linput), 1) remb = torch.unsqueeze(self.embedding_model(rinput), 1) output = self.model(ltree, lemb, rtree, remb) err = self.criterion(output, target) loss += err.data[0] err.backward() k += 1 if k==self.args.batchsize: for f in self.embedding_model.parameters(): f.data.sub_(f.grad.data * self.args.emblr) self.optimizer.step() self.embedding_model.zero_grad() self.optimizer.zero_grad() k = 0 self.epoch += 1 return loss/len(dataset)
def train(self, dataset): self.model.train() self.optimizer.zero_grad() total_loss = 0.0 losses = [] indices = torch.randperm(len(dataset), dtype=torch.long, device='cpu') for idx in tqdm(range(int(len(dataset)/10)), desc='Training epoch ' + str(self.epoch + 1) + ''): ltree, linput, rtree, rinput, label = dataset[indices[idx]] target = utils.map_label_to_target(label, dataset.num_classes) linput, rinput = linput.to(self.device), rinput.to(self.device) target = target.to(self.device) output, intermediate_output = self.model(ltree, linput, rtree, rinput) loss = self.criterion(output, target) losses.append(loss.item()) total_loss += loss.item() loss.backward() if idx % self.args.batchsize == 0 and idx > 0: self.optimizer.step() self.optimizer.zero_grad() self.epoch += 1 print(losses) #plt.plot(losses) #print(plt.show()) exit() return total_loss / len(dataset)
def _forward(self, tree, sent, arb, flag): sent_len = sent.size()[0] sent_input = Var(sent) if flag: a, r, b = arb[0], arb[1], arb[2] target = Var(map_label_to_target(1, 2)) else: a, r, b = arb[2], arb[1], arb[0] target = Var(map_label_to_target(0, 2)) arb_input = self._encode_arb(a, r, b, sent_len) if self.args.cuda: sent_input = sent_input.cuda() arb_input = arb_input.cuda() target = target.cuda() output = self.model(tree, sent_input, arb_input) loss = self.criterion(output, target) _loss = loss.data[0] loss.backward() tree.clear_state() return output, _loss
def test(self, dataset, mode='test'): self.model.eval() if mode == 'eval': inp1 = [] lattention = [] rattention = [] inp2 = [] sims = [] loss = 0 predictions = torch.zeros(len(dataset)) indices = torch.arange(1, dataset.num_classes + 1) for idx in tqdm(range(len(dataset)), desc='Testing epoch ' + str(self.epoch) + ''): ltree, lsent, rtree, rsent, label = dataset[idx] linput, rinput = Var(lsent, volatile=True), Var(rsent, volatile=True) if mode == 'test': target = Var(map_label_to_target(label, dataset.num_classes), volatile=True) if self.args.cuda: linput, rinput = linput.cuda(), rinput.cuda() if mode == 'test': target = target.cuda() output, attl, attr = self.model(ltree, linput, rtree, rinput) if mode == 'eval': inp1.append(linput) inp2.append(rinput) cpu_output = output.data.squeeze().cpu() sim = torch.dot(indices, torch.exp(cpu_output)) sims.append(sim) lattention.append(attl) rattention.append(attr) elif mode == 'test': err = self.criterion(output, target) loss += err.data[0] output = output.data.squeeze().cpu() predictions[idx] = torch.dot(indices, torch.exp(output)) if mode == 'test': return loss / len(dataset), predictions elif mode == 'eval': return inp1, inp2, sims, lattention, rattention
def test(self, dataset): self.model.eval() loss = 0 predictions = torch.zeros(len(dataset)) indices = torch.arange(1,dataset.num_classes+1) for idx in tqdm(range(len(dataset)),desc='Testing epoch '+str(self.epoch)+''): ltree, lsent, ltokens, rtree, rsent, rtokens, label = dataset[idx] linput, rinput = Var(lsent, volatile=True), Var(rsent, volatile=True) target = Var(map_label_to_target(label,dataset.num_classes), volatile=True) if self.args.cuda: linput, rinput = linput.cuda(), rinput.cuda() target = target.cuda() output = self.model(ltree,linput,rtree,rinput) err = self.criterion(output, target) loss += err.data[0] predictions[idx] = torch.dot(indices,torch.exp(output.data.cpu())) return loss/len(dataset), predictions
def test(self, dataset): self.model.eval() total_loss = 0 predictions = torch.zeros(len(dataset)) indices = torch.arange(1, dataset.num_classes + 1) for idx in tqdm(range(len(dataset)),desc='Testing epoch ' + str(self.epoch) + ''): ltree, lsent, rtree, rsent, label = dataset[idx] linput, rinput = Var(lsent, volatile=True), Var(rsent, volatile=True) target = Var(map_label_to_target(label, dataset.num_classes), volatile=True) if self.args.cuda: linput, rinput = linput.cuda(), rinput.cuda() target = target.cuda() output = self.model(ltree, linput, rtree, rinput) loss = self.criterion(output, target) total_loss += loss.data[0] output = output.data.squeeze().cpu() predictions[idx] = torch.dot(indices, torch.exp(output)) return total_loss / len(dataset), predictions
def train(self,batch_size=50): self.model.train() self.optimizer.zero_grad() total_loss = 0.0 indices = torch.randperm(len(self.train_data)) nb_samples=len(self.train_data) l2_reg = None for idx in tqdm(range(nb_samples),desc='Training epoch ' + str(self.epoch + 1) + '',ascii=True, file=sys.stdout): text, label = self.train_data[indices[idx]] tree=None if len(text)<3: nb_samples-=1 continue target=map_label_to_target(label,self.num_classes) #print(target) output = self.model(tree, text) if output is None: continue nb_samples-=1 loss = self.criterion(output, target) #params = self.model.childsumtreelstm.getParameters() #0.5*self.args.reg*params_norm*params_norm total_loss += loss.data[0] loss.backward() if idx % batch_size == 0 and idx > 0: self.optimizer.step() self.optimizer.zero_grad() #del tree, text, label, output gc.collect() self.epoch += 1 train_loss= total_loss / nb_samples print("Train loss:{} ".format(train_loss))
def test(self, dataset, save_attention=False): self.model.eval() atten_val = [] with torch.no_grad(): total_loss = 0.0 predictions = torch.zeros(len(dataset), dtype=torch.float, device='cpu') indices = torch.arange(1, dataset.num_classes + 1, dtype=torch.float, device='cpu') for idx in tqdm(range(int(len(dataset))), desc='Testing epoch ' + str(self.epoch) + ''): ltree, linput, rtree, rinput, label = dataset[idx] target = utils.map_label_to_target(label, dataset.num_classes) linput, rinput = linput.to(self.device), rinput.to(self.device) target = target.to(self.device) ''' lin = [] rin = [] for l in linput: lin.append(vocab.getLabel(int(l))) for r in rinput: rin.append(vocab.getLabel(int(r))) print("##") print(' '.join(lin)) print(' '.join(rin)) #output = self.model(ltree, linput, rtree, rinput) output, intermediate_output = self.model(ltree, linput, rtree, rinput) print("##") print(output.argmax().item()+1) ''' loss = self.criterion(output, target) total_loss += loss.item() output = output.squeeze().to('cpu') predictions[idx] = torch.dot(indices, torch.exp(output)) if save_attention : atten_val.append(intermediate_output) #if save_attention : # with open("atten_val.pkl","wb") as f: # pickle.dump(atten_val,f, protocol = pickle.HIGHEST_PROTOCOL) return total_loss / len(dataset), predictions
def train(self, dataset): self.model.train() self.optimizer.zero_grad() total_loss = 0.0 indices = torch.randperm(len(dataset)) for idx in tqdm(range(len(dataset)),desc='Training epoch ' + str(self.epoch + 1) + ''): ltree, lsent, rtree, rsent, label = dataset[indices[idx]] linput, rinput = Var(lsent), Var(rsent) target = Var(map_label_to_target(label, dataset.num_classes)) if self.args.cuda: linput, rinput = linput.cuda(), rinput.cuda() target = target.cuda() output = self.model(ltree, linput, rtree, rinput) loss = self.criterion(output, target) total_loss += loss.data[0] loss.backward() if idx % self.args.batchsize == 0 and idx > 0: self.optimizer.step() self.optimizer.zero_grad() self.epoch += 1 return total_loss / len(dataset)
def train(self, dataset): self.model.train() self.optimizer.zero_grad() total_loss = 0.0 indices = torch.randperm(len(dataset), dtype=torch.long, device='cpu') for idx in tqdm(range(len(dataset)), desc='Training epoch ' + str(self.epoch + 1) + ''): ltree, linput, rtree, rinput, label = dataset[indices[idx]] #print("tree attributes:", dir(ltree)) target = utils.map_label_to_target(label, dataset.num_classes) linput, rinput = linput.to(self.device), rinput.to(self.device) target = target.to(self.device) output = self.model(ltree, linput, rtree, rinput) loss = self.criterion(output, target) total_loss += loss.item() loss.backward() if idx % self.cfg.batch_size() == 0 and idx > 0: self.optimizer.step() self.optimizer.zero_grad() self.epoch += 1 return total_loss / len(dataset)
def train(self, dataset): self.model.train() self.optimizer.zero_grad() total_loss = 0.0 indices = torch.randperm(len(dataset)) for idx in tqdm(range(len(dataset)), desc='Training epoch ' + str(self.epoch + 1) + ''): ltree, lsent, rtree, rsent, label = dataset[indices[idx]] linput, rinput = Var(lsent), Var(rsent) target = Var(map_label_to_target(label, dataset.num_classes)) if self.args.cuda: linput, rinput = linput.cuda(), rinput.cuda() target = target.cuda() output = self.model(ltree, linput, rtree, rinput) loss = self.criterion(output, target) total_loss += loss.data[0] loss.backward() if idx % self.args.batchsize == 0 and idx > 0: self.optimizer.step() self.optimizer.zero_grad() self.epoch += 1 return total_loss / len(dataset)
def test(self): self.model.eval() loss = 0 predictions = torch.zeros(len(self.val_data)) predictions = predictions indices = torch.range(1,self.val_data.num_classes) correct = 0 total = 0 nb_samples=len(self.val_data) for idx in tqdm(range(len(self.val_data)),desc='Testing epoch '+str(self.epoch)+'',ascii=True, file=sys.stdout): text, label = self.val_data[idx] tree=None if len(text)<3: nb_samples-=1 continue target = map_label_to_target(label,self.num_classes) outputs = self.model(tree, text) # size(1,5) if outputs is None: continue nb_samples-=1 _, predicted = torch.max(outputs.data, 1) total += target.size(0) # print(type(predicted)) # print(type(target)) correct += (predicted == target.data).sum() err = self.criterion(outputs, target) loss += err.data[0] loss=loss/nb_samples acc=correct/total #val_loss=loss/len(self.val_data) print("Val loss:{} Acc:{}".format(loss,acc))
def test_epoch(args, model, dataset): model.eval() test_loss = 0 predictions = torch.zeros(len(dataset)) indices = torch.arange(1, dataset.num_classes + 1) #for idx in range(len(dataset)): for idx in tqdm(range(len(dataset)), desc="Testing "): ltree, lsent, lrel, rtree, rsent, rrel, sim = dataset[idx] linput, rinput = Var(lsent, volatile=True), Var(rsent, volatile=True) lrel, rrel = Var(lrel, volatile=True), Var(rrel, volatile=True) target = Var(map_label_to_target(sim, args.num_classes), volatile=True) if args.cuda: linput, rinput = linput.cuda(), rinput.cuda() lrel, rrel = lrel.cuda(), rrel.cuda() target = target.cuda() out = model(ltree, linput, lrel, rtree, rinput, rrel) test_loss += F.kl_div(out, target).data[0] out = out.data.squeeze().cpu() predictions[idx] = torch.dot(indices, torch.exp(out)) test_loss /= len(dataset) return test_loss, predictions
def train(self, dataset): self.model.train() self.optimizer.zero_grad() loss, k = 0.0, 0 indices = torch.randperm(len(dataset)) for idx in tqdm(range(len(dataset)), desc='Training epoch ' + str(self.epoch + 1) + ''): ltree, lsent, rtree, rsent, label = dataset[indices[idx]] linput, rinput = Var(lsent), Var(rsent) target = Var(map_label_to_target(label, dataset.num_classes)) if self.args.cuda: linput, rinput = linput.cuda(), rinput.cuda() target = target.cuda() output, attl, attr = self.model(ltree, linput, rtree, rinput) err = self.criterion(output, target) loss += err.data[0] if attl: # add penalization term attentionT = torch.transpose(attl, 1, 2).contiguous() extra_loss = Frobenius(torch.bmm(attl, attentionT) - self.I[0]) loss += 1 * extra_loss attentionT = torch.transpose(attr, 1, 2).contiguous() extra_loss = Frobenius(torch.bmm(attr, attentionT) - self.I[0]) loss += 1 * extra_loss err.backward() k += 1 if k % self.args.batchsize == 0: self.optimizer.step() self.optimizer.zero_grad() self.epoch += 1 return loss / len(dataset)
def test(self, dataset): self.model.eval() with torch.no_grad(): total_loss = 0.0 predictions = torch.zeros(len(dataset), dtype=torch.float, device='cpu') indices = torch.arange(1, dataset.num_classes + 1, dtype=torch.float, device='cpu') for idx in tqdm(range(len(dataset) - 1), desc='Testing epoch ' + str(self.epoch) + ''): #print("idx=",idx, len(dataset)) ltree, linput, rtree, rinput, label = dataset[idx] target = utils.map_label_to_target(label, dataset.num_classes) linput, rinput = linput.to(self.device), rinput.to(self.device) target = target.to(self.device) output = self.model(ltree, linput, rtree, rinput) loss = self.criterion(output, target) total_loss += loss.item() output = output.squeeze().to('cpu') predictions[idx] = torch.dot(indices, torch.exp(output)) return total_loss / len(dataset), predictions
model_ckpt = torch.load('%s.pt' % os.path.join(args.save, args.saved_model)) model.load_state_dict(model_ckpt['model']) trainer = Trainer(args, model, criterion, optimizer, device) else : trainer = Trainer(args, model, criterion, optimizer, device) # whether to continue training or only evaluate if (args.evaluate): model.eval() with torch.no_grad(): total_loss = 0.0 predictions = torch.zeros(len(test_dataset), dtype=torch.float, device='cpu') indices = torch.arange(1, test_dataset.num_classes + 1, dtype=torch.float, device='cpu') for idx in tqdm_notebook(range(len(test_dataset)), desc='Testing epoch ' + str(args.epochs) + ''): ltree, linput, rtree, rinput, label = test_dataset[idx] target = utils.map_label_to_target(label, test_dataset.num_classes) linput, rinput = linput.to(device), rinput.to(device) target = target.to(device) #output = self.model(ltree, linput, rtree, rinput) output, intermediate_output = model(ltree, linput, rtree, rinput) loss = criterion(output, target) total_loss += loss.item() output = output.squeeze().to('cpu') predictions[idx] = torch.dot(indices, torch.exp(output)) test_loss, test_pred = trainer.test(test_dataset) test_pearson = metrics.pearson(test_pred, test_dataset.labels) test_mse = metrics.mse(test_pred, test_dataset.labels) test_spear = (spearmanr(np.asarray(test_pred), np.asarray(test_dataset.labels)))[0] print (" Test \tLoss: {}\tPearson: {}\t spearman:{}\t MSE: {}".format( args.epochs, test_loss, test_pearson, test_spear, test_mse)) with open("predictions.pkl","wb") as f: