order_field=ORDER, graph_field=GRAPH) dev_data = DocDataset(path=args.valid, text_field=DOC, order_field=ORDER, graph_field=GRAPH) DOC.vocab = torch.load(args.vocab) print('vocab {} loaded'.format(args.vocab)) args.__dict__.update({'doc_vocab': len(DOC.vocab)}) train_flag = True train_real = DocIter(train_data, args.batch_size, device="cuda" if args.gpu else "cpu", train=train_flag, shuffle=train_flag, sort_key=lambda x: len(x.doc)) devbatch = 1 dev_real = DocIter(dev_data, devbatch, device="cuda" if args.gpu else "cpu", batch_size_fn=None, train=False, repeat=False, shuffle=False, sort=False) args_str = json.dumps(args.__dict__, indent=4, sort_keys=True) print(args_str)
def train(args, train_iter, dev, fields, checkpoint): if is_cqa_task(): model = CqaNet(args) else: model = PointerNet(args) # model.cuda() DOC, ORDER, GRAPH = fields print('1:', DOC.vocab.itos[1]) model.load_pretrained_emb(DOC.vocab.vectors) print_params(model) print(model) wd = 1e-5 opt = torch.optim.Adadelta(model.parameters(), lr=args.lr, rho=0.95, weight_decay=wd) best_answer_type_score = -np.inf best_entity_score = -np.inf best_iter = 0 offset = 0 criterion = nn.NLLLoss(reduction='none') model.equip(criterion) start = time.time() early_stop = args.early_stop test_data = DocDataset(path=args.test, text_field=DOC, order_field=ORDER, graph_field=GRAPH) test_real = DocIter(test_data, 1, device='cuda', batch_size_fn=None, train=False, repeat=False, shuffle=False, sort=False) fake_epc=-1 is_validate_before_train=False timer=Timer() if is_validate_before_train: validate(args, dev, checkpoint,model,DOC,fake_epc,best_answer_type_score,best_iter,is_validate_before_train,best_entity_score,timer) for epc in range(args.maximum_steps): for iters, batch in enumerate(train_iter): model.train() model.zero_grad() t1 = time.time() loss = model(batch.doc, batch.order, batch.doc_len, batch.e_words, batch.elocs,batch.labels,batch.answer_types) loss.backward() opt.step() t2 = time.time() print('epc:{} iter:{} loss:{:.2f} t:{:.2f} lr:{:.1e}'.format(epc, iters + 1, loss, t2 - t1, opt.param_groups[0]['lr'])) if epc < 5: print(f"finish epoch {epc}, lastest loss{loss}") continue best_answer_type_score,best_iter,best_entity_score,is_early_stop=validate(args, dev, checkpoint,model,DOC,epc,best_answer_type_score,best_iter,False,best_entity_score,timer) if is_early_stop: break print('\n*******Train Done********{}'.format(time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()))) minutes = (time.time() - start) // 60 if minutes < 60: print('best:{:.2f}, iter:{}, time:{} mins, lr:{:.1e}, '.format(best_answer_type_score, best_iter, minutes, opt.param_groups[0]['lr'])) else: hours = minutes / 60 print('best:{:.2f}, iter:{}, time:{:.1f} hours, lr:{:.1e}, '.format(best_answer_type_score, best_iter, hours, opt.param_groups[0]['lr'])) checkpoint = torch.load('{}/{}.best.pt'.format(args.model_path, args.model), map_location='cpu') model.load_state_dict(checkpoint['model']) with torch.no_grad(): if args.loss: entity_score = valid_model(args, model, dev, DOC, 'loss') print('epc:{}, loss:{:.2f} best:{:.2f}\n'.format(epc, entity_score, best_answer_type_score)) else: entity_acc, answer_type_acc, ktau, pm = valid_model(args, model, dev, DOC) print('test entity_acc:{:.4%} answer_type_acc:{:.2%} ktau:{:.4f} pm:{:.2%}'.format(entity_acc, answer_type_acc, ktau, pm))
def train(args, train_iter, dev, fields, checkpoint): model = PointerNet(args) if args.gpu: model = model.cuda() DOC, ORDER, GRAPH = fields print('1:', DOC.vocab.itos[1]) if DOC.vocab.vectors: model.load_pretrained_emb(DOC.vocab.vectors) print_params(model) print(model) opt = Optimizer.from_opt(model, args) # wd = 1e-5 # opt = torch.optim.Adadelta(model.parameters(), lr=args.lr, rho=0.95, weight_decay=wd) best_score = -np.inf best_iter = 0 offset = 0 criterion = nn.NLLLoss(reduction='none') model.equip(criterion) start = time.time() early_stop = args.early_stop test_data = DocDataset(path=args.test, text_field=DOC, order_field=ORDER, graph_field=GRAPH) test_real = DocIter(test_data, 1, device="cuda" if args.gpu else "cpu", batch_size_fn=None, train=False, repeat=False, shuffle=False, sort=False) steps = 0 total_loss = 0 epc = 0 t1 = time.time() # for epc in range(args.maximum_steps): while True: for iters, batch in enumerate(train_iter): model.train() model.zero_grad() # print("batch.doc:", batch.doc) loss = model(batch.doc, batch.order, batch.doc_len, batch.e_words, batch.elocs) loss.backward() opt.step() steps += 1 total_loss += loss if steps % args.report == 0: t2 = time.time() total_loss /= args.report print( 'epc:{} steps:{}/{} loss:{:.4f} t:{:.2f} lr:{:.8f}'.format( epc, steps, args.maximum_steps, total_loss, t2 - t1, opt.learning_rate())) total_loss = 0 t1 = t2 if steps % args.valid_steps == 0: with torch.no_grad(): print('valid..............') if args.loss: score = valid_model(args, model, dev, DOC, 'loss') print('epc:{}, loss:{:.2f} best:{:.2f}\n'.format( epc, score, best_score)) else: score, pmr, ktau, _ = valid_model( args, model, dev, DOC) # print('epc:{}, val acc:{:.4f} best:{:.4f} pmr:{:.2f} ktau:{:.4f}'.format(epc, score, best_score, # pmr, ktau)) print('Val acc:{:.4f} pmr:{:.2f} ktau:{:.4f}'.format( score, pmr, ktau)) checkpoint = { 'model': model.state_dict(), 'args': args, 'loss': best_score } torch.save(checkpoint, '{}/{}.pt'.format(args.model_path, steps)) # if early_stop and (epc - best_iter) >= early_stop: # print('early stop at epc {}'.format(epc)) # break epc += 1 if steps >= args.maximum_steps: break print('\n*******Train Done********{}'.format( time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())))
def train(args, train_iter, dev, fields, checkpoint, permutation_file, permutation_length): if is_permutation_task(): model = PermutationPredictor(args) else: model = PointerNet(args) if checkpoint != None: model.load_state_dict(checkpoint['model'], strict=False) model.cuda() DOC, ORDER, GRAPH = fields print('1:', DOC.vocab.itos[1]) model.load_pretrained_emb(DOC.vocab.vectors) print_params(model) print(model) wd = 1e-5 opt = torch.optim.Adadelta(model.parameters(), lr=args.lr, rho=0.95, weight_decay=wd) best_score = -np.inf best_iter = 0 offset = 0 criterion = nn.NLLLoss(reduction='none') model.equip(criterion) start = time.time() early_stop = args.early_stop test_data = DocDataset(path=args.test, text_field=DOC, order_field=ORDER, graph_field=GRAPH, permutation_length=permutation_length, permutation_number=args.permutation_number) test_real = DocIter(test_data, 1, permutation_file, permutation_length, device='cuda', batch_size_fn=None, train=False, repeat=False, shuffle=False, sort=False) history = {"acc": [], "loss": [], "val_acc": [], "epoch": []} val_loss = 0.0 val_steps = 0 for epc in range(args.maximum_steps): for iters, batch in enumerate(train_iter): model.train() model.zero_grad() t1 = time.time() loss = model(batch.doc, batch.order, batch.doc_len, batch.e_words, batch.elocs, batch.permutation_lables) loss.backward() opt.step() t2 = time.time() print('epc:{} iter:{} loss:{:.2f} t:{:.2f} lr:{:.1e}'.format( epc, iters + 1, loss, t2 - t1, opt.param_groups[0]['lr'])) val_loss += loss.detach().cpu().numpy() val_steps += 1 if epc < 5: continue with torch.no_grad(): print('valid..............') if args.loss: score = valid_model(args, model, dev, DOC, 'loss') print('epc:{}, loss:{:.2f} best:{:.2f}\n'.format( epc, score, best_score)) else: score, pmr, ktau, _ = valid_model(args, model, dev, DOC) history["epoch"].append(epc) history["acc"].append(score) print( 'epc:{}, val acc:{:.4f} best:{:.4f} pmr:{:.2f} ktau:{:.4f}' .format(epc, score, best_score, pmr, ktau)) if score > best_score: best_score = score best_iter = epc print('save best model at epc={}'.format(epc)) checkpoint = { 'model': model.state_dict(), 'args': args, 'loss': best_score } if is_permutation_task(): path = '{}/{}.best.pt'.format(args.model_path, args.model) else: path = '{}/{}.sentence_order_best.pt'.format( args.model_path, args.model) torch.save(checkpoint, path) if early_stop and (epc - best_iter) >= early_stop: print('early stop at epc {}'.format(epc)) break print('\n*******Train Done********{}'.format( time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()))) minutes = (time.time() - start) // 60 if minutes < 60: print('best:{:.2f}, iter:{}, time:{} mins, lr:{:.1e}, '.format( best_score, best_iter, minutes, opt.param_groups[0]['lr'])) else: hours = minutes / 60 print('best:{:.2f}, iter:{}, time:{:.1f} hours, lr:{:.1e}, '.format( best_score, best_iter, hours, opt.param_groups[0]['lr'])) checkpoint = torch.load(path, map_location='cpu') model.load_state_dict(checkpoint['model']) with torch.no_grad(): acc, pmr, ktau, pm = valid_model(args, model, test_real, DOC, shuflle_times=1) print('test acc:{:.4%} pmr:{:.2%} ktau:{:.4f} pm:{:.2%}'.format( acc, pmr, ktau, pm)) loss = val_loss / val_steps return best_score, loss
def run_model(args): if args.mode == 'train' or args.mode=='example': if args.load_from is not None and len(args.load_from) == 1: load_from = args.load_from[0] print('{} load the checkpoint from {} for initilize or resume'. format(curtime(), load_from)) checkpoint = torch.load(load_from, map_location='cpu') else: checkpoint = None # if not resume(initilize), only need model parameters if args.resume: print('update args from checkpoint') load_dict = checkpoint['args'].__dict__ except_name = ['mode', 'resume', 'maximum_steps'] override(args, load_dict, tuple(except_name)) main_path = Path(args.main_path) model_path = main_path / args.model_path decoding_path = main_path / args.decoding_path for path in [model_path, decoding_path]: path.mkdir(parents=True, exist_ok=True) args.model_path = str(model_path) args.decoding_path = str(decoding_path) if args.model == '[time]': args.model = time.strftime("%m.%d_%H.%M.", time.gmtime()) # setup random seeds set_seeds(args.seed) # special process, shuffle each document # DOC = DocField(batch_first=True, include_lengths=True, eos_token='<eos>', init_token='<bos>') DOC = DocField(batch_first=True, include_lengths=True) ORDER = data.Field(batch_first=True, include_lengths=True, pad_token=0, use_vocab=False, sequential=True) GRAPH = GraphField(batch_first=True) LABEL_FIELD=data.Field(batch_first=True, include_lengths=True, use_vocab=False) train_data = DocDataset(path=args.corpus, text_field=DOC, order_field=ORDER, graph_field=GRAPH) dev_data = DocDataset(path=args.valid, text_field=DOC, order_field=ORDER, graph_field=GRAPH) DOC.vocab = torch.load(args.vocab) print('vocab {} loaded'.format(args.vocab)) args.__dict__.update({'doc_vocab': len(DOC.vocab)}) train_flag = True train_real = DocIter(train_data, args.batch_size, device='cuda', train=train_flag, shuffle=train_flag, sort_key=lambda x: len(x.doc)) devbatch = 1 dev_real = DocIter(dev_data, devbatch, device='cuda', batch_size_fn=None, train=False, repeat=False, shuffle=False, sort=False) args_str = json.dumps(args.__dict__, indent=4, sort_keys=True) print(args_str) print('{} Start training'.format(curtime())) train(args, train_real, dev_real, (DOC, ORDER, GRAPH), checkpoint) else: if len(args.load_from) == 1: load_from = '{}.coqa_best.pt'.format(args.load_from[0]) print('{} load the best checkpoint from {}'.format(curtime(), load_from)) checkpoint = torch.load(load_from, map_location='cpu') else: raise RuntimeError('must load model') # when translate load_dict update args except some print('update args from checkpoint') load_dict = checkpoint['args'].__dict__ except_name = ['mode', 'load_from', 'test', 'writetrans', 'beam_size', 'batch_size'] override(args, load_dict, tuple(except_name)) print('{} Load test set'.format(curtime())) DOC = DocField(batch_first=True, include_lengths=True) ORDER = data.Field(batch_first=True, include_lengths=True, pad_token=0, use_vocab=False, sequential=True) GRAPH = GraphField(batch_first=True) DOC.vocab = torch.load(args.vocab) print('vocab {} loaded'.format(args.vocab)) args.__dict__.update({'doc_vocab': len(DOC.vocab)}) args_str = json.dumps(args.__dict__, indent=4, sort_keys=True) print(args_str) test_data = DocDataset(path=args.test, text_field=DOC, order_field=ORDER, graph_field=GRAPH) test_real = DocIter(test_data, 1, device='cuda', batch_size_fn=None, train=False, repeat=False, shuffle=False, sort=False) print('{} Load data done'.format(curtime())) start = time.time() decode(args, test_real, (DOC, ORDER), checkpoint) print('{} Decode done, time {} mins'.format(curtime(), (time.time() - start) / 60))