pad_token=0, use_vocab=False, sequential=True) GRAPH = GraphField(batch_first=True) DOC.vocab = torch.load(args.vocab) print('vocab {} loaded'.format(args.vocab)) args.__dict__.update({'doc_vocab': len(DOC.vocab)}) args_str = json.dumps(args.__dict__, indent=4, sort_keys=True) print(args_str) test_data = DocDataset(path=args.test, text_field=DOC, order_field=ORDER, graph_field=GRAPH) test_real = DocIter(test_data, 1, device="cuda" if args.gpu else "cpu", batch_size_fn=None, train=False, repeat=False, shuffle=False, sort=False) print('{} Load data done'.format(curtime())) start = time.time() decode(args, test_real, (DOC, ORDER), checkpoint) print('{} Decode done, time {} mins'.format( curtime(), (time.time() - start) / 60))
def run_model(args): if args.mode == 'train' or args.mode=='example': if args.load_from is not None and len(args.load_from) == 1: load_from = args.load_from[0] print('{} load the checkpoint from {} for initilize or resume'. format(curtime(), load_from)) checkpoint = torch.load(load_from, map_location='cpu') else: checkpoint = None # if not resume(initilize), only need model parameters if args.resume: print('update args from checkpoint') load_dict = checkpoint['args'].__dict__ except_name = ['mode', 'resume', 'maximum_steps'] override(args, load_dict, tuple(except_name)) main_path = Path(args.main_path) model_path = main_path / args.model_path decoding_path = main_path / args.decoding_path for path in [model_path, decoding_path]: path.mkdir(parents=True, exist_ok=True) args.model_path = str(model_path) args.decoding_path = str(decoding_path) if args.model == '[time]': args.model = time.strftime("%m.%d_%H.%M.", time.gmtime()) # setup random seeds set_seeds(args.seed) # special process, shuffle each document # DOC = DocField(batch_first=True, include_lengths=True, eos_token='<eos>', init_token='<bos>') DOC = DocField(batch_first=True, include_lengths=True) ORDER = data.Field(batch_first=True, include_lengths=True, pad_token=0, use_vocab=False, sequential=True) GRAPH = GraphField(batch_first=True) LABEL_FIELD=data.Field(batch_first=True, include_lengths=True, use_vocab=False) train_data = DocDataset(path=args.corpus, text_field=DOC, order_field=ORDER, graph_field=GRAPH) dev_data = DocDataset(path=args.valid, text_field=DOC, order_field=ORDER, graph_field=GRAPH) DOC.vocab = torch.load(args.vocab) print('vocab {} loaded'.format(args.vocab)) args.__dict__.update({'doc_vocab': len(DOC.vocab)}) train_flag = True train_real = DocIter(train_data, args.batch_size, device='cuda', train=train_flag, shuffle=train_flag, sort_key=lambda x: len(x.doc)) devbatch = 1 dev_real = DocIter(dev_data, devbatch, device='cuda', batch_size_fn=None, train=False, repeat=False, shuffle=False, sort=False) args_str = json.dumps(args.__dict__, indent=4, sort_keys=True) print(args_str) print('{} Start training'.format(curtime())) train(args, train_real, dev_real, (DOC, ORDER, GRAPH), checkpoint) else: if len(args.load_from) == 1: load_from = '{}.coqa_best.pt'.format(args.load_from[0]) print('{} load the best checkpoint from {}'.format(curtime(), load_from)) checkpoint = torch.load(load_from, map_location='cpu') else: raise RuntimeError('must load model') # when translate load_dict update args except some print('update args from checkpoint') load_dict = checkpoint['args'].__dict__ except_name = ['mode', 'load_from', 'test', 'writetrans', 'beam_size', 'batch_size'] override(args, load_dict, tuple(except_name)) print('{} Load test set'.format(curtime())) DOC = DocField(batch_first=True, include_lengths=True) ORDER = data.Field(batch_first=True, include_lengths=True, pad_token=0, use_vocab=False, sequential=True) GRAPH = GraphField(batch_first=True) DOC.vocab = torch.load(args.vocab) print('vocab {} loaded'.format(args.vocab)) args.__dict__.update({'doc_vocab': len(DOC.vocab)}) args_str = json.dumps(args.__dict__, indent=4, sort_keys=True) print(args_str) test_data = DocDataset(path=args.test, text_field=DOC, order_field=ORDER, graph_field=GRAPH) test_real = DocIter(test_data, 1, device='cuda', batch_size_fn=None, train=False, repeat=False, shuffle=False, sort=False) print('{} Load data done'.format(curtime())) start = time.time() decode(args, test_real, (DOC, ORDER), checkpoint) print('{} Decode done, time {} mins'.format(curtime(), (time.time() - start) / 60))