def main(): parser = make_parser() # args and init args = parser.parse_args() args.dout = args.dout.format(**vars(args)) torch.manual_seed(args.seed) # check if dataset has been preprocessed if not os.path.exists(os.path.join(args.data, "%s.vocab" % args.pp_folder)) and not args.preprocess: raise Exception("Dataset not processed; run with --preprocess") # make output dir pprint.pprint(args) if not os.path.isdir(args.dout): os.makedirs(args.dout) # load train/valid/tests splits with open(args.splits) as f: splits = json.load(f) pprint.pprint({k: len(v) for k, v in splits.items()}) # preprocess and save if args.preprocess: print("\nPreprocessing dataset and saving to %s folders ... This will take a while. Do this once as required." % args.pp_folder) dataset = Dataset(args, None) dataset.preprocess_splits(splits) vocab = torch.load(os.path.join(args.dout, "%s.vocab" % args.pp_folder)) else: vocab = torch.load(os.path.join(args.data, "%s.vocab" % args.pp_folder)) # load model if args.resume: print("Loading: " + args.resume) model, optimizer = Chunker.load(args.resume) else: model = Chunker(args, vocab) optimizer = None # to gpu if args.gpu: model = model.to(torch.device('cuda')) if not optimizer is None: optimizer_to(optimizer, torch.device('cuda')) # start train loop model.run_train(splits, optimizer=optimizer)
with open(args.splits) as f: splits = json.load(f) pprint.pprint({k: len(v) for k, v in splits.items()}) # preprocess and save if args.preprocess: print("\nPreprocessing dataset and saving to %s folders ... This will take a while. Do this once as required." % args.pp_folder) dataset = Dataset(args, None) dataset.preprocess_splits(splits) vocab = torch.load(os.path.join(args.dout, "%s.vocab" % args.pp_folder)) else: vocab = torch.load(os.path.join(args.data, "%s.vocab" % args.pp_folder)) # load model M = import_module('model.{}'.format(args.model)) if args.resume: print("Loading: " + args.resume) model, optimizer = M.Module.load(args.resume) else: model = M.Module(args, vocab) optimizer = None # to gpu if args.gpu: model = model.to(torch.device('cuda')) if not optimizer is None: optimizer_to(optimizer, torch.device('cuda')) # start train loop model.run_train(splits, optimizer=optimizer)