def __init__(self, args, manager): # args and manager self.args = args self.manager = manager # load splits with open(self.args.splits) as f: self.splits = json.load(f) pprint.pprint({k: len(v) for k, v in self.splits.items()}) # load model print("Loading: ", self.args.model_path) M = import_module(self.args.model) self.model, optimizer = M.Module.load(self.args.model_path) self.model.share_memory() self.model.eval() self.model.test_mode = True # updated args self.model.args.dout = self.args.model_path.replace( self.args.model_path.split('/')[-1], '') self.model.args.data = self.args.data if self.args.data else self.model.args.data # preprocess and save if args.preprocess: print( "\nPreprocessing dataset and saving to %s folders ... This is will take a while. Do this once as required:" % self.model.args.pp_folder) self.model.args.fast_epoch = self.args.fast_epoch dataset = Dataset(self.model.args, self.model.vocab) dataset.preprocess_splits(self.splits) # load resnet args.visual_model = 'resnet18' self.resnet = Resnet(args, eval=True, share_memory=True, use_conv_feat=True) # gpu if self.args.gpu: try: self.model = self.model.to(torch.device('cuda')) except: self.model = self.model.to(torch.device('cuda')) # success and failure lists self.create_stats() # set random seed for shuffling random.seed(int(time.time()))
def main(): parser = make_parser() # args and init args = parser.parse_args() args.dout = args.dout.format(**vars(args)) torch.manual_seed(args.seed) # check if dataset has been preprocessed if not os.path.exists(os.path.join(args.data, "%s.vocab" % args.pp_folder)) and not args.preprocess: raise Exception("Dataset not processed; run with --preprocess") # make output dir pprint.pprint(args) if not os.path.isdir(args.dout): os.makedirs(args.dout) # load train/valid/tests splits with open(args.splits) as f: splits = json.load(f) pprint.pprint({k: len(v) for k, v in splits.items()}) # preprocess and save if args.preprocess: print("\nPreprocessing dataset and saving to %s folders ... This will take a while. Do this once as required." % args.pp_folder) dataset = Dataset(args, None) dataset.preprocess_splits(splits) vocab = torch.load(os.path.join(args.dout, "%s.vocab" % args.pp_folder)) else: vocab = torch.load(os.path.join(args.data, "%s.vocab" % args.pp_folder)) # load model if args.resume: print("Loading: " + args.resume) model, optimizer = Chunker.load(args.resume) else: model = Chunker(args, vocab) optimizer = None # to gpu if args.gpu: model = model.to(torch.device('cuda')) if not optimizer is None: optimizer_to(optimizer, torch.device('cuda')) # start train loop model.run_train(splits, optimizer=optimizer)
raise Exception("Dataset not processed; run with --preprocess") # make output dir pprint.pprint(args) if not os.path.isdir(args.dout): os.makedirs(args.dout) # load train/valid/tests splits with open(args.splits) as f: splits = json.load(f) pprint.pprint({k: len(v) for k, v in splits.items()}) # preprocess and save if args.preprocess: print("\nPreprocessing dataset and saving to %s folders ... This will take a while. Do this once as required." % args.pp_folder) dataset = Dataset(args, None) dataset.preprocess_splits(splits) vocab = torch.load(os.path.join(args.dout, "%s.vocab" % args.pp_folder)) else: vocab = torch.load(os.path.join(args.data, "%s.vocab" % args.pp_folder)) # load model M = import_module('model.{}'.format(args.model)) if args.resume: print("Loading: " + args.resume) model, optimizer = M.Module.load(args.resume) else: model = M.Module(args, vocab) optimizer = None # to gpu