latest_file = max(files, key=os.path.getctime) info = torch.load(latest_file) i_epoch = info['epoch'] i_iter = info['iter'] sd = info['state_dict'] op_sd = info['optimizer'] my_model.load_state_dict(sd) my_optim.load_state_dict(op_sd) if 'best_val_accuracy' in info: best_accuracy = info['best_val_accuracy'] scheduler = get_optim_scheduler(my_optim) my_loss = get_loss_criterion(cfg.loss) data_set_val = prepare_eval_data_set(**cfg['data'], **cfg['model']) data_reader_trn = DataLoader(dataset=train_dataSet, batch_size=cfg.data.batch_size, shuffle=True, num_workers=cfg.data.num_workers) data_reader_val = DataLoader(data_set_val, shuffle=True, batch_size=cfg.data.batch_size, num_workers=cfg.data.num_workers) my_model.train() print("BEGIN TRAINING...") one_stage_train(my_model, data_reader_trn, my_optim, my_loss, data_reader_eval=data_reader_val,
latest_file = max(files, key=os.path.getctime) info = torch.load(latest_file) i_epoch = info["epoch"] i_iter = info["iter"] sd = info["state_dict"] op_sd = info["optimizer"] my_model.load_state_dict(sd) my_optim.load_state_dict(op_sd) if "best_val_accuracy" in info: best_accuracy = info["best_val_accuracy"] scheduler = get_optim_scheduler(my_optim) my_loss = get_loss_criterion(cfg.loss) data_set_val = prepare_eval_data_set(**cfg["data"], **cfg["model"]) data_reader_trn = DataLoader( dataset=train_dataSet, batch_size=cfg.data.batch_size, shuffle=True, num_workers=cfg.data.num_workers, ) data_reader_val = DataLoader( data_set_val, shuffle=True, batch_size=cfg.data.batch_size, num_workers=cfg.data.num_workers, ) my_model.train()
file for file in glob.glob(parent_dir + "/**/" + MODELNAME, recursive=True) ] config_files = [c.replace(MODELNAME, CONFIG) for c in model_pths] if len(esbl_sizes) == 0: esbl_sizes = range(1, len(config_files) + 1) config_file = config_files[0] with open(config_file, "r") as f: config = yaml.load(f) batch_size = config["data"]["batch_size"] data_set_test = prepare_eval_data_set(**config["data"], **config["model"], verbose=True) data_reader_test = DataLoader(data_set_test, shuffle=False, batch_size=batch_size, num_workers=5) ans_dic = data_set_test.answer_dict accumulated_softmax = None final_result = {} n_model = 0 for c_file, model_file in zip(config_files, model_pths): with open(c_file, "r") as f: config = yaml.load(f) myModel = build_model(config, data_set_test)
op_sd = info['optimizer'] my_model.load_state_dict(sd) my_optim.load_state_dict(op_sd) if 'best_val_accuracy' in info: best_accuracy = info['best_val_accuracy'] bert_optimizer = BertAdam(optimizer_grouped_parameters, lr=5e-5, warmup=0.1, t_total=cfg.training_parameters.max_iter) scheduler = get_optim_scheduler(my_optim) my_loss = get_loss_criterion(cfg.loss) data_set_val = prepare_eval_data_set(**cfg['data'], **cfg['model']) data_reader_trn = DataLoader(dataset=train_dataSet, batch_size=cfg.data.batch_size, shuffle=True, num_workers=cfg.data.num_workers) data_reader_val = DataLoader(data_set_val, shuffle=True, batch_size=cfg.data.batch_size, num_workers=cfg.data.num_workers) my_model.train() print("BEGIN TRAINING...") one_stage_train(my_model, data_reader_trn, my_optim,
def main(argv): prg_timer = Timer() args = parse_args() config_file = args.config seed = args.seed if args.seed > 0 else random.randint(1, 100000) process_config(config_file, args.config_overwrite) torch.manual_seed(seed) if use_cuda: torch.cuda.manual_seed(seed) basename = 'default' \ if args.config is None else os.path.basename(args.config) cmd_cfg_obj = demjson.decode(args.config_overwrite) \ if args.config_overwrite is not None else None middle_name, final_name = get_output_folder_name(basename, cmd_cfg_obj, seed, args.suffix) out_dir = args.out_dir if args.out_dir is not None else os.getcwd() snapshot_dir = os.path.join(out_dir, "results", middle_name, final_name) boards_dir = os.path.join(out_dir, "boards", middle_name, final_name) if args.force_restart: if os.path.exists(snapshot_dir): shutil.rmtree(snapshot_dir) if os.path.exists(boards_dir): shutil.rmtree(boards_dir) os.makedirs(snapshot_dir, exist_ok=True) os.makedirs(boards_dir, exist_ok=True) print("Results: {}".format(snapshot_dir)) print("Tensorboard: {}".format(boards_dir)) print("fast data reader = " + str(cfg['data']['image_fast_reader'])) print("use cuda = " + str(use_cuda)) print("Adversary nhid: {}".format(cfg.adv_model.nhid)) print("lambda_q: {}".format(cfg.training_parameters.lambda_q)) print("lambda_grl: {}".format(cfg.training_parameters.lambda_grl)) print("lambda_grl_start: {}".format( cfg.training_parameters.lambda_grl_start)) print("lambda_grl_steps: {}".format( cfg.training_parameters.lambda_grl_steps)) if cfg.training_parameters.lambda_grl > 0: print("WARNING: lambda_grl {} is pos., but GRL expects neg. values". format(cfg.training_parameters.lambda_grl)) print("LRs: {} {}".format(cfg.optimizer.par.lr, cfg.adv_optimizer.par.lr)) print("Static LR: {}".format(cfg.training_parameters.static_lr)) # dump the config file to snap_shot_dir config_to_write = os.path.join(snapshot_dir, "config.yaml") dump_config(cfg, config_to_write) train_dataSet = prepare_train_data_set(**cfg['data'], **cfg['model']) print("=> Loaded trainset: {} examples".format(len(train_dataSet))) main_model, adv_model = build_model(cfg, train_dataSet) model = main_model if hasattr(main_model, 'module'): model = main_model.module params = [{ 'params': model.image_embedding_models_list.parameters() }, { 'params': model.question_embedding_models.parameters() }, { 'params': model.multi_modal_combine.parameters() }, { 'params': model.classifier.parameters() }, { 'params': model.image_feature_encode_list.parameters(), 'lr': cfg.optimizer.par.lr * 0.1 }] main_optim = getattr(optim, cfg.optimizer.method)(params, **cfg.optimizer.par) adv_optim = getattr(optim, cfg.optimizer.method)(adv_model.parameters(), **cfg.adv_optimizer.par) i_epoch = 0 i_iter = 0 best_accuracy = 0 if not args.force_restart: md_pths = os.path.join(snapshot_dir, "model_*.pth") files = glob.glob(md_pths) if len(files) > 0: latest_file = max(files, key=os.path.getctime) print("=> Loading save from {}".format(latest_file)) info = torch.load(latest_file) i_epoch = info['epoch'] i_iter = info['iter'] main_model.load_state_dict(info['state_dict']) main_optim.load_state_dict(info['optimizer']) adv_model.load_state_dict(info['adv_state_dict']) adv_optim.load_state_dict(info['adv_optimizer']) if 'best_val_accuracy' in info: best_accuracy = info['best_val_accuracy'] scheduler = get_optim_scheduler(main_optim) adv_scheduler = get_optim_scheduler(adv_optim) my_loss = get_loss_criterion(cfg.loss) dataset_val = prepare_eval_data_set(**cfg['data'], **cfg['model']) print("=> Loaded valset: {} examples".format(len(dataset_val))) dataset_test = prepare_test_data_set(**cfg['data'], **cfg['model']) print("=> Loaded testset: {} examples".format(len(dataset_test))) data_reader_trn = DataLoader(dataset=train_dataSet, batch_size=cfg.data.batch_size, shuffle=True, num_workers=cfg.data.num_workers) data_reader_val = DataLoader(dataset_val, shuffle=True, batch_size=cfg.data.batch_size, num_workers=cfg.data.num_workers) data_reader_test = DataLoader(dataset_test, shuffle=True, batch_size=cfg.data.batch_size, num_workers=cfg.data.num_workers) main_model.train() adv_model.train() print("=> Start training...") one_stage_train(main_model, adv_model, data_reader_trn, main_optim, adv_optim, my_loss, data_reader_eval=data_reader_val, data_reader_test=data_reader_test, snapshot_dir=snapshot_dir, log_dir=boards_dir, start_epoch=i_epoch, i_iter=i_iter, scheduler=scheduler, adv_scheduler=adv_scheduler, best_val_accuracy=best_accuracy) print("=> Training complete.") model_file = os.path.join(snapshot_dir, "best_model.pth") if os.path.isfile(model_file): print("=> Testing best model...") main_model, _ = build_model(cfg, dataset_test) main_model.load_state_dict(torch.load(model_file)['state_dict']) main_model.eval() print("=> Loaded model from file {}".format(model_file)) print("=> Start testing...") acc_test, loss_test, _ = one_stage_eval_model(data_reader_test, main_model, one_stage_run_model, my_loss) print("Final results:\nacc: {:.4f}\nloss: {:.4f}".format( acc_test, loss_test)) result_file = os.path.join(snapshot_dir, 'result_on_val.txt') with open(result_file, 'a') as fid: fid.write('FINAL RESULT ON TEST: {:.6f}'.format(acc_test)) else: print("File {} not found. Skipping testing.".format(model_file)) acc_test = loss_test = 0 # print("BEGIN PREDICTING ON TEST/VAL set...") # if 'predict' in cfg.run: # print_eval(prepare_test_data_set, "test") # if cfg.run == 'train+val': # print_eval(prepare_eval_data_set, "val") print("total runtime(h): %s" % prg_timer.end()) return (acc_test, loss_test)
if __name__ == '__main__': args = parse_args() config_file = args.config out_file = args.out_prefix + ".json" model_file = args.model_path finalize_config(cfg, config_file, None) batch_size = cfg['data'][ 'batch_size'] if args.batch_size is None else args.batch_size if args.use_val: data_set_test = prepare_eval_data_set(**cfg['data'], **cfg['model'], verbose=True) else: data_set_test = prepare_test_data_set(**cfg['data'], **cfg['model'], verbose=True) data_reader_test = DataLoader(data_set_test, shuffle=False, batch_size=batch_size, num_workers=args.num_workers) ans_dic = data_set_test.answer_dict my_model = build_model(cfg, data_set_test) sd = torch.load(model_file)['state_dict']
help="directories for models", default=None) args = parser.parse_args() config_file = args.config out_dir = args.out_dir model_files = args.model_paths model_dirs = args.model_dirs with open(config_file, 'r') as f: config = yaml.load(f) # get the potential shared data_config info data_root_dir = config['data']['data_root_dir'] batch_size = config['data']['batch_size'] data_set_val = prepare_eval_data_set(**config['data'], **config['model'], verbose=True) data_reader_val = DataLoader(data_set_val, shuffle=True, batch_size=100) ans_dic = data_set_val.answer_dict ans_json_out = answer_json() current_models = [] if model_files is None else [ torch.load(model_file) for model_file in model_files] if model_dirs is not None: for model_dir in model_dirs: for file in glob.glob(model_dir+"/**/best_model.pth", recursive=True): this_model = torch.load(file) current_models.append(this_model)