my_loss = get_loss_criterion(cfg.loss) data_set_val = prepare_eval_data_set(**cfg['data'], **cfg['model']) data_reader_trn = DataLoader(dataset=train_dataSet, batch_size=cfg.data.batch_size, shuffle=True, num_workers=cfg.data.num_workers) data_reader_val = DataLoader(data_set_val, shuffle=True, batch_size=cfg.data.batch_size, num_workers=cfg.data.num_workers) my_model.train() print("BEGIN TRAINING...") one_stage_train(my_model, data_reader_trn, my_optim, my_loss, data_reader_eval=data_reader_val, snapshot_dir=snapshot_dir, log_dir=boards_dir, start_epoch=i_epoch, i_iter=i_iter, scheduler=scheduler,best_val_accuracy=best_accuracy) print("BEGIN PREDICTING ON TEST/VAL set...") if 'predict' in cfg.run: print_eval(prepare_test_data_set, "test") if cfg.run == 'train+val': print_eval(prepare_eval_data_set, "val") print("total runtime(h): %s" % prg_timer.end())
data_set_val, shuffle=True, batch_size=cfg.data.batch_size, num_workers=cfg.data.num_workers, ) my_model.train() print("BEGIN TRAINING...") one_stage_train( my_model, data_reader_trn, my_optim, my_loss, data_reader_eval=data_reader_val, snapshot_dir=snapshot_dir, log_dir=boards_dir, start_epoch=i_epoch, i_iter=i_iter, scheduler=scheduler, best_val_accuracy=best_accuracy, ) print("BEGIN PREDICTING ON TEST/VAL set...") if "predict" in cfg.run: print_eval(prepare_test_data_set, "test") if cfg.run == "train+val": print_eval(prepare_eval_data_set, "val") print("total runtime(h): %s" % prg_timer.end())
def main(argv): prg_timer = Timer() args = parse_args() config_file = args.config seed = args.seed if args.seed > 0 else random.randint(1, 100000) process_config(config_file, args.config_overwrite) torch.manual_seed(seed) if use_cuda: torch.cuda.manual_seed(seed) basename = 'default' \ if args.config is None else os.path.basename(args.config) cmd_cfg_obj = demjson.decode(args.config_overwrite) \ if args.config_overwrite is not None else None middle_name, final_name = get_output_folder_name(basename, cmd_cfg_obj, seed, args.suffix) out_dir = args.out_dir if args.out_dir is not None else os.getcwd() snapshot_dir = os.path.join(out_dir, "results", middle_name, final_name) boards_dir = os.path.join(out_dir, "boards", middle_name, final_name) if args.force_restart: if os.path.exists(snapshot_dir): shutil.rmtree(snapshot_dir) if os.path.exists(boards_dir): shutil.rmtree(boards_dir) os.makedirs(snapshot_dir, exist_ok=True) os.makedirs(boards_dir, exist_ok=True) print("Results: {}".format(snapshot_dir)) print("Tensorboard: {}".format(boards_dir)) print("fast data reader = " + str(cfg['data']['image_fast_reader'])) print("use cuda = " + str(use_cuda)) print("Adversary nhid: {}".format(cfg.adv_model.nhid)) print("lambda_q: {}".format(cfg.training_parameters.lambda_q)) print("lambda_grl: {}".format(cfg.training_parameters.lambda_grl)) print("lambda_grl_start: {}".format( cfg.training_parameters.lambda_grl_start)) print("lambda_grl_steps: {}".format( cfg.training_parameters.lambda_grl_steps)) if cfg.training_parameters.lambda_grl > 0: print("WARNING: lambda_grl {} is pos., but GRL expects neg. values". format(cfg.training_parameters.lambda_grl)) print("LRs: {} {}".format(cfg.optimizer.par.lr, cfg.adv_optimizer.par.lr)) print("Static LR: {}".format(cfg.training_parameters.static_lr)) # dump the config file to snap_shot_dir config_to_write = os.path.join(snapshot_dir, "config.yaml") dump_config(cfg, config_to_write) train_dataSet = prepare_train_data_set(**cfg['data'], **cfg['model']) print("=> Loaded trainset: {} examples".format(len(train_dataSet))) main_model, adv_model = build_model(cfg, train_dataSet) model = main_model if hasattr(main_model, 'module'): model = main_model.module params = [{ 'params': model.image_embedding_models_list.parameters() }, { 'params': model.question_embedding_models.parameters() }, { 'params': model.multi_modal_combine.parameters() }, { 'params': model.classifier.parameters() }, { 'params': model.image_feature_encode_list.parameters(), 'lr': cfg.optimizer.par.lr * 0.1 }] main_optim = getattr(optim, cfg.optimizer.method)(params, **cfg.optimizer.par) adv_optim = getattr(optim, cfg.optimizer.method)(adv_model.parameters(), **cfg.adv_optimizer.par) i_epoch = 0 i_iter = 0 best_accuracy = 0 if not args.force_restart: md_pths = os.path.join(snapshot_dir, "model_*.pth") files = glob.glob(md_pths) if len(files) > 0: latest_file = max(files, key=os.path.getctime) print("=> Loading save from {}".format(latest_file)) info = torch.load(latest_file) i_epoch = info['epoch'] i_iter = info['iter'] main_model.load_state_dict(info['state_dict']) main_optim.load_state_dict(info['optimizer']) adv_model.load_state_dict(info['adv_state_dict']) adv_optim.load_state_dict(info['adv_optimizer']) if 'best_val_accuracy' in info: best_accuracy = info['best_val_accuracy'] scheduler = get_optim_scheduler(main_optim) adv_scheduler = get_optim_scheduler(adv_optim) my_loss = get_loss_criterion(cfg.loss) dataset_val = prepare_eval_data_set(**cfg['data'], **cfg['model']) print("=> Loaded valset: {} examples".format(len(dataset_val))) dataset_test = prepare_test_data_set(**cfg['data'], **cfg['model']) print("=> Loaded testset: {} examples".format(len(dataset_test))) data_reader_trn = DataLoader(dataset=train_dataSet, batch_size=cfg.data.batch_size, shuffle=True, num_workers=cfg.data.num_workers) data_reader_val = DataLoader(dataset_val, shuffle=True, batch_size=cfg.data.batch_size, num_workers=cfg.data.num_workers) data_reader_test = DataLoader(dataset_test, shuffle=True, batch_size=cfg.data.batch_size, num_workers=cfg.data.num_workers) main_model.train() adv_model.train() print("=> Start training...") one_stage_train(main_model, adv_model, data_reader_trn, main_optim, adv_optim, my_loss, data_reader_eval=data_reader_val, data_reader_test=data_reader_test, snapshot_dir=snapshot_dir, log_dir=boards_dir, start_epoch=i_epoch, i_iter=i_iter, scheduler=scheduler, adv_scheduler=adv_scheduler, best_val_accuracy=best_accuracy) print("=> Training complete.") model_file = os.path.join(snapshot_dir, "best_model.pth") if os.path.isfile(model_file): print("=> Testing best model...") main_model, _ = build_model(cfg, dataset_test) main_model.load_state_dict(torch.load(model_file)['state_dict']) main_model.eval() print("=> Loaded model from file {}".format(model_file)) print("=> Start testing...") acc_test, loss_test, _ = one_stage_eval_model(data_reader_test, main_model, one_stage_run_model, my_loss) print("Final results:\nacc: {:.4f}\nloss: {:.4f}".format( acc_test, loss_test)) result_file = os.path.join(snapshot_dir, 'result_on_val.txt') with open(result_file, 'a') as fid: fid.write('FINAL RESULT ON TEST: {:.6f}'.format(acc_test)) else: print("File {} not found. Skipping testing.".format(model_file)) acc_test = loss_test = 0 # print("BEGIN PREDICTING ON TEST/VAL set...") # if 'predict' in cfg.run: # print_eval(prepare_test_data_set, "test") # if cfg.run == 'train+val': # print_eval(prepare_eval_data_set, "val") print("total runtime(h): %s" % prg_timer.end()) return (acc_test, loss_test)