Exemple #1
0
    my_loss = get_loss_criterion(cfg.loss)

    data_set_val = prepare_eval_data_set(**cfg['data'], **cfg['model'])

    data_reader_trn = DataLoader(dataset=train_dataSet,
                                 batch_size=cfg.data.batch_size,
                                 shuffle=True,
                                 num_workers=cfg.data.num_workers)
    data_reader_val = DataLoader(data_set_val,
                                 shuffle=True,
                                 batch_size=cfg.data.batch_size,
                                 num_workers=cfg.data.num_workers)
    my_model.train()

    print("BEGIN TRAINING...")
    one_stage_train(my_model,
                    data_reader_trn,
                    my_optim, my_loss, data_reader_eval=data_reader_val,
                    snapshot_dir=snapshot_dir, log_dir=boards_dir,
                    start_epoch=i_epoch, i_iter=i_iter,
                    scheduler=scheduler,best_val_accuracy=best_accuracy)

    print("BEGIN PREDICTING ON TEST/VAL set...")

    if 'predict' in cfg.run:
        print_eval(prepare_test_data_set, "test")
    if cfg.run == 'train+val':
        print_eval(prepare_eval_data_set, "val")

    print("total runtime(h): %s" % prg_timer.end())
Exemple #2
0
        data_set_val,
        shuffle=True,
        batch_size=cfg.data.batch_size,
        num_workers=cfg.data.num_workers,
    )
    my_model.train()

    print("BEGIN TRAINING...")
    one_stage_train(
        my_model,
        data_reader_trn,
        my_optim,
        my_loss,
        data_reader_eval=data_reader_val,
        snapshot_dir=snapshot_dir,
        log_dir=boards_dir,
        start_epoch=i_epoch,
        i_iter=i_iter,
        scheduler=scheduler,
        best_val_accuracy=best_accuracy,
    )

    print("BEGIN PREDICTING ON TEST/VAL set...")

    if "predict" in cfg.run:
        print_eval(prepare_test_data_set, "test")
    if cfg.run == "train+val":
        print_eval(prepare_eval_data_set, "val")

    print("total runtime(h): %s" % prg_timer.end())
Exemple #3
0
def main(argv):
    prg_timer = Timer()

    args = parse_args()
    config_file = args.config
    seed = args.seed if args.seed > 0 else random.randint(1, 100000)
    process_config(config_file, args.config_overwrite)

    torch.manual_seed(seed)
    if use_cuda:
        torch.cuda.manual_seed(seed)

    basename = 'default' \
        if args.config is None else os.path.basename(args.config)

    cmd_cfg_obj = demjson.decode(args.config_overwrite) \
        if args.config_overwrite is not None else None

    middle_name, final_name = get_output_folder_name(basename, cmd_cfg_obj,
                                                     seed, args.suffix)

    out_dir = args.out_dir if args.out_dir is not None else os.getcwd()

    snapshot_dir = os.path.join(out_dir, "results", middle_name, final_name)
    boards_dir = os.path.join(out_dir, "boards", middle_name, final_name)
    if args.force_restart:
        if os.path.exists(snapshot_dir):
            shutil.rmtree(snapshot_dir)
        if os.path.exists(boards_dir):
            shutil.rmtree(boards_dir)

    os.makedirs(snapshot_dir, exist_ok=True)
    os.makedirs(boards_dir, exist_ok=True)

    print("Results: {}".format(snapshot_dir))
    print("Tensorboard: {}".format(boards_dir))
    print("fast data reader = " + str(cfg['data']['image_fast_reader']))
    print("use cuda = " + str(use_cuda))

    print("Adversary nhid: {}".format(cfg.adv_model.nhid))

    print("lambda_q: {}".format(cfg.training_parameters.lambda_q))

    print("lambda_grl: {}".format(cfg.training_parameters.lambda_grl))
    print("lambda_grl_start: {}".format(
        cfg.training_parameters.lambda_grl_start))
    print("lambda_grl_steps: {}".format(
        cfg.training_parameters.lambda_grl_steps))

    if cfg.training_parameters.lambda_grl > 0:
        print("WARNING: lambda_grl {} is pos., but GRL expects neg. values".
              format(cfg.training_parameters.lambda_grl))

    print("LRs: {} {}".format(cfg.optimizer.par.lr, cfg.adv_optimizer.par.lr))
    print("Static LR: {}".format(cfg.training_parameters.static_lr))

    # dump the config file to snap_shot_dir
    config_to_write = os.path.join(snapshot_dir, "config.yaml")
    dump_config(cfg, config_to_write)

    train_dataSet = prepare_train_data_set(**cfg['data'], **cfg['model'])
    print("=> Loaded trainset: {} examples".format(len(train_dataSet)))

    main_model, adv_model = build_model(cfg, train_dataSet)

    model = main_model
    if hasattr(main_model, 'module'):
        model = main_model.module

    params = [{
        'params': model.image_embedding_models_list.parameters()
    }, {
        'params': model.question_embedding_models.parameters()
    }, {
        'params': model.multi_modal_combine.parameters()
    }, {
        'params': model.classifier.parameters()
    }, {
        'params': model.image_feature_encode_list.parameters(),
        'lr': cfg.optimizer.par.lr * 0.1
    }]

    main_optim = getattr(optim, cfg.optimizer.method)(params,
                                                      **cfg.optimizer.par)

    adv_optim = getattr(optim, cfg.optimizer.method)(adv_model.parameters(),
                                                     **cfg.adv_optimizer.par)

    i_epoch = 0
    i_iter = 0
    best_accuracy = 0
    if not args.force_restart:
        md_pths = os.path.join(snapshot_dir, "model_*.pth")
        files = glob.glob(md_pths)
        if len(files) > 0:
            latest_file = max(files, key=os.path.getctime)
            print("=> Loading save from {}".format(latest_file))
            info = torch.load(latest_file)
            i_epoch = info['epoch']
            i_iter = info['iter']
            main_model.load_state_dict(info['state_dict'])
            main_optim.load_state_dict(info['optimizer'])
            adv_model.load_state_dict(info['adv_state_dict'])
            adv_optim.load_state_dict(info['adv_optimizer'])
            if 'best_val_accuracy' in info:
                best_accuracy = info['best_val_accuracy']

    scheduler = get_optim_scheduler(main_optim)
    adv_scheduler = get_optim_scheduler(adv_optim)

    my_loss = get_loss_criterion(cfg.loss)

    dataset_val = prepare_eval_data_set(**cfg['data'], **cfg['model'])
    print("=> Loaded valset: {} examples".format(len(dataset_val)))

    dataset_test = prepare_test_data_set(**cfg['data'], **cfg['model'])
    print("=> Loaded testset: {} examples".format(len(dataset_test)))

    data_reader_trn = DataLoader(dataset=train_dataSet,
                                 batch_size=cfg.data.batch_size,
                                 shuffle=True,
                                 num_workers=cfg.data.num_workers)
    data_reader_val = DataLoader(dataset_val,
                                 shuffle=True,
                                 batch_size=cfg.data.batch_size,
                                 num_workers=cfg.data.num_workers)
    data_reader_test = DataLoader(dataset_test,
                                  shuffle=True,
                                  batch_size=cfg.data.batch_size,
                                  num_workers=cfg.data.num_workers)

    main_model.train()
    adv_model.train()

    print("=> Start training...")
    one_stage_train(main_model,
                    adv_model,
                    data_reader_trn,
                    main_optim,
                    adv_optim,
                    my_loss,
                    data_reader_eval=data_reader_val,
                    data_reader_test=data_reader_test,
                    snapshot_dir=snapshot_dir,
                    log_dir=boards_dir,
                    start_epoch=i_epoch,
                    i_iter=i_iter,
                    scheduler=scheduler,
                    adv_scheduler=adv_scheduler,
                    best_val_accuracy=best_accuracy)
    print("=> Training complete.")

    model_file = os.path.join(snapshot_dir, "best_model.pth")
    if os.path.isfile(model_file):
        print("=> Testing best model...")
        main_model, _ = build_model(cfg, dataset_test)
        main_model.load_state_dict(torch.load(model_file)['state_dict'])
        main_model.eval()
        print("=> Loaded model from file {}".format(model_file))

        print("=> Start testing...")
        acc_test, loss_test, _ = one_stage_eval_model(data_reader_test,
                                                      main_model,
                                                      one_stage_run_model,
                                                      my_loss)
        print("Final results:\nacc: {:.4f}\nloss: {:.4f}".format(
            acc_test, loss_test))
        result_file = os.path.join(snapshot_dir, 'result_on_val.txt')
        with open(result_file, 'a') as fid:
            fid.write('FINAL RESULT ON TEST: {:.6f}'.format(acc_test))
    else:
        print("File {} not found. Skipping testing.".format(model_file))
        acc_test = loss_test = 0

    # print("BEGIN PREDICTING ON TEST/VAL set...")
    # if 'predict' in cfg.run:
    #     print_eval(prepare_test_data_set, "test")
    # if cfg.run == 'train+val':
    #     print_eval(prepare_eval_data_set, "val")

    print("total runtime(h): %s" % prg_timer.end())

    return (acc_test, loss_test)