Example #1
0
def parse_args():
    parser = argparse.ArgumentParser('Get Test Result of Hateful Memes')
    parser.add_argument('--cfg',
                        type=str,
                        help='path to answer net config yaml')
    parser.add_argument('--ckpt',
                        type=str,
                        help='path to checkpoint of answer net')
    parser.add_argument('--bs', type=int)
    parser.add_argument('--gpus', type=int, nargs='+')
    parser.add_argument('--model-dir',
                        type=str,
                        help='root path to store checkpoint')
    parser.add_argument('--result-path',
                        type=str,
                        help='path to store test result file.')
    parser.add_argument('--result-name', type=str)
    parser.add_argument('--split', default='test')

    args = parser.parse_args()

    if args.cfg is not None:
        update_config(args.cfg)
    if args.bs is not None:
        config.TEST.BATCH_IMAGES = args.bs
    if args.gpus is not None:
        config.GPUS = ','.join([str(gpu) for gpu in args.gpus])
    if args.split is not None:
        config.DATASET.TEST_IMAGE_SET = args.split
    if args.model_dir is not None:
        config.OUTPUT_PATH = os.path.join(args.model_dir, config.OUTPUT_PATH)

    return args, config
def tune_vl_bert(config_path,
                 pl_ckpt_path,
                 num_samples=10,
                 num_epochs=10,
                 gpus_per_trial=2):

    # scheduler = ASHAScheduler(
    #     metric="loss",
    #     mode="min",
    #     max_t=num_epochs,
    #     grace_period=1,
    #     reduction_factor=2)

    reporter = CLIReporter(
        parameter_columns=[
            "lr", "weight_decay", "warmup_factor", "max_epoch", "batch_size"
        ],
        metric_columns=["mean_accuracy", "training_iteration"])

    param_config = {
        "lr": 6.25e-7,
        "weight_decay": tune.loguniform(1e-5, 1e-2),
        "batch_size": 4,
        "max_epoch": tune.choice([4, 6, 8, 10]),
        "warmup_factor": tune.uniform(0, 1),
        "warmup_steps": tune.uniform(100, 800),
    }

    scheduler = PopulationBasedTraining(time_attr="training_iteration",
                                        metric="mean_accuracy",
                                        mode="max",
                                        perturbation_interval=2,
                                        hyperparam_mutations={
                                            "lr":
                                            tune.loguniform(6.25e-6, 6.25e-8),
                                            "batch_size": [1, 2, 3, 4],
                                        })

    update_config(config_path)
    model_base_cfg = copy.deepcopy(config)

    tune.run(partial(
        _tune,
        vl_bert_config=model_base_cfg,
        pl_ckpt_path=pl_ckpt_path,
        num_gpus=gpus_per_trial,
    ),
             resources_per_trial={
                 "cpu": 4,
                 "gpu": gpus_per_trial,
             },
             config=param_config,
             num_samples=num_samples,
             scheduler=scheduler,
             progress_reporter=reporter,
             name="tune_vl_bert")
def parse_args():
    parser = argparse.ArgumentParser('Train Cognition Network')
    parser.add_argument('--cfg', type=str, help='path to config file')
    parser.add_argument('--model-dir', type=str, help='root path to store checkpoint')
    parser.add_argument('--log-dir', type=str, help='tensorboard log dir')
    parser.add_argument('--dist', help='whether to use distributed training', default=False, action='store_true')
    parser.add_argument('--slurm', help='whether this is a slurm job', default=False, action='store_true')
    parser.add_argument('--do-test', help='whether to generate csv result on test set',
                        default=False, action='store_true')
    parser.add_argument('--cudnn-off', help='disable cudnn', default=False, action='store_true')

    # easy test pretrain model
    parser.add_argument('--partial-pretrain', type=str)
    parser.add_argument('--k-fold-i', type=int)

    args = parser.parse_args()

    if args.cfg is not None:
        update_config(args.cfg)
    if args.model_dir is not None:
        config.OUTPUT_PATH = os.path.join(args.model_dir, config.OUTPUT_PATH)

    if args.partial_pretrain is not None:
        config.NETWORK.PARTIAL_PRETRAIN = args.partial_pretrain

    if args.slurm:
        proc_id = int(os.environ['SLURM_PROCID'])
        ntasks = int(os.environ['SLURM_NTASKS'])
        node_list = os.environ['SLURM_NODELIST']
        num_gpus = torch.cuda.device_count()
        addr = subprocess.getoutput(
            'scontrol show hostname {} | head -n1'.format(node_list))
        os.environ['MASTER_PORT'] = str(29500)
        os.environ['MASTER_ADDR'] = addr
        os.environ['WORLD_SIZE'] = str(ntasks)
        os.environ['RANK'] = str(proc_id)
        os.environ['LOCAL_RANK'] = str(proc_id % num_gpus)

    return args, config
def train(config_path, pl_ckpt_path, **kwargs):
    with logger.catch(reraise=True):
        update_config(config_path)
        cfg = copy.deepcopy(config)
        _train(cfg, pl_ckpt_path, **kwargs)