Example #1
0
def run(config_path,
        ir_checkpoint_dir=None,
        pre_checkpoint_dir=None,
        cluster_checkpoint_dir=None):
    config = process_config(config_path)
    AgentClass = globals()[config.agent]
    agent = AgentClass(config)

    if ir_checkpoint_dir is not None:
        agent.load_checkpoint('checkpoint.pth.tar',
                              ir_checkpoint_dir,
                              load_memory_bank=True,
                              load_model=True,
                              load_optim=False,
                              load_epoch=False,
                              cluster_label_dir=cluster_checkpoint_dir)

    if pre_checkpoint_dir is not None:
        agent.load_checkpoint('checkpoint.pth.tar',
                              pre_checkpoint_dir,
                              load_memory_bank=True,
                              load_model=True,
                              load_optim=True,
                              load_epoch=True,
                              cluster_label_dir=cluster_checkpoint_dir)

    try:
        agent.run()
        agent.finalise()
    except KeyboardInterrupt:
        pass
Example #2
0
def run(config_path):
    config = process_config(config_path)
    AgentClass = globals()[config.agent]
    agent = AgentClass(config)

    try:
        agent.run()
        agent.finalise()
    except KeyboardInterrupt:
        pass
Example #3
0
def run(args, gpu_device=None):
    '''Run the Lightning system. 

    Args:
        args
            args.config_path: str, filepath to the config file
        gpu_device: str or None, specifies GPU device as follows:
            None: CPU (specified as null in config)
            'cpu': CPU
            '-1': All available GPUs
            '0': GPU 0
            '4': GPU 4
            '0,3' GPUs 1 and 3
            See: https://pytorch-lightning.readthedocs.io/en/latest/multi_gpu.html
    '''
    if gpu_device == 'cpu' or not gpu_device:
        gpu_device = None

    config = process_config(args.config)

    # Only override if specified.
    if gpu_device:
        config.gpu_device = gpu_device
    if args.quick:
        config.quick = args.quick
    if args.num_workers is not None:
        config.data_loader_workers = args.num_workers

    seed_everything(config.seed)
    SystemClass = SYSTEM[config.system]
    system = SystemClass(config)

    ckpt_callback = pl.callbacks.ModelCheckpoint(
        os.path.join(config.exp_dir, 'checkpoints'),
        save_top_k=-1,
        period=1,
    )
    wandb.init(project='sensor', entity='viewmaker',
               name=config.exp_name, config=config, sync_tensorboard=True)

    trainer = pl.Trainer(
        default_root_dir=config.exp_dir,
        gpus=gpu_device,
        distributed_backend=config.distributed_backend or 'dp',
        max_epochs=config.num_epochs,
        min_epochs=config.num_epochs,
        checkpoint_callback=ckpt_callback,
        resume_from_checkpoint=args.ckpt or config.continue_from_checkpoint,
        profiler=args.profiler,
        precision=config.optim_params.precision or 32,
        callbacks=None,
        val_check_interval=config.val_check_interval or 1.0,
        limit_val_batches=config.limit_val_batches or 1.0,
    )
    trainer.fit(system)
Example #4
0
def main(config_path):
    config = process_config(config_path)

    # Create the Agent and run it with given configuration
    AgentClass = globals()[config.agent]

    agent = AgentClass(config)
    try:
        agent.run()
        agent.finalise()
    except KeyboardInterrupt:
        pass
Example #5
0
def run(config_path, ir_checkpoint_dir=None):
    config = process_config(config_path)
    AgentClass = globals()[config.agent]
    agent = AgentClass(config)

    if ir_checkpoint_dir is not None:
        # this will load both the weights and memory bank
        agent.load_checkpoint('final.pth.tar', ir_checkpoint_dir, load_memory_bank=True, 
                              load_model=True, load_optim=False, load_epoch=False)

    try:
        agent.run()
        agent.finalise()
    except KeyboardInterrupt:
        pass
def random_search(config_path, num_exps):
    gpu = get_free_gpu()
    print("=============== Acquired GPU: {} ===============)".format(gpu))
    for n in range(num_exps):
        params = random_search_params()
        nested_dict = flat_to_nested_dict(params)
        curr_config = process_config(config_path,
                                     override_dotmap=DotMap(nested_dict),
                                     exp_base=EXP_BASE)
        exp_dir = run_agent(globals()[curr_config.agent], curr_config, gpu)

        print('======> Finished: ', exp_dir)

    print('================================================================')
    print('*                  COMPLETED MASS EXPERIMENTS                  *')
    print('================================================================')
Example #7
0
def run(config_path):
    config = process_config(config_path)
    AgentClass = globals()[config.agent]
    agent = AgentClass(config)

    if config.continue_exp_dir is not None:
        agent.logger.info("Found existing model... Continuing training!")
        agent.load_checkpoint('checkpoint.pth.tar',
                              checkpoint_dir=os.path.join(
                                  config.continue_exp_dir, 'checkpoints'),
                              load_model=True,
                              load_optim=True,
                              load_epoch=True)

    try:
        agent.run()
        agent.finalise()
    except KeyboardInterrupt:
        pass
def run(config_path, gpu_device=-1):
    config = process_config(config_path)
    if gpu_device >= 0: config.gpu_device = gpu_device
    seed_everything(config.seed)
    SystemClass = SYSTEM[config.system]
    system = SystemClass(config)

    ckpt_callback = pl.callbacks.ModelCheckpoint(
        os.path.join(config.exp_dir, 'checkpoints'),
        save_top_k=-1,
        period=1,
    )
    trainer = pl.Trainer(
        default_save_path=config.exp_dir,
        gpus=[config.gpu_device],
        max_epochs=config.num_epochs,
        min_epochs=config.num_epochs,
        checkpoint_callback=ckpt_callback,
        val_percent_check=0.1,
        resume_from_checkpoint=config.continue_from_checkpoint,
    )
    trainer.fit(system)
Example #9
0
def run(config_path, gpu_device=None):
    if gpu_device == 'cpu' or not gpu_device:
        gpu_device = None
    config = process_config(config_path)
    if gpu_device: config.gpu_device = gpu_device
    seed_everything(config.seed, use_cuda=config.cuda)
    SystemClass = SYSTEM[config.system]
    system = SystemClass(config)

    if config.optim_params.scheduler:  # moco scheduler
        lr_callback = globals()[config.optim_params.scheduler](
            initial_lr=config.optim_params.learning_rate,
            max_epochs=config.num_epochs,
            schedule=(
                int(0.6 * config.num_epochs),
                int(0.8 * config.num_epochs),
            ),
        )
        callbacks = [lr_callback]
    else:
        callbacks = None

    ckpt_callback = pl.callbacks.ModelCheckpoint(
        os.path.join(config.exp_dir, 'checkpoints'),
        save_top_k=-1,
        period=1,
    )
    trainer = pl.Trainer(
        default_root_dir=config.exp_dir,
        gpus=gpu_device,
        max_epochs=config.num_epochs,
        min_epochs=config.num_epochs,
        checkpoint_callback=ckpt_callback,
        val_percent_check=0.1,
        resume_from_checkpoint=config.continue_from_checkpoint,
    )
    trainer.fit(system)
Example #10
0
def run(config_path, gpu_device=-1):
    config = process_config(config_path)
    if gpu_device >= 0:
        config.gpu_device = [gpu_device]
    AgentClass = globals()[config.agent]
    agent = AgentClass(config)

    if config.continue_exp_dir is not None:
        agent.logger.info("Found existing model... Continuing training!")
        checkpoint_dir = os.path.join(config.continue_exp_dir, 'checkpoints')
        agent.load_checkpoint(
            config.continue_exp_name,
            checkpoint_dir=checkpoint_dir,
            load_memory_bank=True,
            load_model=True,
            load_optim=True,
            load_epoch=True,
        )

    try:
        agent.run()
        agent.finalise()
    except KeyboardInterrupt:
        pass
Example #11
0
                        choices=['easy', 'hard'])
    parser.add_argument('--batch-size', type=int, default=128)
    parser.add_argument('--gpu-device', type=int, default=0)
    parser.add_argument('--cuda', action='store_true', default=False)
    parser.add_argument('--seed', type=int, default=42)
    args = parser.parse_args()

    OUT_DIR = f"/mnt/fs5/wumike/reference/pretrain/{args.dataset}/ir_imagenet"
    if not os.path.isdir(OUT_DIR):
        os.makedirs(OUT_DIR)

    config_path = os.path.join(MODEL_DIR, 'config.json')
    checkpoint_dir = os.path.join(MODEL_DIR, 'checkpoints')
    assert os.path.isfile(os.path.join(checkpoint_dir, 'model_best.pth.tar'))

    config = process_config(config_path,
                            override_dotmap={'gpu_device': args.gpu_device})
    AgentClass = globals()[config.agent]
    localagg = AgentClass(config)
    localagg.load_checkpoint(
        'model_best.pth.tar',
        checkpoint_dir=checkpoint_dir,
        load_memory_bank=True,
        load_model=True,
    )
    localagg._set_models_to_eval()
    gpu_device = localagg.config.gpu_device[0]
    resnet = copy.deepcopy(localagg.model)
    resnet.load_state_dict(localagg.model.state_dict())
    # resnet = nn.Sequential(*list(resnet.children())[:-2])
    resnet = resnet.eval()
    for param in resnet.parameters():
Example #12
0
def run(args, gpu_device=None):
    '''Run the Lightning system. 

    Args:
        args
            args.config_path: str, filepath to the config file
        gpu_device: str or None, specifies GPU device as follows:
            None: CPU (specified as null in config)
            'cpu': CPU
            '-1': All available GPUs
            '0': GPU 0
            '4': GPU 4
            '0,3' GPUs 1 and 3
            See the following for more options: 
            https://pytorch-lightning.readthedocs.io/en/latest/multi_gpu.html
    '''
    if gpu_device == 'cpu' or not gpu_device:
        gpu_device = None
    config = process_config(args.config)
    # Only override if specified.
    if gpu_device: config.gpu_device = gpu_device
    if args.num_workers: config.data_loader_workers = args.num_workers
    seed_everything(config.seed)
    SystemClass = SYSTEM[config.system]
    system = SystemClass(config)

    if config.optim_params.scheduler:
        lr_callback = globals()[config.optim_params.scheduler](
            initial_lr=config.optim_params.learning_rate,
            max_epochs=config.num_epochs,
            schedule=(
                int(0.6 * config.num_epochs),
                int(0.8 * config.num_epochs),
            ),
        )
        callbacks = [lr_callback]
    else:
        callbacks = []

    # TODO: adjust period for saving checkpoints.
    ckpt_callback = pl.callbacks.ModelCheckpoint(
        os.path.join(config.exp_dir, 'checkpoints'),
        save_top_k=-1,
        period=1,
    )
    wandb.init(project='image',
               entity='viewmaker',
               name=config.exp_name,
               config=config,
               sync_tensorboard=True)
    trainer = pl.Trainer(
        default_root_dir=config.exp_dir,
        gpus=gpu_device,
        # 'ddp' is usually faster, but we use 'dp' so the negative samples
        # for the whole batch are used for the SimCLR loss
        # distributed_backend=config.distributed_backend or 'dp',
        max_epochs=config.num_epochs,
        min_epochs=config.num_epochs,
        checkpoint_callback=ckpt_callback,
        resume_from_checkpoint=args.ckpt or config.continue_from_checkpoint,
        profiler=args.profiler,
        precision=config.optim_params.precision or 32,
        callbacks=callbacks,
        val_check_interval=config.val_check_interval or 1.0,
        limit_val_batches=config.limit_val_batches or 1.0,
    )
    trainer.fit(system)