Esempio n. 1
0
def _non_dist_train(model, dataset, cfg, validate=False):
    # prepare data loaders
    data_loaders = [
        build_dataloader(dataset,
                         cfg.data.imgs_per_gpu,
                         cfg.data.workers_per_gpu,
                         cfg.gpus,
                         dist=False)
    ]
    # put model on gpus
    model = MMDataParallel(model, device_ids=range(cfg.gpus)).cuda()
    # build runner
    runner = Runner(model, batch_processor, cfg.optimizer, cfg.work_dir,
                    cfg.log_level)
    runner.register_training_hooks(cfg.lr_config, cfg.optimizer_config,
                                   cfg.checkpoint_config, cfg.log_config)
    runner.register_hook(CfgSaverHook(cfg), priority='VERY_LOW')

    if validate:
        # TODO: should implement dist, nondist version evaluation
        if isinstance(model.module, RPN):
            runner.register_hook(CocoDistEvalRecallHook(cfg.data.val))
        else:
            if cfg.data.val.type == 'CocoDataset':
                runner.register_hook(CocoDistEvalmAPHook(cfg.data.val))
            else:
                runner.register_hook(NonDistEvalmAPHook(cfg.data.val))

    if cfg.resume_from:
        runner.resume(cfg.resume_from)
    elif cfg.load_from:
        runner.load_checkpoint(cfg.load_from)
    runner.run(data_loaders, cfg.workflow, cfg.total_epochs)
Esempio n. 2
0
def _non_dist_train(model, dataset, cfg, validate=False):

    # prepare data loaders
    data_loaders = [
        build_dataloader(dataset,
                         cfg.data.imgs_per_gpu,
                         cfg.data.workers_per_gpu,
                         cfg.gpus,
                         dist=False)
    ]
    # put model on gpus
    model = MMDataParallel(model, device_ids=range(cfg.gpus)).cuda()
    # build runner
    runner = Runner(model, batch_processor, cfg.optimizer, cfg.work_dir,
                    cfg.log_level)
    runner.register_training_hooks(cfg.lr_config, cfg.optimizer_config,
                                   cfg.checkpoint_config, cfg.log_config)
    if validate:
        if isinstance(model.module, RPN):
            runner.register_hook(CocoDistEvalRecallHook(cfg.data.val))
        elif cfg.data.val.type == 'CocoDataset':
            runner.register_hook(CocoDistEvalmAPHook(cfg.data.val))
        elif cfg.data.val.type in ['KittiLiDAR', 'KittiRGB']:
            runner.register_hook(KittiEvalmAPHook(cfg.data.val, \
                                                  interval=cfg.eval_interval))
        else:
            runner.register_hook(DistEvalmAPHook(cfg.data.val))
    if cfg.resume_from:
        runner.resume(cfg.resume_from)
    elif cfg.load_from:
        runner.load_checkpoint(cfg.load_from)
    runner.run(data_loaders, cfg.workflow, cfg.total_epochs)
Esempio n. 3
0
def _dist_train(model, dataset, cfg, validate=False):
    # prepare data loaders
    data_loaders = [
        build_dataloader(dataset,
                         cfg.data.imgs_per_gpu,
                         cfg.data.workers_per_gpu,
                         dist=True)
    ]
    # put model on gpus
    model = MMDistributedDataParallel(model.cuda())
    # build runner
    runner = Runner(model, batch_processor, cfg.optimizer, cfg.work_dir,
                    cfg.log_level)
    # register hooks
    optimizer_config = DistOptimizerHook(**cfg.optimizer_config)
    runner.register_training_hooks(cfg.lr_config, optimizer_config,
                                   cfg.checkpoint_config, cfg.log_config)
    runner.register_hook(DistSamplerSeedHook())
    # register eval hooks
    if validate:
        if isinstance(model.module, RPN):
            # TODO: implement recall hooks for other datasets
            runner.register_hook(CocoDistEvalRecallHook(cfg.data.val))
        else:
            if cfg.data.val.type == 'CocoDataset':
                runner.register_hook(CocoDistEvalmAPHook(cfg.data.val))
            else:
                runner.register_hook(DistEvalmAPHook(cfg.data.val))

    if cfg.resume_from:
        runner.resume(cfg.resume_from)
    elif cfg.load_from:
        runner.load_checkpoint(cfg.load_from)
    runner.run(data_loaders, cfg.workflow, cfg.total_epochs)
Esempio n. 4
0
def _dist_train(model, dataset, cfg, validate=False):
    # prepare data loaders
    try:
        selsa_imgs = cfg.data.selsa_imgs
    except Exception:
        selsa_imgs = cfg.data.imgs_per_gpu

    dataset = dataset if isinstance(dataset, (list, tuple)) else [dataset]
    data_loaders = [
        build_dataloader(ds,
                         cfg.data.imgs_per_gpu,
                         cfg.data.workers_per_gpu,
                         selsa_imgs=selsa_imgs,
                         dist=True) for ds in dataset
    ]
    # put model on gpus
    model = MMDistributedDataParallel(model.cuda())

    mod = cfg.pop('mod', False)
    # build runner
    optimizer = build_optimizer(model, cfg.optimizer, mod)
    runner = Runner(model, batch_processor, optimizer, cfg.work_dir,
                    cfg.log_level)

    # fp16 setting
    fp16_cfg = cfg.get('fp16', None)
    if fp16_cfg is not None:
        optimizer_config = Fp16OptimizerHook(**cfg.optimizer_config,
                                             **fp16_cfg)
    else:
        optimizer_config = DistOptimizerHook(**cfg.optimizer_config)

    # register hooks
    runner.register_training_hooks(cfg.lr_config, optimizer_config,
                                   cfg.checkpoint_config, cfg.log_config)
    runner.register_hook(DistSamplerSeedHook())
    # register eval hooks
    if validate:
        val_dataset_cfg = cfg.data.val
        eval_cfg = cfg.get('evaluation', {})
        if isinstance(model.module, RPN):
            # TODO: implement recall hooks for other datasets
            runner.register_hook(
                CocoDistEvalRecallHook(val_dataset_cfg, **eval_cfg))
        else:
            dataset_type = DATASETS.get(val_dataset_cfg.type)
            if issubclass(dataset_type, datasets.CocoDataset):
                runner.register_hook(
                    CocoDistEvalmAPHook(val_dataset_cfg, **eval_cfg))
            else:
                runner.register_hook(
                    DistEvalmAPHook(val_dataset_cfg, **eval_cfg))

    if cfg.resume_from:
        runner.resume(cfg.resume_from)
    elif cfg.load_from:
        runner.load_checkpoint(cfg.load_from)
    runner.run(data_loaders, cfg.workflow, cfg.total_epochs)
Esempio n. 5
0
def _dist_train(model,
                dataset,
                cfg,
                validate=False,
                logger=None,
                timestamp=None):
    # prepare data loaders
    dataset = dataset if isinstance(dataset, (list, tuple)) else [dataset]
    data_loaders = [
        build_dataloader(
            ds, cfg.data.imgs_per_gpu, cfg.data.workers_per_gpu, dist=True)
        for ds in dataset
    ]
    # put model on gpus
    model = MMDistributedDataParallel(model.cuda())

    # build runner
    optimizer = build_optimizer(model, cfg.optimizer)
    runner = Runner(
        model, batch_processor, optimizer, cfg.work_dir, logger=logger)
    # an ugly walkaround to make the .log and .log.json filenames the same
    runner.timestamp = timestamp

    # fp16 setting
    fp16_cfg = cfg.get('fp16', None)
    if fp16_cfg is not None:
        optimizer_config = Fp16OptimizerHook(**cfg.optimizer_config,
                                             **fp16_cfg)
    else:
        optimizer_config = DistOptimizerHook(**cfg.optimizer_config)

    # register hooks
    runner.register_training_hooks(cfg.lr_config, optimizer_config,
                                   cfg.checkpoint_config, cfg.log_config)
    runner.register_hook(DistSamplerSeedHook())
    # register eval hooks
    if validate:
        val_dataset_cfg = cfg.data.val
        eval_cfg = cfg.get('evaluation', {})
        if isinstance(model.module, RPN):
            # TODO: implement recall hooks for other datasets
            runner.register_hook(
                CocoDistEvalRecallHook(val_dataset_cfg, **eval_cfg))
        else:
            dataset_type = DATASETS.get(val_dataset_cfg.type)
            if issubclass(dataset_type, datasets.CocoDataset) or issubclass(dataset_type, datasets.Acoustic):
                runner.register_hook(
                    CocoDistEvalmAPHook(val_dataset_cfg, **eval_cfg))
            else:
                runner.register_hook(
                    DistEvalmAPHook(val_dataset_cfg, **eval_cfg))

    if cfg.resume_from:
        runner.resume(cfg.resume_from)
    elif cfg.load_from:
        runner.load_checkpoint(cfg.load_from)
    runner.run(data_loaders, cfg.workflow, cfg.total_epochs)
Esempio n. 6
0
def _dist_train(model, dataset, cfg, validate=False):
    # prepare data loaders
    data_loaders = [
        build_dataloader(
            dataset,
            cfg.data.imgs_per_gpu,
            cfg.data.workers_per_gpu,
            dist=True)
    ]
    # put model on gpus
    model = MMDistributedDataParallel(model.cuda())

    # build runner
    optimizer = build_optimizer(model, cfg.optimizer)
    runner = Runner(model, batch_processor, optimizer, cfg.work_dir,
                    cfg.log_level)

    # fp16 setting
    fp16_cfg = cfg.get('fp16', None)
    if fp16_cfg is not None:
        optimizer_config = Fp16OptimizerHook(**cfg.optimizer_config,
                                             **fp16_cfg)
    else:
        optimizer_config = DistOptimizerHook(**cfg.optimizer_config)

    # register hooks
    runner.register_training_hooks(cfg.lr_config, optimizer_config,
                                   cfg.checkpoint_config, cfg.log_config)
    runner.register_hook(DistSamplerSeedHook())
    # register eval hooks
    if validate:
        val_dataset_cfg = cfg.data.val
        eval_cfg = cfg.get('evaluation', {})
        if isinstance(model.module, RPN):
            runner.register_hook(
                CocoDistEvalRecallHook(val_dataset_cfg, **eval_cfg))
        else:
            dataset_type = getattr(datasets, val_dataset_cfg.type)
            if issubclass(dataset_type, datasets.CocoDataset):
                runner.register_hook(
                    CocoDistEvalmAPHook(val_dataset_cfg, **eval_cfg))
            else:
                runner.register_hook(
                    DistEvalmAPHook(val_dataset_cfg, **eval_cfg))

    if cfg.resume_from:
        runner.resume(cfg.resume_from)
    elif cfg.load_from:
        runner.load_checkpoint(cfg.load_from)
    runner.run(data_loaders, cfg.workflow, cfg.total_epochs)
Esempio n. 7
0
def _dist_train(model, dataset, cfg, validate=False):
    # prepare data loaders
    dataset = dataset if isinstance(dataset, (list, tuple)) else [dataset]
    data_loaders = [
        build_dataloader(ds,
                         cfg.data.imgs_per_gpu,
                         cfg.data.workers_per_gpu,
                         dist=True) for ds in dataset
    ]
    # put model on gpus
    model = MMDistributedDataParallel(model.cuda())

    # build runner
    optimizer = build_optimizer(model, cfg.optimizer)
    runner = Runner(model, batch_processor, optimizer, cfg.work_dir,
                    cfg.log_level)

    # fp16 setting
    fp16_cfg = cfg.get('fp16', None)
    if fp16_cfg is not None:
        optimizer_config = Fp16OptimizerHook(**cfg.optimizer_config,
                                             **fp16_cfg)
    else:
        optimizer_config = DistOptimizerHook(**cfg.optimizer_config)

    # register hooks
    runner.register_training_hooks(cfg.lr_config, optimizer_config,
                                   cfg.checkpoint_config, cfg.log_config)
    runner.register_hook(DistSamplerSeedHook())
    # register eval hooks
    if validate:
        val_dataset_cfg = cfg.data.val
        eval_cfg = cfg.evaluation
        if isinstance(model.module, RPN):
            # TODO: implement recall hooks for other datasets
            runner.register_hook(
                CocoDistEvalRecallHook(val_dataset_cfg, **eval_cfg))
        else:
            if isinstance(val_dataset_cfg, dict):
                runner.register_hook(
                    KaggleEvalHook(val_dataset_cfg, **eval_cfg))
            elif isinstance(val_dataset_cfg, list):
                for vdc in val_dataset_cfg:
                    runner.register_hook(KaggleEvalHook(vdc, **eval_cfg))

    if cfg.resume_from:
        runner.resume(cfg.resume_from)
    elif cfg.load_from:
        runner.load_checkpoint(cfg.load_from)
    runner.run(data_loaders, cfg.workflow, cfg.total_epochs)
Esempio n. 8
0
def _dist_train(model, dataset, cfg, validate=False, **kwargs):
    logger = get_root_logger(cfg.log_level)
    # prepare data loaders
    data_loaders = [
        build_dataloader(
            dataset,
            cfg.data.imgs_per_gpu,
            cfg.data.workers_per_gpu,
            dist=True,
            repeat_samples=cfg.train_cfg.repeat_samples,
            **kwargs)
    ]
    # put model on gpus
    model = MMDistributedDataParallel(model.cuda(), device_ids=[torch.cuda.current_device()])
    # build runner
    optimizer = build_optimizer(model, cfg.optimizer)
    runner = EpochBasedRunner(
        model,
        #batch_processor=batch_processor,
        optimizer=optimizer,
        work_dir=cfg.work_dir,
        logger=logger)

    #runner = Runner(model, batch_processor, optimizer, cfg.work_dir,
    #                logger)
    # register hooks
    optimizer_config = DistOptimizerHook(**cfg.optimizer_config)
    runner.register_training_hooks(cfg.lr_config, optimizer_config,
                                   cfg.checkpoint_config, cfg.log_config)
    runner.register_hook(DistSamplerSeedHook())
    # register eval hooks
    if validate:
        val_dataset_cfg = cfg.data.val
        if isinstance(model.module, RPN):
            # TODO: implement recall hooks for other datasets
            runner.register_hook(CocoDistEvalRecallHook(val_dataset_cfg))
        else:
            dataset_type = getattr(datasets, val_dataset_cfg.type)
            if issubclass(dataset_type, datasets.CocoDataset):
                runner.register_hook(CocoDistEvalmAPHook(val_dataset_cfg))
            else:
                runner.register_hook(DistEvalmAPHook(val_dataset_cfg))

    if cfg.resume_from:
        runner.resume(cfg.resume_from)
    elif cfg.load_from:
        runner.load_checkpoint(cfg.load_from)
    runner.run(data_loaders, cfg.workflow, cfg.total_epochs)
Esempio n. 9
0
def _dist_train_runner(model,
                       trainDataset,
                       valDataset,
                       cfg,
                       validate=False) -> Runner:

    # put model on gpus
    model = MMDistributedDataParallel(model.cuda())

    # build runner
    optimizer = build_optimizer(model, cfg.optimizer)
    runner = Runner(model, batch_processor, optimizer, cfg.work_dir,
                    cfg.log_level)

    # fp16 setting
    fp16_cfg = cfg.get('fp16', None)
    if fp16_cfg is not None:
        optimizer_config = Fp16OptimizerHook(**cfg.optimizer_config,
                                             **fp16_cfg)
    else:
        optimizer_config = DistOptimizerHook(**cfg.optimizer_config)

    # register hooks
    runner.register_training_hooks(cfg.lr_config, optimizer_config,
                                   cfg.checkpoint_config, cfg.log_config)
    runner.register_hook(DistSamplerSeedHook())
    # register eval hooks
    if validate:
        val_dataset_cfg = valDataset
        eval_cfg = cfg.get('evaluation', {})
        if isinstance(model.module, RPN):
            # TODO: implement recall hooks for other datasets
            runner.register_hook(
                CocoDistEvalRecallHook(val_dataset_cfg, **eval_cfg))
        else:
            dataset_type = getattr(mmdetDatasets, val_dataset_cfg.type)
            if issubclass(dataset_type, CocoDataset):
                runner.register_hook(
                    CocoDistEvalmAPHook(val_dataset_cfg, **eval_cfg))
            else:
                runner.register_hook(
                    DistEvalmAPHook(val_dataset_cfg, **eval_cfg))

    if cfg.resume_from:
        runner.resume(cfg.resume_from)
    elif cfg.load_from:
        runner.load_checkpoint(cfg.load_from)
    return runner
Esempio n. 10
0
def _dist_train(model, dataset, cfg, validate=False):
    # prepare data loaders
    data_loaders = [
        build_dataloader(dataset,
                         cfg.data.imgs_per_gpu,
                         cfg.data.workers_per_gpu,
                         dist=True)
    ]
    # put model on gpus

    find_unused_parameters = cfg.get('find_unused_parameters', False)
    # Sets the `find_unused_parameters` parameter in
    # torch.nn.parallel.DistributedDataParallel
    model = MMDistributedDataParallel(
        model.cuda(),
        device_ids=[torch.cuda.current_device()],
        broadcast_buffers=False,
        find_unused_parameters=find_unused_parameters)
    # model = MMDistributedDataParallel(model.cuda())
    # build runner
    optimizer = build_optimizer(model, cfg.optimizer)
    runner = Runner(model, batch_processor, optimizer, cfg.work_dir,
                    cfg.log_level)
    # register hooks
    optimizer_config = DistOptimizerHook(**cfg.optimizer_config)
    runner.register_training_hooks(cfg.lr_config, optimizer_config,
                                   cfg.checkpoint_config, cfg.log_config)
    runner.register_hook(DistSamplerSeedHook())
    # register eval hooks
    if validate:
        val_dataset_cfg = cfg.data.val
        if isinstance(model.module, RPN):
            # TODO: implement recall hooks for other datasets
            runner.register_hook(CocoDistEvalRecallHook(val_dataset_cfg))
        else:
            dataset_type = getattr(datasets, val_dataset_cfg.type)
            if issubclass(dataset_type, datasets.CocoDataset):
                runner.register_hook(CocoDistEvalmAPHook(val_dataset_cfg))
            else:
                runner.register_hook(DistEvalmAPHook(val_dataset_cfg))

    if cfg.resume_from:
        runner.resume(cfg.resume_from)
    elif cfg.load_from:
        runner.load_checkpoint(cfg.load_from)
    runner.run(data_loaders, cfg.workflow, cfg.total_epochs)
Esempio n. 11
0
def _dist_train(model, dataset, cfg, validate=False):
    # prepare data loaders
    data_loaders = [
        build_dataloader(dataset,
                         cfg.data.imgs_per_gpu,
                         cfg.data.workers_per_gpu,
                         dist=True)
    ]
    # put model on gpus
    model = torch.nn.SyncBatchNorm.convert_sync_batchnorm(model)
    for m in model.modules():
        if isinstance(m, torch.nn.SyncBatchNorm):
            m._specify_ddp_gpu_num(1)

    model = MMDistributedDataParallel(model.cuda())
    # model = DistributedDataParallel(model.cuda())
    # build runner
    optimizer = build_optimizer(model, cfg.optimizer)
    runner = Runner(model, batch_processor, optimizer, cfg.work_dir,
                    cfg.log_level)
    # register hooks
    optimizer_config = DistOptimizerHook(**cfg.optimizer_config)
    runner.register_training_hooks(cfg.lr_config, optimizer_config,
                                   cfg.checkpoint_config, cfg.log_config)
    runner.register_hook(DistSamplerSeedHook())
    # register eval hooks
    if validate:
        val_dataset_cfg = cfg.data.val
        if isinstance(model.module, RPN):
            # TODO: implement recall hooks for other datasets
            runner.register_hook(CocoDistEvalRecallHook(val_dataset_cfg))
        else:
            dataset_type = getattr(datasets, val_dataset_cfg.type)
            if issubclass(dataset_type, datasets.CocoDataset):
                runner.register_hook(CocoDistEvalmAPHook(val_dataset_cfg))
            else:
                runner.register_hook(DistEvalmAPHook(val_dataset_cfg))

    if cfg.resume_from:
        runner.resume(cfg.resume_from)
    elif cfg.load_from:
        runner.load_checkpoint(cfg.load_from)
    runner.run(data_loaders, cfg.workflow, cfg.total_epochs)
Esempio n. 12
0
def _dist_train(model, dataset, cfg, validate=False):
    # prepare data loaders
    try:
        pad_size = cfg.data.pad_size
        print("using padding size")
    except:
        pad_size = None
    data_loaders = [
        build_dataloader(dataset,
                         cfg.data.imgs_per_gpu,
                         cfg.data.workers_per_gpu,
                         pad_size=pad_size,
                         dist=True)
    ]
    rank = int(os.environ['RANK'])
    num_gpus = torch.cuda.device_count()
    # put model on gpus
    model = MMDistributedDataParallel(model.cuda(rank % num_gpus))
    torch.cuda.empty_cache()
    # build runner
    runner = Runner(model, batch_processor, cfg.optimizer, cfg.work_dir,
                    cfg.log_level)
    # register hooks
    optimizer_config = DistOptimizerHook(**cfg.optimizer_config)
    runner.register_training_hooks(cfg.lr_config, optimizer_config,
                                   cfg.checkpoint_config, cfg.log_config)
    runner.register_hook(DistSamplerSeedHook())
    # register eval hooks
    if validate:
        if isinstance(model.module, RPN):
            # TODO: implement recall hooks for other datasets
            runner.register_hook(CocoDistEvalRecallHook(cfg.data.val))
        else:
            if cfg.data.val.type == 'CocoDataset' or cfg.data.val.type == 'CocoZipDataset':
                runner.register_hook(CocoDistEvalmAPHook(cfg.data.val))
            else:
                runner.register_hook(DistEvalmAPHook(cfg.data.val))

    if cfg.resume_from:
        runner.resume(cfg.resume_from)
    elif cfg.load_from:
        runner.load_checkpoint(cfg.load_from)
    runner.run(data_loaders, cfg.workflow, cfg.total_epochs)
Esempio n. 13
0
def _non_dist_train(model,
                    dataset,
                    cfg,
                    validate=False,
                    logger=None,
                    timestamp=None,
                    runner_attr_dict=dict()):
    if validate:
        raise NotImplementedError('Built-in validation is not implemented '
                                  'yet in not-distributed training. Use '
                                  'distributed training or test.py and '
                                  '*eval.py scripts instead.')
    # prepare data loaders
    dataset = dataset if isinstance(dataset, (list, tuple)) else [dataset]
    data_loaders = [
        build_dataloader(ds,
                         cfg.data.imgs_per_gpu,
                         cfg.data.workers_per_gpu,
                         cfg.gpus,
                         dist=False) for ds in dataset
    ]
    # build runner
    runner_attr_dict.update({
        'imgs_per_gpu': cfg.data.imgs_per_gpu,
        'initial_lr': cfg.optimizer['lr']
    })
    if hasattr(dataset, 'CLASSES'):
        runner_attr_dict.update({'classes': dataset.CLASSES})
    optimizer = build_optimizer(model, cfg.optimizer)
    search_optimizer = getattr(getattr(cfg, 'search_config', {}),
                               'search_optimizer', None)
    assert search_optimizer is None, "Not support"
    runner = Runner(model,
                    batch_processor,
                    optimizer,
                    search_optimizer,
                    cfg.work_dir,
                    logger=logger,
                    runner_attr_dict=runner_attr_dict)
    # an ugly walkaround to make the .log and .log.json filenames the same
    runner.timestamp = timestamp
    # fp16 setting
    fp16_cfg = cfg.get('fp16', None)
    if fp16_cfg is not None:
        optimizer_config = Fp16OptimizerHook(**cfg.optimizer_config,
                                             **fp16_cfg,
                                             distributed=False)
    else:
        optimizer_config = cfg.optimizer_config
    runner.register_training_hooks(cfg.lr_config, optimizer_config)
    # register eval hooks
    if validate:
        if isinstance(model.module, RPN):
            runner.register_hook(CocoDistEvalRecallHook(cfg.data.val))
        else:
            if cfg.data.val.type == 'CocoDataset':
                runner.register_hook(CocoDistEvalmAPHook(cfg.data.val))
            else:
                runner.register_hook(DistEvalmAPHook(cfg.data.val))

    register_hooks(runner, cfg)

    if cfg.resume_from:
        runner.resume(cfg.resume_from)
    elif cfg.load_from:
        runner.load_checkpoint(cfg.load_from)
    runner.run(data_loaders, cfg.workflow, cfg.total_epochs)
Esempio n. 14
0
def _non_dist_train_runner(model,
                           trainDataset,
                           valDataset,
                           cfg,
                           validate=False) -> Runner:

    # put model on gpus
    model = MMDataParallel(model, device_ids=range(cfg.gpus)).cuda()

    # build runner
    optimizer = build_optimizer(model, cfg.optimizer)
    runner = Runner(model, batch_processor, optimizer, cfg.work_dir,
                    cfg.log_level)
    # fp16 setting
    fp16_cfg = cfg.get('fp16', None)
    if fp16_cfg is not None:
        optimizer_config = Fp16OptimizerHook(**cfg.optimizer_config,
                                             **fp16_cfg,
                                             distributed=False)
    else:
        optimizer_config = cfg.optimizer_config

    runner.register_training_hooks(cfg.lr_config, optimizer_config,
                                   cfg.checkpoint_config, cfg.log_config)

    before = toSingleGPUModeBefore
    after = toSingleGPUModeAfter

    # register eval hooks
    if validate:
        val_dataset_cfg = valDataset
        eval_cfg = cfg.get('evaluation', {})
        if isinstance(model.module, RPN):
            # TODO: implement recall hooks for other datasets
            runner.register_hook(
                HookWrapper(
                    CocoDistEvalRecallHook(val_dataset_cfg, **eval_cfg),
                    before, after))
        else:
            dataset_type = getattr(mmdetDatasets, val_dataset_cfg.type)
            if issubclass(dataset_type, mmdetDatasets.CocoDataset):
                runner.register_hook(
                    HookWrapper(
                        CocoDistEvalmAPHook(val_dataset_cfg, **eval_cfg),
                        before, after))
            else:
                runner.register_hook(
                    HookWrapper(DistEvalmAPHook(val_dataset_cfg, **eval_cfg),
                                before, after))

    if cfg.resume_from:
        runner.resume(cfg.resume_from)
    elif cfg.load_from:
        weightsPath = cfg.load_from
        if weightsPath is not None:
            if weightsPath.startswith("open-mmlab://"):
                cfgName = weightsPath[len("open-mmlab://"):]
                weightsPath = checkpoint_registry.getPath(cfgName)
            if cfg.resetHeads:
                torchHome = torch.hub._get_torch_home()
                chpName = os.path.basename(weightsPath)[0:(-1) * len(".pth")]
                noHeadWeightsPath = os.path.join(
                    torchHome, f"checkpoints/nohead/{chpName}_nohead.pth")
                if not os.path.exists(noHeadWeightsPath):
                    if isURL(weightsPath):
                        weights = model_zoo.load_url(weightsPath)
                    else:
                        weights = torch.load(weightsPath)
                    weights['state_dict'] = {
                        k: v
                        for k, v in weights['state_dict'].items()
                        if not k.startswith('bbox_head')
                        and not k.startswith('mask_head')
                    }
                    weightsDir = os.path.dirname(noHeadWeightsPath)
                    if not os.path.exists(weightsDir):
                        os.mkdir(weightsDir)
                    torch.save(weights, noHeadWeightsPath)
                weightsPath = noHeadWeightsPath

            runner.load_checkpoint(weightsPath)
    return runner
Esempio n. 15
0
def _dist_train(model,
                dataset,
                cfg,
                validate=False,
                logger=None,
                timestamp=None,
                runner_attr_dict=dict()):
    # prepare data loaders
    dataset = dataset if isinstance(dataset, (list, tuple)) else [dataset]
    data_loaders = [
        build_dataloader(ds,
                         cfg.data.imgs_per_gpu,
                         cfg.data.workers_per_gpu,
                         dist=True) for ds in dataset
    ]
    # build runner
    runner_attr_dict.update({
        'imgs_per_gpu': cfg.data.imgs_per_gpu,
        'initial_lr': cfg.optimizer['lr']
    })
    if hasattr(dataset, 'CLASSES'):
        runner_attr_dict.update({'classes': dataset.CLASSES})
    optimizer = build_optimizer(model, cfg.optimizer)
    search_optimizer = getattr(cfg, 'search_config',
                               {}).pop('search_optimizer', None)
    runner = Runner(model,
                    batch_processor,
                    optimizer,
                    search_optimizer,
                    cfg.work_dir,
                    logger=logger,
                    runner_attr_dict=runner_attr_dict)
    # an ugly walkaround to make the .log and .log.json filenames the same
    runner.timestamp = timestamp

    # fp16 setting
    fp16_cfg = cfg.get('fp16', None)
    if fp16_cfg is not None:
        optimizer_config = Fp16OptimizerHook(**cfg.optimizer_config,
                                             **fp16_cfg)
    else:
        optimizer_config = DistOptimizerHook(**cfg.optimizer_config)

    # register hooks
    runner.register_training_hooks(cfg.lr_config, optimizer_config)
    runner.register_hook(DistSamplerSeedHook())
    if search_optimizer is not None:
        runner.register_hook(DistSearchOptimizerHook())
        runner.register_hook(SearchHook(**cfg.search_config))
    # register eval hooks
    if validate:
        val_dataset_cfg = cfg.data.val
        eval_cfg = cfg.get('evaluation', {})
        if isinstance(model.module, RPN):
            # TODO: implement recall hooks for other datasets
            runner.register_hook(
                CocoDistEvalRecallHook(val_dataset_cfg, **eval_cfg))
        else:
            dataset_type = DATASETS.get(val_dataset_cfg.type)
            if issubclass(dataset_type, datasets.CocoDataset):
                runner.register_hook(
                    CocoDistEvalmAPHook(val_dataset_cfg, **eval_cfg))
            else:
                runner.register_hook(
                    DistEvalmAPHook(val_dataset_cfg, **eval_cfg))

    register_hooks(runner, cfg)

    if cfg.resume_from:
        runner.resume(cfg.resume_from)
    elif cfg.load_from:
        runner.load_checkpoint(cfg.load_from)
    runner.run(data_loaders, cfg.workflow, cfg.total_epochs)
Esempio n. 16
0
def _dist_train(model, dataset, cfg, validate=False):
    # prepare data loaders
    dataset = dataset if isinstance(dataset, (list, tuple)) else [dataset]
    if 'use_img_sampling' not in cfg.data:
        cfg.data.update({'use_img_sampling': False})
    if 'use_sample_out' not in cfg.data:
        cfg.data.update({'use_sample_out': False})
    print('--Dist-train--IS:{}--ISout:{}'.format(cfg.data.use_img_sampling,
                                                 cfg.data.use_sample_out))
    data_loaders = [
        build_dataloader(ds,
                         cfg.data.imgs_per_gpu,
                         cfg.data.workers_per_gpu,
                         dist=True,
                         use_img_sampling=cfg.data.use_img_sampling,
                         use_sample_out=cfg.data.use_sample_out)
        for ds in dataset
    ]
    # put model on gpus
    model = MMDistributedDataParallel(model.cuda())

    # build runner
    optimizer = build_optimizer(model, cfg.optimizer)
    runner = Runner(model, batch_processor, optimizer, cfg.work_dir,
                    cfg.log_level)

    # Add for LVIS by LiYu
    import logging
    runner.logger.setLevel(logging.INFO)
    # ====================

    # fp16 setting
    fp16_cfg = cfg.get('fp16', None)
    if fp16_cfg is not None:
        optimizer_config = Fp16OptimizerHook(**cfg.optimizer_config,
                                             **fp16_cfg)
    else:
        optimizer_config = DistOptimizerHook(**cfg.optimizer_config)

    # register hooks
    runner.register_training_hooks(cfg.lr_config, optimizer_config,
                                   cfg.checkpoint_config, cfg.log_config)
    runner.register_hook(DistSamplerSeedHook())
    # register eval hooks
    if validate:
        val_dataset_cfg = cfg.data.val
        eval_cfg = cfg.get('evaluation', {})
        if isinstance(model.module, RPN):
            # TODO: implement recall hooks for other datasets
            runner.register_hook(
                CocoDistEvalRecallHook(val_dataset_cfg, **eval_cfg))
        else:
            dataset_type = DATASETS.get(val_dataset_cfg.type)
            if issubclass(dataset_type, datasets.CocoDataset):
                runner.register_hook(
                    CocoDistEvalmAPHook(val_dataset_cfg, **eval_cfg))
            else:
                runner.register_hook(
                    DistEvalmAPHook(val_dataset_cfg, **eval_cfg))

    if cfg.resume_from:
        runner.resume(cfg.resume_from)
    elif cfg.load_from:
        runner.load_checkpoint(cfg.load_from)
    runner.run(data_loaders, cfg.workflow, cfg.total_epochs)
Esempio n. 17
0
def _dist_train(model,
                dataset,
                cfg,
                validate=False,
                logger=None,
                timestamp=None):
    # prepare data loaders
    dataset = dataset if isinstance(dataset, (list, tuple)) else [dataset]
    data_loaders = [
        build_dataloader(
            ds, cfg.data.imgs_per_gpu, cfg.data.workers_per_gpu, dist=True)
        for ds in dataset
    ]
    # for index, item in enumerate(data_loaders):
    #     a = iter(item)
    #     print('~~~', index, a, next(a))
    #     break

    from mmdet.apis import get_root_logger
    logger = get_root_logger()
    logger.info(model)
    model = MMDistributedDataParallel(model.cuda())

    # build runner
    optimizer = build_optimizer(model, cfg.optimizer)
    runner = Runner(
        model, batch_processor, optimizer, cfg.work_dir, logger=logger)
    # an ugly walkaround to make the .log and .log.json filenames the same
    runner.timestamp = timestamp

    # fp16 setting
    fp16_cfg = cfg.get('fp16', None)
    if fp16_cfg is not None:
        optimizer_config = Fp16OptimizerHook(**cfg.optimizer_config,
                                             **fp16_cfg)
    else:
        optimizer_config = DistOptimizerHook(**cfg.optimizer_config)

    # register hooks
    runner.register_training_hooks(cfg.lr_config, optimizer_config,
                                   cfg.checkpoint_config, cfg.log_config, cfg.total_epochs, len(data_loaders[0]))
    runner.register_hook(DistSamplerSeedHook())
    # register eval hooks
    if validate:
        val_dataset_cfg = cfg.data.val
        eval_cfg = cfg.get('evaluation', {})
        if isinstance(model.module, RPN):
            # TODO: implement recall hooks for other datasets
            runner.register_hook(
                CocoDistEvalRecallHook(val_dataset_cfg, **eval_cfg))
        else:
            dataset_type = DATASETS.get(val_dataset_cfg.type)
            if cfg.data.val.type in ['KittiDataset', 'KittiInCocoDataset', 'Kitti3dDataset']:
                runner.register_hook(
                    KITTIDistEvalmAPHook(val_dataset_cfg, **eval_cfg))
            elif issubclass(dataset_type, datasets.CocoDataset):
                logger.info('Using CocoDistEvalmAPHook.')
                runner.register_hook(
                    CocoDistEvalmAPHook(val_dataset_cfg, **eval_cfg))
            else:
                runner.register_hook(
                    DistEvalmAPHook(val_dataset_cfg, **eval_cfg))

    if cfg.resume_from:
        runner.resume(cfg.resume_from)
    elif cfg.load_from:
        runner.load_checkpoint(cfg.load_from)
    runner.run(data_loaders, cfg.workflow, cfg.total_epochs)
Esempio n. 18
0
def _dist_train(model,
                dataset,
                cfg,
                validate=False,
                logger=None,
                timestamp=None):
    # prepare data loaders
    dataset = dataset if isinstance(dataset, (list, tuple)) else [dataset]
    data_loaders = [
        build_dataloader(
            ds, cfg.data.imgs_per_gpu, cfg.data.workers_per_gpu, dist=True)
        for ds in dataset
    ]
    # put model on gpus
    model = MMDistributedDataParallel(model.cuda())

    # # if model.module.bbox_head.freeze_solov2_and_train_combonly:
    # if model.module.bbox_head.optimize_list is not None:
    #     for (key, param) in model.named_parameters():
    #         # if 'kernel_convs_convcomb' not in key and 'context_fusion_convs' not in key and 'learned_weight' not in key:
    #         if not any(s in key for s in model.module.bbox_head.optimize_list):
    #             param.requires_grad=False
    #         else:
    #             # print('optimize {}'.format(key))
    #             logger.info('optimize {}'.format(key))

    # build runner
    optimizer = build_optimizer(model, cfg.optimizer)
    runner = Runner(
        model, batch_processor, optimizer, cfg.work_dir, logger=logger)
    # an ugly walkaround to make the .log and .log.json filenames the same
    runner.timestamp = timestamp

    # fp16 setting
    fp16_cfg = cfg.get('fp16', None)
    if fp16_cfg is not None:
        optimizer_config = Fp16OptimizerHook(**cfg.optimizer_config,
                                             **fp16_cfg)
    else:
        optimizer_config = DistOptimizerHook(**cfg.optimizer_config)

    # register hooks
    runner.register_training_hooks(cfg.lr_config, optimizer_config,
                                   cfg.checkpoint_config, cfg.log_config)
    runner.register_hook(DistSamplerSeedHook())
    # register eval hooks
    if validate:
        val_dataset_cfg = cfg.data.val
        eval_cfg = cfg.get('evaluation', {})
        if isinstance(model.module, RPN):
            # TODO: implement recall hooks for other datasets
            runner.register_hook(
                CocoDistEvalRecallHook(val_dataset_cfg, **eval_cfg))
        else:
            dataset_type = DATASETS.get(val_dataset_cfg.type)
            if issubclass(dataset_type, datasets.CocoDataset):
                runner.register_hook(
                    CocoDistEvalmAPHook(val_dataset_cfg, **eval_cfg))
            else:
                runner.register_hook(
                    DistEvalmAPHook(val_dataset_cfg, **eval_cfg))

    if cfg.resume_from:
        runner.resume(cfg.resume_from)
    elif cfg.load_from:
        runner.load_checkpoint(cfg.load_from)
    runner.run(data_loaders, cfg.workflow, cfg.total_epochs)

    ## add test after training
    if cfg.data.test.ann_file != 'data/lvis/lvis_v0.5_val_lvis_freqset.json': # if val set is lvis freq, only eval on lvis-freq val set
        cfg.data.test.test_mode = True
        dataset = build_dataset(cfg.data.test)
        data_loader = build_dataloader(
            dataset,
            imgs_per_gpu=1,
            workers_per_gpu=cfg.data.workers_per_gpu,
            dist=False,
            shuffle=False)
        model_orig=model.module
        model = MMDataParallel(model, device_ids=[0]).cuda()
        # data_loader.dataset.img_infos = data_loader.dataset.img_infos[:100]
        outputs = single_gpu_test(model, data_loader)

        print('\nwriting results to {}'.format('xxx'))
        # mmcv.dump(outputs, 'xxx')
        eval_types = ['segm']
        if eval_types:
            print('Starting evaluate {}'.format(' and '.join(eval_types)))
            if eval_types == ['proposal_fast']:
                result_file = 'xxx'
                coco_eval(result_file, eval_types, dataset.coco)
            else:
                if not isinstance(outputs[0], dict):
                    result_files = results2json_segm(dataset, outputs, 'xxx', dump=False)
                    coco_eval(result_files, eval_types, dataset.coco)
                else:
                    for name in outputs[0]:
                        print('\nEvaluating {}'.format(name))
                        outputs_ = [out[name] for out in outputs]
                        result_file = 'xxx' + '.{}'.format(name)
                        result_files = results2json(dataset, outputs_,
                                                    result_file, dump=False)
                        coco_eval(result_files, eval_types, dataset.coco)

        ##eval on lvis-77######
        cfg.data.test.ann_file = 'data/lvis/lvis_v0.5_val_cocofied.json'
        cfg.data.test.img_prefix = 'data/lvis/val2017/'
        cfg.data.test.test_mode = True
        dataset = build_dataset(cfg.data.test)
        data_loader = build_dataloader(
            dataset,
            imgs_per_gpu=1,
            workers_per_gpu=cfg.data.workers_per_gpu,
            dist=False,
            shuffle=False)
        # model_orig=model.module
        # model = MMDataParallel(model, device_ids=[0]).cuda()
        # data_loader.dataset.img_infos = data_loader.dataset.img_infos[:100]
        outputs = single_gpu_test(model, data_loader)

        print('\nwriting results to {}'.format('xxx'))
        # mmcv.dump(outputs, 'xxx')
        eval_types = ['segm']
        if eval_types:
            print('Starting evaluate {}'.format(' and '.join(eval_types)))
            if eval_types == ['proposal_fast']:
                result_file = 'xxx'
                coco_eval(result_file, eval_types, dataset.coco)
            else:
                if not isinstance(outputs[0], dict):
                    result_files = results2json_segm(dataset, outputs, 'xxx', dump=False)
                    from lvis import LVISEval
                    lvisEval = LVISEval('data/lvis/lvis_v0.5_val_cocofied.json', result_files, 'segm')
                    lvisEval.run()
                    lvisEval.print_results()
                    # fix lvis api eval iou_thr error, should be 0.9 but was 0.8999
                    lvisEval.params.iou_thrs[8] = 0.9
                    for iou in [0.5, 0.6, 0.7, 0.8, 0.9]:
                        print('AP at iou {}: {}'.format(iou, lvisEval._summarize('ap', iou_thr=iou)))
                else:
                    for name in outputs[0]:
                        print('\nEvaluating {}'.format(name))
                        outputs_ = [out[name] for out in outputs]
                        result_file = 'xxx' + '.{}'.format(name)
                        result_files = results2json(dataset, outputs_,
                                                    result_file, dump=False)
                        coco_eval(result_files, eval_types, dataset.coco)
    else:
        ##eval on lvis-freq######
        cfg.data.test.test_mode = True
        dataset = build_dataset(cfg.data.test)
        data_loader = build_dataloader(
            dataset,
            imgs_per_gpu=1,
            workers_per_gpu=cfg.data.workers_per_gpu,
            dist=False,
            shuffle=False)
        # model_orig=model.module
        # model = MMDataParallel(model, device_ids=[0]).cuda()
        data_loader.dataset.img_infos = data_loader.dataset.img_infos[:100]
        outputs = single_gpu_test(model, data_loader)

        print('\nwriting results to {}'.format('xxx'))
        # mmcv.dump(outputs, 'xxx')
        eval_types = ['segm']
        if eval_types:
            print('Starting evaluate {}'.format(' and '.join(eval_types)))
            if eval_types == ['proposal_fast']:
                result_file = 'xxx'
                coco_eval(result_file, eval_types, dataset.coco)
            else:
                if not isinstance(outputs[0], dict):
                    result_files = results2json_segm(dataset, outputs, 'xxx', dump=False)
                    from lvis import LVISEval
                    lvisEval = LVISEval(cfg.data.test.ann_file, result_files, 'segm')
                    lvisEval.run()
                    lvisEval.print_results()
                    # fix lvis api eval iou_thr error, should be 0.9 but was 0.8999
                    lvisEval.params.iou_thrs[8] = 0.9
                    for iou in [0.5, 0.6, 0.7, 0.8, 0.9]:
                        print('AP at iou {}: {}'.format(iou, lvisEval._summarize('ap', iou_thr=iou)))
                else:
                    for name in outputs[0]:
                        print('\nEvaluating {}'.format(name))
                        outputs_ = [out[name] for out in outputs]
                        result_file = 'xxx' + '.{}'.format(name)
                        result_files = results2json(dataset, outputs_,
                                                    result_file, dump=False)
                        coco_eval(result_files, eval_types, dataset.coco)
Esempio n. 19
0
def _dist_train(model, dataset, cfg, validate=False):
    # prepare data loaders
    data_loaders = [
        build_dataloader(dataset,
                         cfg.data.imgs_per_gpu,
                         cfg.data.workers_per_gpu,
                         dist=True)
    ]
    # put model on gpus
    model = MMDistributedDataParallel(model.cuda())
    # build runner
    optimizer = build_optimizer(model, cfg.optimizer)

    # runner = mmcvRunner(model, batch_processor, optimizer, cfg.work_dir,
    #                 cfg.log_level)
    runner = Runner(model, batch_processor, optimizer, cfg.work_dir,
                    cfg.log_level)
    # register hooks
    optimizer_config = DistOptimizerHook(**cfg.optimizer_config)
    runner.register_training_hooks(cfg.lr_config, optimizer_config,
                                   cfg.checkpoint_config, cfg.log_config)
    runner.register_hook(DistSamplerSeedHook())
    # register eval hooks
    if validate:
        val_dataset_cfg = cfg.data.val
        if isinstance(model.module, RPN):
            # TODO: implement recall hooks for other datasets
            runner.register_hook(CocoDistEvalRecallHook(val_dataset_cfg))
        else:
            dataset_type = getattr(datasets, val_dataset_cfg.type)
            if issubclass(dataset_type, datasets.CocoDataset):
                runner.register_hook(CocoDistEvalmAPHook(val_dataset_cfg))
            else:
                runner.register_hook(DistEvalmAPHook(val_dataset_cfg))

    if cfg.resume_from:
        runner.resume(cfg.resume_from)
    elif cfg.load_from:
        runner.load_checkpoint(cfg.load_from)

    # add prune
    m = Mask(model)
    m.init_length()
    print("-" * 10 + "one epoch begin" + "-" * 10)
    print("remaining ratio of pruning : Norm is %f" % cfg.prun.rate_norm)
    print("reducing ratio of pruning : Distance is %f" % cfg.prun.rate_dist)
    print("total remaining ratio is %f" %
          (cfg.prun.rate_norm - cfg.prun.rate_dist))

    m.model = model
    m.init_mask(cfg.prun.rate_norm, cfg.prun.rate_dist, cfg)
    # m.if_zero()
    m.do_mask()
    m.do_similar_mask()
    model = m.model
    m.if_zero()

    # normal mode
    # runner.run(data_loaders, cfg.workflow, cfg.total_epochs)
    # prune mode
    runner.run(data_loaders, m, cfg.workflow, cfg.total_epochs, cfg)
Esempio n. 20
0
def _dist_train(model,
                dataset,
                cfg,
                validate=False,
                multitask=False,
                vis=False):
    # prepare data loaders
    data_loaders = [[
        build_dataloader(d,
                         cfg.data.imgs_per_gpu //
                         2 if issubclass(d.__class__, datasets.BDDVideo) else
                         cfg.data.imgs_per_gpu,
                         cfg.data.workers_per_gpu //
                         2 if issubclass(d.__class__, datasets.BDDVideo) else
                         cfg.data.workers_per_gpu,
                         dist=True) for d in dataset
    ]] if multitask else [
        build_dataloader(dataset,
                         cfg.data.imgs_per_gpu //
                         2 if issubclass(dataset.__class__, datasets.BDDVideo)
                         else cfg.data.imgs_per_gpu,
                         cfg.data.workers_per_gpu //
                         2 if issubclass(dataset.__class__, datasets.BDDVideo)
                         else cfg.data.workers_per_gpu,
                         dist=True)
    ]
    # put model on gpus
    model = MMDistributedDataParallel(model.cuda())

    # build runner
    optimizer = build_optimizer(model, cfg.optimizer)

    bp = batch_processor_with_vis if vis else batch_processor
    runner = MTLRunner(model, bp, optimizer, cfg.work_dir,
                       cfg.log_level) if multitask else \
             Runner(model, bp, optimizer, cfg.work_dir,
                       cfg.log_level)

    # fp16 setting
    fp16_cfg = cfg.get('fp16', None)
    if fp16_cfg is not None:
        optimizer_config = Fp16OptimizerHook(**cfg.optimizer_config,
                                             **fp16_cfg)
    else:
        optimizer_config = DistOptimizerHook(**cfg.optimizer_config)

    # register hooks
    runner.register_training_hooks(cfg.lr_config, optimizer_config,
                                   cfg.checkpoint_config, cfg.log_config)
    runner.register_hook(DistSamplerSeedHook())
    # register eval hooks
    if validate:
        val_dataset_cfg = cfg.data.val
        eval_cfg = cfg.get('evaluation', {})
        if isinstance(model.module, RPN):
            # TODO: implement recall hooks for other datasets
            runner.register_hook(
                CocoDistEvalRecallHook(val_dataset_cfg, **eval_cfg))
        else:
            if not type(val_dataset_cfg) == list:
                val_dataset_cfg = [val_dataset_cfg]
            for _cfg in val_dataset_cfg:
                dataset_type = getattr(datasets, _cfg.type)
                if issubclass(dataset_type, datasets.BddStreet) or \
                   issubclass(dataset_type, datasets.BddSemanticSeg):
                    runner.register_hook(BddSegEvalHook(_cfg, **eval_cfg))
                elif issubclass(dataset_type, datasets.CocoDataset):
                    runner.register_hook(CocoDistEvalmAPHook(_cfg, **eval_cfg))
                elif issubclass(dataset_type, datasets.BDDVideo):
                    runner.register_hook(BDDEvalHook(_cfg))
                else:
                    runner.register_hook(DistEvalmAPHook(_cfg, **eval_cfg))
            runner.register_logger_hooks(cfg.log_config)
    if cfg.resume_from:
        runner.resume(cfg.resume_from)
    elif cfg.load_from:
        runner.load_checkpoint(cfg.load_from)
    if cfg.get('init_asso_head', False):
        ori_key = cfg.init_asso_head[0]
        new_key = cfg.init_asso_head[1]
        for _key in model.module.state_dict().keys():
            if 'asso' in _key:

                exist_key = _key.replace(ori_key, new_key)
                if exist_key in model.module.state_dict().keys():
                    if dist.get_rank() == 0:
                        print('Init "{}" with "{}"'.format(_key, exist_key))
                    model.module.state_dict()[_key].copy_(
                        model.module.state_dict()[exist_key])
    runner.run(data_loaders, cfg.workflow, cfg.total_epochs)
def _dist_train(model, dataset, cfg, validate=False, train_dataset2=None):
    # prepare data loaders
    if train_dataset2 is not None:
        data_loaders = [
            build_dataloader(train_dataset2,
                             cfg.data.imgs_per_gpu,
                             cfg.data.workers_per_gpu,
                             dist=True),
            build_dataloader(dataset,
                             cfg.data.imgs_per_gpu,
                             cfg.data.workers_per_gpu,
                             dist=True)
        ]
    else:
        data_loaders = [
            build_dataloader(dataset,
                             cfg.data.imgs_per_gpu,
                             cfg.data.workers_per_gpu,
                             dist=True)
        ]
    # put model on gpus
    model = MMDistributedDataParallel(model.cuda())
    # build runner
    runner = Runner(model, batch_processor, cfg.optimizer, cfg.work_dir,
                    cfg.log_level)
    # register hooks
    optimizer_config = DistOptimizerHook(**cfg.optimizer_config)
    runner.register_training_hooks(cfg.lr_config, optimizer_config,
                                   cfg.checkpoint_config, cfg.log_config)
    runner.register_hook(DistSamplerSeedHook())
    # register eval hooks
    if validate:
        if isinstance(model.module, RPN):
            # TODO: implement recall hooks for other datasets
            runner.register_hook(CocoDistEvalRecallHook(cfg.data.val))
        else:
            if cfg.data.val.type == 'CocoDataset':
                runner.register_hook(
                    CocoDistEvalmAPHook(cfg.data.val, cfg.interval))
            elif cfg.data.val.type == 'CocoRGBDataset':
                runner.register_hook(CocoDistEvalmAPHookRGB(cfg.data.val))
            elif cfg.data.val.type == 'CocoDatasetRGB2':
                runner.register_hook(
                    CocoDistEvalmAPHookRGB2(cfg.data.val, cfg.interval))
            elif cfg.data.val.type == 'Coco3DDataset' and hasattr(
                    cfg, 'data2') and hasattr(cfg.data2, 'val'):
                runner.register_hook(
                    CocoDistEvalmAPHook3DMult(cfg.data.val, cfg.data2.val))
            elif cfg.data.val.type == 'Coco3DDataset':
                runner.register_hook(
                    CocoDistEvalmAPHook3D(cfg.data.val, cfg.interval))
            elif cfg.data.val.type == 'Coco3DParcelDataset':
                runner.register_hook(
                    CocoDistEvalmAPHook3DParcel(cfg.data.val, cfg.interval))
            elif cfg.data.val.type == 'Coco3D2ScalesDataset':
                runner.register_hook(
                    CocoDistEvalmAPHook3D(cfg.data.val,
                                          cfg.interval,
                                          dataset2=cfg.data2_2scales.val))
            elif cfg.data.val.type == 'Coco3D3ScalesDataset':
                runner.register_hook(
                    CocoDistEvalmAPHook3D(cfg.data.val,
                                          cfg.interval,
                                          dataset3=cfg.data3_3scales.val))
            else:
                runner.register_hook(DistEvalmAPHook(cfg.data.val))

    if cfg.resume_from:
        runner.resume(cfg.resume_from)
    elif cfg.load_from:
        runner.load_checkpoint(cfg.load_from)
    runner.run(data_loaders, cfg.workflow, cfg.total_epochs)