def __init__(self, embed_dims, num_heads, attn_drop=0., proj_drop=0., dropout_layer=None, init_cfg=None, batch_first=True, qkv_bias=False, norm_cfg=dict(type='LN'), sr_ratio=1): super().__init__( embed_dims, num_heads, attn_drop, proj_drop, dropout_layer=dropout_layer, init_cfg=init_cfg, batch_first=batch_first, bias=qkv_bias) self.sr_ratio = sr_ratio if sr_ratio > 1: self.sr = Conv2d( in_channels=embed_dims, out_channels=embed_dims, kernel_size=sr_ratio, stride=sr_ratio) # The ret[0] of build_norm_layer is norm name. self.norm = build_norm_layer(norm_cfg, embed_dims)[1] # handle the BC-breaking from https://github.com/open-mmlab/mmcv/pull/1418 # noqa from mmseg import digit_version, mmcv_version if mmcv_version < digit_version('1.3.17'): warnings.warn('The legacy version of forward function in' 'EfficientMultiheadAttention is deprecated in' 'mmcv>=1.3.17 and will no longer support in the' 'future. Please upgrade your mmcv.') self.forward = self.legacy_forward
def test_digit_version(): assert digit_version('0.2.16') == (0, 2, 16, 0, 0, 0) assert digit_version('1.2.3') == (1, 2, 3, 0, 0, 0) assert digit_version('1.2.3rc0') == (1, 2, 3, 0, -1, 0) assert digit_version('1.2.3rc1') == (1, 2, 3, 0, -1, 1) assert digit_version('1.0rc0') == (1, 0, 0, 0, -1, 0) assert digit_version('1.0') == digit_version('1.0.0') assert digit_version('1.5.0+cuda90_cudnn7.6.3_lms') == digit_version('1.5') assert digit_version('1.0.0dev') < digit_version('1.0.0a') assert digit_version('1.0.0a') < digit_version('1.0.0a1') assert digit_version('1.0.0a') < digit_version('1.0.0b') assert digit_version('1.0.0b') < digit_version('1.0.0rc') assert digit_version('1.0.0rc1') < digit_version('1.0.0') assert digit_version('1.0.0') < digit_version('1.0.0post') assert digit_version('1.0.0post') < digit_version('1.0.0post1') assert digit_version('v1') == (1, 0, 0, 0, 0, 0) assert digit_version('v1.1.5') == (1, 1, 5, 0, 0, 0)
def main(): args = parse_args() assert args.out or args.eval or args.format_only or args.show \ or args.show_dir, \ ('Please specify at least one operation (save/eval/format/show the ' 'results / save the results) with the argument "--out", "--eval"' ', "--format-only", "--show" or "--show-dir"') if args.eval and args.format_only: raise ValueError('--eval and --format_only cannot be both specified') if args.out is not None and not args.out.endswith(('.pkl', '.pickle')): raise ValueError('The output file must be a pkl file.') cfg = mmcv.Config.fromfile(args.config) if args.cfg_options is not None: cfg.merge_from_dict(args.cfg_options) # set multi-process settings setup_multi_processes(cfg) # set cudnn_benchmark if cfg.get('cudnn_benchmark', False): torch.backends.cudnn.benchmark = True if args.aug_test: # hard code index cfg.data.test.pipeline[1].img_ratios = [ 0.5, 0.75, 1.0, 1.25, 1.5, 1.75 ] cfg.data.test.pipeline[1].flip = True cfg.model.pretrained = None cfg.data.test.test_mode = True if args.gpu_id is not None: cfg.gpu_ids = [args.gpu_id] # init distributed env first, since logger depends on the dist info. if args.launcher == 'none': cfg.gpu_ids = [args.gpu_id] distributed = False if len(cfg.gpu_ids) > 1: warnings.warn(f'The gpu-ids is reset from {cfg.gpu_ids} to ' f'{cfg.gpu_ids[0:1]} to avoid potential error in ' 'non-distribute testing time.') cfg.gpu_ids = cfg.gpu_ids[0:1] else: distributed = True init_dist(args.launcher, **cfg.dist_params) rank, _ = get_dist_info() # allows not to create if args.work_dir is not None and rank == 0: mmcv.mkdir_or_exist(osp.abspath(args.work_dir)) timestamp = time.strftime('%Y%m%d_%H%M%S', time.localtime()) if args.aug_test: json_file = osp.join(args.work_dir, f'eval_multi_scale_{timestamp}.json') else: json_file = osp.join(args.work_dir, f'eval_single_scale_{timestamp}.json') elif rank == 0: work_dir = osp.join('./work_dirs', osp.splitext(osp.basename(args.config))[0]) mmcv.mkdir_or_exist(osp.abspath(work_dir)) timestamp = time.strftime('%Y%m%d_%H%M%S', time.localtime()) if args.aug_test: json_file = osp.join(work_dir, f'eval_multi_scale_{timestamp}.json') else: json_file = osp.join(work_dir, f'eval_single_scale_{timestamp}.json') # build the dataloader # TODO: support multiple images per gpu (only minor changes are needed) dataset = build_dataset(cfg.data.test) # The default loader config loader_cfg = dict( # cfg.gpus will be ignored if distributed num_gpus=len(cfg.gpu_ids), dist=distributed, shuffle=False) # The overall dataloader settings loader_cfg.update({ k: v for k, v in cfg.data.items() if k not in [ 'train', 'val', 'test', 'train_dataloader', 'val_dataloader', 'test_dataloader' ] }) test_loader_cfg = { **loader_cfg, 'samples_per_gpu': 1, 'shuffle': False, # Not shuffle by default **cfg.data.get('test_dataloader', {}) } # build the dataloader data_loader = build_dataloader(dataset, **test_loader_cfg) # build the model and load checkpoint cfg.model.train_cfg = None model = build_segmentor(cfg.model, test_cfg=cfg.get('test_cfg')) fp16_cfg = cfg.get('fp16', None) if fp16_cfg is not None: wrap_fp16_model(model) checkpoint = load_checkpoint(model, args.checkpoint, map_location='cpu') if 'CLASSES' in checkpoint.get('meta', {}): model.CLASSES = checkpoint['meta']['CLASSES'] else: print('"CLASSES" not found in meta, use dataset.CLASSES instead') model.CLASSES = dataset.CLASSES if 'PALETTE' in checkpoint.get('meta', {}): model.PALETTE = checkpoint['meta']['PALETTE'] else: print('"PALETTE" not found in meta, use dataset.PALETTE instead') model.PALETTE = dataset.PALETTE # clean gpu memory when starting a new evaluation. torch.cuda.empty_cache() eval_kwargs = {} if args.eval_options is None else args.eval_options # Deprecated efficient_test = eval_kwargs.get('efficient_test', False) if efficient_test: warnings.warn( '``efficient_test=True`` does not have effect in tools/test.py, ' 'the evaluation and format results are CPU memory efficient by ' 'default') eval_on_format_results = (args.eval is not None and 'cityscapes' in args.eval) if eval_on_format_results: assert len(args.eval) == 1, 'eval on format results is not ' \ 'applicable for metrics other than ' \ 'cityscapes' if args.format_only or eval_on_format_results: if 'imgfile_prefix' in eval_kwargs: tmpdir = eval_kwargs['imgfile_prefix'] else: tmpdir = '.format_cityscapes' eval_kwargs.setdefault('imgfile_prefix', tmpdir) mmcv.mkdir_or_exist(tmpdir) else: tmpdir = None if not distributed: warnings.warn( 'SyncBN is only supported with DDP. To be compatible with DP, ' 'we convert SyncBN to BN. Please use dist_train.sh which can ' 'avoid this error.') if not torch.cuda.is_available(): assert digit_version(mmcv.__version__) >= digit_version('1.4.4'), \ 'Please use MMCV >= 1.4.4 for CPU training!' model = revert_sync_batchnorm(model) model = MMDataParallel(model, device_ids=cfg.gpu_ids) results = single_gpu_test(model, data_loader, args.show, args.show_dir, False, args.opacity, pre_eval=args.eval is not None and not eval_on_format_results, format_only=args.format_only or eval_on_format_results, format_args=eval_kwargs) else: model = MMDistributedDataParallel( model.cuda(), device_ids=[torch.cuda.current_device()], broadcast_buffers=False) results = multi_gpu_test(model, data_loader, args.tmpdir, args.gpu_collect, False, pre_eval=args.eval is not None and not eval_on_format_results, format_only=args.format_only or eval_on_format_results, format_args=eval_kwargs) rank, _ = get_dist_info() if rank == 0: if args.out: warnings.warn( 'The behavior of ``args.out`` has been changed since MMSeg ' 'v0.16, the pickled outputs could be seg map as type of ' 'np.array, pre-eval results or file paths for ' '``dataset.format_results()``.') print(f'\nwriting results to {args.out}') mmcv.dump(results, args.out) if args.eval: eval_kwargs.update(metric=args.eval) metric = dataset.evaluate(results, **eval_kwargs) metric_dict = dict(config=args.config, metric=metric) mmcv.dump(metric_dict, json_file, indent=4) if tmpdir is not None and eval_on_format_results: # remove tmp dir when cityscapes evaluation shutil.rmtree(tmpdir)
def train_segmentor(model, dataset, cfg, distributed=False, validate=False, timestamp=None, meta=None): """Launch segmentor training.""" logger = get_root_logger(cfg.log_level) # prepare data loaders dataset = dataset if isinstance(dataset, (list, tuple)) else [dataset] # The default loader config loader_cfg = dict( # cfg.gpus will be ignored if distributed num_gpus=len(cfg.gpu_ids), dist=distributed, seed=cfg.seed, drop_last=True) # The overall dataloader settings loader_cfg.update({ k: v for k, v in cfg.data.items() if k not in [ 'train', 'val', 'test', 'train_dataloader', 'val_dataloader', 'test_dataloader' ] }) # The specific dataloader settings train_loader_cfg = {**loader_cfg, **cfg.data.get('train_dataloader', {})} data_loaders = [build_dataloader(ds, **train_loader_cfg) for ds in dataset] # put model on gpus if distributed: find_unused_parameters = cfg.get('find_unused_parameters', False) # Sets the `find_unused_parameters` parameter in # torch.nn.parallel.DistributedDataParallel model = MMDistributedDataParallel( model.cuda(), device_ids=[torch.cuda.current_device()], broadcast_buffers=False, find_unused_parameters=find_unused_parameters) else: if not torch.cuda.is_available(): assert digit_version(mmcv.__version__) >= digit_version('1.4.4'), \ 'Please use MMCV >= 1.4.4 for CPU training!' model = MMDataParallel(model, device_ids=cfg.gpu_ids) # build runner optimizer = build_optimizer(model, cfg.optimizer) if cfg.get('runner') is None: cfg.runner = {'type': 'IterBasedRunner', 'max_iters': cfg.total_iters} warnings.warn( 'config is now expected to have a `runner` section, ' 'please set `runner` in your config.', UserWarning) runner = build_runner(cfg.runner, default_args=dict(model=model, batch_processor=None, optimizer=optimizer, work_dir=cfg.work_dir, logger=logger, meta=meta)) # register hooks runner.register_training_hooks(cfg.lr_config, cfg.optimizer_config, cfg.checkpoint_config, cfg.log_config, cfg.get('momentum_config', None)) if distributed: # when distributed training by epoch, using`DistSamplerSeedHook` to set # the different seed to distributed sampler for each epoch, it will # shuffle dataset at each epoch and avoid overfitting. if isinstance(runner, EpochBasedRunner): runner.register_hook(DistSamplerSeedHook()) # an ugly walkaround to make the .log and .log.json filenames the same runner.timestamp = timestamp # register eval hooks if validate: val_dataset = build_dataset(cfg.data.val, dict(test_mode=True)) # The specific dataloader settings val_loader_cfg = { **loader_cfg, 'samples_per_gpu': 1, 'shuffle': False, # Not shuffle by default **cfg.data.get('val_dataloader', {}), } val_dataloader = build_dataloader(val_dataset, **val_loader_cfg) eval_cfg = cfg.get('evaluation', {}) eval_cfg['by_epoch'] = cfg.runner['type'] != 'IterBasedRunner' eval_hook = DistEvalHook if distributed else EvalHook # In this PR (https://github.com/open-mmlab/mmcv/pull/1193), the # priority of IterTimerHook has been modified from 'NORMAL' to 'LOW'. runner.register_hook(eval_hook(val_dataloader, **eval_cfg), priority='LOW') # user-defined hooks if cfg.get('custom_hooks', None): custom_hooks = cfg.custom_hooks assert isinstance(custom_hooks, list), \ f'custom_hooks expect list type, but got {type(custom_hooks)}' for hook_cfg in cfg.custom_hooks: assert isinstance(hook_cfg, dict), \ 'Each item in custom_hooks expects dict type, but got ' \ f'{type(hook_cfg)}' hook_cfg = hook_cfg.copy() priority = hook_cfg.pop('priority', 'NORMAL') hook = build_from_cfg(hook_cfg, HOOKS) runner.register_hook(hook, priority=priority) if cfg.resume_from is None and cfg.get('auto_resume'): resume_from = find_latest_checkpoint(cfg.work_dir) if resume_from is not None: cfg.resume_from = resume_from if cfg.resume_from: runner.resume(cfg.resume_from) elif cfg.load_from: runner.load_checkpoint(cfg.load_from) runner.run(data_loaders, cfg.workflow)