def main(args): assert args.opset in available_opsets assert args.opset > 9 torch.set_default_tensor_type(torch.FloatTensor) model = init_detector(args.config, args.checkpoint, device='cpu') model.eval() if torch.cuda.is_available(): model.cuda() device = next(model.parameters()).device cfg = model.cfg fake_data = get_fake_input(cfg, device=device) # BEGIN nncf part if cfg.get('nncf_config'): check_nncf_is_enabled() if not is_checkpoint_nncf(args.checkpoint): raise RuntimeError('Trying to make export with NNCF compression ' 'a model snapshot that was NOT trained with NNCF') cfg.load_from = args.checkpoint cfg.resume_from = None compression_ctrl, model = wrap_nncf_model(model, cfg, None, get_fake_input) # TODO: apply the following string for NNCF 1.5.* #compression_ctrl.prepare_for_export() # END nncf part if args.target == 'openvino' and not args.alt_ssd_export: if hasattr(model, 'roi_head'): stub_roi_feature_extractor(model.roi_head, 'bbox_roi_extractor') stub_roi_feature_extractor(model.roi_head, 'mask_roi_extractor') mmcv.mkdir_or_exist(osp.abspath(args.output_dir)) onnx_model_path = osp.join(args.output_dir, osp.splitext(osp.basename(args.config))[0] + '.onnx') with torch.no_grad(): export_to_onnx(model, fake_data, export_name=onnx_model_path, opset=args.opset, alt_ssd_export=getattr(args, 'alt_ssd_export', False), verbose=True) add_node_names(onnx_model_path) print(f'ONNX model has been saved to "{onnx_model_path}"') optimize_onnx_graph(onnx_model_path) if args.target == 'openvino': input_shape = list(fake_data['img'][0].shape) if args.input_shape: input_shape = [1, 3, *args.input_shape] export_to_openvino(cfg, onnx_model_path, args.output_dir, input_shape, args.input_format) else: pass
def main(): args = parse_args() assert args.out or args.eval or args.format_only or args.show \ or args.show_dir, \ ('Please specify at least one operation (save/eval/format/show the ' 'results / save the results) with the argument "--out", "--eval"' ', "--format-only", "--show" or "--show-dir"') if args.eval and args.format_only: raise ValueError('--eval and --format_only cannot be both specified') if args.out is not None and not args.out.endswith(('.pkl', '.pickle')): raise ValueError('The output file must be a pkl file.') cfg = Config.fromfile(args.config) if args.update_config: cfg.merge_from_dict(args.update_config) # set cudnn_benchmark if cfg.get('cudnn_benchmark', False): torch.backends.cudnn.benchmark = True cfg.model.pretrained = None cfg.data.test.test_mode = True # init distributed env first, since logger depends on the dist info. if args.launcher == 'none': distributed = False else: distributed = True init_dist(args.launcher, **cfg.dist_params) # build the dataloader # TODO: support multiple images per gpu (only minor changes are needed) dataset = build_dataset(cfg.data.test) data_loader = build_dataloader(dataset, samples_per_gpu=1, workers_per_gpu=cfg.data.workers_per_gpu, dist=distributed, shuffle=False) # build the model and load checkpoint model = build_detector(cfg.model, train_cfg=None, test_cfg=cfg.test_cfg) # nncf model wrapper if cfg.get('nncf_config'): check_nncf_is_enabled() if not is_checkpoint_nncf(args.checkpoint): raise RuntimeError( 'Trying to make testing with NNCF compression a model snapshot that was NOT trained with NNCF' ) cfg.load_from = args.checkpoint cfg.resume_from = None if torch.cuda.is_available(): model = model.cuda() _, model = wrap_nncf_model(model, cfg, None, get_fake_input) checkpoint = torch.load(args.checkpoint, map_location=None) else: fp16_cfg = cfg.get('fp16', None) if fp16_cfg is not None: wrap_fp16_model(model) checkpoint = load_checkpoint(model, args.checkpoint, map_location='cpu') if args.fuse_conv_bn: # TODO: FIXME: should it be inside this 'else' branch??? from tools.fuse_conv_bn import fuse_module model = fuse_module(model) # old versions did not save class info in checkpoints, this walkaround is # for backward compatibility if 'CLASSES' in checkpoint['meta']: model.CLASSES = checkpoint['meta']['CLASSES'] else: model.CLASSES = dataset.CLASSES if torch.cuda.is_available(): if not distributed: model = MMDataParallel(model, device_ids=[0]) outputs = single_gpu_test(model, data_loader, args.show, args.show_dir, args.show_score_thr) else: model = MMDistributedDataParallel( model.cuda(), device_ids=[torch.cuda.current_device()], broadcast_buffers=False) outputs = multi_gpu_test(model, data_loader, args.tmpdir, args.gpu_collect) else: model = MMDataCPU(model) outputs = single_gpu_test(model, data_loader, args.show, args.show_dir, args.show_score_thr) rank, _ = get_dist_info() if rank == 0: if args.out: print(f'\nwriting results to {args.out}') mmcv.dump(outputs, args.out) kwargs = cfg.get('evaluation', {}) kwargs.pop('interval', None) kwargs.pop('gpu_collect', None) kwargs.update({} if args.options is None else args.options) kwargs['metric'] = args.eval if args.format_only: dataset.format_results(outputs, **kwargs) if args.eval: dataset.evaluate(outputs, **kwargs)
def main(args): assert args.opset in available_opsets assert args.opset > 9 torch.set_default_tensor_type(torch.FloatTensor) config = mmcv.Config.fromfile(args.config) if args.update_config: config.merge_from_dict(args.update_config) model = init_detector(config, args.checkpoint, device='cpu') model.eval() if torch.cuda.is_available(): model.cuda() device = next(model.parameters()).device cfg = model.cfg fake_data = get_fake_input(cfg, device=device) # BEGIN nncf part was_model_compressed = is_checkpoint_nncf(args.checkpoint) cfg_contains_nncf = cfg.get('nncf_config') if cfg_contains_nncf and not was_model_compressed: raise RuntimeError('Trying to make export with NNCF compression ' 'a model snapshot that was NOT trained with NNCF') if was_model_compressed and not cfg_contains_nncf: # reading NNCF config from checkpoint nncf_part = get_nncf_config_from_meta(args.checkpoint) for k, v in nncf_part.items(): cfg[k] = v if cfg.get('nncf_config'): alt_ssd_export = getattr(args, 'alt_ssd_export', False) assert not alt_ssd_export, \ 'Export of NNCF-compressed model is incompatible with --alt_ssd_export' check_nncf_is_enabled() cfg.load_from = args.checkpoint cfg.resume_from = None compression_ctrl, model = wrap_nncf_model(model, cfg, None, get_fake_input) compression_ctrl.prepare_for_export() # END nncf part mmcv.mkdir_or_exist(osp.abspath(args.output_dir)) onnx_model_path = osp.join( args.output_dir, osp.splitext(osp.basename(args.config))[0] + '.onnx') with torch.no_grad(): export_to_onnx(model, fake_data, export_name=onnx_model_path, opset=args.opset, alt_ssd_export=getattr(args, 'alt_ssd_export', False), target=args.target, verbose=False) add_node_names(onnx_model_path) print(f'ONNX model has been saved to "{onnx_model_path}"') optimize_onnx_graph(onnx_model_path) with_text = False if args.target == 'openvino' and not args.alt_ssd_export: if hasattr(model, 'roi_head'): if getattr(model.roi_head, 'with_text', False): with_text = True if args.target == 'openvino': input_shape = list(fake_data['img'][0].shape) if args.input_shape: input_shape = [1, 3, *args.input_shape] export_to_openvino(cfg, onnx_model_path, args.output_dir, input_shape, args.input_format, with_text=with_text) else: pass
def main(): args = parse_args() cfg = Config.fromfile(args.config) cfg_samples_per_gpu = cfg.data.samples_per_gpu if args.update_config is not None: cfg.merge_from_dict(args.update_config) # set cudnn_benchmark if cfg.get('cudnn_benchmark', False): torch.backends.cudnn.benchmark = True # work_dir is determined in this priority: CLI > segment in file > filename if args.work_dir is not None: # update configs according to CLI args if args.work_dir is not None cfg.work_dir = args.work_dir elif cfg.get('work_dir', None) is None: # use config filename as default work_dir if cfg.work_dir is None cfg.work_dir = osp.join('./work_dirs', osp.splitext(osp.basename(args.config))[0]) if args.resume_from is not None: cfg.resume_from = args.resume_from if args.gpu_ids is not None: cfg.gpu_ids = args.gpu_ids else: cfg.gpu_ids = range(1) if args.gpus is None else range(args.gpus) if args.autoscale_lr: # apply the linear scaling rule (https://arxiv.org/abs/1706.02677) cfg.optimizer['lr'] = cfg.optimizer['lr'] * len(cfg.gpu_ids) / 8 # init distributed env first, since logger depends on the dist info. if args.launcher == 'none': distributed = False else: distributed = True if torch.cuda.is_available(): init_dist(args.launcher, **cfg.dist_params) else: cfg.dist_params['backend'] = 'gloo' init_dist_cpu(args.launcher, **cfg.dist_params) # create work_dir mmcv.mkdir_or_exist(osp.abspath(cfg.work_dir)) # dump config cfg.dump(osp.join(cfg.work_dir, osp.basename(args.config))) # init the logger before other steps timestamp = time.strftime('%Y%m%d_%H%M%S', time.localtime()) log_file = osp.join(cfg.work_dir, f'{timestamp}.log') logger = get_root_logger(log_file=log_file, log_level=cfg.log_level) if args.tensorboard_dir is not None: hooks = [ hook for hook in cfg.log_config.hooks if hook.type == 'TensorboardLoggerHook' ] if hooks: hooks[0].log_dir = args.tensorboard_dir else: logger.warning('Failed to find TensorboardLoggerHook') # init the meta dict to record some important information such as # environment info and seed, which will be logged meta = dict() # log env info env_info_dict = collect_env() env_info = '\n'.join([(f'{k}: {v}') for k, v in env_info_dict.items()]) dash_line = '-' * 60 + '\n' logger.info('Environment info:\n' + dash_line + env_info + '\n' + dash_line) meta['env_info'] = env_info # log some basic info logger.info(f'Distributed training: {distributed}') logger.info(f'Config:\n{cfg.pretty_text}') if cfg.get('nncf_config'): check_nncf_is_enabled() logger.info('NNCF config: {}'.format(cfg.nncf_config)) meta.update(get_nncf_metadata()) # set random seeds if args.seed is not None: logger.info(f'Set random seed to {args.seed}, ' f'deterministic: {args.deterministic}') set_random_seed(args.seed, deterministic=args.deterministic) cfg.seed = args.seed meta['seed'] = args.seed model = build_detector(cfg.model, train_cfg=cfg.train_cfg, test_cfg=cfg.test_cfg) datasets = [build_dataset(cfg.data.train)] dataset_len_per_gpu = sum(len(dataset) for dataset in datasets) if distributed: dataset_len_per_gpu = dataset_len_per_gpu // get_dist_info()[1] assert dataset_len_per_gpu > 0 if cfg.data.samples_per_gpu == 'auto': if torch.cuda.is_available(): logger.info('Auto-selection of samples per gpu (batch size).') cfg.data.samples_per_gpu = determine_max_batch_size( cfg, distributed, dataset_len_per_gpu) logger.info( f'Auto selected batch size: {cfg.data.samples_per_gpu} {dataset_len_per_gpu}' ) cfg.dump(osp.join(cfg.work_dir, osp.basename(args.config))) else: logger.warning( 'Auto-selection of batch size is not implemented for CPU.') logger.warning( 'Setting batch size to value taken from configuration file.') cfg.data.samples_per_gpu = cfg_samples_per_gpu if dataset_len_per_gpu < cfg.data.samples_per_gpu: cfg.data.samples_per_gpu = dataset_len_per_gpu logger.warning( f'Decreased samples_per_gpu to: {cfg.data.samples_per_gpu} ' f'because of dataset length: {dataset_len_per_gpu} ' f'and gpus number: {get_dist_info()[1]}') if len(cfg.workflow) == 2: val_dataset = copy.deepcopy(cfg.data.val) val_dataset.pipeline = cfg.data.train.pipeline datasets.append(build_dataset(val_dataset)) if cfg.checkpoint_config is not None: # save mmdet version, config file content and class names in # checkpoints as meta data cfg.checkpoint_config.meta = dict(mmdet_version=__version__, config=cfg.pretty_text, CLASSES=datasets[0].CLASSES) # also save nncf status in the checkpoint -- it is important, # since it is used in wrap_nncf_model for loading NNCF-compressed models if cfg.get('nncf_config'): nncf_metadata = get_nncf_metadata() cfg.checkpoint_config.meta.update(nncf_metadata) else: # cfg.checkpoint_config is None assert not cfg.get('nncf_config'), ( "NNCF is enabled, but checkpoint_config is not set -- " "cannot store NNCF metainfo into checkpoints") # add an attribute for visualization convenience model.CLASSES = datasets[0].CLASSES train_detector(model, datasets, cfg, distributed=distributed, validate=(not args.no_validate), timestamp=timestamp, meta=meta)