コード例 #1
0
ファイル: benchmark.py プロジェクト: zhouzaida/mmaction2
def main():
    args = parse_args()

    cfg = Config.fromfile(args.config)
    # set cudnn_benchmark
    if cfg.get('cudnn_benchmark', False):
        torch.backends.cudnn.benchmark = True
    cfg.model.backbone.pretrained = None
    cfg.data.test.test_mode = True

    # build the dataloader
    dataset = build_dataset(cfg.data.test, dict(test_mode=True))
    data_loader = build_dataloader(
        dataset,
        videos_per_gpu=1,
        workers_per_gpu=cfg.data.workers_per_gpu,
        dist=False,
        shuffle=False)

    # build the model and load checkpoint
    model = build_model(
        cfg.model, train_cfg=None, test_cfg=cfg.get('test_cfg'))
    fp16_cfg = cfg.get('fp16', None)
    if fp16_cfg is not None:
        wrap_fp16_model(model)
    if args.fuse_conv_bn:
        model = fuse_conv_bn(model)

    model = MMDataParallel(model, device_ids=[0])

    model.eval()

    # the first several iterations may be very slow so skip them
    num_warmup = 5
    pure_inf_time = 0

    # benchmark with 2000 video and take the average
    for i, data in enumerate(data_loader):

        torch.cuda.synchronize()
        start_time = time.perf_counter()

        with torch.no_grad():
            model(return_loss=False, **data)

        torch.cuda.synchronize()
        elapsed = time.perf_counter() - start_time

        if i >= num_warmup:
            pure_inf_time += elapsed
            if (i + 1) % args.log_interval == 0:
                fps = (i + 1 - num_warmup) / pure_inf_time
                print(
                    f'Done video [{i + 1:<3}/ 2000], fps: {fps:.1f} video / s')

        if (i + 1) == 200:
            pure_inf_time += elapsed
            fps = (i + 1 - num_warmup) / pure_inf_time
            print(f'Overall fps: {fps:.1f} video / s')
            break
コード例 #2
0
    def __init__(self,
                 config,
                 ckpt_path=None,
                 cudnn_benchmark=False,
                 fp16=False,
                 enable_fuse_conv_bn=False):
        self.config = config
        self.fp16 = fp16
        self.cudnn_benchmark = cudnn_benchmark
        self.enable_fuse_conv_bn = enable_fuse_conv_bn

        if isinstance(config, str):
            cfg = Config.fromfile(config)
            cfg.model.backbone.pretrained = None
            self._model_name = _mmaction2_config_to_model_name(cfg.model)
            self.model = mmaction2_build_model(cfg.model,
                                               train_cfg=None,
                                               test_cfg=cfg.get('test_cfg'))
        elif isinstance(config, dict):
            self.model = mmaction2_build_model(config, None, None)
            # get model name
            self._model_name = _mmaction2_config_to_model_name(config)
        else:
            raise ValueError({f"Unknown config {config}"})

        if ckpt_path is not None:
            load_checkpoint(self.model, ckpt_path, map_location='cpu')

        if cudnn_benchmark:
            torch.backends.cudnn.benchmark = True
        if fp16:
            wrap_fp16_model(self.model)
        if enable_fuse_conv_bn:
            self.model = fuse_conv_bn(self.model)
        self.model.cuda().eval()
コード例 #3
0
def inference_pytorch(args, cfg, distributed, data_loader):
    """Get predictions by pytorch models."""
    # remove redundant pretrain steps for testing
    turn_off_pretrained(cfg.model)

    # build the model and load checkpoint
    model = build_model(cfg.model,
                        train_cfg=None,
                        test_cfg=cfg.get('test_cfg'))

    if len(cfg.module_hooks) > 0:
        register_module_hooks(model, cfg.module_hooks)

    fp16_cfg = cfg.get('fp16', None)
    if fp16_cfg is not None:
        wrap_fp16_model(model)
    load_checkpoint(model, args.checkpoint, map_location='cpu')

    if args.fuse_conv_bn:
        model = fuse_conv_bn(model)

    if not distributed:
        model = MMDataParallel(model, device_ids=[0])
        outputs = single_gpu_test(model, data_loader)
    else:
        model = MMDistributedDataParallel(
            model.cuda(),
            device_ids=[torch.cuda.current_device()],
            broadcast_buffers=False)
        outputs = multi_gpu_test(model, data_loader, args.tmpdir,
                                 args.gpu_collect)

    return outputs
コード例 #4
0
def main():
    args = parse_args()

    cfg = Config.fromfile(args.config)
    # set cudnn_benchmark
    if cfg.get('cudnn_benchmark', False):
        torch.backends.cudnn.benchmark = True

    # build the dataloader
    dataset = build_dataset(cfg.data.val)
    data_loader = build_dataloader(
        dataset,
        samples_per_gpu=1,
        workers_per_gpu=cfg.data.workers_per_gpu,
        dist=False,
        shuffle=False)

    # build the model and load checkpoint
    model = build_posenet(cfg.model)
    fp16_cfg = cfg.get('fp16', None)
    if fp16_cfg is not None:
        wrap_fp16_model(model)
    if args.fuse_conv_bn:
        model = fuse_conv_bn(model)
    model = MMDataParallel(model, device_ids=[0])

    # the first several iterations may be very slow so skip them
    num_warmup = 5
    pure_inf_time = 0

    # benchmark with total batch and take the average
    for i, data in enumerate(data_loader):

        torch.cuda.synchronize()
        start_time = time.perf_counter()
        with torch.no_grad():
            model(return_loss=False, **data)

        torch.cuda.synchronize()
        elapsed = time.perf_counter() - start_time

        if i >= num_warmup:
            pure_inf_time += elapsed
            if (i + 1) % args.log_interval == 0:
                its = (i + 1 - num_warmup) / pure_inf_time
                print(f'Done item [{i + 1:<3}],  {its:.2f} items / s')
    print(f'Overall average: {its:.2f} items / s')
    print(f'Total time: {pure_inf_time:.2f} s')
コード例 #5
0
    def before_run(self, runner):
        """Preparing steps before Mixed Precision Training.

        1. Make a master copy of fp32 weights for optimization.
        2. Convert the main model from fp32 to fp16.
        """
        # keep a copy of fp32 weights
        old_groups = runner.optimizer.param_groups
        runner.optimizer.param_groups = copy.deepcopy(
            runner.optimizer.param_groups)
        state = defaultdict(dict)
        p_map = {
            old_p: p
            for old_p, p in zip(
                chain(*(g['params'] for g in old_groups)),
                chain(*(g['params'] for g in runner.optimizer.param_groups)))
        }
        for k, v in runner.optimizer.state.items():
            state[p_map[k]] = v
        runner.optimizer.state = state
        # convert model to fp16
        wrap_fp16_model(runner.model)
コード例 #6
0
def main():
    args = parse_args()

    cfg = Config.fromfile(args.config)

    cfg.merge_from_dict(args.cfg_options)

    # Load output_config from cfg
    output_config = cfg.get('output_config', {})
    if args.out:
        # Overwrite output_config from args.out
        output_config = Config._merge_a_into_b(dict(out=args.out),
                                               output_config)

    # Load eval_config from cfg
    eval_config = cfg.get('eval_config', {})
    if args.eval:
        # Overwrite eval_config from args.eval
        eval_config = Config._merge_a_into_b(dict(metrics=args.eval),
                                             eval_config)
    if args.eval_options:
        # Add options from args.eval_options
        eval_config = Config._merge_a_into_b(args.eval_options, eval_config)

    assert output_config or eval_config, \
        ('Please specify at least one operation (save or eval the '
         'results) with the argument "--out" or "--eval"')

    dataset_type = cfg.data.test.type
    if output_config.get('out', None):
        if 'output_format' in output_config:
            # ugly workround to make recognition and localization the same
            warnings.warn(
                'Skip checking `output_format` in localization task.')
        else:
            out = output_config['out']
            # make sure the dirname of the output path exists
            mmcv.mkdir_or_exist(osp.dirname(out))
            _, suffix = osp.splitext(out)
            if dataset_type == 'AVADataset':
                assert suffix[1:] == 'csv', ('For AVADataset, the format of '
                                             'the output file should be csv')
            else:
                assert suffix[1:] in file_handlers, (
                    'The format of the output '
                    'file should be json, pickle or yaml')

    # set cudnn benchmark
    if cfg.get('cudnn_benchmark', False):
        torch.backends.cudnn.benchmark = True
    cfg.data.test.test_mode = True

    if args.average_clips is not None:
        # You can set average_clips during testing, it will override the
        # original setting
        if cfg.model.get('test_cfg') is None and cfg.get('test_cfg') is None:
            cfg.model.setdefault('test_cfg',
                                 dict(average_clips=args.average_clips))
        else:
            if cfg.model.get('test_cfg') is not None:
                cfg.model.test_cfg.average_clips = args.average_clips
            else:
                cfg.test_cfg.average_clips = args.average_clips

    # init distributed env first, since logger depends on the dist info.
    if args.launcher == 'none':
        distributed = False
    else:
        distributed = True
        init_dist(args.launcher, **cfg.dist_params)

    # The flag is used to register module's hooks
    cfg.setdefault('module_hooks', [])

    # build the dataloader
    dataset = build_dataset(cfg.data.test, dict(test_mode=True))
    dataloader_setting = dict(videos_per_gpu=cfg.data.get('videos_per_gpu', 1),
                              workers_per_gpu=cfg.data.get(
                                  'workers_per_gpu', 1),
                              dist=distributed,
                              shuffle=False)
    dataloader_setting = dict(dataloader_setting,
                              **cfg.data.get('test_dataloader', {}))
    data_loader = build_dataloader(dataset, **dataloader_setting)

    # remove redundant pretrain steps for testing
    turn_off_pretrained(cfg.model)

    # build the model and load checkpoint
    model = build_model(cfg.model,
                        train_cfg=None,
                        test_cfg=cfg.get('test_cfg'))

    if len(cfg.module_hooks) > 0:
        register_module_hooks(model, cfg.module_hooks)

    fp16_cfg = cfg.get('fp16', None)
    if fp16_cfg is not None:
        wrap_fp16_model(model)
    load_checkpoint(model, args.checkpoint, map_location='cpu')

    if args.fuse_conv_bn:
        model = fuse_conv_bn(model)

    if not distributed:
        model = MMDataParallel(model, device_ids=[0])
        outputs = single_gpu_test(model, data_loader)
    else:
        model = MMDistributedDataParallel(
            model.cuda(),
            device_ids=[torch.cuda.current_device()],
            broadcast_buffers=False)
        outputs = multi_gpu_test(model, data_loader, args.tmpdir,
                                 args.gpu_collect)

    rank, _ = get_dist_info()
    if rank == 0:
        if output_config.get('out', None):
            out = output_config['out']
            print(f'\nwriting results to {out}')
            #import pdb
            #pdb.set_trace()
            print(out[-4:])
            if out[-4:] == 'json':
                result_dict = {}
                for result in outputs:
                    video_name = result['video_name']
                    result_dict[video_name] = result['proposal_list']
                output_dict = {
                    'version': 'VERSION 1.3',
                    'results': result_dict,
                    'external_data': {}
                }
                mmcv.dump(output_dict, out)
            else:
                dataset.dump_results(outputs, **output_config)

        if eval_config:
            eval_res = dataset.evaluate(outputs, **eval_config)
            for name, val in eval_res.items():
                print(f'{name}: {val:.04f}')
コード例 #7
0
def main():
    args = parse_args()

    cfg = Config.fromfile(args.config)

    cfg.merge_from_dict(args.cfg_options)

    # Load output_config from cfg
    output_config = cfg.get('output_config', {})
    # Overwrite output_config from args.out
    output_config = merge_configs(output_config, dict(out=args.out))

    # Load eval_config from cfg
    eval_config = cfg.get('eval_config', {})
    # Overwrite eval_config from args.eval
    eval_config = merge_configs(eval_config, dict(metrics=args.eval))
    # Add options from args.option
    eval_config = merge_configs(eval_config, args.options)

    assert output_config or eval_config, \
        ('Please specify at least one operation (save or eval the '
         'results) with the argument "--out" or "--eval"')

    # set cudnn benchmark
    if cfg.get('cudnn_benchmark', False):
        torch.backends.cudnn.benchmark = True
    cfg.data.test.test_mode = True

    if cfg.test_cfg is None:
        cfg.test_cfg = dict(average_clips=args.average_clips)
    else:
        # You can set average_clips during testing, it will override the
        # original settting
        if args.average_clips is not None:
            cfg.test_cfg.average_clips = args.average_clips

    # init distributed env first, since logger depends on the dist info.
    if args.launcher == 'none':
        distributed = False
    else:
        distributed = True
        init_dist(args.launcher, **cfg.dist_params)

    # create work_dir
    mmcv.mkdir_or_exist(osp.abspath(cfg.work_dir))
    # build the dataloader
    dataset = build_dataset(cfg.data.test, dict(test_mode=True))
    dataloader_setting = dict(videos_per_gpu=cfg.data.get('videos_per_gpu', 2),
                              workers_per_gpu=cfg.data.get(
                                  'workers_per_gpu', 0),
                              dist=distributed,
                              shuffle=False)
    dataloader_setting = dict(dataloader_setting,
                              **cfg.data.get('test_dataloader', {}))
    data_loader = build_dataloader(dataset, **dataloader_setting)

    # build the model and load checkpoint
    model = build_model(cfg.model, train_cfg=None, test_cfg=cfg.test_cfg)
    fp16_cfg = cfg.get('fp16', None)
    if fp16_cfg is not None:
        wrap_fp16_model(model)
    load_checkpoint(model, args.checkpoint, map_location='cpu')

    if args.fuse_conv_bn:
        model = fuse_conv_bn(model)

    if not distributed:
        model = MMDataParallel(model, device_ids=[0])
        outputs = single_gpu_test(model, data_loader)
    else:
        model = MMDistributedDataParallel(
            model.cuda(),
            device_ids=[torch.cuda.current_device()],
            broadcast_buffers=False)
        outputs = multi_gpu_test(model, data_loader, args.tmpdir,
                                 args.gpu_collect)

    rank, _ = get_dist_info()
    if rank == 0:
        if output_config:
            out = output_config['out']
            print(f'\nwriting results to {out}')
            dataset.dump_results(outputs, **output_config)
        if eval_config:
            eval_res = dataset.evaluate(outputs, **eval_config)
            for name, val in eval_res.items():
                print(f'{name}: {val:.04f}')
コード例 #8
0
def main():
    args = parse_args()

    if 'cuda' in args.device.lower():
        if torch.cuda.is_available():
            with_cuda = True
        else:
            raise RuntimeError('No CUDA device found, please check it again.')
    else:
        with_cuda = False

    if args.root_work_dir is None:
        # get the current time stamp
        now = datetime.now()
        ts = now.strftime('%Y_%m_%d_%H_%M')
        args.root_work_dir = f'work_dirs/inference_speed_test_{ts}'
    mmcv.mkdir_or_exist(osp.abspath(args.root_work_dir))

    cfg = mmcv.load(args.config)
    dummy_datasets = mmcv.load(args.dummy_dataset_config)['dummy_datasets']

    results = []
    for i in range(args.priority + 1):
        models = cfg['model_list'][f'P{i}']
        for cur_model in models:
            cfg_file = cur_model['config']
            model_cfg = Config.fromfile(cfg_file)
            test_dataset = model_cfg['data']['test']
            dummy_dataset = dummy_datasets[test_dataset['type']]
            test_dataset.update(dummy_dataset)

            dataset = build_dataset(test_dataset)
            data_loader = build_dataloader(
                dataset,
                samples_per_gpu=args.batch_size,
                workers_per_gpu=model_cfg.data.workers_per_gpu,
                dist=False,
                shuffle=False)
            data_loader = IterLoader(data_loader)

            if 'pretrained' in model_cfg.model.keys():
                del model_cfg.model['pretrained']

            model = init_pose_model(model_cfg, device=args.device.lower())

            fp16_cfg = model_cfg.get('fp16', None)
            if fp16_cfg is not None:
                wrap_fp16_model(model)
            if args.fuse_conv_bn:
                model = fuse_conv_bn(model)

            # benchmark with several iterations and take the average
            pure_inf_time = 0
            speed = []
            for iteration in range(args.num_iters + args.num_warmup):
                data = next(data_loader)
                data['img'] = data['img'].to(args.device.lower())
                data['img_metas'] = data['img_metas'].data[0]

                if with_cuda:
                    torch.cuda.synchronize()

                start_time = time.perf_counter()
                with torch.no_grad():
                    model(return_loss=False, **data)

                if with_cuda:
                    torch.cuda.synchronize()
                elapsed = time.perf_counter() - start_time

                if iteration >= args.num_warmup:
                    pure_inf_time += elapsed
                    speed.append(1 / elapsed)

            speed_mean = np.mean(speed)
            speed_std = np.std(speed)

            split_line = '=' * 30
            result = f'{split_line}\nModel config:{cfg_file}\n' \
                     f'Device: {args.device}\n' \
                     f'Batch size: {args.batch_size}\n' \
                     f'Overall average speed: {speed_mean:.2f} \u00B1 ' \
                     f'{speed_std:.2f} items / s\n' \
                     f'Total iters: {args.num_iters}\n'\
                     f'Total time: {pure_inf_time:.2f} s \n{split_line}\n'\

            print(result)
            results.append(result)

    print('!!!Please be cautious if you use the results in papers. '
          'You may need to check if all ops are included and verify that the '
          'speed computation is correct.')
    with open(osp.join(args.root_work_dir, 'inference_speed.txt'), 'w') as f:
        for res in results:
            f.write(res)
コード例 #9
0
def main():
    args = parse_args()

    cfg = mmcv.Config.fromfile(args.config)
    if args.update_config is not None:
        cfg.merge_from_dict(args.update_config)
    cfg = update_config(cfg, args)
    cfg = propagate_root_dir(cfg, args.data_dir)

    # Load output_config from cfg
    output_config = cfg.get('output_config', {})
    # Overwrite output_config from args.out
    output_config = merge_configs(output_config, dict(out=args.out))

    # Load eval_config from cfg
    eval_config = cfg.get('eval_config', {})
    # Overwrite eval_config from args.eval
    eval_config = merge_configs(eval_config, dict(metrics=args.eval))
    # Add options from args.option
    eval_config = merge_configs(eval_config, args.options)

    assert output_config or eval_config, \
        ('Please specify at least one operation (save or eval the '
         'results) with the argument "--out" or "--eval"')

    # init distributed env first, since logger depends on the dist info.
    distributed = args.launcher != 'none'
    if distributed:
        init_dist(args.launcher, **cfg.dist_params)

    # get rank
    rank, _ = get_dist_info()

    if cfg.get('seed'):
        print(f'Set random seed to {cfg.seed}')
        set_random_seed(cfg.seed)

    # build the dataset
    dataset = build_dataset(cfg.data, 'test', dict(test_mode=True))
    if cfg.get('classes'):
        dataset = dataset.filter(cfg.classes)
    if rank == 0:
        print(f'Test datasets:\n{str(dataset)}')

    # build the dataloader
    data_loader = build_dataloader(dataset,
                                   videos_per_gpu=1,
                                   workers_per_gpu=cfg.data.workers_per_gpu,
                                   dist=distributed,
                                   shuffle=False)

    # build the model and load checkpoint
    model = build_model(cfg.model,
                        train_cfg=None,
                        test_cfg=cfg.test_cfg,
                        class_sizes=dataset.class_sizes,
                        class_maps=dataset.class_maps)

    # nncf model wrapper
    if is_checkpoint_nncf(args.checkpoint) and not cfg.get('nncf_config'):
        # reading NNCF config from checkpoint
        nncf_part = get_nncf_config_from_meta(args.checkpoint)
        for k, v in nncf_part.items():
            cfg[k] = v

    if cfg.get('nncf_config'):
        check_nncf_is_enabled()
        if not is_checkpoint_nncf(args.checkpoint):
            raise RuntimeError(
                'Trying to make testing with NNCF compression a model snapshot that was NOT trained with NNCF'
            )
        cfg.load_from = args.checkpoint
        cfg.resume_from = None
        if torch.cuda.is_available():
            model = model.cuda()
        _, model = wrap_nncf_model(model, cfg, None, get_fake_input)
    else:
        fp16_cfg = cfg.get('fp16', None)
        if fp16_cfg is not None:
            wrap_fp16_model(model)
        # load model weights
        load_checkpoint(model,
                        args.checkpoint,
                        map_location='cpu',
                        force_matching=True)
        if args.fuse_conv_bn:
            model = fuse_conv_bn(model)

    if not distributed:
        model = MMDataParallel(model, device_ids=[0])
        outputs = single_gpu_test(model, data_loader)
    else:
        model = MMDistributedDataParallel(
            model.cuda(),
            device_ids=[torch.cuda.current_device()],
            broadcast_buffers=False)
        outputs = multi_gpu_test(model, data_loader, args.tmpdir,
                                 args.gpu_collect)

    if rank == 0:
        if output_config:
            out = output_config['out']
            print(f'\nwriting results to {out}')
            dataset.dump_results(outputs, **output_config)

        if eval_config:
            eval_res = dataset.evaluate(outputs, **eval_config)

            print('\nFinal metrics:')
            for name, val in eval_res.items():
                if 'invalid_info' in name:
                    continue

                if isinstance(val, float):
                    print(f'{name}: {val:.04f}')
                elif isinstance(val, str):
                    print(f'{name}:\n{val}')
                else:
                    print(f'{name}: {val}')

            invalid_info = {
                name: val
                for name, val in eval_res.items() if 'invalid_info' in name
            }
            if len(invalid_info) > 0:
                assert args.out_invalid is not None and args.out_invalid != ''
                if os.path.exists(args.out_invalid):
                    shutil.rmtree(args.out_invalid)
                if not os.path.exists(args.out_invalid):
                    os.makedirs(args.out_invalid)

                for name, invalid_record in invalid_info.items():
                    out_invalid_dir = os.path.join(args.out_invalid, name)

                    item_gen = zip(invalid_record['ids'],
                                   invalid_record['conf'],
                                   invalid_record['pred'])
                    for invalid_idx, pred_conf, pred_label in item_gen:
                        record_info = dataset.get_info(invalid_idx)
                        gt_label = record_info['label']

                        if 'filename' in record_info:
                            src_data_path = record_info['filename']

                            in_record_name, record_extension = os.path.basename(
                                src_data_path).split('.')
                            out_record_name = f'{in_record_name}_gt{gt_label}_pred{pred_label}_conf{pred_conf:.3f}'
                            trg_data_path = os.path.join(
                                out_invalid_dir,
                                f'{out_record_name}.{record_extension}')

                            shutil.copyfile(src_data_path, trg_data_path)
                        else:
                            src_data_path = record_info['frame_dir']

                            in_record_name = os.path.basename(src_data_path)
                            out_record_name = f'{in_record_name}_gt{gt_label}_pred{pred_label}_conf{pred_conf:.3f}'
                            trg_data_path = os.path.join(
                                out_invalid_dir, out_record_name)
                            os.makedirs(trg_data_path)

                            start_frame_id = record_info[
                                'clip_start'] + dataset.start_index
                            end_frame_id = record_info[
                                'clip_end'] + dataset.start_index
                            for frame_id in range(start_frame_id,
                                                  end_frame_id):
                                img_name = f'{frame_id:05}.jpg'
                                shutil.copyfile(
                                    os.path.join(src_data_path, img_name),
                                    os.path.join(trg_data_path, img_name))
コード例 #10
0
def main():
    args = parse_args()

    cfg = Config.fromfile(args.config)
    # import modules from string list.
    if cfg.get('custom_imports', None):
        from mmcv.utils import import_modules_from_strings
        import_modules_from_strings(**cfg['custom_imports'])
    # set cudnn_benchmark
    if cfg.get('cudnn_benchmark', False):
        torch.backends.cudnn.benchmark = True
    cfg.model.pretrained = None
    cfg.data.test.test_mode = True

    # build the dataloader
    samples_per_gpu = cfg.data.test.pop('samples_per_gpu', 1)
    if samples_per_gpu > 1:
        # Replace 'ImageToTensor' to 'DefaultFormatBundle'
        cfg.data.test.pipeline = replace_ImageToTensor(cfg.data.test.pipeline)
    dataset = build_dataset(cfg.data.test)
    data_loader = build_dataloader(dataset,
                                   samples_per_gpu=1,
                                   workers_per_gpu=cfg.data.workers_per_gpu,
                                   dist=False,
                                   shuffle=False)

    # build the model and load checkpoint
    model = build_detector(cfg.model, train_cfg=None, test_cfg=cfg.test_cfg)
    fp16_cfg = cfg.get('fp16', None)
    if fp16_cfg is not None:
        wrap_fp16_model(model)
    load_checkpoint(model, args.checkpoint, map_location='cpu')
    if args.fuse_conv_bn:
        model = fuse_conv_bn(model)

    model = MMDataParallel(model, device_ids=[0])

    model.eval()

    # the first several iterations may be very slow so skip them
    num_warmup = 5
    pure_inf_time = 0

    # benchmark with 2000 image and take the average
    for i, data in enumerate(data_loader):

        torch.cuda.synchronize()
        start_time = time.perf_counter()

        with torch.no_grad():
            model(return_loss=False, rescale=True, **data)

        torch.cuda.synchronize()
        elapsed = time.perf_counter() - start_time

        if i >= num_warmup:
            pure_inf_time += elapsed
            if (i + 1) % args.log_interval == 0:
                fps = (i + 1 - num_warmup) / pure_inf_time
                print(f'Done image [{i + 1:<3}/ 2000], fps: {fps:.1f} img / s')

        if (i + 1) == 2000:
            pure_inf_time += elapsed
            fps = (i + 1 - num_warmup) / pure_inf_time
            print(f'Overall fps: {fps:.1f} img / s')
            break
コード例 #11
0
def main():
    args = parse_args()

    cfg = mmcv.Config.fromfile(args.config)
    if args.update_config is not None:
        cfg.merge_from_dict(args.update_config)
    cfg = update_config(cfg, args)
    cfg = propagate_root_dir(cfg, args.data_dir)

    # Load output_config from cfg
    output_config = cfg.get('output_config', {})
    # Overwrite output_config from args.out
    output_config = merge_configs(output_config, dict(out=args.out))

    # Load eval_config from cfg
    eval_config = cfg.get('eval_config', {})
    # Overwrite eval_config from args.eval
    eval_config = merge_configs(eval_config, dict(metrics=args.eval))
    # Add options from args.option
    eval_config = merge_configs(eval_config, args.options)

    assert output_config or eval_config, \
        ('Please specify at least one operation (save or eval the '
         'results) with the argument "--out" or "--eval"')

    # init distributed env first, since logger depends on the dist info.
    distributed = args.launcher != 'none'
    if distributed:
        init_dist(args.launcher, **cfg.dist_params)

    # get rank
    rank, _ = get_dist_info()

    if cfg.get('seed'):
        print(f'Set random seed to {cfg.seed}')
        set_random_seed(cfg.seed)

    # build the dataset
    dataset = build_dataset(cfg.data, 'test', dict(test_mode=True))
    if cfg.get('classes'):
        dataset = dataset.filter(cfg.classes)
    if rank == 0:
        print(f'Test datasets:\n{str(dataset)}')

    # build the dataloader
    data_loader = build_dataloader(
        dataset,
        videos_per_gpu=1,
        workers_per_gpu=cfg.data.workers_per_gpu,
        dist=distributed,
        shuffle=False
    )

    # build the model and load checkpoint
    model = build_model(
        cfg.model,
        train_cfg=None,
        test_cfg=cfg.test_cfg,
        class_sizes=dataset.class_sizes,
        class_maps=dataset.class_maps
    )
    fp16_cfg = cfg.get('fp16', None)
    if fp16_cfg is not None:
        wrap_fp16_model(model)

    # load model weights
    load_checkpoint(model, args.checkpoint, map_location='cpu', force_matching=True)

    if args.fuse_conv_bn:
        model = fuse_conv_bn(model)

    if not distributed:
        model = MMDataParallel(model, device_ids=[0])
        outputs = single_gpu_test(model, data_loader)
    else:
        model = MMDistributedDataParallel(
            model.cuda(),
            device_ids=[torch.cuda.current_device()],
            broadcast_buffers=False)
        outputs = multi_gpu_test(model, data_loader, args.tmpdir, args.gpu_collect)

    if rank == 0:
        if output_config:
            out = output_config['out']
            print(f'\nwriting results to {out}')
            dataset.dump_results(outputs, **output_config)

        if eval_config:
            eval_res = dataset.evaluate(outputs, **eval_config)

            print('\nFinal metrics:')
            for name, val in eval_res.items():
                print(f'{name}: {val:.04f}')