Esempio n. 1
0
class DistPersonExtractor(object):
    def __init__(self, weight_path):
        # build model
        weights = torch.load(weight_path, map_location='cpu')
        self.model = resnet50_person(weights)
        self.model = MMDistributedDataParallel(
            self.model.cuda(),
            device_ids=[torch.cuda.current_device()],
            broadcast_buffers=False)
        self.model.eval()

    def batch_extract(self,
                      imglist,
                      img_prefix,
                      imgs_per_gpu=1,
                      workers_per_gpu=4):
        # build dataset
        dataset = PersonDataset(imglist, img_prefix=img_prefix)
        # get dist info
        rank, world_size = get_dist_info()
        # build data loader
        sampler = DistributedSampler(dataset, world_size, rank, shuffle=False)
        data_loader = DataLoader(dataset,
                                 batch_size=imgs_per_gpu,
                                 sampler=sampler,
                                 num_workers=workers_per_gpu,
                                 collate_fn=partial(
                                     collate, samples_per_gpu=imgs_per_gpu),
                                 pin_memory=False)
        results = self.multi_gpu_test(data_loader)
        return results

    def multi_gpu_test(self, data_loader):
        results = []
        dataset = data_loader.dataset
        rank, world_size = get_dist_info()
        if rank == 0:
            prog_bar = mmcv.ProgressBar(len(dataset))
        for i, data in enumerate(data_loader):
            with torch.no_grad():
                result = self.model(data)
            results.append(result.detach().cpu())
            if rank == 0:
                batch_size = data.size(0)
                for _ in range(batch_size * world_size):
                    prog_bar.update()
        # collect results from all ranks
        results = self.collect_results(torch.cat(results), len(dataset))
        return results

    def collect_results(self, result_part, size, tmpdir=None):
        rank, world_size = get_dist_info()
        # create a tmp dir if it is not specified
        if tmpdir is None:
            MAX_LEN = 512
            # 32 is whitespace
            dir_tensor = torch.full((MAX_LEN, ),
                                    32,
                                    dtype=torch.uint8,
                                    device='cuda')
            if rank == 0:
                tmpdir = tempfile.mkdtemp()
                # pylint: disable=not-callable
                tmpdir = torch.tensor(bytearray(tmpdir.encode()),
                                      dtype=torch.uint8,
                                      device='cuda')
                # pylint: enable=not-callable
                dir_tensor[:len(tmpdir)] = tmpdir
            dist.broadcast(dir_tensor, 0)
            tmpdir = dir_tensor.cpu().numpy().tobytes().decode().rstrip()
        else:
            mmcv.mkdir_or_exist(tmpdir)
        # dump the part result to the dir
        # mmcv.dump(result_part, osp.join(tmpdir, 'part_{}.pkl'.format(rank)))
        torch.save(result_part, osp.join(tmpdir, 'part_{}.pkl'.format(rank)))
        dist.barrier()
        # collect all parts
        if rank != 0:
            return None
        else:
            # load results of all parts from tmp dir
            part_list = []
            for i in range(world_size):
                part_file = osp.join(tmpdir, 'part_{}.pkl'.format(i))
                # part_list.append(mmcv.load(part_file))
                part_list.append(torch.load(part_file))
            # sort the results
            ordered_results = []
            for res in zip(*part_list):
                ordered_results.append(torch.stack(res))
            # the dataloader may pad some samples
            ordered_results = torch.cat(ordered_results)
            ordered_results = ordered_results[:size]
            # remove tmp dir
            shutil.rmtree(tmpdir)
            return ordered_results
Esempio n. 2
0
def measure_inference_speed(cfg, checkpoint, max_iter, log_interval,
                            is_fuse_conv_bn):
    # set cudnn_benchmark
    if cfg.get('cudnn_benchmark', False):
        torch.backends.cudnn.benchmark = True
    cfg.model.pretrained = None
    cfg.data.test.test_mode = True

    # build the dataloader
    samples_per_gpu = cfg.data.test.pop('samples_per_gpu', 1)
    if samples_per_gpu > 1:
        # Replace 'ImageToTensor' to 'DefaultFormatBundle'
        cfg.data.test.pipeline = replace_ImageToTensor(cfg.data.test.pipeline)
    dataset = build_dataset(cfg.data.test)
    data_loader = build_dataloader(
        dataset,
        samples_per_gpu=1,
        # Because multiple processes will occupy additional CPU resources,
        # FPS statistics will be more unstable when workers_per_gpu is not 0.
        # It is reasonable to set workers_per_gpu to 0.
        workers_per_gpu=0,
        dist=True,
        shuffle=False)

    # build the model and load checkpoint
    cfg.model.train_cfg = None
    model = build_detector(cfg.model, test_cfg=cfg.get('test_cfg'))
    fp16_cfg = cfg.get('fp16', None)
    if fp16_cfg is not None:
        wrap_fp16_model(model)
    load_checkpoint(model, checkpoint, map_location='cpu')
    if is_fuse_conv_bn:
        model = fuse_conv_bn(model)

    model = MMDistributedDataParallel(model.cuda(),
                                      device_ids=[torch.cuda.current_device()],
                                      broadcast_buffers=False)
    model.eval()

    # the first several iterations may be very slow so skip them
    num_warmup = 5
    pure_inf_time = 0
    fps = 0

    # benchmark with 2000 image and take the average
    for i, data in enumerate(data_loader):

        torch.cuda.synchronize()
        start_time = time.perf_counter()

        with torch.no_grad():
            model(return_loss=False, rescale=True, **data)

        torch.cuda.synchronize()
        elapsed = time.perf_counter() - start_time

        if i >= num_warmup:
            pure_inf_time += elapsed
            if (i + 1) % log_interval == 0:
                fps = (i + 1 - num_warmup) / pure_inf_time
                print(
                    f'Done image [{i + 1:<3}/ {max_iter}], '
                    f'fps: {fps:.1f} img / s, '
                    f'times per image: {1000 / fps:.1f} ms / img',
                    flush=True)

        if (i + 1) == max_iter:
            fps = (i + 1 - num_warmup) / pure_inf_time
            print(
                f'Overall fps: {fps:.1f} img / s, '
                f'times per image: {1000 / fps:.1f} ms / img',
                flush=True)
            break
    return fps
Esempio n. 3
0
def main():
    args = parse_args()

    cfg = Config.fromfile(args.config)
    # set cudnn_benchmark
    if cfg.get('cudnn_benchmark', False):
        torch.backends.cudnn.benchmark = True
    # update configs according to CLI args
    if args.dir is not None:
        if args.dir.startswith('//'):
            cfg.work_dir = args.dir[2:]
        else:
            localhost = get_localhost().split('.')[0]
            # results from server saved to /private
            if 'gpu' in localhost:
                output_dir = '/private/huangchenxi/mmdet/outputs'
            else:
                output_dir = 'work_dirs'

            if args.dir.endswith('-c'):
                args.dir = args.dir[:-2]
                args.resume_from = search_and_delete(os.path.join(
                    output_dir, args.dir),
                                                     prefix=cfg.work_dir,
                                                     suffix=localhost)
            cfg.work_dir += time.strftime("_%m%d_%H%M") + '_' + localhost
            cfg.work_dir = os.path.join(output_dir, args.dir, cfg.work_dir)

    if args.workers_per_gpu != -1:
        cfg.data['workers_per_gpu'] = args.workers_per_gpu

    if args.resume_from is not None:
        cfg.resume_from = args.resume_from
    cfg.gpus = args.gpus

    if args.profiler or args.speed:
        cfg.data.imgs_per_gpu = 1

    if cfg.resume_from or cfg.load_from:
        cfg.model['pretrained'] = None

    if args.test:
        cfg.data.train['ann_file'] = cfg.data.val['ann_file']
        cfg.data.train['img_prefix'] = cfg.data.val['img_prefix']

    # init distributed env first, since logger depends on the dist info.
    if args.launcher == 'none':
        distributed = False
        num_gpus = args.gpus
        rank = 0
    else:
        distributed = True
        init_dist(args.launcher, **cfg.dist_params)
        num_gpus = torch.cuda.device_count()
        rank, _ = get_dist_info()

    if cfg.optimizer['type'] == 'SGD':
        cfg.optimizer['lr'] *= num_gpus * cfg.data.imgs_per_gpu / 256
    else:
        cfg.optimizer['lr'] *= ((num_gpus / 8) * (cfg.data.imgs_per_gpu / 2))

    # init logger before other steps
    logger = get_root_logger(nlogger, cfg.log_level)
    if rank == 0:
        logger.set_logger_dir(cfg.work_dir, 'd')
    logger.info("Config: ------------------------------------------\n" +
                cfg.text)
    logger.info('Distributed training: {}'.format(distributed))

    # set random seeds
    if args.seed is not None:
        logger.info('Set random seed to {}'.format(args.seed))
        set_random_seed(args.seed)

    model = build_detector(cfg.model,
                           train_cfg=cfg.train_cfg,
                           test_cfg=cfg.test_cfg)
    if rank == 0:
        # describe_vars(model)
        writer = set_writer(cfg.work_dir)
        # try:
        #     # describe_features(model.backbone)
        #     writer.add_graph(model, torch.zeros((1, 3, 800, 800)))
        # except (NotImplementedError, TypeError):
        #     logger.warn("Add graph failed.")
        # except Exception as e:
        #     logger.warn("Add graph failed:", e)

    if not args.graph and not args.profiler and not args.speed:
        if distributed:
            model = MMDistributedDataParallel(model.cuda())
        else:
            model = MMDataParallel(model, device_ids=range(cfg.gpus)).cuda()

        if isinstance(cfg.data.train, list):
            for t in cfg.data.train:
                logger.info("loading training set: " + str(t.ann_file))
            train_dataset = [build_dataset(t) for t in cfg.data.train]
            CLASSES = train_dataset[0].CLASSES
        else:
            logger.info("loading training set: " +
                        str(cfg.data.train.ann_file))
            train_dataset = build_dataset(cfg.data.train)
            logger.info("{} images loaded!".format(len(train_dataset)))
            CLASSES = train_dataset.CLASSES
        if cfg.checkpoint_config is not None:
            # save mmdet version, config file content and class names in
            # checkpoints as meta data
            cfg.checkpoint_config.meta = dict(mmdet_version=__version__,
                                              config=cfg.text,
                                              CLASSES=CLASSES)
        # add an attribute for visualization convenience
        if hasattr(model, 'module'):
            model.module.CLASSES = CLASSES
        else:
            model.CLASSES = CLASSES
        train_detector(model,
                       train_dataset,
                       cfg,
                       distributed=distributed,
                       validate=args.validate,
                       logger=logger,
                       runner_attr_dict={'task_name': args.dir})
    else:
        from mmcv.runner.checkpoint import load_checkpoint
        from mmdet.datasets import build_dataloader
        from mmdet.core.utils.model_utils import register_hooks
        from mmdet.apis.train import parse_losses

        model = MMDataParallel(model, device_ids=range(cfg.gpus)).cuda()
        if args.profiler == 'test' or args.speed == 'test':
            model.eval()
            dataset = build_dataset(cfg.data.test)
        else:
            model.train()
            dataset = build_dataset(cfg.data.train)

        if cfg.load_from and (args.profiler or args.speed):
            logger.info('load checkpoint from %s', cfg.load_from)
            load_checkpoint(model,
                            cfg.load_from,
                            map_location='cpu',
                            strict=True)

        data_loader = build_dataloader(dataset,
                                       cfg.data.imgs_per_gpu,
                                       cfg.data.workers_per_gpu,
                                       cfg.gpus,
                                       dist=False,
                                       shuffle=False)

        if args.graph:
            id_dict = {}
            for name, parameter in model.named_parameters():
                id_dict[id(parameter)] = name

        for i, data_batch in enumerate(data_loader):
            if args.graph:
                outputs = model(**data_batch)
                loss, log_vars = parse_losses(outputs)
                get_dot = register_hooks(loss, id_dict)
                loss.backward()
                dot = get_dot()
                dot.save('graph.dot')
                break
            elif args.profiler:
                with torch.autograd.profiler.profile(use_cuda=True) as prof:
                    if args.profiler == 'train':
                        outputs = model(**data_batch)
                        loss, log_vars = parse_losses(outputs)
                        loss.backward()
                    else:
                        with torch.no_grad():
                            model(**data_batch, return_loss=False)

                    if i == 20:
                        prof.export_chrome_trace('./trace.json')
                        logger.info(prof)
                        break
            elif args.speed:
                if args.speed == 'train':
                    start = time.perf_counter()
                    outputs = model(**data_batch)
                    loss, log_vars = parse_losses(outputs)
                    loss.backward()
                    torch.cuda.synchronize()
                    end = time.perf_counter()
                else:
                    start = time.perf_counter()
                    with torch.no_grad():
                        model(**data_batch, return_loss=False)
                    end = time.perf_counter()
                logger.info("{:.3f} s/iter, {:.1f} iters/s".format(
                    end - start, 1. / (end - start)))
Esempio n. 4
0
class DistPersonDetector(object):
    def __init__(self, arch, cfg_path, weight_path, img_scale=(1333, 800)):
        # build model
        if arch == 'retina':
            self.model = self.build_retinanet(cfg_path, weight_path)
        elif arch == 'rcnn':
            self.model = self.build_cascadercnn(cfg_path, weight_path)
        else:
            raise KeyError('{} is not supported now.'.format(arch))
        self.model = MMDistributedDataParallel(
            self.model.cuda(),
            device_ids=[torch.cuda.current_device()],
            broadcast_buffers=False)
        self.model.eval()
        self.img_scale = img_scale

    def build_retinanet(self, cfg_path, weight_path):
        with open(cfg_path) as f:
            cfg = json.load(f)
        model = RetinaNet(**cfg)
        load_checkpoint(model, weight_path, map_location='cpu')
        return model

    def build_cascadercnn(self, cfg_path, weight_path):
        with open(cfg_path) as f:
            cfg = json.load(f)
        model = CascadeRCNN(**cfg)
        load_checkpoint(model, weight_path, map_location='cpu')
        return model

    def batch_detect(self,
                     imglist,
                     img_prefix,
                     imgs_per_gpu=1,
                     workers_per_gpu=4,
                     conf_thr=0.5):
        # build dataset
        dataset = CustomDataset(imglist,
                                img_scale=self.img_scale,
                                img_prefix=img_prefix)
        # get dist info
        rank, world_size = get_dist_info()
        # build data loader
        sampler = DistributedSampler(dataset, world_size, rank, shuffle=False)
        data_loader = DataLoader(dataset,
                                 batch_size=imgs_per_gpu,
                                 sampler=sampler,
                                 num_workers=workers_per_gpu,
                                 collate_fn=partial(
                                     collate, samples_per_gpu=imgs_per_gpu),
                                 pin_memory=False)
        results = self.multi_gpu_test(data_loader, conf_thr)
        return results

    def multi_gpu_test(self, data_loader, conf_thr):
        results = []
        dataset = data_loader.dataset
        rank, world_size = get_dist_info()
        if rank == 0:
            prog_bar = mmcv.ProgressBar(len(dataset))
        for i, data in enumerate(data_loader):
            with torch.no_grad():
                result = self.model(rescale=True, **data)
            result = result[result[:, -1] > conf_thr]
            results.append(result)
            if rank == 0:
                batch_size = (len(data['img_meta']._data)
                              if 'img_meta' in data else data['img'].size(0))
                for _ in range(batch_size * world_size):
                    prog_bar.update()
        # collect results from all ranks
        results = self.collect_results(results, len(dataset))
        return results

    def collect_results(self, result_part, size, tmpdir=None):
        rank, world_size = get_dist_info()
        # create a tmp dir if it is not specified
        if tmpdir is None:
            MAX_LEN = 512
            # 32 is whitespace
            dir_tensor = torch.full((MAX_LEN, ),
                                    32,
                                    dtype=torch.uint8,
                                    device='cuda')
            if rank == 0:
                tmpdir = tempfile.mkdtemp()
                # pylint: disable=not-callable
                tmpdir = torch.tensor(bytearray(tmpdir.encode()),
                                      dtype=torch.uint8,
                                      device='cuda')
                # pylint: enable=not-callable
                dir_tensor[:len(tmpdir)] = tmpdir
            dist.broadcast(dir_tensor, 0)
            tmpdir = dir_tensor.cpu().numpy().tobytes().decode().rstrip()
        else:
            mmcv.mkdir_or_exist(tmpdir)
        # dump the part result to the dir
        mmcv.dump(result_part, osp.join(tmpdir, 'part_{}.pkl'.format(rank)))
        dist.barrier()
        # collect all parts
        if rank != 0:
            return None
        else:
            # load results of all parts from tmp dir
            part_list = []
            for i in range(world_size):
                part_file = osp.join(tmpdir, 'part_{}.pkl'.format(i))
                part_list.append(mmcv.load(part_file))
            # sort the results
            ordered_results = []
            for res in zip(*part_list):
                ordered_results.extend(list(res))
            # the dataloader may pad some samples
            ordered_results = ordered_results[:size]
            # remove tmp dir
            shutil.rmtree(tmpdir)
            return ordered_results
Esempio n. 5
0
File: test.py Progetto: rhythm-A/mm
class TestMMDetModel(object):
    def __init__(self,
                 config,
                 checkpoint,
                 out_pkl,
                 show_result=False,
                 out_json=None,
                 tmpdir=None,
                 launcher='none',
                 eval_types=None):
        """

        :param config: test config file path, can use the training config file
        :param checkpoint: checkpoint file, generated by training process
        :param out_pkl: output result file
        :param show_result: show results
        :param out_json: Save predictions in the COCO json format
        :param tmpdir: tmp dir for writing some results
        :param launcher: job launcher in ['none', 'pytorch', 'slurm', 'mpi']
        :param eval_types: nargs='+', ['proposal', 'proposal_fast', 'bbox', 'segm', 'keypoints']
        """

        # args = parse_args()

        assert out_pkl or show_result or out_json, \
            ('Please specify at least one operation (save or show the results) '
             'with the argument "--out" or "--show" or "--json_out"')
        self.show = show_result
        self.out_pkl = out_pkl
        self.out_json = out_json

        if out_pkl is not None and not out_pkl.endswith(('.pkl', '.pickle')):
            raise ValueError('The output file must be a pkl file.')

        if out_json is not None and out_json.endswith('.json'):
            out_json = out_json[:-5]

        cfg = mmcv.Config.fromfile(config)
        # set cudnn_benchmark
        if cfg.get('cudnn_benchmark', False):
            torch.backends.cudnn.benchmark = True
        cfg.model.pretrained = None
        cfg.data.test.test_mode = True

        # init distributed env first, since logger depends on the dist info.
        if launcher == 'none':
            self.distributed = False
        else:
            self.distributed = True
            init_dist(launcher, **cfg.dist_params)
        self.tmpdir = tmpdir
        self.eval_types = eval_types

        # build the dataloader
        # TODO: support multiple images per gpu (only minor changes are needed)
        dataset = build_dataset(cfg.data.test)
        self.dataset = dataset
        self.data_loader = build_dataloader(
            dataset,
            imgs_per_gpu=1,
            workers_per_gpu=cfg.data.workers_per_gpu,
            dist=self.distributed,
            shuffle=False)

        # build the model and load checkpoint
        self.model = build_detector(cfg.model,
                                    train_cfg=None,
                                    test_cfg=cfg.test_cfg)
        fp16_cfg = cfg.get('fp16', None)
        if fp16_cfg is not None:
            wrap_fp16_model(self.model)
        self.checkpoint = load_checkpoint(self.model,
                                          checkpoint,
                                          map_location='cpu')
        # old versions did not save class info in checkpoints, this walkaround is
        # for backward compatibility
        if 'CLASSES' in self.checkpoint['meta']:
            self.model.CLASSES = self.checkpoint['meta']['CLASSES']
        else:
            self.model.CLASSES = dataset.CLASSES

    def start_testing(self):

        if not self.distributed:
            self.model = MMDataParallel(self.model, device_ids=[0])
            outputs = self.__single_gpu_test()
        else:
            self.model = MMDistributedDataParallel(self.model.cuda())
            outputs = self.__multi_gpu_test()

        dir_name = os.path.dirname(self.out_pkl)
        if not os.path.exists(dir_name):
            os.makedirs(dir_name)

        rank, _ = get_dist_info()
        if self.out_pkl and rank == 0:
            print('\nwriting results to {}'.format(self.out_pkl))
            mmcv.dump(outputs, self.out_pkl)
            eval_types = self.eval_types
            if eval_types:
                print('Starting evaluate {}'.format(' and '.join(eval_types)))
                if eval_types == ['proposal_fast']:
                    result_file = self.out_pkl
                    coco_eval(result_file, eval_types, self.dataset.coco)
                else:
                    if not isinstance(outputs[0], dict):
                        result_files = results2json(self.dataset, outputs,
                                                    self.out_pkl)
                        coco_eval(result_files, eval_types, self.dataset.coco)
                    else:
                        for name in outputs[0]:
                            print('\nEvaluating {}'.format(name))
                            outputs_ = [out[name] for out in outputs]
                            result_file = self.out_pkl + '.{}'.format(name)
                            result_files = results2json(
                                self.dataset, outputs_, result_file)
                            coco_eval(result_files, eval_types,
                                      self.dataset.coco)

        # Save predictions in the COCO json format
        if self.out_json and rank == 0:
            if not isinstance(outputs[0], dict):
                results2json(self.dataset, outputs, self.out_json)
            else:
                for name in outputs[0]:
                    outputs_ = [out[name] for out in outputs]
                    result_file = self.out_json + '.{}'.format(name)
                    results2json(self.dataset, outputs_, result_file)

    def __single_gpu_test(self):
        self.model.eval()
        results = []
        dataset = self.data_loader.dataset
        prog_bar = mmcv.ProgressBar(len(dataset))
        for i, data in enumerate(self.data_loader):
            with torch.no_grad():
                result = self.model(return_loss=False,
                                    rescale=not self.show,
                                    **data)
            results.append(result)

            if self.show:
                self.model.module.show_result(data, result,
                                              dataset.img_norm_cfg)

            batch_size = data['img'][0].size(0)
            for _ in range(batch_size):
                prog_bar.update()
        return results

    def __multi_gpu_test(self):
        self.model.eval()
        results = []
        dataset = self.data_loader.dataset
        rank, world_size = get_dist_info()
        if rank == 0:
            prog_bar = mmcv.ProgressBar(len(dataset))
        for i, data in enumerate(self.data_loader):
            with torch.no_grad():
                result = self.model(return_loss=False, rescale=True, **data)
            results.append(result)

            if rank == 0:
                batch_size = data['img'][0].size(0)
                for _ in range(batch_size * world_size):
                    prog_bar.update()

        # collect results from all ranks
        results = self.__collect_results(results, len(dataset), self.tmpdir)

        return results

    @staticmethod
    def __collect_results(result_part, size, tmpdir=None):
        rank, world_size = get_dist_info()
        # create a tmp dir if it is not specified
        if tmpdir is None:
            MAX_LEN = 512
            # 32 is whitespace
            dir_tensor = torch.full((MAX_LEN, ),
                                    32,
                                    dtype=torch.uint8,
                                    device='cuda')
            if rank == 0:
                tmpdir = tempfile.mkdtemp()
                tmpdir = torch.tensor(bytearray(tmpdir.encode()),
                                      dtype=torch.uint8,
                                      device='cuda')
                dir_tensor[:len(tmpdir)] = tmpdir
            dist.broadcast(dir_tensor, 0)
            tmpdir = dir_tensor.cpu().numpy().tobytes().decode().rstrip()
        else:
            mmcv.mkdir_or_exist(tmpdir)
        # dump the part result to the dir
        mmcv.dump(result_part, osp.join(tmpdir, 'part_{}.pkl'.format(rank)))
        dist.barrier()
        # collect all parts
        if rank != 0:
            return None
        else:
            # load results of all parts from tmp dir
            part_list = []
            for i in range(world_size):
                part_file = osp.join(tmpdir, 'part_{}.pkl'.format(i))
                part_list.append(mmcv.load(part_file))
            # sort the results
            ordered_results = []
            for res in zip(*part_list):
                ordered_results.extend(list(res))
            # the dataloader may pad some samples
            ordered_results = ordered_results[:size]
            # remove tmp dir
            shutil.rmtree(tmpdir)
            return ordered_results