Python collate 예제들, mmcv.parallel.collate Python 예제들

예제 #1

0

파일 보기

파일: inference.py 프로젝트: Johnqczhang/mmtracking

def inference_mot(model, img, frame_id):
    """Inference image(s) with the mot model.

    Args:
        model (nn.Module): The loaded mot model.
        img (str | ndarray): Either image name or loaded image.
        frame_id (int): frame id.

    Returns:
        dict[str : ndarray]: The tracking results.
    """
    cfg = model.cfg
    device = next(model.parameters()).device  # model device
    # prepare data
    if isinstance(img, np.ndarray):
        # directly add img
        data = dict(img=img, img_info=dict(frame_id=frame_id), img_prefix=None)
        cfg = cfg.copy()
        # set loading pipeline type
        cfg.data.test.pipeline[0].type = 'LoadImageFromWebcam'
    else:
        # add information into dict
        data = dict(img_info=dict(filename=img, frame_id=frame_id),
                    img_prefix=None)
    # build the data pipeline
    test_pipeline = Compose(cfg.data.test.pipeline)
    data = test_pipeline(data)
    data = collate([data], samples_per_gpu=1)
    if next(model.parameters()).is_cuda:
        # scatter to specified GPU
        data = scatter(data, [device])[0]
    else:
        for m in model.modules():
            assert not isinstance(
                m, RoIPool
            ), 'CPU inference with RoIPool is not supported currently.'
        # just get the actual data from DataContainer
        data['img_metas'] = data['img_metas'][0].data
    # forward the model
    with torch.no_grad():
        result = model(return_loss=False, rescale=True, **data)
    return result

예제 #2

0

파일 보기

    def after_train_epoch(self, runner):
        if not self.every_n_epochs(runner, self.interval):
            return
        runner.model.eval()
        results = [None for _ in range(len(self.dataset))]
        if runner.rank == 0:
            prog_bar = mmcv.ProgressBar(len(self.dataset))
        for idx in range(runner.rank, len(self.dataset), runner.world_size):
            data = self.dataset[idx]
            data_gpu = scatter(
                collate([data], samples_per_gpu=1),
                [torch.cuda.current_device()])[0]

            # compute output
            with torch.no_grad():
                result = runner.model(
                    return_loss=False, rescale=True, **data_gpu)
            results[idx] = result

            batch_size = runner.world_size
            if runner.rank == 0:
                for _ in range(batch_size):
                    prog_bar.update()

        if runner.rank == 0:
            print('\n')
            print('PROGRESS: {:.2f}%'.format(100.0 * (runner.epoch + 1) /
                                             runner.max_epochs))
            dist.barrier()
            for i in range(1, runner.world_size):
                tmp_file = osp.join(runner.work_dir, 'temp_{}.pkl'.format(i))
                tmp_results = mmcv.load(tmp_file)
                for idx in range(i, len(results), runner.world_size):
                    results[idx] = tmp_results[idx]
                os.remove(tmp_file)
            self.evaluate(runner, results)
        else:
            tmp_file = osp.join(runner.work_dir,
                                'temp_{}.pkl'.format(runner.rank))
            mmcv.dump(results, tmp_file)
            dist.barrier()
        dist.barrier()

예제 #3

0

파일 보기

def inpainting_inference(model, masked_img, mask):
    """Inference image with the model.

    Args:
        model (nn.Module): The loaded model.
        masked_img (str): File path of image with mask.
        mask (str): Mask file path.

    Returns:
        Tensor: The predicted inpainting result.
    """
    device = next(model.parameters()).device  # model device

    infer_pipeline = [
        dict(type='LoadImageFromFile', key='masked_img'),
        dict(type='LoadMask', mask_mode='file', mask_config=dict()),
        dict(type='Pad', keys=['masked_img', 'mask'], mode='reflect'),
        dict(
            type='Normalize',
            keys=['masked_img'],
            mean=[127.5] * 3,
            std=[127.5] * 3,
            to_rgb=False),
        dict(type='GetMaskedImage', img_name='masked_img'),
        dict(
            type='Collect',
            keys=['masked_img', 'mask'],
            meta_keys=['masked_img_path']),
        dict(type='ImageToTensor', keys=['masked_img', 'mask'])
    ]

    # build the data pipeline
    test_pipeline = Compose(infer_pipeline)
    # prepare data
    data = dict(masked_img_path=masked_img, mask_path=mask)
    data = test_pipeline(data)
    data = scatter(collate([data], samples_per_gpu=1), [device])[0]
    # forward the model
    with torch.no_grad():
        result = model(test_mode=True, **data)

    return result['fake_img']

예제 #4

0

파일 보기

파일: mpt.py 프로젝트: ingeniousfrog/multi-person-trecker

def prepare_image(model, img):
    # class LoadImagee(object):

    #     def __call__(self, results):
    #         if isinstance(results['img'], str):
    #             results['filename'] = ''#results['img']
    #         else:
    #             results['filename'] = ''
    #         # img = mmcv.imread(results['img'])
    #         # img = np.random.randint(0, 255, (720, 1280, 3))
    #         results['img'] = np.float32(results['img'].cpu().numpy())
    #         img = results['img']
    #         results['img_shape'] = img.shape
    #         results['ori_shape'] = img.shape
    #         return results

    class LoadImage(object):

        def __call__(self, results):
            if isinstance(results['img'], str):
                results['filename'] = results['img']
            else:
                results['filename'] = None
            img = mmcv.imread(results['img'])
            results['img'] = img
            results['img_shape'] = img.shape
            results['ori_shape'] = img.shape
            return results

    # img = '/content/gdrive/My Drive/catapulta/Overhead_train_images/frame10000.jpg'
    cfg = model.cfg
    device = next(model.parameters()).device  # model device
    # build the data pipeline
    test_pipeline = [LoadImagee()] + cfg.data.test.pipeline[1:]
    test_pipeline = Compose(test_pipeline)
    # prepare data
    data = dict(img=img)
    data = test_pipeline(data)
    data = scatter(collate([data], samples_per_gpu=1), [device])[0]
    # forward the model
    return data

예제 #5

0

파일 보기

파일: inference.py 프로젝트: reinforcementdriving/BRNet

def inference_detector(model, pcd):
    """Inference point cloud with the detector.

    Args:
        model (nn.Module): The loaded detector.
        pcd (str): Point cloud files.

    Returns:
        tuple: Predicted results and data from pipeline.
    """
    cfg = model.cfg
    device = next(model.parameters()).device  # model device
    # build the data pipeline
    test_pipeline = deepcopy(cfg.data.test.pipeline)
    test_pipeline = Compose(test_pipeline)
    box_type_3d, box_mode_3d = get_box_type(cfg.data.test.box_type_3d)
    data = dict(
        pts_filename=pcd,
        box_type_3d=box_type_3d,
        box_mode_3d=box_mode_3d,
        img_fields=[],
        bbox3d_fields=[],
        pts_mask_fields=[],
        pts_seg_fields=[],
        bbox_fields=[],
        mask_fields=[],
        seg_fields=[])
    data = test_pipeline(data)
    data = collate([data], samples_per_gpu=1)
    if next(model.parameters()).is_cuda:
        # scatter to specified GPU
        data = scatter(data, [device.index])[0]
    else:
        # this is a workaround to avoid the bug of MMDataParallel
        data['img_metas'] = data['img_metas'][0].data
        data['points'] = data['points'][0].data
    # forward the model
    with torch.no_grad():
        result = model(return_loss=False, rescale=True, **data)
    return result, data

예제 #6

0

파일 보기

def inference():
    score_cache = deque()
    scores_sum = 0
    while True:
        cur_windows = []

        while len(cur_windows) == 0:
            if len(frame_queue) == sample_length:
                cur_windows = list(np.array(frame_queue))
                if data['img_shape'] is None:
                    data['img_shape'] = frame_queue.popleft().shape[:2]

        cur_data = data.copy()
        cur_data['imgs'] = cur_windows
        cur_data = test_pipeline(cur_data)
        cur_data = collate([cur_data], samples_per_gpu=1)
        if next(model.parameters()).is_cuda:
            cur_data = scatter(cur_data, [device])[0]

        with torch.no_grad():
            scores = model(return_loss=False, **cur_data)[0]

        score_cache.append(scores)
        scores_sum += scores

        if len(score_cache) == average_size:
            scores_avg = scores_sum / average_size
            num_selected_labels = min(len(label), 5)

            scores_tuples = tuple(zip(label, scores_avg))
            scores_sorted = sorted(scores_tuples,
                                   key=itemgetter(1),
                                   reverse=True)
            results = scores_sorted[:num_selected_labels]

            result_queue.append(results)
            scores_sum -= score_cache.popleft()

    camera.release()
    cv2.destroyAllWindows()

예제 #7

0

파일 보기

파일: inference.py 프로젝트: DragonGongY/mmdet-ui

def inference_detector(model, img):
    """Inference image(s) with the detector.

    Args:
        model (nn.Module): The loaded detector.
        imgs (str/ndarray or list[str/ndarray]): Either image files or loaded
            images.

    Returns:
        If imgs is a str, a generator will be returned, otherwise return the
        detection results directly.
    """
    cfg = model.cfg
    device = next(model.parameters()).device  # model device
    # build the data pipeline
    test_pipeline = [LoadImage()] + cfg.data.test.pipeline[1:]
    test_pipeline = Compose(test_pipeline)
    # prepare data
    data = dict(img=img)
    data = test_pipeline(data)
    data = collate([data], samples_per_gpu=1)
    if next(model.parameters()).is_cuda:
        # scatter to specified GPU
        data = scatter(data, [device])[0]
    else:
        # Use torchvision ops for CPU mode instead
        for m in model.modules():
            if isinstance(m, (RoIPool, RoIAlign)):
                if not m.aligned:
                    # aligned=False is not implemented on CPU
                    # set use_torchvision on-the-fly
                    m.use_torchvision = True
        warnings.warn('We set use_torchvision=True in CPU mode.')
        # just get the actual data from DataContainer
        data['img_metas'] = data['img_metas'][0].data

    # forward the model
    with torch.no_grad():
        result = model(return_loss=False, rescale=True, **data)
    return result

예제 #8

0

파일 보기

파일: pred_single.py 프로젝트: zengzhaoyang/tooth

    def _inference(self, filename):

        ss = time.time()

        data = dict(img=filename)
        data = self.test_pipeline(data)
        data = collate([data], samples_per_gpu=1)
        data = scatter(data, [self.device])[0]

        with torch.no_grad():
            pred = self.model(return_loss=False, rescale=True, **data)


        #print(len(pred[0][0]), len(pred[0][1]), 'haha')
        #print(pred[0][0][0].shape, pred[0][0][1].shape)

        bbox = pred[0][0][1]
        area = (bbox[:, 2] - bbox[:, 0]) * (bbox[:, 3] - bbox[:, 1])
        valid = area > 100
        bbox = bbox[valid]

        pred = pred[0][1]

        tot = len(pred[1])
        if tot != 0:
            res = pred[1][0].astype(np.int32)
            for i in range(1, tot):
                res = res + pred[1][i].astype(np.int32)
            self.h, self.w = res.shape
        else:
            res = np.zeros((self.h, self.w), dtype=np.int32)


        res = res > 0
        res = res.astype(np.uint8)

        res = res * 255

        return res, bbox

예제 #9

0

파일 보기

def model_inference(model, img):
    """Inference image(s) with the detector.

    Args:
        model (nn.Module): The loaded detector.
        imgs (str): Image files.

    Returns:
        result (dict): Detection results.
    """
    assert isinstance(img, str)

    cfg = model.cfg
    device = next(model.parameters()).device  # model device
    data = dict(img_info=dict(filename=img), img_prefix=None)
    # build the data pipeline
    test_pipeline = Compose(cfg.data.test.pipeline)
    data = test_pipeline(data)
    data = collate([data], samples_per_gpu=1)

    # process img_metas
    if isinstance(data['img_metas'], list):
        data['img_metas'] = data['img_metas'][0].data
    else:
        data['img_metas'] = data['img_metas'].data[0]

    if next(model.parameters()).is_cuda:
        # scatter to specified GPU
        data = scatter(data, [device])[0]
    else:
        for m in model.modules():
            assert not isinstance(
                m, RoIPool
            ), 'CPU inference with RoIPool is not supported currently.'

    # forward the model
    with torch.no_grad():
        result = model(return_loss=False, rescale=True, **data)[0]
    return result

예제 #10

0

파일 보기

    def predict(self, model, epoch):
        self.epoch = epoch

        y_true = []
        y_pred = []
        names = []

        with torch.no_grad():
            for ind in tqdm(range(len(self.dataset)), total=len(self.dataset)):
                if self.debug:
                    if ind > 100:
                        break
                # Get data
                data = self.dataset[ind]
                # Wrap img, img_meta in list
                # Not sure why I have to do this ...
                if type(data['img']) != list and type(
                        data['img_meta']) != list:
                    data['img'] = [data['img']]
                    data['img_meta'] = [data['img_meta']]
                data_gpu = collate([data], samples_per_gpu=1)
                if not self.predict_mode:
                    # Get annotations
                    ann = self.dataset.get_ann_info(ind)
                    bboxes = ann['bboxes']
                    labels = ann['labels']
                    y_true.append({'bboxes': bboxes, 'labels': labels})
                names.append(self.dataset.img_infos[ind]['filename'])
                # We can alter NMS params using model.module.test_cfg
                # If we want to tune thresholds/NMS thresholds
                ##
                # Get model output
                output = model(**data_gpu, return_loss=False, rescale=True)
                # output is a list with length = num_classes - 1
                # Each element in output corresponds to a list of predicted
                # boxes for that class
                y_pred.append(output)

        return y_true, y_pred, names

예제 #11

0

파일 보기

def restoration_inference(model, img, ref=None):
    """Inference image with the model.

    Args:
        model (nn.Module): The loaded model.
        img (str): File path of input image.
        ref (str | None): File path of reference image. Default: None.

    Returns:
        Tensor: The predicted restoration result.
    """
    cfg = model.cfg
    device = next(model.parameters()).device  # model device
    # remove gt from test_pipeline
    keys_to_remove = ['gt', 'gt_path']
    for key in keys_to_remove:
        for pipeline in list(cfg.test_pipeline):
            if 'key' in pipeline and key == pipeline['key']:
                cfg.test_pipeline.remove(pipeline)
            if 'keys' in pipeline and key in pipeline['keys']:
                pipeline['keys'].remove(key)
                if len(pipeline['keys']) == 0:
                    cfg.test_pipeline.remove(pipeline)
            if 'meta_keys' in pipeline and key in pipeline['meta_keys']:
                pipeline['meta_keys'].remove(key)
    # build the data pipeline
    test_pipeline = Compose(cfg.test_pipeline)
    # prepare data
    if ref:  # Ref-SR
        data = dict(lq_path=img, ref_path=ref)
    else:  # SISR
        data = dict(lq_path=img)
    data = test_pipeline(data)
    data = scatter(collate([data], samples_per_gpu=1), [device])[0]
    # forward the model
    with torch.no_grad():
        result = model(test_mode=True, **data)

    return result['output']

예제 #12

0

파일 보기

파일: onnx_runtime.py 프로젝트: ObKsEm/mmdetection

def run_with_onnx_runtime(model_path, w, h):

    session = onnxruntime.InferenceSession(model_path, None)
    input_name = session.get_inputs()[0].name
    # test_pipeline = [LoadImage()] + cfg.data.test.pipeline[1:]
    test_pipeline = [LoadImage()] + test_cfg
    test_pipeline = Compose(test_pipeline)
    device = torch.device(0)
    with open(test_path, "r") as f:
        filenames = f.readlines()
        for filename in filenames:
            img_file = filename.strip() + ".jpg"
            xml_file = filename.strip() + ".xml"
            img = cv2.imread(os.path.join(test_img_path, img_file))
            if img is not None:
                # prepare data
                data = dict(img=img)
                data = test_pipeline(data)
                data = scatter(collate([data], samples_per_gpu=1), [device])[0]
                result = session.run([], {input_name: data})
                print(f'Output y.shape: {result.shape}')
                break

예제 #13

0

파일 보기

def preprocess(model, img):
    """Inference image(s) with the detector.

    Args:
        model (nn.Module): The loaded detector.
        imgs (str/ndarray or list[str/ndarray]): Either image files or loaded
            images.

    Returns:
        If imgs is a str, a generator will be returned, otherwise return the
        detection results directly.
    """
    cfg = model.cfg
    device = next(model.parameters()).device  # model device
    # build the data pipeline
    test_pipeline = [LoadImage()] + cfg.data.test.pipeline[1:]
    test_pipeline = Compose(test_pipeline)
    # prepare data
    data = dict(img=img)
    data = test_pipeline(data)
    data = scatter(collate([data], samples_per_gpu=1), [device])[0]
    return data

예제 #14

0

파일 보기

파일: mymmdetection2d.py 프로젝트: lkk688/WaymoObjectDetection

def myinferencedetector(model, img):
    image = Image.open(img)
    # summarize some details about the image
    print(image.format)
    print(image.size)
    print(image.mode)
    # convert image to numpy array
    image_np = np.asarray(image)
    print(type(image_np))
    # summarize shape
    print(image_np.shape)  #(1280, 1920, 3)

    datas = []

    cfg = model.cfg
    device = next(model.parameters()).device  # model device
    cfg = cfg.copy()
    # set loading pipeline type
    cfg.data.test.pipeline[0].type = 'LoadImageFromWebcam'
    cfg.data.test.pipeline = replace_ImageToTensor(cfg.data.test.pipeline)
    test_pipeline = Compose(cfg.data.test.pipeline)

    if isinstance(image_np, np.ndarray):
        # directly add img
        data = dict(img=image_np)
    # build the data pipeline
    data = test_pipeline(data)
    datas.append(data)

    data = collate(datas, samples_per_gpu=1)
    # just get the actual data from DataContainer
    data['img_metas'] = [img_metas.data[0] for img_metas in data['img_metas']]
    data['img'] = [img.data[0] for img in data['img']]

    data = scatter(data, [device])[0]
    # forward the model
    with torch.no_grad():
        results = model(return_loss=False, rescale=True, **data)
    return results[0]

예제 #15

0

파일 보기

def inference_model(model, img):
    """Inference image(s) with the classifier.

    Args:
        model (nn.Module): The loaded classifier.
        img (str/ndarray): The image filename or loaded image.

    Returns:
        result (dict): The classification results that contains
            `class_name`, `pred_label` and `pred_score`.
    """
    cfg = model.cfg
    device = next(model.parameters()).device  # model device
    # build the data pipeline
    if isinstance(img, str):
        if cfg.data.test.pipeline[0]['type'] != 'LoadImageFromFile':
            cfg.data.test.pipeline.insert(0, dict(type='LoadImageFromFile'))
        data = dict(img_info=dict(filename=img), img_prefix=None)
    else:
        if cfg.data.test.pipeline[0]['type'] == 'LoadImageFromFile':
            cfg.data.test.pipeline.pop(0)
        data = dict(img=img)
    test_pipeline = Compose(cfg.data.test.pipeline)
    data = test_pipeline(data)
    data = collate([data], samples_per_gpu=1)
    if next(model.parameters()).is_cuda:
        # scatter to specified GPU
        data = scatter(data, [device])[0]

    # forward the model
    with torch.no_grad():
        scores = model(return_loss=False, **data)
        pred_score = np.max(scores, axis=1)[0]
        pred_label = np.argmax(scores, axis=1)[0]
        result = {'pred_label': pred_label, 'pred_score': float(pred_score)}
    result['pred_class'] = model.CLASSES[result['pred_label']]
    return result

예제 #16

0

파일 보기

def inference_sot(model, image, init_bbox, frame_id):
    """Inference image with the single object tracker.

    Args:
        model (nn.Module): The loaded tracker.
        image (ndarray): Loaded images.
        init_bbox (ndarray): The target needs to be tracked.
        frame_id (int): frame id.

    Returns:
        dict[str : ndarray]: The tracking results.
    """
    cfg = model.cfg
    device = next(model.parameters()).device  # model device

    data = dict(img=image.astype(np.float32),
                gt_bboxes=np.array(init_bbox).astype(np.float32),
                img_info=dict(frame_id=frame_id))
    # remove the "LoadImageFromFile" and "LoadAnnotations" in pipeline
    test_pipeline = Compose(cfg.data.test.pipeline[2:])
    data = test_pipeline(data)
    data = collate([data], samples_per_gpu=1)
    if next(model.parameters()).is_cuda:
        # scatter to specified GPU
        data = scatter(data, [device])[0]
    else:
        for m in model.modules():
            assert not isinstance(
                m, RoIPool
            ), 'CPU inference with RoIPool is not supported currently.'
        # just get the actual data from DataContainer
        data['img_metas'] = data['img_metas'][0].data

    # forward the model
    with torch.no_grad():
        result = model(return_loss=False, rescale=True, **data)
    return result

예제 #17

0

파일 보기

파일: inference.py 프로젝트: dyy0205/SOLO

def inference_detector(model, img):
    """Inference image(s) with the detector.

    Args:
        model (nn.Module): The loaded detector.
        imgs (str/ndarray or list[str/ndarray]): Either image files or loaded
            images.

    Returns:
        If imgs is a str, a generator will be returned, otherwise return the
        detection results directly.
    """
    cfg = model.cfg
    device = next(model.parameters()).device  # model device
    # build the data pipeline
    test_pipeline = [LoadImage()] + cfg.data.test.pipeline[1:]
    test_pipeline = Compose(test_pipeline)
    # prepare data
    data = dict(img=img)
    data = test_pipeline(data)
    data = scatter(collate([data], samples_per_gpu=1), [device])[0]
    # forward the model
    with torch.no_grad():
        torch.cuda.synchronize()
        st = time.time()
        try:
            result = model(return_loss=False, rescale=True, **data)
        except RuntimeError as exception:
            if 'out of memory' in str(exception):
                print('WARNING: out of memory')
                if hasattr(torch.cuda, 'empty_cache'):
                    torch.cuda.empty_cache()
            else:
                raise exception
        torch.cuda.synchronize()
        cost_time = time.time() - st
    return result, cost_time

예제 #18

0

파일 보기

파일: interface.py 프로젝트: tyunist/Kaggle_PKU_Baidu

def inference_detector(cfg, model, img):
    """Inference image(s) with the detector.

    Args:
        model (nn.Module): The loaded detector.
        imgs (str/ndarray or list[str/ndarray]): Either image files or loaded
            images.

    Returns:
        If imgs is a str, a generator will be returned, otherwise return the
        detection results directly.
    """
    # build the data pipeline
    test_pipeline = [LoadImage()] + cfg.data.test.pipeline[2:]
    test_pipeline = Compose(test_pipeline)
    # prepare data
    data = dict(img=img)
    data = test_pipeline(data)
    data = collate([data], samples_per_gpu=1)
    # forward the model
    with torch.no_grad():
        result = model(return_loss=False, rescale=True, **data)

    return result

예제 #19

0

파일 보기

파일: extract.py 프로젝트: hnp0411/mmcaptioning

def extract_encoder_feat(model, tokenizer, img):
    """Extract encoder features of caption model.

    Args:
        model (nn.Module): Image Captioning Model
        tokenizer: For preprocess pipeline
        img (str): img file path

    Returns:
        Extracted feature result.
    """
    cfg = model.cfg
    device = next(model.parameters()).device  # model device
    # prepare data
    # Add dummy caption
    cap_info = dict(caption='',
                    tokenizer=tokenizer)
    data = dict(img_info=dict(filename=img), 
                img_prefix=None,
                cap_info=cap_info)
    # build the data pipeline
    test_pipeline = Compose(cfg.data.test.pipeline)

    data = test_pipeline(data)
    data = collate([data], samples_per_gpu=1)
    if next(model.parameters()).is_cuda:
        # scatter to specified GPU
        data = scatter(data, [device])[0]

    # forward the model
    with torch.no_grad():
        # img, img_mask, pos
        result = model.extract_feat(data['img'][0], data['img_mask'][0])
        
        #result = model(return_loss=False, rescale=True, **data)[0]
    return result

예제 #20

0

파일 보기

파일: matting_inference.py 프로젝트: star0071/mmediting

def matting_inference(model, img, trimap):
    """Inference image(s) with the model.

    Args:
        model (nn.Module): The loaded model.
        img (str): Image file path.
        trimap (str): Trimap file path.

    Returns:
        np.ndarray: The predicted alpha matte.
    """
    cfg = model.cfg
    device = next(model.parameters()).device  # model device
    # remove alpha from test_pipeline
    keys_to_remove = ['alpha', 'ori_alpha']
    for key in keys_to_remove:
        for pipeline in list(cfg.test_pipeline):
            if 'key' in pipeline and key == pipeline['key']:
                cfg.test_pipeline.remove(pipeline)
            if 'keys' in pipeline and key in pipeline['keys']:
                pipeline['keys'].remove(key)
                if len(pipeline['keys']) == 0:
                    cfg.test_pipeline.remove(pipeline)
            if 'meta_keys' in pipeline and key in pipeline['meta_keys']:
                pipeline['meta_keys'].remove(key)
    # build the data pipeline
    test_pipeline = Compose(cfg.test_pipeline)
    # prepare data
    data = dict(merged_path=img, trimap_path=trimap)
    data = test_pipeline(data)
    data = scatter(collate([data], samples_per_gpu=1), [device])[0]
    # forward the model
    with torch.no_grad():
        result = model(test_mode=True, **data)

    return result['pred_alpha']

예제 #21

0

파일 보기

파일: test_video.py 프로젝트: yuxulingche/TPN

def inference_recognizer(model, frames):
    cfg = model.cfg
    device = next(model.parameters()).device  # model device
    # build the data pipeline
    test_transform = GroupImageTransform(crop_size=cfg.data.test.input_size,
                                         oversample=None,
                                         resize_crop=False,
                                         **dict(mean=[123.675, 116.28, 103.53],
                                                std=[58.395, 57.12, 57.375],
                                                to_rgb=True))
    # prepare data
    frames, *l = test_transform(
        frames, (cfg.data.test.img_scale, cfg.data.test.img_scale),
        crop_history=None,
        flip=False,
        keep_ratio=False,
        div_255=False,
        is_flow=False)
    data = dict(img_group_0=frames, num_modalities=1, img_meta={})
    data = scatter(collate([data], samples_per_gpu=1), [device])[0]
    # forward the model
    with torch.no_grad():
        result = model(return_loss=False, rescale=True, **data)
    return result

예제 #22

0

파일 보기

    def after_train_epoch(self, runner):
        if not self.every_n_epochs(runner, self.interval):
            return
        runner.model.eval()
        range_idxs = list(range(len(self.dataset)))
        if self.shuffle:
            np.random.shuffle(range_idxs)
        range_idxs = range_idxs[:self.num_evals]
        prog_bar = mmcv.ProgressBar(len(range_idxs))
        results = []
        for idx in range_idxs:
            data = self.dataset[idx]
            data_gpu = scatter(collate([data], samples_per_gpu=1),
                               [torch.cuda.current_device()])[0]

            with torch.no_grad():
                result, out_dict = runner.model(return_loss=False,
                                                rescale=True,
                                                **data_gpu)
            results.extend(result)

            prog_bar.update()

        self.evaluate(runner, results, range_idxs=range_idxs)

예제 #23

0

파일 보기

    def after_train_epoch(self, runner):
        if not self.every_n_epochs(runner, self.interval):
            return
        runner.model.eval()
        results = [None for _ in range(len(self.dataset))]
        prog_bar = mmcv.ProgressBar(len(self.dataset))
        for idx in range(len(self.dataset)):
            data = self.dataset[idx]
            data_gpu = scatter(collate([data], samples_per_gpu=1),
                               [torch.cuda.current_device()])[0]

            # compute output
            with torch.no_grad():
                result = runner.model(return_loss=False,
                                      rescale=True,
                                      **data_gpu)
            results[idx] = result

            batch_size = 1
            for _ in range(batch_size):
                prog_bar.update()

        print('\n')
        self.evaluate(runner, results)

예제 #24

0

파일 보기

파일: inference.py 프로젝트: Smallflyfly/mmpose

def inference_bottom_up_pose_model(model, img_or_path):
    """Inference a single image.

    num_people: P
    num_keypoints: K
    bbox height: H
    bbox width: W

    Args:
        model (nn.Module): The loaded pose model.
        image_name (str| np.ndarray): Image_name.

    Returns:
        list[ndarray]: The predicted pose info.

            The length of the list
            is the number of people (P). Each item in the
            list is a ndarray, containing each person's
            pose (ndarray[Kx3]): x, y, score
    """
    pose_results = []
    cfg = model.cfg
    device = next(model.parameters()).device

    # build the data pipeline
    test_pipeline = [LoadImage()] + cfg.test_pipeline[1:]
    test_pipeline = Compose(test_pipeline)

    # prepare data
    data = {
        'img_or_path': img_or_path,
        'dataset': 'coco',
        'ann_info': {
            'image_size':
            cfg.data_cfg['image_size'],
            'num_joints':
            cfg.data_cfg['num_joints'],
            'flip_index':
            [0, 2, 1, 4, 3, 6, 5, 8, 7, 10, 9, 12, 11, 14, 13, 16, 15],
        }
    }

    data = test_pipeline(data)
    data = collate([data], samples_per_gpu=1)
    if next(model.parameters()).is_cuda:
        # scatter to specified GPU
        data = scatter(data, [device])[0]
    else:
        # just get the actual data from DataContainer
        data['img_metas'] = data['img_metas'].data[0]

    # forward the model
    with torch.no_grad():
        all_preds, _, _ = model(
            return_loss=False, img=data['img'], img_metas=data['img_metas'])

    for pred in all_preds:
        pose_results.append({
            'keypoints': pred[:, :3],
        })

    return pose_results

예제 #25

0

파일 보기

파일: bounding_box_generation_opti.py 프로젝트: ia-flash/lab

def _data_func(data, device_id):
    data = scatter(collate([data], samples_per_gpu=1), [device_id])[0]
    return dict(return_loss=False, rescale=True, **data)

예제 #26

0

파일 보기

파일: inference.py 프로젝트: Smallflyfly/mmpose

def _inference_single_pose_model(model, img_or_path, bbox, dataset):
    """Inference a single bbox.

    num_keypoints: K

    Args:
        model (nn.Module): The loaded pose model.
        image_name (str | np.ndarray):Image_name
        bbox (list | np.ndarray): Bounding boxes (with scores),
            shaped (4, ) or (5, ). (left, top, width, height, [score])
        dataset (str): Dataset name.

    Returns:
        ndarray[Kx3]: Predicted pose x, y, score.
    """
    cfg = model.cfg
    device = next(model.parameters()).device

    # build the data pipeline
    test_pipeline = [LoadImage()] + cfg.test_pipeline[1:]
    test_pipeline = Compose(test_pipeline)

    assert len(bbox) in [4, 5]
    center, scale = _box2cs(cfg, bbox)

    flip_pairs = None
    if dataset == 'TopDownCocoDataset' or dataset == 'TopDownOCHumanDataset':
        flip_pairs = [[1, 2], [3, 4], [5, 6], [7, 8], [9, 10], [11, 12],
                      [13, 14], [15, 16]]
    elif dataset == 'TopDownCocoWholeBodyDataset':
        body = [[1, 2], [3, 4], [5, 6], [7, 8], [9, 10], [11, 12], [13, 14],
                [15, 16]]
        foot = [[17, 20], [18, 21], [19, 22]]

        face = [[23, 39], [24, 38], [25, 37], [26, 36], [27, 35], [28, 34],
                [29, 33], [30, 32], [40, 49], [41, 48], [42, 47], [43, 46],
                [44, 45], [54, 58], [55, 57], [59, 68], [60, 67], [61, 66],
                [62, 65], [63, 70], [64, 69], [71, 77], [72, 76], [73, 75],
                [78, 82], [79, 81], [83, 87], [84, 86], [88, 90]]

        hand = [[91, 112], [92, 113], [93, 114], [94, 115], [95, 116],
                [96, 117], [97, 118], [98, 119], [99, 120], [100, 121],
                [101, 122], [102, 123], [103, 124], [104, 125], [105, 126],
                [106, 127], [107, 128], [108, 129], [109, 130], [110, 131],
                [111, 132]]
        flip_pairs = body + foot + face + hand
    elif dataset == 'TopDownAicDataset':
        flip_pairs = [[0, 3], [1, 4], [2, 5], [6, 9], [7, 10], [8, 11]]
    elif dataset == 'TopDownOneHand10KDataset' or \
            dataset == 'TopDownFreiHandDataset':
        flip_pairs = []
    else:
        raise NotImplementedError()

    # prepare data
    data = {
        'img_or_path':
        img_or_path,
        'center':
        center,
        'scale':
        scale,
        'bbox_score':
        bbox[4] if len(bbox) == 5 else 1,
        'dataset':
        dataset,
        'joints_3d':
        np.zeros((cfg.data_cfg.num_joints, 3), dtype=np.float32),
        'joints_3d_visible':
        np.zeros((cfg.data_cfg.num_joints, 3), dtype=np.float32),
        'rotation':
        0,
        'ann_info': {
            'image_size': cfg.data_cfg['image_size'],
            'num_joints': cfg.data_cfg['num_joints'],
            'flip_pairs': flip_pairs
        }
    }
    data = test_pipeline(data)
    data = collate([data], samples_per_gpu=1)
    if next(model.parameters()).is_cuda:
        # scatter to specified GPU
        data = scatter(data, [device])[0]
    else:
        # just get the actual data from DataContainer
        data['img_metas'] = data['img_metas'].data[0]

    # forward the model
    with torch.no_grad():
        all_preds, _, _ = model(
            return_loss=False, img=data['img'], img_metas=data['img_metas'])

    return all_preds[0]

예제 #27

0

파일 보기

파일: inference.py 프로젝트: XiaoWen-AI/CIKM2020AnalytiCup-adversarial-attack-for-general-object-detectors

def attact_detector(model, img0,dstimg,gt_bboxes,gt_labels,filename=None,attack_roi=None,
                    at_times=50,e=30.0,image_size=800,mode='frcnn',random_begin=False, return_grad=False,rpn=False):
    """Inference image(s) with the detector.

    Args:
        model (nn.Module): The loaded detector.
        imgs (str/ndarray or list[str/ndarray]): Either image files or loaded
            images.

    Returns:
        If imgs is a str, a generator will be returned, otherwise return the
        detection results directly.
    """
    cfg=model.cfg
    device = next(model.parameters()).device  # model device
    # build the data pipeline
    test_pipeline = [LoadImage()] + cfg.data.test.pipeline[1:]
    test_pipeline = Compose(test_pipeline)

    gt_labels=np.array(gt_labels)
    gt_bboxes=np.array(gt_bboxes,dtype=np.float32)
    gt_bboxes=gt_bboxes/500*image_size
    gt_labels=torch.tensor(gt_labels,device=device)
    gt_bboxes=torch.tensor(gt_bboxes,device=device)
    
    #gt_bboxes[:,2:4]=gt_bboxes[:,2:4]/80
    pertubation = np.zeros_like(dstimg)
    momentom=np.zeros_like(dstimg)
    if mode=='frcnn':
        mean = [123.675, 116.28, 103.53]
        std = [58.395, 57.12, 57.375]
    elif mode=='ssd':
        mean = [123.675, 116.28, 103.53]
        std = [1, 1, 1]
    if random_begin==True:
        dstimg[attack_roi]=np.random.randint(0,256,dstimg[attack_roi].shape)
    if return_grad==True:
        at_times=1
        
    adv_x = np.array(dstimg[...,::-1],dtype=np.float)
    res=np.array(adv_x)
    data = dict(img=adv_x)
    adv_x=adv_x[...,::-1]
    data = test_pipeline(data)
    data = collate([data], samples_per_gpu=1)
    if next(model.parameters()).is_cuda:
        # scatter to specified GPU
        data = scatter(data, [device])[0]
    else:
        # Use torchvision ops for CPU mode instead
        for m in model.modules():
            if isinstance(m, (RoIPool, RoIAlign)):
                if not m.aligned:
                    # aligned=False is not implemented on CPU
                    # set use_torchvision on-the-fly
                    m.use_torchvision = True
        warnings.warn('We set use_torchvision=True in CPU mode.')
        # just get the actual data from DataContainer
        data['img_metas'] = data['img_metas'][0].data
    data['img'] = torch.tensor(data['img'][0].clone().detach(), device=device)
    data['img_metas'] = data['img_metas'][0]
    data['gt_bboxes']=[gt_bboxes]
    data['gt_labels']=[gt_labels]
    data_min = torch.min(data['img'])
    data_max = torch.max(data['img'])
    loss_last=100
    times=0
    for k in range(at_times):
        data['img'] = torch.autograd.Variable(data['img'])
        data['img'].requires_grad = True
        #data_img=F.interpolate(data['img'],size=[800,800],mode='bilinear')
        loss = model(return_loss=True,img=data['img'],img_metas=data['img_metas'],gt_bboxes=data['gt_bboxes'],gt_labels=data['gt_labels'])
        #r = model(return_loss=False, img=[data['img']], img_metas=[data['img_metas']],rescale=True)
        loss_rpn_cls = 0
        loss_rpn_bbox = 0
        for i in range(len(loss['loss_rpn_cls'])):
            loss_rpn_cls = loss_rpn_cls + loss['loss_rpn_cls'][i]
            loss_rpn_bbox = loss_rpn_bbox + loss['loss_rpn_bbox'][i]
        if mode=='ssd':
            loss_back = loss['loss_cls'][0]
        else:
            if rpn==True:
                loss_back = -loss_rpn_cls#+loss['loss_cls']#-loss['loss_bbox']
            else:
                loss_back=loss['loss_cls']

        model.zero_grad()
        loss_back.backward()
        data_grad = data['img'].grad.data

        data_grad=data_grad.cpu().numpy()
        data_grad=data_grad.squeeze()
        data_grad=data_grad.transpose([1,2,0])
        data_grad=cv2.resize(data_grad,(500,500),cv2.INTER_AREA)
        if loss_back<=0.1:
            momentom = 0.9 * momentom + e/2 * data_grad
        else:
            momentom=0.9*momentom+e*data_grad

        momentom=momentom*std
        if mode=='ssd':
            data_grad=100*e*data_grad*std
        else:
            data_grad = e * data_grad * std
        #data_grad=np.clip(data_grad,-10,10)
        momentom=np.clip(momentom,-10,10)
        if return_grad == True:
            return data_grad, gt_bboxes
        # grad_range=np.sort(np.reshape(momentom[attack_roi],[-1]))
        # print('梯度范围', grad_range[:100],grad_range[-100:])
        adv_x[attack_roi]=adv_x[attack_roi]-momentom[attack_roi]
        momentom = momentom / std
        adv_x[attack_roi]=np.clip(adv_x[attack_roi],0,255)
        data['img']=(adv_x-mean)/std
        data['img']=cv2.resize(data['img'],(image_size,image_size))
        data['img']=torch.from_numpy(data['img'].transpose(2, 0, 1)).float().unsqueeze(0)
        #data['img']=torch.clamp(data['img'],data_min,data_max)
        data['img']=data['img'].cuda()

        if loss_back < loss_last:
            res=adv_x
            loss_last = loss_back

        if (k+1)%5==0 or (k+1)==at_times:
            print(filename,'frcn step:%d'%(k+1),loss_back,loss['loss_cls'],loss_rpn_cls)
        if k==199:
            e=e/2
            
    # ix = np.where(adv_x[attack_roi]==img[attack_roi])
    # adv_x[attack_roi[0][ix],attack_roi[1][ix],attack_roi[2][ix]]=\
    #     255-img[attack_roi[0][ix],attack_roi[1][ix],attack_roi[2][ix]]
    #data_img=data_img*std+mean
    #adv_x[attack_roi]=data_img[attack_roi]
    
    return  res

예제 #28

0

파일 보기

파일: inference.py 프로젝트: XiaoWen-AI/CIKM2020AnalytiCup-adversarial-attack-for-general-object-detectors

def Dpatch_detector(model, img, dstimg,patch, gt_bboxes, gt_labels, filename=None, at_times=50, e=10.0,image_size=800,mode='frcnn'):
    """Inference image(s) with the detector.

    Args:
        model (nn.Module): The loaded detector.
        imgs (str/ndarray or list[str/ndarray]): Either image files or loaded
            images.

    Returns:
        If imgs is a str, a generator will be returned, otherwise return the
        detection results directly.
    """
    cfg = model.cfg
    device = next(model.parameters()).device  # model device
    # build the data pipeline
    test_pipeline = [LoadImage()] + cfg.data.test.pipeline[1:]
    test_pipeline = Compose(test_pipeline)

    gt_labels = np.array(gt_labels)
    gt_bboxes = np.array(gt_bboxes, dtype=np.float32)
    gt_bboxes = gt_bboxes / 500 *image_size
    #gt_bboxes[:,2:4]=0
    gt_labels = torch.tensor(gt_labels, device=device)
    gt_bboxes = torch.tensor(gt_bboxes, device=device)
    pertubation = np.zeros_like(img)
    adv_x = np.array(dstimg, dtype=np.uint8)
    patch=np.random.randint(0, 256,size=adv_x.shape)
    adv_x[230:270,230:270]=patch[230:270,230:270]
    attack_roi=np.where(adv_x!=img)
    momentom = np.zeros_like(dstimg)
    mean = [123.675, 116.28, 103.53]
    std = [58.395, 57.12, 57.375]
    # mean=mean[::-1]
    # std=std[::-1]
    adv_x=adv_x[...,::-1]
    data = dict(img=adv_x)
    data = test_pipeline(data)
    data = collate([data], samples_per_gpu=1)
    adv_x = adv_x[..., ::-1]
    if next(model.parameters()).is_cuda:
        # scatter to specified GPU
        data = scatter(data, [device])[0]
    else:
        # Use torchvision ops for CPU mode instead
        for m in model.modules():
            if isinstance(m, (RoIPool, RoIAlign)):
                if not m.aligned:
                    # aligned=False is not implemented on CPU
                    # set use_torchvision on-the-fly
                    m.use_torchvision = True
        warnings.warn('We set use_torchvision=True in CPU mode.')
        # just get the actual data from DataContainer
        data['img_metas'] = data['img_metas'][0].data
    data['img'] = torch.tensor(data['img'][0].clone().detach(), device=device)
    data['img_metas'] = data['img_metas'][0]
    data['gt_bboxes']=[gt_bboxes]
    data['gt_labels']=[gt_labels]
    #print(data)
    data_min = torch.min(data['img'])
    data_max = torch.max(data['img'])
    loss_last = 100
    times = 0
    for k in range(at_times):
        data['img'] = torch.autograd.Variable(data['img'])
        data['img'].requires_grad = True
        # data_img=F.interpolate(data['img'],size=[800,800])
        loss = model(return_loss=True, **data)
        # r = model(return_loss=False, img=[data['img']], img_metas=[data['img_metas']],rescale=True)
        loss_rpn_cls = 0
        loss_rpn_bbox = 0
        for i in range(len(loss['loss_rpn_cls'])):
            loss_rpn_cls = loss_rpn_cls + loss['loss_rpn_cls'][i]
            loss_rpn_bbox = loss_rpn_bbox + loss['loss_rpn_bbox'][i]
        if mode == 'ssd':
            loss_back = loss['loss_cls'][0]
        else:
            loss_back =loss['loss_cls']+loss['loss_bbox']

        model.zero_grad()
        loss_back.backward()
        data_grad = data['img'].grad.data

        data_grad = data_grad.cpu().numpy()
        data_grad = data_grad.squeeze()
        data_grad = data_grad.transpose([1, 2, 0])
        data_grad = cv2.resize(data_grad, (500, 500))

        momentom = 0.9 * momentom + e * data_grad

        data['img'] = data['img'].cpu().detach().numpy()
        data['img'] = data['img'].squeeze()
        data['img'] = data['img'].transpose([1, 2, 0])
        data['img'] = cv2.resize(data['img'], (500, 500))
        data['img'][attack_roi] = data['img'][attack_roi] - momentom[attack_roi]
        data['img'][attack_roi] = np.clip(data['img'][attack_roi], data_min.cpu(), data_max.cpu())

        momentom = momentom * std
        if mode == 'ssd':
            data_grad = 100 * e * data_grad * std
        else:
            data_grad = e * data_grad * std
        data_grad = np.clip(data_grad, -10, 10)
        momentom = np.clip(momentom, -10, 10)
        adv_x[attack_roi] = adv_x[attack_roi] - data_grad[attack_roi]

        # grad_range=np.sort(np.reshape(data_grad[attack_roi],[-1]))
        # print('梯度范围', grad_range[:100],grad_range[-100:])

        momentom = momentom / std
        adv_x[attack_roi] = np.clip(adv_x[attack_roi], 0, 255)
        data['img'] = (adv_x - mean) / std
        # data['img'][200:600, 200:600] = data['img'][200:600, 200:600] - 1000 * data_grad[200:600, 200:600]
        data['img'] = cv2.resize(data['img'], (image_size, image_size))
        data['img'] = torch.from_numpy(data['img'].transpose(2, 0, 1)).float().unsqueeze(0)
        data['img'] = torch.clamp(data['img'], data_min, data_max)
        data['img'] = data['img'].cuda()

        # momentom=0.9*momentom+e*data_grad_
        # pertubation=data_grad_*255
        # pertubation[attack_roi]=np.where(pertubation[attack_roi]>0,pertubation[attack_roi]+0.5,pertubation[attack_roi])
        # pertubation[attack_roi] = np.where(pertubation[attack_roi] < 0, pertubation[attack_roi] - 0.5,
        #                                    pertubation[attack_roi])
        # pertubation = np.clip(pertubation, -3, 3)
        # print(np.min(pertubation[attack_roi]),np.max(pertubation[attack_roi]))
        if loss_back < loss_last:
            res = adv_x
            loss_last = loss_back

        if (k + 1) % 5 == 0 or (k + 1) == at_times:
            print(filename, 'frcn step:%d' % (k + 1), loss_back, loss['loss_cls'], loss_rpn_cls)
    return adv_x

예제 #29

0

파일 보기

파일: pytorch2onnx.py 프로젝트: Pandinosaurus/mmocr

def _prepare_data(cfg, imgs):
    """Inference image(s) with the detector.

    Args:
        model (nn.Module): The loaded detector.
        imgs (str/ndarray or list[str/ndarray] or tuple[str/ndarray]):
            Either image files or loaded images.
    Returns:
        result (dict): Predicted results.
    """
    if isinstance(imgs, (list, tuple)):
        if not isinstance(imgs[0], (np.ndarray, str)):
            raise AssertionError('imgs must be strings or numpy arrays')

    elif isinstance(imgs, (np.ndarray, str)):
        imgs = [imgs]
    else:
        raise AssertionError('imgs must be strings or numpy arrays')

    is_ndarray = isinstance(imgs[0], np.ndarray)

    if is_ndarray:
        cfg = cfg.copy()
        # set loading pipeline type
        cfg.data.test.pipeline[0].type = 'LoadImageFromNdarray'

    cfg.data.test.pipeline = replace_ImageToTensor(cfg.data.test.pipeline)
    test_pipeline = Compose(cfg.data.test.pipeline)

    data = []
    for img in imgs:
        # prepare data
        if is_ndarray:
            # directly add img
            datum = dict(img=img)
        else:
            # add information into dict
            datum = dict(img_info=dict(filename=img), img_prefix=None)

        # build the data pipeline
        datum = test_pipeline(datum)
        # get tensor from list to stack for batch mode (text detection)
        data.append(datum)

    if isinstance(data[0]['img'], list) and len(data) > 1:
        raise Exception('aug test does not support '
                        f'inference with batch size '
                        f'{len(data)}')

    data = collate(data, samples_per_gpu=len(imgs))

    # process img_metas
    if isinstance(data['img_metas'], list):
        data['img_metas'] = [
            img_metas.data[0] for img_metas in data['img_metas']
        ]
    else:
        data['img_metas'] = data['img_metas'].data

    if isinstance(data['img'], list):
        data['img'] = [img.data for img in data['img']]
        if isinstance(data['img'][0], list):
            data['img'] = [img[0] for img in data['img']]
    else:
        data['img'] = data['img'].data
    return data

예제 #30

0

파일 보기

def inference_bottom_up_pose_model(model,
                                   img_or_path,
                                   return_heatmap=False,
                                   outputs=None):
    """Inference a single image.

    num_people: P
    num_keypoints: K
    bbox height: H
    bbox width: W

    Args:
        model (nn.Module): The loaded pose model.
        img_or_path (str| np.ndarray): Image filename or loaded image.
        return_heatmap (bool) : Flag to return heatmap, default: False
        outputs (list(str) | tuple(str)) : Names of layers whose outputs
            need to be returned, default: None

    Returns:
        list[ndarray]: The predicted pose info.
            The length of the list is the number of people (P).
            Each item in the list is a ndarray, containing each person's
            pose (ndarray[Kx3]): x, y, score.
        list[dict[np.ndarray[N, K, H, W] | torch.tensor[N, K, H, W]]]:
            Output feature maps from layers specified in `outputs`.
            Includes 'heatmap' if `return_heatmap` is True.
    """
    pose_results = []
    returned_outputs = []

    cfg = model.cfg
    device = next(model.parameters()).device

    # build the data pipeline
    channel_order = cfg.test_pipeline[0].get('channel_order', 'rgb')
    test_pipeline = [LoadImage(channel_order=channel_order)
                     ] + cfg.test_pipeline[1:]
    test_pipeline = Compose(test_pipeline)

    # prepare data
    data = {
        'img_or_path': img_or_path,
        'dataset': 'coco',
        'ann_info': {
            'image_size':
            cfg.data_cfg['image_size'],
            'num_joints':
            cfg.data_cfg['num_joints'],
            'flip_index':
            [0, 2, 1, 4, 3, 6, 5, 8, 7, 10, 9, 12, 11, 14, 13, 16, 15],
        }
    }

    data = test_pipeline(data)
    data = collate([data], samples_per_gpu=1)
    if next(model.parameters()).is_cuda:
        # scatter to specified GPU
        data = scatter(data, [device])[0]
    else:
        # just get the actual data from DataContainer
        data['img_metas'] = data['img_metas'].data[0]

    with OutputHook(model, outputs=outputs, as_tensor=False) as h:
        # forward the model
        with torch.no_grad():
            result = model(
                img=data['img'],
                img_metas=data['img_metas'],
                return_loss=False,
                return_heatmap=return_heatmap)

        if return_heatmap:
            h.layer_outputs['heatmap'] = result['output_heatmap']

        returned_outputs.append(h.layer_outputs)

        for pred in result['preds']:
            pose_results.append({
                'keypoints': pred[:, :3],
            })

    return pose_results, returned_outputs