Exemple #1
0
    def prepare_test_img(self, idx):
        sample_id = self.sample_ids[idx]
        # load image
        img = mmcv.imread(self.img_filenames[idx])
        img, img_shape, pad_shape, scale_factor = self.img_transform(
            img, 1, False)

        data = dict(img=DC(to_tensor(img), stack=True),
                    img_shape=DC(img_shape, cpu_only=True),
                    sample_idx=DC(sample_id, cpu_only=True),
                    calib=DC(self.calib, cpu_only=True))

        if self.with_mask:
            NotImplemented

        if self.with_point:
            points = read_lidar(self.lidar_filenames[idx])
            points = get_lidar_in_image_fov(points,
                                            self.calib,
                                            0,
                                            0,
                                            img_shape[1],
                                            img_shape[0],
                                            clip_distance=0.1)

        if self.generator is not None:
            voxels, coordinates, num_points = self.generator.generate(points)
            data['voxels'] = DC(to_tensor(voxels))
            data['coordinates'] = DC(to_tensor(coordinates))
            data['num_points'] = DC(to_tensor(num_points))
            data['anchors'] = DC(to_tensor(self.anchors))

        return data
Exemple #2
0
        def prepare_single(img, scale, flip):
            _img, img_shape, pad_shape, scale_factor = self.img_transform(
                img, scale, flip, keep_ratio=self.resize_keep_ratio)
            _img = to_tensor(_img)
            _img_meta = dict(
                ori_shape=(img_info['height'], img_info['width'], 3),
                img_shape=img_shape,
                pad_shape=pad_shape,
                scale_factor=scale_factor,
                flip=flip,
                img_name=row['img_name'],
                path=row['path'])


            return _img, _img_meta
Exemple #3
0
 def prepare_single(img, scale, flip, proposal=None):
     _img, img_shape, pad_shape, scale_factor = self.img_transform(
         img, scale, flip, keep_ratio=self.resize_keep_ratio)
     _img = to_tensor(_img)
     _img_meta = dict(
         ori_shape=(img_info['height'], img_info['width'], 3),
         img_shape=img_shape,
         pad_shape=pad_shape,
         scale_factor=scale_factor,
         flip=flip)
     if proposal is not None:
         if proposal.shape[1] == 5:
             score = proposal[:, 4, None]
             proposal = proposal[:, :4]
         else:
             score = None
         _proposal = self.bbox_transform(proposal, img_shape,
                                         scale_factor, flip)
         _proposal = np.hstack(
             [_proposal, score]) if score is not None else _proposal
         _proposal = to_tensor(_proposal)
     else:
         _proposal = None
     return _img, _img_meta, _proposal
Exemple #4
0
    def prepare_test_img(self, idx):
        """Prepare an image for testing (multi-scale and flipping)"""
        sample_id = self.sample_ids[idx]

        # load image
        img = mmcv.imread(osp.join(self.img_prefix, '%06d.png' % sample_id))
        img, img_shape, pad_shape, scale_factor = self.img_transform(
            img, 1, False)

        calib = Calibration(osp.join(self.calib_prefix,
                                     '%06d.txt' % sample_id))

        if self.with_label:
            objects = read_label(
                osp.join(self.label_prefix, '%06d.txt' % sample_id))
            gt_bboxes = [
                object.box3d for object in objects
                if object.type in self.class_name
            ]

            if len(gt_bboxes) != 0:
                gt_bboxes = np.array(gt_bboxes, dtype=np.float32)
                gt_labels = np.ones(len(gt_bboxes), dtype=np.int64)
                # transfer from cam to lidar coordinates
                gt_bboxes[:, :3] = project_rect_to_velo(
                    gt_bboxes[:, :3], calib)
            else:
                gt_bboxes = None
                gt_labels = None

        img_meta = dict(img_shape=img_shape, sample_idx=sample_id, calib=calib)

        data = dict(img=to_tensor(img), img_meta=DC(img_meta, cpu_only=True))

        if self.anchors is not None:
            data['anchors'] = DC(to_tensor(self.anchors.astype(np.float32)))

        if self.with_mask:
            NotImplemented

        if self.with_point:
            points = read_lidar(
                osp.join(self.lidar_prefix, '%06d.bin' % sample_id))

        if isinstance(self.generator, VoxelGenerator):
            #voxels, coordinates, num_points = self.generator.generate(points)

            voxel_size = self.generator.voxel_size
            pc_range = self.generator.point_cloud_range
            grid_size = self.generator.grid_size

            keep = points_op_cpu.points_bound_kernel(points, pc_range[:3],
                                                     pc_range[3:])
            voxels = points[keep, :]
            coordinates = (
                (voxels[:, [2, 1, 0]] -
                 np.array(pc_range[[2, 1, 0]], dtype=np.float32)) /
                np.array(voxel_size[::-1], dtype=np.float32)).astype(np.int32)
            num_points = np.ones(len(keep)).astype(np.int32)

            data['voxels'] = DC(to_tensor(voxels.astype(np.float32)))
            data['coordinates'] = DC(to_tensor(coordinates))
            data['num_points'] = DC(to_tensor(num_points))

            if self.anchor_area_threshold >= 0 and self.anchors is not None:
                dense_voxel_map = sparse_sum_for_anchors_mask(
                    coordinates, tuple(grid_size[::-1][1:]))
                dense_voxel_map = dense_voxel_map.cumsum(0)
                dense_voxel_map = dense_voxel_map.cumsum(1)
                anchors_area = fused_get_anchors_area(dense_voxel_map,
                                                      self.anchors_bv,
                                                      voxel_size, pc_range,
                                                      grid_size)
                anchors_mask = anchors_area > self.anchor_area_threshold
                data['anchors_mask'] = DC(
                    to_tensor(anchors_mask.astype(np.uint8)))

        if self.with_label:
            data['gt_labels'] = DC(to_tensor(gt_labels), cpu_only=True)
            data['gt_bboxes'] = DC(to_tensor(gt_bboxes), cpu_only=True)
        else:
            data['gt_labels'] = DC(None, cpu_only=True)
            data['gt_bboxes'] = DC(None, cpu_only=True)

        return data
Exemple #5
0
    def prepare_train_img(self, idx):
        sample_id = self.sample_ids[idx]

        # load image
        img = mmcv.imread(osp.join(self.img_prefix, '%06d.png' % sample_id))

        img, img_shape, pad_shape, scale_factor = self.img_transform(
            img, 1, False)

        objects = read_label(
            osp.join(self.label_prefix, '%06d.txt' % sample_id))
        calib = Calibration(osp.join(self.calib_prefix,
                                     '%06d.txt' % sample_id))

        gt_bboxes = [
            object.box3d for object in objects
            if object.type not in ["DontCare"]
        ]
        gt_bboxes = np.array(gt_bboxes, dtype=np.float32)
        gt_types = [
            object.type for object in objects
            if object.type not in ["DontCare"]
        ]

        # transfer from cam to lidar coordinates
        if len(gt_bboxes) != 0:
            gt_bboxes[:, :3] = project_rect_to_velo(gt_bboxes[:, :3], calib)

        img_meta = dict(img_shape=img_shape, sample_idx=sample_id, calib=calib)

        data = dict(img=to_tensor(img), img_meta=DC(img_meta, cpu_only=True))

        if self.anchors is not None:
            data['anchors'] = DC(to_tensor(self.anchors.astype(np.float32)))

        if self.with_mask:
            NotImplemented

        if self.with_point:
            points = read_lidar(
                osp.join(self.lidar_prefix, '%06d.bin' % sample_id))

        if self.augmentor is not None and self.test_mode is False:
            sampled_gt_boxes, sampled_gt_types, sampled_points = self.augmentor.sample_all(
                gt_bboxes, gt_types)
            assert sampled_points.dtype == np.float32
            gt_bboxes = np.concatenate([gt_bboxes, sampled_gt_boxes])
            gt_types = gt_types + sampled_gt_types
            assert len(gt_types) == len(gt_bboxes)

            # to avoid overlapping point (option)
            masks = points_in_rbbox(points, sampled_gt_boxes)
            #masks = points_op_cpu.points_in_bbox3d_np(points[:,:3], sampled_gt_boxes)

            points = points[np.logical_not(masks.any(-1))]

            # paste sampled points to the scene
            points = np.concatenate([sampled_points, points], axis=0)

            # select the interest classes
            selected = [
                i for i in range(len(gt_types))
                if gt_types[i] in self.class_names
            ]
            gt_bboxes = gt_bboxes[selected, :]
            gt_types = [
                gt_types[i] for i in range(len(gt_types))
                if gt_types[i] in self.class_names
            ]

            # force van to have same label as car
            gt_types = ['Car' if n == 'Van' else n for n in gt_types]
            gt_labels = np.array(
                [self.class_names.index(n) + 1 for n in gt_types],
                dtype=np.int64)

            self.augmentor.noise_per_object_(gt_bboxes, points, num_try=100)
            gt_bboxes, points = self.augmentor.random_flip(gt_bboxes, points)
            gt_bboxes, points = self.augmentor.global_rotation(
                gt_bboxes, points)
            gt_bboxes, points = self.augmentor.global_scaling(
                gt_bboxes, points)

        if isinstance(self.generator, VoxelGenerator):
            #voxels, coordinates, num_points = self.generator.generate(points)
            voxel_size = self.generator.voxel_size
            pc_range = self.generator.point_cloud_range
            grid_size = self.generator.grid_size

            keep = points_op_cpu.points_bound_kernel(points, pc_range[:3],
                                                     pc_range[3:])
            voxels = points[keep, :]
            coordinates = (
                (voxels[:, [2, 1, 0]] -
                 np.array(pc_range[[2, 1, 0]], dtype=np.float32)) /
                np.array(voxel_size[::-1], dtype=np.float32)).astype(np.int32)
            num_points = np.ones(len(keep)).astype(np.int32)

            data['voxels'] = DC(to_tensor(voxels.astype(np.float32)))
            data['coordinates'] = DC(to_tensor(coordinates))
            data['num_points'] = DC(to_tensor(num_points))

            if self.anchor_area_threshold >= 0 and self.anchors is not None:
                dense_voxel_map = sparse_sum_for_anchors_mask(
                    coordinates, tuple(grid_size[::-1][1:]))
                dense_voxel_map = dense_voxel_map.cumsum(0)
                dense_voxel_map = dense_voxel_map.cumsum(1)
                anchors_area = fused_get_anchors_area(dense_voxel_map,
                                                      self.anchors_bv,
                                                      voxel_size, pc_range,
                                                      grid_size)
                anchors_mask = anchors_area > self.anchor_area_threshold
                data['anchors_mask'] = DC(
                    to_tensor(anchors_mask.astype(np.uint8)))

            # filter gt_bbox out of range
            bv_range = self.generator.point_cloud_range[[0, 1, 3, 4]]
            mask = filter_gt_box_outside_range(gt_bboxes, bv_range)
            gt_bboxes = gt_bboxes[mask]
            gt_labels = gt_labels[mask]

        else:
            NotImplementedError

        # skip the image if there is no valid gt bbox
        if len(gt_bboxes) == 0:
            return None

        # limit rad to [-pi, pi]
        gt_bboxes[:, 6] = limit_period(gt_bboxes[:, 6],
                                       offset=0.5,
                                       period=2 * np.pi)

        if self.with_label:
            data['gt_labels'] = DC(to_tensor(gt_labels))
            data['gt_bboxes'] = DC(to_tensor(gt_bboxes))

        return data
Exemple #6
0
    def prepare_train_img(self, idx):

        try:
            img_info = self.img_infos[idx]
            # load image
            img = img_info.image()  # mmcv.imread(osp.join(self.img_prefix, img_info['filename']))
            # load proposals if necessary
            if self.proposals is not None:
                proposals = self.proposals[idx][:self.num_max_proposals]
                # TODO: Handle empty proposals properly. Currently images with
                # no proposals are just ignored, but they can be used for
                # training in concept.
                if len(proposals) == 0:
                    return None
                if not (proposals.shape[1] == 4 or proposals.shape[1] == 5):
                    raise AssertionError(
                        'proposals should have shapes (n, 4) or (n, 5), '
                        'but found {}'.format(proposals.shape))
                if proposals.shape[1] == 5:
                    scores = proposals[:, 4, None]
                    proposals = proposals[:, :4]
                else:
                    scores = None

            ann = self.get_ann_info(idx)
            gt_bboxes = ann['bboxes']
            gt_labels = ann['labels']
            gt_masks = None
            gt_bboxes_ignore = None
            if self.with_mask:
                gt_masks = ann['masks']
            if self.with_crowd:
                gt_bboxes_ignore = ann['bboxes_ignore']

            # dumpData(f"d:/ttt/{img_info.id}_tmp_bbox.jpg", f"d:/ttt/{img_info.id}_tmp_mask.jpg", img, gt_labels-1,
            #          gt_bboxes, gt_masks, self.CLASSES)

            img, gt_bboxes, gt_masks, gt_bboxes_ignore = self.applyAugmentations(img, gt_bboxes, gt_masks,
                                                                                 gt_bboxes_ignore, True)

            # dumpData(f"d:/ttt/{img_info.id}_tmp_bbox_aug.jpg", f"d:/ttt/{img_info.id}_tmp_mask_aug.jpg", img, gt_labels-1,
            #          gt_bboxes, gt_masks, self.CLASSES)

            # skip the image if there is no valid gt bbox
            if len(gt_bboxes) == 0:
                return None

            # extra augmentation
            if self.extra_aug is not None:
                # img = self.extra_aug(img)
                img, gt_bboxes, gt_labels = self.extra_aug(img, gt_bboxes,
                                                           gt_labels)

            # apply transforms
            flip = True if np.random.rand() < self.flip_ratio else False
            # randomly sample a scale

            img_scale = random_scale(self.img_scales, self.multiscale_mode)
            img, img_shape, pad_shape, scale_factor = self.img_transform(
                img, img_scale, flip, keep_ratio=self.resize_keep_ratio)
            img = img.copy()
            if self.with_seg:
                # gt_seg = mmcv.imread(
                #     osp.join(self.seg_prefix, img_info['file_name'].replace(
                #         'jpg', 'png')),
                #     flag='unchanged')
                # gt_seg = self.seg_transform(gt_seg.squeeze(), img_scale, flip)
                # gt_seg = mmcv.imrescale(
                #     gt_seg, self.seg_scale_factor, interpolation='nearest')
                # gt_seg = gt_seg[None, ...]
                pass
            if self.proposals is not None:
                proposals = self.bbox_transform(proposals, img_shape, scale_factor,
                                                flip)
                proposals = np.hstack(
                    [proposals, scores]) if scores is not None else proposals
            gt_bboxes = self.bbox_transform(gt_bboxes, img_shape, scale_factor,
                                            flip)
            if self.with_crowd:
                gt_bboxes_ignore = self.bbox_transform(gt_bboxes_ignore, img_shape,
                                                       scale_factor, flip)
            if self.with_mask:
                gt_masks = self.mask_transform(gt_masks, pad_shape,
                                               scale_factor, flip)

            ori_shape = (img_info['height'], img_info['width'], 3)
            img_meta = dict(
                id=img_info['id'],
                ori_shape=ori_shape,
                img_shape=img_shape,
                pad_shape=pad_shape,
                scale_factor=scale_factor,
                flip=flip)

            # imgt = img.transpose(1, 2, 0)
            # imgt -= np.min(imgt)
            # imgt *= (255 / np.max(imgt))
            # imgt = imgt.astype(np.uint8)
            # dumpData(f"d:/ttt/{img_info.id}_tmp_bbox_aug1.jpg", f"d:/ttt/{img_info.id}_tmp_mask_aug1.jpg", imgt,
            #          gt_labels - 1,
            #          gt_bboxes, gt_masks, self.CLASSES)

            data = dict(
                img=DC(to_tensor(img), stack=True),
                img_meta=DC(img_meta, cpu_only=True),
                gt_bboxes=DC(to_tensor(gt_bboxes)))
            if self.proposals is not None:
                data['proposals'] = DC(to_tensor(proposals))
            if self.with_label:
                data['gt_labels'] = DC(to_tensor(gt_labels))
            if self.with_crowd:
                data['gt_bboxes_ignore'] = DC(to_tensor(gt_bboxes_ignore))
            if self.with_mask:
                data['gt_masks'] = DC(gt_masks, cpu_only=True)
            # if self.with_seg:
            #     data['gt_semantic_seg'] = DC(to_tensor(gt_seg), stack=True)

            return data
        finally:
            img_info.dispose()
Exemple #7
0
def main():
    args = parse_args()

    cfg = Config.fromfile(args.config)

    # set cudnn_benchmark
    if cfg.get('cudnn_benchmark', False):
        torch.backends.cudnn.benchmark = True

    if args.ckpt:
        cfg.resume_from = args.ckpt

    cfg.test_cfg.rcnn.score_thr = 0.5

    FOCAL_LENGTH = cfg.get('FOCAL_LENGTH', 1000)

    model = build_detector(cfg.model,
                           train_cfg=cfg.train_cfg,
                           test_cfg=cfg.test_cfg)
    if cfg.checkpoint_config is not None:
        # save mmdet version, config file content and class names in
        # checkpoints as meta data
        cfg.checkpoint_config.meta = dict(mmdet_version=__version__,
                                          config=cfg.text,
                                          CLASSES=('Human', ))
    # add an attribute for visualization convenience
    model.CLASSES = ('Human', )

    model = MMDataParallel(model, device_ids=[0]).cuda()

    # build runner
    optimizer = build_optimizer(model, cfg.optimizer)

    runner = Runner(model, lambda x: x, optimizer, cfg.work_dir, cfg.log_level)
    runner.resume(cfg.resume_from)
    model = runner.model
    model.eval()
    # necessary for headless rendering
    os.environ['PYOPENGL_PLATFORM'] = 'egl'
    render = Renderer(focal_length=FOCAL_LENGTH)
    img_transform = ImageTransform(size_divisor=32, **img_norm_cfg)
    img_scale = cfg.common_val_cfg.img_scale

    with torch.no_grad():
        folder_name = args.image_folder
        output_folder = args.output_folder
        os.makedirs(output_folder, exist_ok=True)
        images = os.listdir(folder_name)
        for image in images:
            file_name = osp.join(folder_name, image)
            img = cv2.imread(file_name)
            ori_shape = img.shape

            img, img_shape, pad_shape, scale_factor = img_transform(
                img, img_scale)

            # Force padding for the issue of multi-GPU training
            padded_img = np.zeros((img.shape[0], img_scale[1], img_scale[0]),
                                  dtype=img.dtype)
            padded_img[:, :img.shape[-2], :img.shape[-1]] = img
            img = padded_img

            assert img.shape[1] == 512 and img.shape[
                2] == 832, "Image shape incorrect"

            data_batch = dict(
                img=DC([to_tensor(img[None, ...])], stack=True),
                img_meta=DC([{
                    'img_shape': img_shape,
                    'scale_factor': scale_factor,
                    'flip': False,
                    'ori_shape': ori_shape
                }],
                            cpu_only=True),
            )
            bbox_results, pred_results = model(**data_batch, return_loss=False)

            if pred_results is not None:
                pred_results['bboxes'] = bbox_results[0]
                img = denormalize(img)
                img_viz = prepare_dump(pred_results, img, render, bbox_results,
                                       FOCAL_LENGTH)
                cv2.imwrite(
                    f'{file_name.replace(folder_name, output_folder)}.output.jpg',
                    img_viz[:, :, ::-1])
Exemple #8
0
    def prepare_train_img(self, idx):

        img_info = self.img_infos[idx]
        # load image
        assert osp.isfile(osp.join(self.img_prefix, img_info['filename']))
        img = mmcv.imread(osp.join(self.img_prefix, img_info['filename']))
        assert len(img.shape) == 3
        # load proposals if necessary
        if self.proposals is not None:
            proposals = self.proposals[idx][:self.num_max_proposals]
            # TODO: Handle empty proposals properly. Currently images with
            # no proposals are just ignored, but they can be used for
            # training in concept.
            if len(proposals) == 0:
                return None
            if not (proposals.shape[1] == 4 or proposals.shape[1] == 5):
                raise AssertionError(
                    'proposals should have shapes (n, 4) or (n, 5), '
                    'but found {}'.format(proposals.shape))
            if proposals.shape[1] == 5:
                scores = proposals[:, 4, None]
                proposals = proposals[:, :4]
            else:
                scores = None
        # get ann here.
        ann = self.get_ann_info(idx)
        gt_bboxes = ann['bboxes']
        gt_labels = ann['labels']
        if self.with_crowd:
            gt_bboxes_ignore = ann['bboxes_ignore']

        # skip the image if there is no valid gt bbox
        if len(gt_bboxes) == 0:
            return None

        # extra augmentation for photometric
        if self.extra_aug is not None:
            img = self.extra_aug.first_transform(img)
        img = img.copy()

        # apply transforms
        flip = True if np.random.rand() < self.flip_ratio else False
        img_scale = random_scale(self.img_scales, mode=self.resize_mode)
        # img_scale = random_scale(self.img_scales, mode='value')  # sample a scale
        # here to select a rescale size, and pad the image.
        # scale_factor is used to guide the transformation of bboxes and masks.
        img, img_shape, pad_shape, scale_factor = self.img_transform(
            img, img_scale, flip, keep_ratio=self.resize_keep_ratio)
        img = img.copy()
        if self.proposals is not None:
            proposals = self.bbox_transform(proposals, img_shape, scale_factor,
                                            flip)
            proposals = np.hstack([proposals, scores
                                   ]) if scores is not None else proposals
        gt_bboxes = self.bbox_transform(gt_bboxes, img_shape, scale_factor,
                                        flip)
        if self.with_crowd:
            gt_bboxes_ignore = self.bbox_transform(gt_bboxes_ignore, img_shape,
                                                   scale_factor, flip)
        if self.with_mask:
            gt_masks = self.mask_transform(ann['masks'], pad_shape,
                                           scale_factor, flip)
            gt_ignore_masks = ann['ignore_masks']
            if len(gt_ignore_masks) > 1:
                gt_ignore_masks = self.mask_transform(gt_ignore_masks,
                                                      pad_shape, scale_factor,
                                                      flip)
            else:
                gt_ignore_masks = np.array([], dtype=np.uint8)
        # extra augmentation
        # different from the original ones.
        # there use mask to generate img, gt_bboxes and gt_labels.
        # extra_aug only supports random crop.
        if self.extra_aug is not None and self.with_mask:
            """ crop by masks, select gt_masks and gt_bboxes. 
                the crop size is the ori_shape/img_shape/pad_shape
                scale_factor use the scale_factor.
                assert the input image shape: [3, H, W] and the output image shape
                [3, H, W]
            """
            if self.with_crowd:
                img, gt_bboxes, gt_labels, gt_bboxes_ignore, gt_masks, img_shape, pad_shape = \
                    self.extra_aug(img=img, boxes=gt_bboxes, labels=gt_labels, masks=gt_masks, ignore_bboxes=gt_bboxes_ignore, ignore_masks=gt_ignore_masks,
                                   img_shape=img_shape, pad_shape=pad_shape)

            else:
                img, gt_bboxes, gt_labels, gt_bboxes_ignore, gt_masks, img_shape, pad_shape = \
                    self.extra_aug(img=img, boxes=gt_bboxes, labels=gt_labels, masks=gt_masks, ignore_bboxes=np.zeros((0, 4), dtype=np.float32), ignore_masks=gt_ignore_masks,
                                   img_shape=img_shape, pad_shape=pad_shape)
            img = img.copy()
            # self.debug_rc(img, gt_bboxes, gt_masks, img_info["filename"])

        if len(gt_bboxes) == 0:
            return None
        # the ori_shape will be changed to the crop size.
        # ori_shape = (img_info['height'], img_info['width'], 3)
        # if self.extra_aug is None:
        if not (self.extra_aug is not None and self.with_mask):
            ori_shape = (img_info['height'], img_info['width'], 3)
        else:
            ori_shape = img_shape

        img_meta = dict(ori_shape=ori_shape,
                        img_shape=img_shape,
                        pad_shape=pad_shape,
                        scale_factor=scale_factor,
                        flip=flip)

        data = dict(img=DC(to_tensor(img), stack=True),
                    img_meta=DC(img_meta, cpu_only=True),
                    gt_bboxes=DC(to_tensor(gt_bboxes)))
        if self.proposals is not None:
            data['proposals'] = DC(to_tensor(proposals))
        if self.with_label:
            data['gt_labels'] = DC(to_tensor(gt_labels))
        if self.with_crowd:
            data['gt_bboxes_ignore'] = DC(to_tensor(gt_bboxes_ignore))
        if self.with_mask:
            data['gt_masks'] = DC(gt_masks, cpu_only=True)
        return data
Exemple #9
0
    def prepare_test_img(self, idx):
        """Prepare an image for testing (multi-scale and flipping)"""
        img_info = self.img_infos[idx]
        img = mmcv.imread(osp.join(self.img_prefix, img_info['filename']))
        if self.proposals is not None:
            proposal = self.proposals[idx][:self.num_max_proposals]
            if not (proposal.shape[1] == 4 or proposal.shape[1] == 5):
                raise AssertionError(
                    'proposals should have shapes (n, 4) or (n, 5), '
                    'but found {}'.format(proposal.shape))
        else:
            proposal = None

        def prepare_single(img, scale, flip, proposal=None):
            _img, img_shape, pad_shape, scale_factor = self.img_transform(
                img, scale, flip, keep_ratio=self.resize_keep_ratio)
            _img = to_tensor(_img)
            _img_meta = dict(ori_shape=(img_info['height'], img_info['width'],
                                        3),
                             img_shape=img_shape,
                             pad_shape=pad_shape,
                             scale_factor=scale_factor,
                             flip=flip)
            if proposal is not None:
                if proposal.shape[1] == 5:
                    score = proposal[:, 4, None]
                    proposal = proposal[:, :4]
                else:
                    score = None
                _proposal = self.bbox_transform(proposal, img_shape,
                                                scale_factor, flip)
                _proposal = np.hstack([_proposal, score
                                       ]) if score is not None else _proposal
                _proposal = to_tensor(_proposal)
            else:
                _proposal = None
            return _img, _img_meta, _proposal

        imgs = []
        img_metas = []
        proposals = []
        for scale in self.img_scales:
            _img, _img_meta, _proposal = prepare_single(
                img, scale, False, proposal)
            imgs.append(_img)
            img_metas.append(DC(_img_meta, cpu_only=True))
            proposals.append(_proposal)
            if self.flip_ratio > 0:
                _img, _img_meta, _proposal = prepare_single(
                    img, scale, True, proposal)
                imgs.append(_img)
                img_metas.append(DC(_img_meta, cpu_only=True))
                proposals.append(_proposal)
        data = dict(img=imgs, img_meta=img_metas)
        # if self.proposals is not None:
        #     data['proposals'] = proposals
        # if self.with_cap is not None:
        #     data['gt_caps'] = self.enc_captions[idx*self.cpi:(idx+1)*self.cpi]
        #     data['gt_caplens'] = self.caplens[idx*self.cpi:(idx+1)*self.cpi]
        if self.with_seg:
            gt_seg = mmcv.imread(osp.join(
                self.seg_prefix, img_info['file_name'].replace('jpg', 'png')),
                                 flag='unchanged')
            gt_seg = self.seg_transform(gt_seg.squeeze(), self.img_scales[0],
                                        False)
            # gt_seg = mmcv.imrescale(
            #     gt_seg, self.seg_scale_factor, interpolation='nearest')
            # gt_seg = resize_label(gt_seg, self.size_divisor)
            gt_seg = gt_seg[None, ...]
            data['gt_seg'] = DC(to_tensor(gt_seg.astype(np.long)), stack=True)

        if self.with_cap:
            rnd_idx = idx * self.cpi + np.random.randint(self.cpi)
            # data['gt_caps'] = DC(to_tensor(self.enc_captions[rnd_idx]))
            data['gt_caps'] = to_tensor(self.enc_captions[rnd_idx])
            # data['gt_caplens'] = DC(to_tensor(self.caplens[rnd_idx]))
            data['gt_caplens'] = to_tensor(np.array(self.caplens[rnd_idx]))
            data['allcaps'] = to_tensor(
                self.enc_captions[idx * self.cpi:(idx + 1) * self.cpi])
        return data
Exemple #10
0
    def prepare_train_img(self, idx):
        img_info = self.img_infos[idx]
        # load image
        img = mmcv.imread(osp.join(self.img_prefix, img_info['filename']))
        # load proposals if necessary
        if self.proposals is not None:
            proposals = self.proposals[idx][:self.num_max_proposals]
            # TODO: Handle empty proposals properly. Currently images with
            # no proposals are just ignored, but they can be used for
            # training in concept.
            if len(proposals) == 0:
                return None
            if not (proposals.shape[1] == 4 or proposals.shape[1] == 5):
                raise AssertionError(
                    'proposals should have shapes (n, 4) or (n, 5), '
                    'but found {}'.format(proposals.shape))
            if proposals.shape[1] == 5:
                scores = proposals[:, 4, None]
                proposals = proposals[:, :4]
            else:
                scores = None

        ann = self.get_ann_info(idx)
        gt_bboxes = ann['bboxes']
        gt_labels = ann['labels']
        if self.with_crowd:
            gt_bboxes_ignore = ann['bboxes_ignore']

        # skip the image if there is no valid gt bbox
        if len(gt_bboxes) == 0:
            return None

        # extra augmentation
        if self.extra_aug is not None:
            img, gt_bboxes, gt_labels = self.extra_aug(img, gt_bboxes,
                                                       gt_labels)

        # apply transforms
        flip = True if np.random.rand() < self.flip_ratio else False
        # randomly sample a scale
        img_scale = random_scale(self.img_scales, self.multiscale_mode)
        img, img_shape, pad_shape, scale_factor = self.img_transform(
            img, img_scale, flip, keep_ratio=self.resize_keep_ratio)
        img = img.copy()
        if self.with_seg:
            gt_seg = mmcv.imread(osp.join(
                self.seg_prefix, img_info['file_name'].replace('jpg', 'png')),
                                 flag='unchanged')
            gt_seg = self.seg_transform(gt_seg.squeeze(), img_scale, flip)
            gt_seg = gt_seg[None, ...]
            # gt_seg = mmcv.imrescale(
            #     gt_seg, self.seg_scale_factor, interpolation='nearest')
            # gt_seg = resize_label(gt_seg, self.size_divisor)
            # gt_seg = gt_seg[None, ...]
        if self.proposals is not None:
            proposals = self.bbox_transform(proposals, img_shape, scale_factor,
                                            flip)
            proposals = np.hstack([proposals, scores
                                   ]) if scores is not None else proposals
        gt_bboxes = self.bbox_transform(gt_bboxes, img_shape, scale_factor,
                                        flip)
        if self.with_crowd:
            gt_bboxes_ignore = self.bbox_transform(gt_bboxes_ignore, img_shape,
                                                   scale_factor, flip)
        if self.with_mask:
            gt_masks = self.mask_transform(ann['masks'], pad_shape,
                                           scale_factor, flip)

        ori_shape = (img_info['height'], img_info['width'], 3)
        img_meta = dict(ori_shape=ori_shape,
                        img_shape=img_shape,
                        pad_shape=pad_shape,
                        scale_factor=scale_factor,
                        flip=flip)

        data = dict(img=DC(to_tensor(img), stack=True),
                    img_meta=DC(img_meta, cpu_only=True),
                    gt_bboxes=DC(to_tensor(gt_bboxes)))
        if self.proposals is not None:
            data['proposals'] = DC(to_tensor(proposals))
        if self.with_label:
            data['gt_labels'] = DC(to_tensor(gt_labels))
        if self.with_crowd:
            data['gt_bboxes_ignore'] = DC(to_tensor(gt_bboxes_ignore))
        if self.with_mask:
            data['gt_masks'] = DC(gt_masks, cpu_only=True)
        if self.with_seg:
            data['gt_seg'] = DC(to_tensor(gt_seg.astype(np.long)), stack=True)
        if self.with_cap:
            rnd_idx = idx * self.cpi + np.random.randint(self.cpi)
            # data['gt_caps'] = DC(to_tensor(self.enc_captions[rnd_idx]))
            data['gt_caps'] = to_tensor(self.enc_captions[rnd_idx])
            # data['gt_caplens'] = DC(to_tensor(self.caplens[rnd_idx]))
            data['gt_caplens'] = to_tensor(np.array(self.caplens[rnd_idx]))
        return data