Esempio n. 1
0
# Datalist of SCUT-CTW1500
#test_dataset= './datalist/ctw1500_test_datalist.json'
#img_prefix = '/path/to/Img_prefix/CTW1500/'

out_dir= 'result'

test_file = mmcv.load(test_dataset)
cnt = 0
time_sum = 0.0
out_dict = {}

# Inference and visualize image one by one
for filename in test_file:
    # Load images
    img_path= img_prefix + filename
    img = mmcv.imread(img_path)
    img_copy = img.copy()
    img_name = img_path.split("/")[-1]

    # Inference
    print('predicting {} - {}'.format(cnt, img_path))
    time_start = time.time()
    result = inference_detector(model, img_path)
    time_end = time.time()
    time_sum += (time_end - time_start)
    print(result)

    # Results visualization
    bboxes = []
    for i in range(len(result["points"])):
        points2 = result["points"][i]
Esempio n. 2
0
def test_flip():
    # test assertion for invalid flip_ratio
    with pytest.raises(AssertionError):
        transform = dict(type='RandomFlip', flip_ratio=1.5)
        build_from_cfg(transform, PIPELINES)
    # test assertion for 0 <= sum(flip_ratio) <= 1
    with pytest.raises(AssertionError):
        transform = dict(type='RandomFlip',
                         flip_ratio=[0.7, 0.8],
                         direction=['horizontal', 'vertical'])
        build_from_cfg(transform, PIPELINES)

    # test assertion for mismatch between number of flip_ratio and direction
    with pytest.raises(AssertionError):
        transform = dict(type='RandomFlip', flip_ratio=[0.4, 0.5])
        build_from_cfg(transform, PIPELINES)

    # test assertion for invalid direction
    with pytest.raises(AssertionError):
        transform = dict(type='RandomFlip',
                         flip_ratio=1.,
                         direction='horizonta')
        build_from_cfg(transform, PIPELINES)

    transform = dict(type='RandomFlip', flip_ratio=1.)
    flip_module = build_from_cfg(transform, PIPELINES)

    results = dict()
    img = mmcv.imread(osp.join(osp.dirname(__file__), '../data/color.jpg'),
                      'color')
    original_img = copy.deepcopy(img)
    results['img'] = img
    results['img2'] = copy.deepcopy(img)
    results['img_shape'] = img.shape
    results['ori_shape'] = img.shape
    # Set initial values for default meta_keys
    results['pad_shape'] = img.shape
    results['scale_factor'] = 1.0
    results['img_fields'] = ['img', 'img2']

    results = flip_module(results)
    assert np.equal(results['img'], results['img2']).all()

    flip_module = build_from_cfg(transform, PIPELINES)
    results = flip_module(results)
    assert np.equal(results['img'], results['img2']).all()
    assert np.equal(original_img, results['img']).all()

    # test flip_ratio is float, direction is list
    transform = dict(type='RandomFlip',
                     flip_ratio=0.9,
                     direction=['horizontal', 'vertical', 'diagonal'])
    flip_module = build_from_cfg(transform, PIPELINES)

    results = dict()
    img = mmcv.imread(osp.join(osp.dirname(__file__), '../data/color.jpg'),
                      'color')
    original_img = copy.deepcopy(img)
    results['img'] = img
    results['img_shape'] = img.shape
    results['ori_shape'] = img.shape
    # Set initial values for default meta_keys
    results['pad_shape'] = img.shape
    results['scale_factor'] = 1.0
    results['img_fields'] = ['img']
    results = flip_module(results)
    if results['flip']:
        assert np.array_equal(
            mmcv.imflip(original_img, results['flip_direction']),
            results['img'])
    else:
        assert np.array_equal(original_img, results['img'])

    # test flip_ratio is list, direction is list
    transform = dict(type='RandomFlip',
                     flip_ratio=[0.3, 0.3, 0.2],
                     direction=['horizontal', 'vertical', 'diagonal'])
    flip_module = build_from_cfg(transform, PIPELINES)

    results = dict()
    img = mmcv.imread(osp.join(osp.dirname(__file__), '../data/color.jpg'),
                      'color')
    original_img = copy.deepcopy(img)
    results['img'] = img
    results['img_shape'] = img.shape
    results['ori_shape'] = img.shape
    # Set initial values for default meta_keys
    results['pad_shape'] = img.shape
    results['scale_factor'] = 1.0
    results['img_fields'] = ['img']
    results = flip_module(results)
    if results['flip']:
        assert np.array_equal(
            mmcv.imflip(original_img, results['flip_direction']),
            results['img'])
    else:
        assert np.array_equal(original_img, results['img'])
Esempio n. 3
0
    def show_result2(self,
                     img,
                     result,
                     score_thr=0.3,
                     bbox_color='green',
                     text_color='green',
                     thickness=1,
                     font_scale=0.5,
                     win_name='',
                     show=False,
                     wait_time=0,
                     out_file=None):
        """Draw `result` over `img`.

        Args:
            img (str or Tensor): The image to be displayed.
            result (Tensor or tuple): The results to draw over `img`
                bbox_result or (bbox_result, segm_result).
            score_thr (float, optional): Minimum score of bboxes to be shown.
                Default: 0.3.
            bbox_color (str or tuple or :obj:`Color`): Color of bbox lines.
            text_color (str or tuple or :obj:`Color`): Color of texts.
            thickness (int): Thickness of lines.
            font_scale (float): Font scales of texts.
            win_name (str): The window name.
            wait_time (int): Value of waitKey param.
                Default: 0.
            show (bool): Whether to show the image.
                Default: False.
            out_file (str or None): The filename to write the image.
                Default: None.

        Returns:
            img (Tensor): Only if not `show` or `out_file`
        """
        img_o = mmcv.imread(img)

        img = np.zeros((500, 500, 3))
        img = img.copy()
        if isinstance(result, tuple):
            bbox_result, segm_result = result
            if isinstance(segm_result, tuple):
                segm_result = segm_result[0]  # ms rcnn
        else:
            bbox_result, segm_result = result, None
        bboxes = np.vstack(bbox_result)
        labels = [
            np.full(bbox.shape[0], i, dtype=np.int32)
            for i, bbox in enumerate(bbox_result)
        ]
        labels = np.concatenate(labels)
        # draw segmentation masks
        if segm_result is not None and len(labels) > 0:  # non empty
            segms = mmcv.concat_list(segm_result)
            inds = np.where(bboxes[:, -1] > score_thr)[0]
            np.random.seed(42)
            color_masks = [
                np.random.randint(40, 256, (1, 3), dtype=np.uint8)
                for _ in range(max(labels) + 1)
            ]
            for i in inds:
                i = int(i)
                color_mask = color_masks[labels[i]]
                mask = segms[i]
                img[mask] = img[mask] * 0.5 + color_mask * 0.5
        # if out_file specified, do not show image in window
        if out_file is not None:
            show = False

        from PIL import Image
        im = Image.fromarray(np.uint8(img))
        im.save(out_file)

        # draw bounding boxes
        # mmcv.imshow_det_bboxes(
        #     img_o,
        #     bboxes,
        #     labels,
        #     class_names=self.CLASSES,
        #     score_thr=score_thr,
        #     bbox_color=bbox_color,
        #     text_color=text_color,
        #     thickness=thickness,
        #     font_scale=font_scale,
        #     win_name=win_name,
        #     show=show,
        #     wait_time=wait_time,
        #     out_file='/disk2/mask_bbox_100/'+out_file.split('/')[-1])

        if not (show or out_file):
            return img
Esempio n. 4
0
def test_resize():
    # test assertion if img_scale is a list
    with pytest.raises(AssertionError):
        transform = dict(type='Resize', img_scale=[1333, 800], keep_ratio=True)
        build_from_cfg(transform, PIPELINES)

    # test assertion if len(img_scale) while ratio_range is not None
    with pytest.raises(AssertionError):
        transform = dict(type='Resize',
                         img_scale=[(1333, 800), (1333, 600)],
                         ratio_range=(0.9, 1.1),
                         keep_ratio=True)
        build_from_cfg(transform, PIPELINES)

    # test assertion for invalid multiscale_mode
    with pytest.raises(AssertionError):
        transform = dict(type='Resize',
                         img_scale=[(1333, 800), (1333, 600)],
                         keep_ratio=True,
                         multiscale_mode='2333')
        build_from_cfg(transform, PIPELINES)

    # test assertion if both scale and scale_factor are setted
    with pytest.raises(AssertionError):
        results = dict(img_prefix=osp.join(osp.dirname(__file__), '../data'),
                       img_info=dict(filename='color.jpg'))
        load = dict(type='LoadImageFromFile')
        load = build_from_cfg(load, PIPELINES)
        transform = dict(type='Resize', img_scale=(1333, 800), keep_ratio=True)
        transform = build_from_cfg(transform, PIPELINES)
        results = load(results)
        results['scale'] = (1333, 800)
        results['scale_factor'] = 1.0
        results = transform(results)

    transform = dict(type='Resize', img_scale=(1333, 800), keep_ratio=True)
    resize_module = build_from_cfg(transform, PIPELINES)

    results = dict()
    img = mmcv.imread(osp.join(osp.dirname(__file__), '../data/color.jpg'),
                      'color')
    results['img'] = img
    results['img2'] = copy.deepcopy(img)
    results['img_shape'] = img.shape
    results['ori_shape'] = img.shape
    # Set initial values for default meta_keys
    results['pad_shape'] = img.shape
    results['img_fields'] = ['img', 'img2']

    results = resize_module(results)
    assert np.equal(results['img'], results['img2']).all()

    results.pop('scale')
    results.pop('scale_factor')
    transform = dict(type='Resize',
                     img_scale=(1280, 800),
                     multiscale_mode='value',
                     keep_ratio=False)
    resize_module = build_from_cfg(transform, PIPELINES)
    results = resize_module(results)
    assert np.equal(results['img'], results['img2']).all()
    assert results['img_shape'] == (800, 1280, 3)
Esempio n. 5
0
def test_multi_scale_flip_aug():
    # test assertion if give both scale_factor and img_scale
    with pytest.raises(AssertionError):
        transform = dict(type='MultiScaleFlipAug',
                         scale_factor=1.0,
                         img_scale=[(1333, 800)],
                         transforms=[dict(type='Resize')])
        build_from_cfg(transform, PIPELINES)

    # test assertion if both scale_factor and img_scale are None
    with pytest.raises(AssertionError):
        transform = dict(type='MultiScaleFlipAug',
                         scale_factor=None,
                         img_scale=None,
                         transforms=[dict(type='Resize')])
        build_from_cfg(transform, PIPELINES)

    # test assertion if img_scale is not tuple or list of tuple
    with pytest.raises(AssertionError):
        transform = dict(type='MultiScaleFlipAug',
                         img_scale=[1333, 800],
                         transforms=[dict(type='Resize')])
        build_from_cfg(transform, PIPELINES)

    # test assertion if flip_direction is not str or list of str
    with pytest.raises(AssertionError):
        transform = dict(type='MultiScaleFlipAug',
                         img_scale=[(1333, 800)],
                         flip_direction=1,
                         transforms=[dict(type='Resize')])
        build_from_cfg(transform, PIPELINES)

    scale_transform = dict(type='MultiScaleFlipAug',
                           img_scale=[(1333, 800), (1333, 640)],
                           transforms=[dict(type='Resize', keep_ratio=True)])
    transform = build_from_cfg(scale_transform, PIPELINES)

    results = dict()
    img = mmcv.imread(osp.join(osp.dirname(__file__), '../data/color.jpg'),
                      'color')
    results['img'] = img
    results['img_shape'] = img.shape
    results['ori_shape'] = img.shape
    # Set initial values for default meta_keys
    results['pad_shape'] = img.shape
    results['img_fields'] = ['img']

    scale_results = transform(copy.deepcopy(results))
    assert len(scale_results['img']) == 2
    assert scale_results['img'][0].shape == (750, 1333, 3)
    assert scale_results['img_shape'][0] == (750, 1333, 3)
    assert scale_results['img'][1].shape == (640, 1138, 3)
    assert scale_results['img_shape'][1] == (640, 1138, 3)

    scale_factor_transform = dict(
        type='MultiScaleFlipAug',
        scale_factor=[0.8, 1.0, 1.2],
        transforms=[dict(type='Resize', keep_ratio=False)])
    transform = build_from_cfg(scale_factor_transform, PIPELINES)
    scale_factor_results = transform(copy.deepcopy(results))
    assert len(scale_factor_results['img']) == 3
    assert scale_factor_results['img'][0].shape == (230, 409, 3)
    assert scale_factor_results['img_shape'][0] == (230, 409, 3)
    assert scale_factor_results['img'][1].shape == (288, 512, 3)
    assert scale_factor_results['img_shape'][1] == (288, 512, 3)
    assert scale_factor_results['img'][2].shape == (345, 614, 3)
    assert scale_factor_results['img_shape'][2] == (345, 614, 3)

    # test pipeline of coco_detection
    results = dict(img_prefix=osp.join(osp.dirname(__file__), '../data'),
                   img_info=dict(filename='color.jpg'))
    load_cfg, multi_scale_cfg = mmcv.Config.fromfile(
        'configs/_base_/datasets/coco_detection.py').test_pipeline
    load = build_from_cfg(load_cfg, PIPELINES)
    transform = build_from_cfg(multi_scale_cfg, PIPELINES)
    results = transform(load(results))
    assert len(results['img']) == 1
    assert len(results['img_metas']) == 1
    assert isinstance(results['img'][0], torch.Tensor)
    assert isinstance(results['img_metas'][0], mmcv.parallel.DataContainer)
    assert results['img_metas'][0].data['ori_shape'] == (288, 512, 3)
    assert results['img_metas'][0].data['img_shape'] == (750, 1333, 3)
    assert results['img_metas'][0].data['pad_shape'] == (768, 1344, 3)
    assert results['img_metas'][0].data['scale_factor'].tolist() == [
        2.603515625, 2.6041667461395264, 2.603515625, 2.6041667461395264
    ]
Esempio n. 6
0
def visualize_bbox_act(img, bboxes,labels, act_preds,
              classes=None,thickness=1,
              font_scale=0.4,show=False,
              wait_time=0,out_file=None):
    """Show the tracks with opencv."""
    assert bboxes.ndim == 2
    assert labels.ndim == 1
    assert bboxes.shape[0] == labels.shape[0]
    assert bboxes.shape[1] == 5
    if isinstance(img, str):
        img = mmcv.imread(img)

    img_shape = img.shape
    bboxes[:, 0::2] = np.clip(bboxes[:, 0::2], 0, img_shape[1])
    bboxes[:, 1::2] = np.clip(bboxes[:, 1::2], 0, img_shape[0])

    text_width, text_height = 8, 15
    for i, (bbox, label) in enumerate(zip(bboxes, labels), 0):
        x1, y1, x2, y2 = bbox[:4].astype(np.int32)
        score = float(bbox[-1])

        # bbox
        bbox_color = random_color(label)
        bbox_color = [int(255 * _c) for _c in bbox_color][::-1]
        cv2.rectangle(img, (x1, y1), (x2, y2), bbox_color, thickness=thickness)

        # score
        text = '{:.02f}'.format(score)
        width = len(text) * text_width
        img[y1 - text_height:y1, x1:x1 + width, :] = bbox_color
        cv2.putText(
            img,
            text, (x1, y1 - 2),
            cv2.FONT_HERSHEY_COMPLEX,
            font_scale,
            color=(0, 0, 0))


        classes_color = random_color(label + 1)
        text = classes[label]
        width = len(text) * text_width
        img[y1:y1 + text_height, x1:x1 + width, :] = bbox_color
        cv2.putText(img,text,
                    (x1, y1 + text_height - 2),
                    cv2.FONT_HERSHEY_COMPLEX,
                    font_scale,color=classes_color)

        #background_color = random_color(label + 5)
        background_color = [255, 204, 153]
        if (act_preds is not None) and (len(bboxes)==len(labels)==len(act_preds)):
            for j, act_pred in enumerate(act_preds[i]):
                text = '{}: {:.02f}'.format(act_pred[0], act_pred[1])
                width = len(text) * (text_width)
                img[y1+text_height*(j+2) :y1 + text_height*(j+3), x1:x1 + width, :] = background_color
                cv2.putText(img, text,
                            (x1, y1 + text_height*(j+3) - 2),
                            cv2.FONT_HERSHEY_COMPLEX,
                            font_scale, color=classes_color)

    if show:
        mmcv.imshow(img, wait_time=wait_time)
    if out_file is not None:
        mmcv.imwrite(img, out_file)

    return img
Esempio n. 7
0
def convert_lasot_test(lasot_test, ann_dir, save_dir):
    """Convert lasot dataset to COCO style.

    Args:
        lasot_test (dict): The converted COCO style annotations.
        ann_dir (str): The path of lasot test dataset
        save_dir (str): The path to save `lasot_test`.
    """
    records = dict(vid_id=1, img_id=1, ann_id=1, global_instance_id=1)
    videos_list = osp.join(ann_dir, 'testing_set.txt')
    videos_list = mmcv.list_from_file(videos_list)

    lasot_test['categories'] = [dict(id=0, name=0)]

    for video_name in tqdm(videos_list):
        video_path = osp.join(ann_dir, video_name)
        video = dict(id=records['vid_id'], name=video_name)
        lasot_test['videos'].append(video)

        gt_bboxes = mmcv.list_from_file(osp.join(video_path,
                                                 'groundtruth.txt'))
        full_occlusion = mmcv.list_from_file(
            osp.join(video_path, 'full_occlusion.txt'))
        full_occlusion = full_occlusion[0].split(',')
        out_of_view = mmcv.list_from_file(
            osp.join(video_path, 'out_of_view.txt'))
        out_of_view = out_of_view[0].split(',')

        img = mmcv.imread(osp.join(video_path, 'img/00000001.jpg'))
        height, width, _ = img.shape
        for frame_id, gt_bbox in enumerate(gt_bboxes):
            file_name = '%08d' % (frame_id + 1) + '.jpg'
            file_name = osp.join(video_name, 'img', file_name)
            image = dict(file_name=file_name,
                         height=height,
                         width=width,
                         id=records['img_id'],
                         frame_id=frame_id,
                         video_id=records['vid_id'])
            lasot_test['images'].append(image)

            x1, y1, w, h = gt_bbox.split(',')
            ann = dict(id=records['ann_id'],
                       image_id=records['img_id'],
                       instance_id=records['global_instance_id'],
                       category_id=0,
                       bbox=[int(x1), int(y1),
                             int(w), int(h)],
                       area=int(w) * int(h),
                       full_occlusion=full_occlusion[frame_id] == '1',
                       out_of_view=out_of_view[frame_id] == '1')
            lasot_test['annotations'].append(ann)

            records['ann_id'] += 1
            records['img_id'] += 1
        records['global_instance_id'] += 1
        records['vid_id'] += 1

    mmcv.dump(lasot_test, osp.join(save_dir, 'lasot_test.json'))
    print('-----LaSOT Test Dataset------')
    print(f'{records["vid_id"]- 1} videos')
    print(f'{records["global_instance_id"]- 1} instances')
    print(f'{records["img_id"]- 1} images')
    print(f'{records["ann_id"] - 1} objects')
    print('-----------------------------')
Esempio n. 8
0
def convert_det(data_dir, mode=None, adjust_center=True):
    kitti = defaultdict(list)

    img_dir = osp.join(data_dir, 'image_2')
    label_dir = osp.join(data_dir, 'label_2')
    cali_dir = osp.join(data_dir, 'calib')

    if not osp.exists(img_dir):
        print(f"Folder {img_dir} is not found")
        return None

    if not osp.exists(label_dir):
        label_dir = None

    img_names = sorted(os.listdir(img_dir))
    for k, v in cats_mapping.items():
        kitti['categories'].append(dict(id=v, name=k))

    pose_dict = dict(rotation=[0, 0, 0], position=[0, 0, 0])
    ann_id = 0
    vid_info = dict(id=0, name='', n_frames=len(img_names))
    kitti['videos'].append(vid_info)
    for img_id, img_name in enumerate(img_names):
        if mode == 'train':
            if osp.splitext(img_name)[0] in det_val_sets:
                continue
        elif mode == 'val':
            if osp.splitext(img_name)[0] not in det_val_sets:
                continue
        print('DET ID: {}'.format(img_id))
        img = mmcv.imread(osp.join(img_dir, img_name))
        height, width, _ = img.shape
        projection = ku.read_calib_det(cali_dir, img_id)

        index = int(img_name.split('.')[0])
        img_info = dict(file_name=osp.join(img_dir, img_name),
                        cali=projection.tolist(),
                        pose=pose_dict,
                        height=height,
                        width=width,
                        fov=60,
                        near_clip=0.15,
                        id=img_id,
                        video_id=0,
                        index=index)
        kitti['images'].append(img_info)

        if label_dir:
            label_file = osp.join(label_dir,
                                  '{}.txt'.format(img_name.split('.')[0]))
            labels = mmcv.list_from_file(label_file)
            track_id = 0
            for label in labels:
                label = label.split()
                cat = label[0]
                if cat in ['DontCare']:
                    continue
                # if cat not in kitti_cats.keys():
                #     continue
                x1, y1, x2, y2 = float(label[4]), float(label[5]), float(
                    label[6]), float(label[7])

                if adjust_center:
                    # KITTI GT uses the bottom of the car as center (x, 0, z).
                    # Prediction uses center of the bbox as center (x, y, z).
                    # So we align them to the bottom center as GT does
                    y_cen_adjust = float(label[8]) / 2.0
                else:
                    y_cen_adjust = 0.0

                center_2d = tu.cameratoimage(
                    np.array([[
                        float(label[11]),
                        float(label[12]) - y_cen_adjust,
                        float(label[13])
                    ]]), projection).flatten().tolist()

                ann = dict(id=ann_id,
                           image_id=img_id,
                           instance_id=track_id,
                           category_id=cats_mapping[kitti_cats[cat]],
                           alpha=float(label[3]),
                           roty=float(label[14]),
                           dimension=[
                               float(label[8]),
                               float(label[9]),
                               float(label[10])
                           ],
                           translation=[
                               float(label[11]),
                               float(label[12]) - y_cen_adjust,
                               float(label[13])
                           ],
                           is_occluded=int(label[2]),
                           is_truncated=float(label[1]),
                           center_2d=center_2d,
                           delta_2d=[
                               center_2d[0] - (x1 + x2) / 2.0,
                               center_2d[1] - (y1 + y2) / 2.0
                           ],
                           bbox=[x1, y1, x2 - x1, y2 - y1],
                           area=(x2 - x1) * (y2 - y1),
                           iscrowd=False,
                           ignore=False,
                           segmentation=[[x1, y1, x1, y2, x2, y2, x2, y1]])
                kitti['annotations'].append(ann)
                ann_id += 1
                track_id += 1
    return kitti
Esempio n. 9
0
def convert_track(data_dir, mode=None, adjust_center=True):
    kitti = defaultdict(list)

    img_dir = osp.join(data_dir, 'image_02')
    label_dir = osp.join(data_dir, 'label_02')
    cali_dir = osp.join(data_dir, 'calib')
    oxt_dir = osp.join(data_dir, 'oxts')

    if not osp.exists(img_dir):
        print(f"Folder {img_dir} is not found")
        return None

    if not osp.exists(label_dir):
        label_dir = None

    vid_names = sorted(os.listdir(img_dir))
    print(f"{data_dir} with {len(vid_names)} sequences")

    for k, v in cats_mapping.items():
        kitti['categories'].append(dict(id=v, name=k))

    img_id = 0
    global_track_id = 0
    ann_id = 0

    for vid_id, vid_name in enumerate(vid_names):
        if mode == 'train':
            if vid_name in val_sets:
                continue
        elif mode == 'val':
            if vid_name not in val_sets:
                continue
        elif mode == 'mini':
            if vid_name not in mini_sets:
                continue
        print("VID ID: {}".format(vid_id))
        ind2id = dict()
        trackid_maps = dict()
        img_names = sorted([
            f.path for f in os.scandir(osp.join(img_dir, vid_name))
            if f.is_file() and f.name.endswith('png')
        ])
        vid_info = dict(id=vid_id, name=vid_name, n_frames=len(img_names))
        kitti['videos'].append(vid_info)

        projection = ku.read_calib(cali_dir, vid_id)

        for fr, img_name in enumerate(sorted(img_names)):
            img = mmcv.imread(img_name)
            fields = ku.read_oxts(oxt_dir, vid_id)
            poses = [ku.KittiPoseParser(fields[i]) for i in range(len(fields))]

            rotation = R.from_matrix(poses[fr].rotation).as_euler('xyz')
            position = poses[fr].position - poses[0].position
            pose_dict = dict(rotation=rotation.tolist(),
                             position=position.tolist())

            height, width, _ = img.shape
            index = fr
            img_info = dict(file_name=img_name,
                            cali=projection.tolist(),
                            pose=pose_dict,
                            height=height,
                            width=width,
                            fov=60,
                            near_clip=0.15,
                            id=img_id,
                            video_id=vid_id,
                            index=index)
            kitti['images'].append(img_info)
            ind2id[index] = img_id
            img_id += 1

        if label_dir:
            label_file = osp.join(label_dir, '{}.txt'.format(vid_name))
            labels = mmcv.list_from_file(label_file)
            for label in labels:
                label = label.split()
                cat = label[2]
                if cat in ['DontCare']:
                    continue
                # if cat not in kitti_cats.keys():
                #     continue
                image_id = ind2id[int(label[0])]
                if label[1] in trackid_maps.keys():
                    track_id = trackid_maps[label[1]]
                else:
                    track_id = global_track_id
                    trackid_maps[label[1]] = track_id
                    global_track_id += 1
                x1, y1, x2, y2 = float(label[6]), float(label[7]), float(
                    label[8]), float(label[9])

                if adjust_center:
                    # KITTI GT uses the bottom of the car as center (x, 0, z).
                    # Prediction uses center of the bbox as center (x, y, z).
                    # So we align them to the bottom center as GT does
                    y_cen_adjust = float(label[10]) / 2.0
                else:
                    y_cen_adjust = 0.0

                center_2d = tu.cameratoimage(
                    np.array([[
                        float(label[13]),
                        float(label[14]) - y_cen_adjust,
                        float(label[15])
                    ]]), projection).flatten().tolist()

                ann = dict(id=ann_id,
                           image_id=image_id,
                           category_id=cats_mapping[kitti_cats[cat]],
                           instance_id=track_id,
                           alpha=float(label[5]),
                           roty=float(label[16]),
                           dimension=[
                               float(label[10]),
                               float(label[11]),
                               float(label[12])
                           ],
                           translation=[
                               float(label[13]),
                               float(label[14]) - y_cen_adjust,
                               float(label[15])
                           ],
                           is_occluded=int(label[4]),
                           is_truncated=float(label[3]),
                           center_2d=center_2d,
                           delta_2d=[
                               center_2d[0] - (x1 + x2) / 2.0,
                               center_2d[1] - (y1 + y2) / 2.0
                           ],
                           bbox=[x1, y1, x2 - x1, y2 - y1],
                           area=(x2 - x1) * (y2 - y1),
                           iscrowd=False,
                           ignore=False,
                           segmentation=[[x1, y1, x1, y2, x2, y2, x2, y1]])
                kitti['annotations'].append(ann)
                ann_id += 1
    return kitti
Esempio n. 10
0
    def show_result(self,
                    img,
                    result,
                    palette=None,
                    win_name='',
                    show=False,
                    wait_time=0,
                    out_file=None,
                    opacity=0.5):
        """Draw `result` over `img`.

        Args:
            img (str or Tensor): The image to be displayed.
            result (Tensor): The semantic segmentation results to draw over
                `img`.
            palette (list[list[int]]] | np.ndarray | None): The palette of
                segmentation map. If None is given, random palette will be
                generated. Default: None
            win_name (str): The window name.
            wait_time (int): Value of waitKey param.
                Default: 0.
            show (bool): Whether to show the image.
                Default: False.
            out_file (str or None): The filename to write the image.
                Default: None.
            opacity(float): Opacity of painted segmentation map.
                Default 0.5.
                Must be in (0, 1] range.
        Returns:
            img (Tensor): Only if not `show` or `out_file`
        """
        img = mmcv.imread(img)
        img = img.copy()
        seg = result[0]
        if palette is None:
            if self.PALETTE is None:
                palette = np.random.randint(0,
                                            255,
                                            size=(len(self.CLASSES), 3))
            else:
                palette = self.PALETTE
        palette = np.array(palette)
        assert palette.shape[0] == len(self.CLASSES)
        assert palette.shape[1] == 3
        assert len(palette.shape) == 2
        assert 0 < opacity <= 1.0
        color_seg = np.zeros((seg.shape[0], seg.shape[1], 3), dtype=np.uint8)
        for label, color in enumerate(palette):
            color_seg[seg == label, :] = color
        # convert to BGR
        color_seg = color_seg[..., ::-1]

        img = img * (1 - opacity) + color_seg * opacity
        img = img.astype(np.uint8)
        # if out_file specified, do not show image in window
        if out_file is not None:
            show = False

        if show:
            mmcv.imshow(img, win_name, wait_time)
        if out_file is not None:
            mmcv.imwrite(img, out_file)

        if not (show or out_file):
            warnings.warn('show==False and out_file is not specified, only '
                          'result image will be returned')
            return img
Esempio n. 11
0
    def __call__(self):
        """
        Load light-weight instance annotations of all images into a list of dicts in Detectron2 format.
        Do not load heavy data into memory in this file,
        since we will load the annotations of all images into memory.
        """
        # cache the dataset_dicts to avoid loading masks from files
        hashed_file_name = hashlib.md5(
            ("".join([str(fn) for fn in self.objs]) +
             "dataset_dicts_{}_{}_{}_{}_{}_{}".format(
                 self.name, self.dataset_root,
                 self.with_masks, self.with_depth, self.with_xyz,
                 osp.abspath(__file__))).encode("utf-8")).hexdigest()
        cache_path = osp.join(
            self.dataset_root,
            "dataset_dicts_{}_{}.pkl".format(self.name, hashed_file_name))

        if osp.exists(cache_path) and self.use_cache:
            logger.info("load cached dataset dicts from {}".format(cache_path))
            return mmcv.load(cache_path)

        t_start = time.perf_counter()
        dataset_dicts = []
        self.num_instances_without_valid_segmentation = 0
        self.num_instances_without_valid_box = 0
        logger.info("loading dataset dicts: {}".format(self.name))
        # it is slow because of loading and converting masks to rle

        for scene in self.scenes:
            scene_id = int(scene)
            scene_root = osp.join(self.dataset_root, scene)

            gt_dict = mmcv.load(osp.join(scene_root, 'scene_gt.json'))
            gt_info_dict = mmcv.load(osp.join(scene_root,
                                              'scene_gt_info.json'))
            cam_dict = mmcv.load(osp.join(scene_root, 'scene_camera.json'))

            for str_im_id in tqdm(gt_dict, postfix=f"{scene_id}"):
                int_im_id = int(str_im_id)
                rgb_path = osp.join(scene_root,
                                    "rgb/{:06d}.jpg").format(int_im_id)
                assert osp.exists(rgb_path), rgb_path

                depth_path = osp.join(scene_root,
                                      "depth/{:06d}.png".format(int_im_id))
                K = np.array(cam_dict[str_im_id]['cam_K'],
                             dtype=np.float32).reshape(3, 3)
                depth_factor = 1000.0 / cam_dict[str_im_id][
                    'depth_scale']  # 10000

                record = {
                    "dataset_name": self.name,
                    'file_name': osp.relpath(rgb_path, PROJ_ROOT),
                    'depth_file': osp.relpath(depth_path, PROJ_ROOT),
                    'height': self.height,
                    'width': self.width,
                    'image_id': int_im_id,
                    "scene_im_id": "{}/{}".format(scene_id,
                                                  int_im_id),  # for evaluation
                    "cam": K,
                    "depth_factor": depth_factor,
                    "img_type": 'syn_pbr'  # NOTE: has background
                }
                insts = []
                for anno_i, anno in enumerate(gt_dict[str_im_id]):
                    obj_id = anno['obj_id']
                    if obj_id not in self.cat_ids:
                        continue
                    cur_label = self.cat2label[obj_id]  # 0-based label
                    R = np.array(anno['cam_R_m2c'],
                                 dtype='float32').reshape(3, 3)
                    t = np.array(anno['cam_t_m2c'], dtype='float32') / 1000.0
                    pose = np.hstack([R, t.reshape(3, 1)])
                    quat = mat2quat(R).astype('float32')
                    allo_q = mat2quat(egocentric_to_allocentric(pose)
                                      [:3, :3]).astype('float32')

                    proj = (record["cam"] @ t.T).T
                    proj = proj[:2] / proj[2]

                    bbox_visib = gt_info_dict[str_im_id][anno_i]['bbox_visib']
                    bbox_obj = gt_info_dict[str_im_id][anno_i]['bbox_obj']
                    x1, y1, w, h = bbox_visib
                    if self.filter_invalid:
                        if h <= 1 or w <= 1:
                            self.num_instances_without_valid_box += 1
                            continue

                    mask_file = osp.join(
                        scene_root,
                        "mask/{:06d}_{:06d}.png".format(int_im_id, anno_i))
                    mask_visib_file = osp.join(
                        scene_root, "mask_visib/{:06d}_{:06d}.png".format(
                            int_im_id, anno_i))
                    assert osp.exists(mask_file), mask_file
                    assert osp.exists(mask_visib_file), mask_visib_file
                    # load mask visib  TODO: load both mask_visib and mask_full
                    mask_single = mmcv.imread(mask_visib_file, "unchanged")
                    area = mask_single.sum()
                    if area < 3:  # filter out too small or nearly invisible instances
                        self.num_instances_without_valid_segmentation += 1
                        continue
                    mask_rle = binary_mask_to_rle(mask_single, compressed=True)

                    inst = {
                        'category_id': cur_label,  # 0-based label
                        'bbox':
                        bbox_visib,  # TODO: load both bbox_obj and bbox_visib
                        'bbox_mode': BoxMode.XYWH_ABS,
                        'pose': pose,
                        "quat": quat,
                        "trans": t,
                        "allo_quat": allo_q,
                        "centroid_2d": proj,  # absolute (cx, cy)
                        "segmentation": mask_rle,
                        "mask_full_file":
                        mask_file,  # TODO: load as mask_full, rle
                    }
                    if self.with_xyz:
                        xyz_crop_path = mask_file.replace(
                            "/mask/", "/xyz_crop/").replace(".png", ".pkl")
                        assert osp.exists(xyz_crop_path), xyz_crop_path
                        inst["xyz_crop_path"] = xyz_crop_path

                    insts.append(inst)
                if len(insts) == 0:  # filter im without anno
                    continue
                record['annotations'] = insts
                dataset_dicts.append(record)

        if self.num_instances_without_valid_segmentation > 0:
            logger.warning(
                "Filtered out {} instances without valid segmentation. "
                "There might be issues in your dataset generation process.".
                format(self.num_instances_without_valid_segmentation))
        if self.num_instances_without_valid_box > 0:
            logger.warning(
                "Filtered out {} instances without valid box. "
                "There might be issues in your dataset generation process.".
                format(self.num_instances_without_valid_box))
        ##########################
        if self.num_to_load > 0:
            self.num_to_load = min(int(self.num_to_load), len(dataset_dicts))
            dataset_dicts = dataset_dicts[:self.num_to_load]
        logger.info("loaded {} dataset dicts, using {}s".format(
            len(dataset_dicts),
            time.perf_counter() - t_start))

        mkdir_p(osp.dirname(cache_path))
        mmcv.dump(dataset_dicts, cache_path, protocol=4)
        logger.info("Dumped dataset_dicts to {}".format(cache_path))
        return dataset_dicts
Esempio n. 12
0
    def prepare_train_img(self, idx):
        img_info = self.img_infos[idx]
        # load image
        img = mmcv.imread(osp.join(self.img_prefix, img_info['filename']))
        # load proposals if necessary
        if self.proposals is not None:
            proposals = self.proposals[idx][:self.num_max_proposals]
            # TODO: Handle empty proposals properly. Currently images with
            # no proposals are just ignored, but they can be used for
            # training in concept.
            if len(proposals) == 0:
                return None
            if not (proposals.shape[1] == 4 or proposals.shape[1] == 5):
                raise AssertionError(
                    'proposals should have shapes (n, 4) or (n, 5), '
                    'but found {}'.format(proposals.shape))
            if proposals.shape[1] == 5:
                scores = proposals[:, 4, None]
                proposals = proposals[:, :4]
            else:
                scores = None

        ann = self.get_ann_info(idx)
        gt_bboxes = ann['bboxes']
        gt_labels = ann['labels']
        if self.with_crowd:
            gt_bboxes_ignore = ann['bboxes_ignore']

        assert len(self.img_scales[0]) == 2 and isinstance(
            self.img_scales[0][0], int)

        img, gt_bboxes, gt_labels, gt_bboxes_ignore = augment(
            img, gt_bboxes, gt_labels, gt_bboxes_ignore, self.img_scales[0])
        ori_shape = img.shape[:2]
        img, img_shape, pad_shape, scale_factor = self.img_transform(
            img, img.shape[:2], False, keep_ratio=self.resize_keep_ratio)
        assert (scale_factor == 1)
        img_meta = dict(
            ori_shape=ori_shape,
            img_shape=ori_shape,
            pad_shape=(0, 0),
            scale_factor=1,
            flip=False,
            name=img_info['filename'],
        )

        pos_maps = []
        scale_maps = []
        offset_maps = []
        if not self.with_crowd:
            gt_bboxes_ignore = None
        for i, stride in enumerate(self.strides):
            pos_map, scale_map, offset_map = self.calc_gt_center(gt_bboxes, gt_bboxes_ignore, \
                                            stride=stride, regress_range=self.regress_ranges[i], image_shape=ori_shape)
            pos_maps.append(pos_map)
            scale_maps.append(scale_map)
            offset_maps.append(offset_map)

        data = dict(img=DC(to_tensor(img), stack=True),
                    img_meta=DC(img_meta, cpu_only=True),
                    gt_bboxes=DC(to_tensor(gt_bboxes)))
        if self.proposals is not None:
            data['proposals'] = DC(to_tensor(proposals))
        if self.with_label:
            data['gt_labels'] = DC(to_tensor(gt_labels))
        if self.with_crowd:
            data['gt_bboxes_ignore'] = DC(to_tensor(gt_bboxes_ignore))

        data['classification_maps'] = DC(
            [to_tensor(pos_map) for pos_map in pos_maps])
        data['scale_maps'] = DC(
            [to_tensor(scale_map) for scale_map in scale_maps])
        data['offset_maps'] = DC(
            [to_tensor(offset_map) for offset_map in offset_maps])
        return data
Esempio n. 13
0
def test_readtext(mock_kiedataset):
    # Fixing the weights of models to prevent them from
    # generating invalid results and triggering other assertion errors
    torch.manual_seed(4)
    random.seed(4)
    mmocr = MMOCR_testobj()
    mmocr_det = MMOCR_testobj(kie='', recog='')
    mmocr_recog = MMOCR_testobj(kie='', det='', recog='CRNN_TPS')
    mmocr_det_recog = MMOCR_testobj(kie='')

    def readtext(imgs, ocr_obj=mmocr, **kwargs):
        # filename can be different depends on how
        # the the image was loaded
        e2e_res = ocr_obj.readtext(imgs, **kwargs)
        for res in e2e_res:
            res.pop('filename')
        return e2e_res

    def kiedataset_with_test_dict(**kwargs):
        kwargs['dict_file'] = 'tests/data/kie_toy_dataset/dict.txt'
        return KIEDataset(**kwargs)

    mock_kiedataset.side_effect = kiedataset_with_test_dict

    # Single image
    toy_dir = 'tests/data/toy_dataset/imgs/test/'
    toy_img1_path = toy_dir + 'img_1.jpg'
    str_e2e_res = readtext(toy_img1_path)
    toy_img1 = mmcv.imread(toy_img1_path)
    np_e2e_res = readtext(toy_img1)
    assert str_e2e_res == np_e2e_res

    # Multiple images
    toy_img2_path = toy_dir + 'img_2.jpg'
    toy_img2 = mmcv.imread(toy_img2_path)
    toy_imgs = [toy_img1, toy_img2]
    toy_img_paths = [toy_img1_path, toy_img2_path]
    np_e2e_results = readtext(toy_imgs)
    str_e2e_results = readtext(toy_img_paths)
    str_tuple_e2e_results = readtext(tuple(toy_img_paths))
    assert np_e2e_results == str_e2e_results
    assert str_e2e_results == str_tuple_e2e_results

    # Batch mode test
    toy_imgs.append(toy_dir + 'img_3.jpg')
    e2e_res = readtext(toy_imgs)
    full_batch_e2e_res = readtext(toy_imgs, batch_mode=True)
    assert full_batch_e2e_res == e2e_res
    batch_e2e_res = readtext(toy_imgs,
                             batch_mode=True,
                             recog_batch_size=2,
                             det_batch_size=2)
    assert batch_e2e_res == full_batch_e2e_res

    # Batch mode test with DBNet only
    full_batch_det_res = mmocr_det.readtext(toy_imgs, batch_mode=True)
    det_res = mmocr_det.readtext(toy_imgs)
    batch_det_res = mmocr_det.readtext(toy_imgs,
                                       batch_mode=True,
                                       single_batch_size=2)
    assert len(full_batch_det_res) == len(det_res)
    assert len(batch_det_res) == len(det_res)
    assert all([
        np.allclose(full_batch_det_res[i]['boundary_result'],
                    det_res[i]['boundary_result'])
        for i in range(len(full_batch_det_res))
    ])
    assert all([
        np.allclose(batch_det_res[i]['boundary_result'],
                    det_res[i]['boundary_result'])
        for i in range(len(batch_det_res))
    ])

    # Batch mode test with CRNN_TPS only (CRNN doesn't support batch inference)
    full_batch_recog_res = mmocr_recog.readtext(toy_imgs, batch_mode=True)
    recog_res = mmocr_recog.readtext(toy_imgs)
    batch_recog_res = mmocr_recog.readtext(toy_imgs,
                                           batch_mode=True,
                                           single_batch_size=2)
    full_batch_recog_res.sort(key=lambda x: x['text'])
    batch_recog_res.sort(key=lambda x: x['text'])
    recog_res.sort(key=lambda x: x['text'])
    assert np.all([
        np.allclose(full_batch_recog_res[i]['score'], recog_res[i]['score'])
        for i in range(len(full_batch_recog_res))
    ])
    assert np.all([
        np.allclose(batch_recog_res[i]['score'], recog_res[i]['score'])
        for i in range(len(full_batch_recog_res))
    ])

    # Test export
    with tempfile.TemporaryDirectory() as tmpdirname:
        mmocr.readtext(toy_imgs, export=tmpdirname)
        assert len(os.listdir(tmpdirname)) == len(toy_imgs)
    with tempfile.TemporaryDirectory() as tmpdirname:
        mmocr_det.readtext(toy_imgs, export=tmpdirname)
        assert len(os.listdir(tmpdirname)) == len(toy_imgs)
    with tempfile.TemporaryDirectory() as tmpdirname:
        mmocr_recog.readtext(toy_imgs, export=tmpdirname)
        assert len(os.listdir(tmpdirname)) == len(toy_imgs)

    # Test output
    # Single image
    with tempfile.TemporaryDirectory() as tmpdirname:
        tmp_output = os.path.join(tmpdirname, '1.jpg')
        mmocr.readtext(toy_imgs[0], output=tmp_output)
        assert os.path.exists(tmp_output)
    # Multiple images
    with tempfile.TemporaryDirectory() as tmpdirname:
        mmocr.readtext(toy_imgs, output=tmpdirname)
        assert len(os.listdir(tmpdirname)) == len(toy_imgs)

    # Test imshow
    with mock.patch('mmocr.utils.ocr.mmcv.imshow') as mock_imshow:
        mmocr.readtext(toy_img1_path, imshow=True)
        mock_imshow.assert_called_once()
        mock_imshow.reset_mock()
        mmocr.readtext(toy_imgs, imshow=True)
        assert mock_imshow.call_count == len(toy_imgs)

    # Test print_result
    with io.StringIO() as capturedOutput:
        sys.stdout = capturedOutput
        res = mmocr.readtext(toy_imgs, print_result=True)
        assert json.loads('[%s]' % capturedOutput.getvalue().strip().replace(
            '\n\n', ',').replace("'", '"')) == res
        sys.stdout = sys.__stdout__
    with io.StringIO() as capturedOutput:
        sys.stdout = capturedOutput
        res = mmocr.readtext(toy_imgs, details=True, print_result=True)
        assert json.loads('[%s]' % capturedOutput.getvalue().strip().replace(
            '\n\n', ',').replace("'", '"')) == res
        sys.stdout = sys.__stdout__

    # Test merge
    with mock.patch('mmocr.utils.ocr.stitch_boxes_into_lines') as mock_merge:
        mmocr_det_recog.readtext(toy_imgs, merge=True)
        assert mock_merge.call_count == len(toy_imgs)
Esempio n. 14
0
    def show_result(self,
                    result,
                    img=None,
                    skeleton=None,
                    kpt_score_thr=0.3,
                    radius=8,
                    bbox_color='green',
                    thickness=2,
                    pose_kpt_color=None,
                    pose_link_color=None,
                    vis_height=400,
                    num_instances=-1,
                    win_name='',
                    show=False,
                    wait_time=0,
                    out_file=None):
        """Visualize 3D pose estimation results.

        Args:
            result (list[dict]): The pose estimation results containing:

                - "keypoints_3d" ([K,4]): 3D keypoints
                - "keypoints" ([K,3] or [T,K,3]): Optional for visualizing
                    2D inputs. If a sequence is given, only the last frame
                    will be used for visualization
                - "bbox" ([4,] or [T,4]): Optional for visualizing 2D inputs
                - "title" (str): title for the subplot
            img (str or Tensor): Optional. The image to visualize 2D inputs on.
            skeleton (list of [idx_i,idx_j]): Skeleton described by a list of
                links, each is a pair of joint indices.
            kpt_score_thr (float, optional): Minimum score of keypoints
                to be shown. Default: 0.3.
            radius (int): Radius of circles.
            bbox_color (str or tuple or :obj:`Color`): Color of bbox lines.
            thickness (int): Thickness of lines.
            pose_kpt_color (np.array[Nx3]`): Color of N keypoints.
                If None, do not draw keypoints.
            pose_link_color (np.array[Mx3]): Color of M limbs.
                If None, do not draw limbs.
            vis_height (int): The image height of the visualization. The width
                will be N*vis_height depending on the number of visualized
                items.
            num_instances (int): Number of instances to be shown in 3D. If
                smaller than 0, all the instances in the pose_result will be
                shown. Otherwise, pad or truncate the pose_result to a length
                of num_instances.
            win_name (str): The window name.
            show (bool): Whether to show the image. Default: False.
            wait_time (int): Value of waitKey param.
                Default: 0.
            out_file (str or None): The filename to write the image.
                Default: None.

        Returns:
            Tensor: Visualized img, only if not `show` or `out_file`.
        """
        if num_instances < 0:
            assert len(result) > 0
        result = sorted(result, key=lambda x: x.get('track_id', 0))

        # draw image and 2d poses
        if img is not None:
            img = mmcv.imread(img)

            bbox_result = []
            pose_2d = []
            for res in result:
                if 'bbox' in res:
                    bbox = np.array(res['bbox'])
                    if bbox.ndim != 1:
                        assert bbox.ndim == 2
                        bbox = bbox[-1]  # Get bbox from the last frame
                    bbox_result.append(bbox)
                if 'keypoints' in res:
                    kpts = np.array(res['keypoints'])
                    if kpts.ndim != 2:
                        assert kpts.ndim == 3
                        kpts = kpts[-1]  # Get 2D keypoints from the last frame
                    pose_2d.append(kpts)

            if len(bbox_result) > 0:
                bboxes = np.vstack(bbox_result)
                mmcv.imshow_bboxes(img,
                                   bboxes,
                                   colors=bbox_color,
                                   top_k=-1,
                                   thickness=2,
                                   show=False)
            if len(pose_2d) > 0:
                imshow_keypoints(img,
                                 pose_2d,
                                 skeleton,
                                 kpt_score_thr=kpt_score_thr,
                                 pose_kpt_color=pose_kpt_color,
                                 pose_link_color=pose_link_color,
                                 radius=radius,
                                 thickness=thickness)
            img = mmcv.imrescale(img, scale=vis_height / img.shape[0])

        img_vis = imshow_keypoints_3d(result,
                                      img,
                                      skeleton,
                                      pose_kpt_color,
                                      pose_link_color,
                                      vis_height,
                                      axis_limit=300,
                                      axis_azimuth=-115,
                                      axis_elev=15,
                                      kpt_score_thr=kpt_score_thr,
                                      num_instances=num_instances)

        if show:
            mmcv.visualization.imshow(img_vis, win_name, wait_time)

        if out_file is not None:
            mmcv.imwrite(img_vis, out_file)

        return img_vis
Esempio n. 15
0
def test_multi_scale_flip_aug():
    # test assertion if give both scale_factor and img_scale
    with pytest.raises(AssertionError):
        transform = dict(type='MultiScaleFlipAug',
                         scale_factor=1.0,
                         img_scale=[(1333, 800)],
                         transforms=[dict(type='Resize')])
        build_from_cfg(transform, PIPELINES)

    # test assertion if both scale_factor and img_scale are None
    with pytest.raises(AssertionError):
        transform = dict(type='MultiScaleFlipAug',
                         scale_factor=None,
                         img_scale=None,
                         transforms=[dict(type='Resize')])
        build_from_cfg(transform, PIPELINES)

    # test assertion if img_scale is not tuple or list of tuple
    with pytest.raises(AssertionError):
        transform = dict(type='MultiScaleFlipAug',
                         img_scale=[1333, 800],
                         transforms=[dict(type='Resize')])
        build_from_cfg(transform, PIPELINES)

    # test assertion if flip_direction is not str or list of str
    with pytest.raises(AssertionError):
        transform = dict(type='MultiScaleFlipAug',
                         img_scale=[(1333, 800)],
                         flip_direction=1,
                         transforms=[dict(type='Resize')])
        build_from_cfg(transform, PIPELINES)

    scale_transform = dict(type='MultiScaleFlipAug',
                           img_scale=[(1333, 800), (1333, 640)],
                           transforms=[dict(type='Resize', keep_ratio=True)])
    transform = build_from_cfg(scale_transform, PIPELINES)

    results = dict()
    img = mmcv.imread(osp.join(osp.dirname(__file__), '../data/color.jpg'),
                      'color')
    results['img'] = img
    results['img_shape'] = img.shape
    results['ori_shape'] = img.shape
    # Set initial values for default meta_keys
    results['pad_shape'] = img.shape
    results['img_fields'] = ['img']

    scale_results = transform(copy.deepcopy(results))
    assert len(scale_results['img']) == 2
    assert scale_results['img'][0].shape == (750, 1333, 3)
    assert scale_results['img_shape'][0] == (750, 1333, 3)
    assert scale_results['img'][1].shape == (640, 1138, 3)
    assert scale_results['img_shape'][1] == (640, 1138, 3)

    scale_factor_transform = dict(
        type='MultiScaleFlipAug',
        scale_factor=[0.8, 1.0, 1.2],
        transforms=[dict(type='Resize', keep_ratio=False)])
    transform = build_from_cfg(scale_factor_transform, PIPELINES)
    scale_factor_results = transform(copy.deepcopy(results))
    assert len(scale_factor_results['img']) == 3
    assert scale_factor_results['img'][0].shape == (230, 409, 3)
    assert scale_factor_results['img_shape'][0] == (230, 409, 3)
    assert scale_factor_results['img'][1].shape == (288, 512, 3)
    assert scale_factor_results['img_shape'][1] == (288, 512, 3)
    assert scale_factor_results['img'][2].shape == (345, 614, 3)
    assert scale_factor_results['img_shape'][2] == (345, 614, 3)
Esempio n. 16
0
import matplotlib.image as img
from mmdet.apis import inference_detector, init_detector, show_result_pyplot
import os
from tqdm import tqdm
import glob
import cv2

test_root = args.input_images_folder
test_ids = os.listdir(test_root)
model.cfg = cfg

res_size=(1080,1440)

for it in tqdm(range(len(test_ids))):
  id = test_ids[it]
  image = mmcv.imread(test_root+"/"+id)
  orig_shape = image.shape[0:2]
  image = cv2.resize(image, res_size[::-1],interpolation=cv2.INTER_NEAREST)

  result = inference_detector(model,image)
  count = 1
  
  print(len(result[1][0]))

  for i,mask in enumerate(result[1][0]):
    if mask.sum()<500:
      continue
    mask = cv2.resize(mask, orig_shape[::-1],interpolation=cv2.INTER_NEAREST)
    img.imsave(args.save_path+"/"+id[:-4]+"_{}".format(count)+".bmp",mask)
    count+=1
    def prepare_train_img(self, idx):
        img_info = self.img_infos[idx]
        # load image
        img = mmcv.imread(osp.join(self.img_prefix, img_info['filename']))
        # load proposals if necessary
        if self.proposals is not None:
            proposals = self.proposals[idx][:self.num_max_proposals]
            # TODO: Handle empty proposals properly. Currently images with
            # no proposals are just ignored, but they can be used for
            # training in concept.
            if len(proposals) == 0:
                return None
            if not (proposals.shape[1] == 4 or proposals.shape[1] == 5):
                raise AssertionError(
                    'proposals should have shapes (n, 4) or (n, 5), '
                    'but found {}'.format(proposals.shape))
            if proposals.shape[1] == 5:
                scores = proposals[:, 4, None]
                proposals = proposals[:, :4]
            else:
                scores = None

        ann = self.get_ann_info(idx)
        gt_bboxes = ann['bboxes']
        gt_labels = ann['labels']
        if self.with_crowd:
            gt_bboxes_ignore = ann['bboxes_ignore']

        # skip the image if there is no valid gt bbox
        if len(gt_bboxes) == 0:
            return None

        # extra augmentation
        if self.extra_aug is not None:
            img, gt_bboxes, gt_labels = self.extra_aug(img, gt_bboxes,
                                                       gt_labels)

        # apply transforms
        flip = True if np.random.rand() < self.flip_ratio else False
        img_scale = random_scale(self.img_scales)  # sample a scale
        img, img_shape, pad_shape, scale_factor = self.img_transform(
            img, img_scale, flip, keep_ratio=self.resize_keep_ratio)
        img = img.copy()
        if self.proposals is not None:
            proposals = self.bbox_transform(proposals, img_shape, scale_factor,
                                            flip)
            proposals = np.hstack([proposals, scores
                                   ]) if scores is not None else proposals
        gt_bboxes = self.bbox_transform(gt_bboxes, img_shape, scale_factor,
                                        flip)
        if self.with_crowd:
            gt_bboxes_ignore = self.bbox_transform(gt_bboxes_ignore, img_shape,
                                                   scale_factor, flip)
        if self.with_mask:
            gt_masks = self.mask_transform(ann['masks'], pad_shape,
                                           scale_factor, flip)

        ori_shape = (img_info['height'], img_info['width'], 3)
        img_meta = dict(ori_shape=ori_shape,
                        img_shape=img_shape,
                        pad_shape=pad_shape,
                        scale_factor=scale_factor,
                        flip=flip)

        data = dict(img=DC(to_tensor(img), stack=True),
                    img_meta=DC(img_meta, cpu_only=True),
                    gt_bboxes=DC(to_tensor(gt_bboxes)))
        if self.proposals is not None:
            data['proposals'] = DC(to_tensor(proposals))
        if self.with_label:
            data['gt_labels'] = DC(to_tensor(gt_labels))
        if self.with_crowd:
            data['gt_bboxes_ignore'] = DC(to_tensor(gt_bboxes_ignore))
        if self.with_mask:
            data['gt_masks'] = DC(gt_masks, cpu_only=True)
        return data
Esempio n. 18
0
def make_lmdb(data_path,
              lmdb_path,
              img_path_list,
              keys,
              batch=5000,
              compress_level=1,
              multiprocessing_read=False,
              n_thread=40):
    """Make lmdb.

    Contents of lmdb. The file structure is:
    example.lmdb
    ├── data.mdb
    ├── lock.mdb
    ├── meta_info.txt

    The data.mdb and lock.mdb are standard lmdb files and you can refer to
    https://lmdb.readthedocs.io/en/release/ for more details.

    The meta_info.txt is a specified txt file to record the meta information
    of our datasets. It will be automatically created when preparing
    datasets by our provided dataset tools.
    Each line in the txt file records 1)image name (with extension),
    2)image shape, and 3)compression level, separated by a white space.

    For example, the meta information could be:
    `000_00000000.png (720,1280,3) 1`, which means:
    1) image name (with extension): 000_00000000.png;
    2) image shape: (720,1280,3);
    3) compression level: 1

    We use the image name without extension as the lmdb key.

    If `multiprocessing_read` is True, it will read all the images to memory
    using multiprocessing. Thus, your server needs to have enough memory.

    Args:
        data_path (str): Data path for reading images.
        lmdb_path (str): Lmdb save path.
        img_path_list (str): Image path list.
        keys (str): Used for lmdb keys.
        batch (int): After processing batch images, lmdb commits.
            Default: 5000.
        compress_level (int): Compress level when encoding images. Default: 1.
        multiprocessing_read (bool): Whether use multiprocessing to read all
            the images to memory. Default: False.
        n_thread (int): For multiprocessing.
    """
    assert len(img_path_list) == len(keys), (
        'img_path_list and keys should have the same length, '
        f'but got {len(img_path_list)} and {len(keys)}')
    print(f'Create lmdb for {data_path}, save to {lmdb_path}...')
    print(f'Total images: {len(img_path_list)}')
    if not lmdb_path.endswith('.lmdb'):
        raise ValueError("lmdb_path must end with '.lmdb'.")
    if osp.exists(lmdb_path):
        print(f'Folder {lmdb_path} already exists. Exit.')
        sys.exit(1)

    if multiprocessing_read:
        # read all the images to memory (multiprocessing)
        dataset = {}  # use dict to keep the order for multiprocessing
        shapes = {}
        print(f'Read images with multiprocessing, #thread: {n_thread} ...')
        prog_bar = mmcv.ProgressBar(len(img_path_list))

        def callback(arg):
            """get the image data and update prog_bar."""
            key, dataset[key], shapes[key] = arg
            prog_bar.update()

        pool = Pool(n_thread)
        for path, key in zip(img_path_list, keys):
            pool.apply_async(read_img_worker,
                             args=(osp.join(data_path,
                                            path), key, compress_level),
                             callback=callback)
        pool.close()
        pool.join()
        print(f'Finish reading {len(img_path_list)} images.')

    # create lmdb environment
    # obtain data size for one image
    img = mmcv.imread(osp.join(data_path, img_path_list[0]), flag='unchanged')
    _, img_byte = cv2.imencode('.png', img,
                               [cv2.IMWRITE_PNG_COMPRESSION, compress_level])
    data_size_per_img = img_byte.nbytes
    print('Data size per image is: ', data_size_per_img)
    data_size = data_size_per_img * len(img_path_list)
    env = lmdb.open(lmdb_path, map_size=data_size * 10)

    # write data to lmdb
    prog_bar = mmcv.ProgressBar(len(img_path_list))
    txn = env.begin(write=True)
    txt_file = open(osp.join(lmdb_path, 'meta_info.txt'), 'w')
    for idx, (path, key) in enumerate(zip(img_path_list, keys)):
        prog_bar.update()
        key_byte = key.encode('ascii')
        if multiprocessing_read:
            img_byte = dataset[key]
            h, w, c = shapes[key]
        else:
            _, img_byte, img_shape = read_img_worker(osp.join(data_path, path),
                                                     key, compress_level)
            h, w, c = img_shape

        txn.put(key_byte, img_byte)
        # write meta information
        txt_file.write(f'{key}.png ({h},{w},{c}) {compress_level}\n')
        if idx % batch == 0:
            txn.commit()
            txn = env.begin(write=True)
    txn.commit()
    env.close()
    txt_file.close()
    print('\nFinish writing lmdb.')
Esempio n. 19
0
    def show_result(self,
                    img,
                    result,
                    skeleton=None,
                    kpt_score_thr=0.3,
                    bbox_color='green',
                    pose_kpt_color=None,
                    pose_limb_color=None,
                    radius=4,
                    text_color=(255, 0, 0),
                    thickness=1,
                    font_scale=0.5,
                    win_name='',
                    show=False,
                    wait_time=0,
                    out_file=None):
        """Draw `result` over `img`.

        Args:
            img (str or Tensor): The image to be displayed.
            result (list[dict]): The results to draw over `img`
                (bbox_result, pose_result).
            kpt_score_thr (float, optional): Minimum score of keypoints
                to be shown. Default: 0.3.
            bbox_color (str or tuple or :obj:`Color`): Color of bbox lines.
            pose_kpt_color (np.array[Nx3]`): Color of N keypoints.
            pose_limb_color (np.array[Mx3]): Color of M limbs.
            text_color (str or tuple or :obj:`Color`): Color of texts.
            thickness (int): Thickness of lines.
            font_scale (float): Font scales of texts.
            win_name (str): The window name.
            wait_time (int): Value of waitKey param.
                Default: 0.
            out_file (str or None): The filename to write the image.
                Default: None.

        Returns:
            img (Tensor): Only if not `show` or `out_file`
        """

        img = mmcv.imread(img)
        img = img.copy()
        img_h, img_w, _ = img.shape

        bbox_result = []
        pose_result = []
        for res in result:
            bbox_result.append(res['bbox'])
            pose_result.append(res['keypoints'])

        if len(bbox_result) > 0:
            bboxes = np.vstack(bbox_result)
            # draw bounding boxes
            mmcv.imshow_bboxes(img,
                               bboxes,
                               colors=bbox_color,
                               top_k=-1,
                               thickness=thickness,
                               show=False,
                               win_name=win_name,
                               wait_time=wait_time,
                               out_file=None)

            for person_id, kpts in enumerate(pose_result):
                # draw each point on image
                for kid, kpt in enumerate(kpts):
                    x_coord, y_coord, kpt_score = int(kpt[0]), int(
                        kpt[1]), kpt[2]
                    if kpt_score > kpt_score_thr:
                        # cv2.circle(img, (x_coord, y_coord), radius,
                        #            pose_kpt_color, thickness)
                        img_copy = img.copy()
                        r, g, b = pose_kpt_color[kid]
                        cv2.circle(img_copy, (int(x_coord), int(y_coord)),
                                   radius, (int(r), int(g), int(b)), -1)
                        transparency = max(0, min(1, kpt_score))
                        cv2.addWeighted(img_copy,
                                        transparency,
                                        img,
                                        1 - transparency,
                                        0,
                                        dst=img)

                # draw limbs
                if skeleton is not None:
                    for sk_id, sk in enumerate(skeleton):
                        pos1 = (int(kpts[sk[0] - 1, 0]), int(kpts[sk[0] - 1,
                                                                  1]))
                        pos2 = (int(kpts[sk[1] - 1, 0]), int(kpts[sk[1] - 1,
                                                                  1]))
                        if (pos1[0] > 0 and pos1[0] < img_w and pos1[1] > 0
                                and pos1[1] < img_h and pos2[0] > 0
                                and pos2[0] < img_w and pos2[1] > 0
                                and pos2[1] < img_h
                                and kpts[sk[0] - 1, 2] > kpt_score_thr
                                and kpts[sk[1] - 1, 2] > kpt_score_thr):
                            # cv2.line(img, pos1, pos2, pose_kpt_color, 2, 8)
                            img_copy = img.copy()
                            X = (pos1[0], pos2[0])
                            Y = (pos1[1], pos2[1])
                            mX = np.mean(X)
                            mY = np.mean(Y)
                            length = ((Y[0] - Y[1])**2 + (X[0] - X[1])**2)**0.5
                            angle = math.degrees(
                                math.atan2(Y[0] - Y[1], X[0] - X[1]))
                            stickwidth = 2
                            polygon = cv2.ellipse2Poly(
                                (int(mX), int(mY)),
                                (int(length / 2), int(stickwidth)), int(angle),
                                0, 360, 1)

                            r, g, b = pose_limb_color[sk_id]
                            cv2.fillConvexPoly(img_copy, polygon,
                                               (int(r), int(g), int(b)))
                            transparency = max(
                                0,
                                min(
                                    1, 0.5 *
                                    (kpts[sk[0] - 1, 2] + kpts[sk[1] - 1, 2])))
                            cv2.addWeighted(img_copy,
                                            transparency,
                                            img,
                                            1 - transparency,
                                            0,
                                            dst=img)

        if show:
            imshow(img, win_name, wait_time)

        if out_file is not None:
            imwrite(img, out_file)

        return img
Esempio n. 20
0
def _inference_single(model, img, img_transform, device):
    img = mmcv.imread(img)
    data = _prepare_data(img, img_transform, model.cfg, device)
    with torch.no_grad():
        result = model(return_loss=False, rescale=True, **data)
    return result
Esempio n. 21
0
 def _load_semantic_seg(self, results):
     results['gt_semantic_seg'] = mmcv.imread(osp.join(
         results['seg_prefix'], results['ann_info']['seg_map']),
                                              flag='unchanged').squeeze()
     return results
Esempio n. 22
0
def convert_svhn_to_coco_test(out_file, image_prefix):

    import re

    def convert(text):
        return int(text) if text.isdigit() else text.lower()

    def Sort(key):
        return [convert(c) for c in re.split('([0-9]+)', key)]

    def sorted_alphanumeric(data):
        return sorted(data, key=Sort)

    sorted_file = sorted_alphanumeric(os.listdir(image_prefix))

    annotations = []
    images = []
    obj_count = 0
    for file in sorted_file:
        if file[-4:] != ".png":
            continue

        idx = file[:-4]
        img_name = file

        filename = img_name
        img_path = osp.join(image_prefix, img_name)
        height, width = mmcv.imread(img_path).shape[:2]

        images.append(
            dict(id=idx, file_name=filename, height=height, width=width))

        for e in range(1):
            label = int(0)
            left, top, width, height = 0, 0, 0, 0

            data_anno = dict(image_id=idx,
                             id=obj_count,
                             category_id=label,
                             bbox=[left, top, width, height],
                             area=width * height,
                             segmentation=[],
                             iscrowd=0)
            annotations.append(data_anno)
            obj_count += 1

    coco_format_json = dict(images=images,
                            annotations=annotations,
                            categories=[{
                                'id': 0,
                                'name': '10'
                            }, {
                                'id': 1,
                                'name': '1'
                            }, {
                                'id': 2,
                                'name': '2'
                            }, {
                                'id': 3,
                                'name': '3'
                            }, {
                                'id': 4,
                                'name': '4'
                            }, {
                                'id': 5,
                                'name': '5'
                            }, {
                                'id': 6,
                                'name': '6'
                            }, {
                                'id': 7,
                                'name': '7'
                            }, {
                                'id': 8,
                                'name': '8'
                            }, {
                                'id': 9,
                                'name': '9'
                            }])
    print("DONE")
    mmcv.dump(coco_format_json, out_file)
Esempio n. 23
0
def test_random_crop():
    # test assertion for invalid random crop
    with pytest.raises(AssertionError):
        transform = dict(type='RandomCrop', crop_size=(-1, 0))
        build_from_cfg(transform, PIPELINES)

    results = dict()
    img = mmcv.imread(osp.join(osp.dirname(__file__), '../data/color.jpg'),
                      'color')
    results['img'] = img

    results['img_shape'] = img.shape
    results['ori_shape'] = img.shape
    # TODO: add img_fields test
    results['bbox_fields'] = ['gt_bboxes', 'gt_bboxes_ignore']
    # Set initial values for default meta_keys
    results['pad_shape'] = img.shape
    results['scale_factor'] = 1.0

    def create_random_bboxes(num_bboxes, img_w, img_h):
        bboxes_left_top = np.random.uniform(0, 0.5, size=(num_bboxes, 2))
        bboxes_right_bottom = np.random.uniform(0.5, 1, size=(num_bboxes, 2))
        bboxes = np.concatenate((bboxes_left_top, bboxes_right_bottom), 1)
        bboxes = (bboxes * np.array([img_w, img_h, img_w, img_h])).astype(
            np.int)
        return bboxes

    h, w, _ = img.shape
    gt_bboxes = create_random_bboxes(8, w, h)
    gt_bboxes_ignore = create_random_bboxes(2, w, h)
    results['gt_bboxes'] = gt_bboxes
    results['gt_bboxes_ignore'] = gt_bboxes_ignore
    transform = dict(type='RandomCrop', crop_size=(h - 20, w - 20))
    crop_module = build_from_cfg(transform, PIPELINES)
    results = crop_module(results)
    assert results['img'].shape[:2] == (h - 20, w - 20)
    # All bboxes should be reserved after crop
    assert results['img_shape'][:2] == (h - 20, w - 20)
    assert results['gt_bboxes'].shape[0] == 8
    assert results['gt_bboxes_ignore'].shape[0] == 2

    def area(bboxes):
        return np.prod(bboxes[:, 2:4] - bboxes[:, 0:2], axis=1)

    assert (area(results['gt_bboxes']) <= area(gt_bboxes)).all()
    assert (area(results['gt_bboxes_ignore']) <= area(gt_bboxes_ignore)).all()

    # test assertion for invalid crop_type
    with pytest.raises(ValueError):
        transform = dict(type='RandomCrop',
                         crop_size=(1, 1),
                         crop_type='unknown')
        build_from_cfg(transform, PIPELINES)

    # test assertion for invalid crop_size
    with pytest.raises(AssertionError):
        transform = dict(type='RandomCrop',
                         crop_type='relative',
                         crop_size=(0, 0))
        build_from_cfg(transform, PIPELINES)

    def _construct_toy_data():
        img = np.array([[1, 2, 3, 4], [5, 6, 7, 8]], dtype=np.uint8)
        img = np.stack([img, img, img], axis=-1)
        results = dict()
        # image
        results['img'] = img
        results['img_shape'] = img.shape
        results['img_fields'] = ['img']
        # bboxes
        results['bbox_fields'] = ['gt_bboxes', 'gt_bboxes_ignore']
        results['gt_bboxes'] = np.array([[0., 0., 2., 1.]], dtype=np.float32)
        results['gt_bboxes_ignore'] = np.array([[2., 0., 3., 1.]],
                                               dtype=np.float32)
        # labels
        results['gt_labels'] = np.array([1], dtype=np.int64)
        return results

    # test crop_type "relative_range"
    results = _construct_toy_data()
    transform = dict(type='RandomCrop',
                     crop_type='relative_range',
                     crop_size=(0.3, 0.7),
                     allow_negative_crop=True)
    transform_module = build_from_cfg(transform, PIPELINES)
    results_transformed = transform_module(copy.deepcopy(results))
    h, w = results_transformed['img_shape'][:2]
    assert int(2 * 0.3 + 0.5) <= h <= int(2 * 1 + 0.5)
    assert int(4 * 0.7 + 0.5) <= w <= int(4 * 1 + 0.5)

    # test crop_type "relative"
    transform = dict(type='RandomCrop',
                     crop_type='relative',
                     crop_size=(0.3, 0.7),
                     allow_negative_crop=True)
    transform_module = build_from_cfg(transform, PIPELINES)
    results_transformed = transform_module(copy.deepcopy(results))
    h, w = results_transformed['img_shape'][:2]
    assert h == int(2 * 0.3 + 0.5) and w == int(4 * 0.7 + 0.5)

    # test crop_type "absolute"
    transform = dict(type='RandomCrop',
                     crop_type='absolute',
                     crop_size=(1, 2),
                     allow_negative_crop=True)
    transform_module = build_from_cfg(transform, PIPELINES)
    results_transformed = transform_module(copy.deepcopy(results))
    h, w = results_transformed['img_shape'][:2]
    assert h == 1 and w == 2

    # test crop_type "absolute_range"
    transform = dict(type='RandomCrop',
                     crop_type='absolute_range',
                     crop_size=(1, 20),
                     allow_negative_crop=True)
    transform_module = build_from_cfg(transform, PIPELINES)
    results_transformed = transform_module(copy.deepcopy(results))
    h, w = results_transformed['img_shape'][:2]
    assert 1 <= h <= 2 and 1 <= w <= 4
Esempio n. 24
0
def convert_svhn_to_coco_train(ann_file, out_file, image_prefix):
    data_infos = h5py.File(ann_file, "r")

    annotations = []
    images = []
    obj_count = 0
    for idx in range(len(data_infos['/digitStruct/name'])):
        img_name = load_data.get_name(idx, data_infos)
        anno = load_data.get_bbox(idx, data_infos)

        filename = img_name
        img_path = osp.join(image_prefix, img_name)
        height, width = mmcv.imread(img_path).shape[:2]

        images.append(
            dict(id=idx, file_name=filename, height=height, width=width))

        for e in range(len(anno['label'])):
            label = int(anno['label'][e])
            left = anno['left'][e]
            top = anno['top'][e]
            width = anno['width'][e]
            height = anno['height'][e]

            if label == 10:
                label = 0
            data_anno = dict(image_id=idx,
                             id=obj_count,
                             category_id=label,
                             bbox=[left, top, width, height],
                             area=width * height,
                             segmentation=[],
                             iscrowd=0)
            annotations.append(data_anno)
            obj_count += 1

    coco_format_json = dict(images=images,
                            annotations=annotations,
                            categories=[{
                                'id': 0,
                                'name': '10'
                            }, {
                                'id': 1,
                                'name': '1'
                            }, {
                                'id': 2,
                                'name': '2'
                            }, {
                                'id': 3,
                                'name': '3'
                            }, {
                                'id': 4,
                                'name': '4'
                            }, {
                                'id': 5,
                                'name': '5'
                            }, {
                                'id': 6,
                                'name': '6'
                            }, {
                                'id': 7,
                                'name': '7'
                            }, {
                                'id': 8,
                                'name': '8'
                            }, {
                                'id': 9,
                                'name': '9'
                            }])
    print(coco_format_json)
    mmcv.dump(coco_format_json, out_file)
Esempio n. 25
0
def test_cutout():
    # test n_holes
    with pytest.raises(AssertionError):
        transform = dict(type='CutOut', n_holes=(5, 3), cutout_shape=(8, 8))
        build_from_cfg(transform, PIPELINES)
    with pytest.raises(AssertionError):
        transform = dict(type='CutOut', n_holes=(3, 4, 5), cutout_shape=(8, 8))
        build_from_cfg(transform, PIPELINES)
    # test cutout_shape and cutout_ratio
    with pytest.raises(AssertionError):
        transform = dict(type='CutOut', n_holes=1, cutout_shape=8)
        build_from_cfg(transform, PIPELINES)
    with pytest.raises(AssertionError):
        transform = dict(type='CutOut', n_holes=1, cutout_ratio=0.2)
        build_from_cfg(transform, PIPELINES)
    # either of cutout_shape and cutout_ratio should be given
    with pytest.raises(AssertionError):
        transform = dict(type='CutOut', n_holes=1)
        build_from_cfg(transform, PIPELINES)
    with pytest.raises(AssertionError):
        transform = dict(type='CutOut',
                         n_holes=1,
                         cutout_shape=(2, 2),
                         cutout_ratio=(0.4, 0.4))
        build_from_cfg(transform, PIPELINES)

    results = dict()
    img = mmcv.imread(osp.join(osp.dirname(__file__), '../data/color.jpg'),
                      'color')

    results['img'] = img
    results['img_shape'] = img.shape
    results['ori_shape'] = img.shape
    results['pad_shape'] = img.shape
    results['img_fields'] = ['img']

    transform = dict(type='CutOut', n_holes=1, cutout_shape=(10, 10))
    cutout_module = build_from_cfg(transform, PIPELINES)
    cutout_result = cutout_module(copy.deepcopy(results))
    assert cutout_result['img'].sum() < img.sum()

    transform = dict(type='CutOut', n_holes=1, cutout_ratio=(0.8, 0.8))
    cutout_module = build_from_cfg(transform, PIPELINES)
    cutout_result = cutout_module(copy.deepcopy(results))
    assert cutout_result['img'].sum() < img.sum()

    transform = dict(type='CutOut',
                     n_holes=(2, 4),
                     cutout_shape=[(10, 10), (15, 15)],
                     fill_in=(255, 255, 255))
    cutout_module = build_from_cfg(transform, PIPELINES)
    cutout_result = cutout_module(copy.deepcopy(results))
    assert cutout_result['img'].sum() > img.sum()

    transform = dict(type='CutOut',
                     n_holes=1,
                     cutout_ratio=(0.8, 0.8),
                     fill_in=(255, 255, 255))
    cutout_module = build_from_cfg(transform, PIPELINES)
    cutout_result = cutout_module(copy.deepcopy(results))
    assert cutout_result['img'].sum() > img.sum()
Esempio n. 26
0
def get_class_bboxes(input_path,
                     model,
                     cfg,
                     dataset='coco',
                     class_int=0,
                     score_thr=0.78,
                     show_result=False):
    '''

    :param input_path:
    :param model:
    :param cfg:
    :param dataset:
    :param class_int:
    :param score_thr:
    :param show_result:
    :return:
    '''
    if os.path.isdir(input_path):
        img_fnames = glob.glob('{}/*.jpg'.format(input_path))
        detections = inference_detector(model, img_fnames, cfg)
    elif os.path.isfile(input_path):
        img_fnames = [input_path]
        detections = [inference_detector(model, input_path, cfg)]
    else:
        raise Exception('Provided image path is not a file or directory.')

    img_sizes = [mmcv.imread(img).shape for img in img_fnames]

    if not all(size == img_sizes[0] for size in img_sizes):
        raise Exception('Not all images are of the same size!')

    class_names = get_classes(dataset)

    result_dict = dict()
    result_dict['image_size'] = img_sizes[0][:2]
    result_dict['results'] = dict()

    for idx, det in enumerate(list(detections)):
        if isinstance(det, tuple):
            bbox_result, segm_result = det
        else:
            bbox_result, segm_result = det, None

        bboxes = np.vstack(bbox_result)

        if bbox_result is not None:
            # segms = mmcv.concat_list(segm_result)

            labels = [
                np.full(bbox.shape[0], i, dtype=np.int32)
                for i, bbox in enumerate(bbox_result)
            ]

            labels = np.concatenate(labels)

            filter_thr = np.where(bboxes[:, -1] > score_thr)[0]
            filter_class = np.where(labels == class_int)[0]
            filter_idxs = np.intersect1d(filter_thr, filter_class)

            bboxes = bboxes[filter_idxs]
            labels = labels[filter_idxs]

            data = list()
            for bbox, label in zip(bboxes, labels):
                left_top = [int(bbox[0]), int(bbox[1])]
                right_bottom = [int(bbox[2]), int(bbox[3])]
                label_name = class_names[
                    label] if class_names is not None else 'cls {}'.format(
                        label)
                data.append({
                    'label': label_name,
                    'bbox': {
                        'lt': left_top,
                        'rb': right_bottom
                    }
                })

            result_dict['results'][os.path.basename(
                img_fnames[idx])] = data.copy()
            data.clear()

            ## Debug
            if show_result:
                img = mmcv.imread(img_fnames[idx])
                mmcv.imshow_det_bboxes(img.copy(),
                                       bboxes,
                                       labels,
                                       class_names=class_names,
                                       score_thr=score_thr,
                                       show=show_result)
            ##

    with open('{}_detection_bboxes.json'.format(time.strftime("%Y%m%d%H%M%S")),
              'w') as out_file:
        json.dump(result_dict, out_file)

    # print(json.dumps(out))  # debug
    return result_dict
Esempio n. 27
0
def show_result(img,
                result,
                class_names,
                score_thr=0.3,
                wait_time=0,
                show=True,
                out_file=None):
    """Visualize the detection results on the image.

    Args:
        img (str or np.ndarray): Image filename or loaded image.
        result (tuple[list] or list): The detection result, can be either
            (bbox, segm) or just bbox.
        class_names (list[str] or tuple[str]): A list of class names.
        score_thr (float): The threshold to visualize the bboxes and masks.
        wait_time (int): Value of waitKey param.
        show (bool, optional): Whether to show the image with opencv or not.
        out_file (str, optional): If specified, the visualization result will
            be written to the out file instead of shown in a window.

    Returns:
        np.ndarray or None: If neither `show` nor `out_file` is specified, the
            visualized image is returned, otherwise None is returned.
    """
    assert isinstance(class_names, (tuple, list))
    img = mmcv.imread(img)
    img = img.copy()
    if isinstance(result, tuple):
        bbox_result, segm_result = result
    else:
        bbox_result, segm_result = result, None
    bboxes = np.vstack(bbox_result)
    labels = [
        np.full(bbox.shape[0], i, dtype=np.int32)
        for i, bbox in enumerate(bbox_result)
    ]
    labels = np.concatenate(labels)
    # draw segmentation masks
    if segm_result is not None:
        segms = mmcv.concat_list(segm_result)
        inds = np.where(bboxes[:, -1] > score_thr)[0]
        np.random.seed(42)
        color_masks = [
            np.random.randint(0, 256, (1, 3), dtype=np.uint8)
            for _ in range(max(labels) + 1)
        ]
        for i in inds:
            i = int(i)
            color_mask = color_masks[labels[i]]
            mask = maskUtils.decode(segms[i]).astype(np.bool)
            img[mask] = img[mask] * 0.5 + color_mask * 0.5
    # draw bounding boxes
    mmcv.imshow_det_bboxes(img,
                           bboxes,
                           labels,
                           class_names=class_names,
                           score_thr=score_thr,
                           show=show,
                           wait_time=wait_time,
                           out_file=out_file)
    if not (show or out_file):
        return img
Esempio n. 28
0
    def prepare_train_img(self, idx):
        # prepare a pair of image in a sequence
        vid,  frame_id = idx
        vid_idx = self.vid_ids.index(vid)
        vid_info = self.vid_infos[vid_idx]
        basename = osp.basename(vid_info['filenames'][frame_id])
        clip_frame_ids = self.sample_ref(idx) + [frame_id]
        clip_frame_ids.sort()
        imgs = []
        for frame_id in clip_frame_ids:
            imgs.append(mmcv.imread(osp.join(self.img_prefix, vid_info['filenames'][frame_id])))
        imgs = np.stack(imgs, axis=0)
        # load proposals if necessary
        if self.proposals is not None:
            proposals = self.proposals[idx][:self.num_max_proposals]
            # TODO: Handle empty proposals properly. Currently images with
            # no proposals are just ignored, but they can be used for
            # training in concept.
            if len(proposals) == 0:
                return None
            if not (proposals.shape[1] == 4 or proposals.shape[1] == 5):
                raise AssertionError(
                    'proposals should have shapes (n, 4) or (n, 5), '
                    'but found {}'.format(proposals.shape))
            if proposals.shape[1] == 5:
                scores = proposals[:, 4, None]
                proposals = proposals[:, :4]
            else:
                scores = None

        # load annotation of ref_frames
        bboxes, labels, ids, masks, bboxes_ignore = [], [], [], [], []
        for frame_id in clip_frame_ids:
            ann = self.get_ann_info(vid, frame_id)
            bboxes.append(ann['bboxes'])
            labels.append(ann['labels'])
            # obj ids attribute does not exist in current annotation
            # need to add it
            ids.append(ann['obj_ids'])
            if self.with_mask:
                masks.append(ann['masks'])
            # compute matching of reference frame with current frame
            # 0 denote there is no matching
            # gt_pids = [ref_ids.index(i)+1 if i in ref_ids else 0 for i in gt_ids]
            if self.with_crowd:
                bboxes_ignore.append(ann['bboxes_ignore'])

        # extra augmentation
        if self.extra_aug is not None and self.with_mask:
            for i in range(len(clip_frame_ids)):
                imgs[i], bboxes[i], labels[i], masks[i], ids[i] = self.extra_aug(imgs[i], bboxes[i], labels[i], masks[i], ids[i])

        # apply transforms
        flip = True if np.random.rand() < self.flip_ratio else False
        img_scale = random_scale(self.img_scales, mode='range_keep_ratio')  # sample a scale
        temp_imgs = []
        for i in range(len(clip_frame_ids)):
            img_cur, img_shape, pad_shape, scale_factor = self.img_transform(
                imgs[i], img_scale, flip, keep_ratio=self.resize_keep_ratio)
            temp_imgs.append(img_cur)
        imgs = np.stack(temp_imgs, axis=0)
        imgs = imgs.copy()
        if self.proposals is not None:
            proposals = self.bbox_transform(proposals, img_shape, scale_factor, flip)
            proposals = np.hstack([proposals, scores]) if scores is not None else proposals

        for i in range(len(clip_frame_ids)):
            bboxes[i] = self.bbox_transform(bboxes[i], img_shape, pad_shape, scale_factor, flip)
        if self.aug_ref_bbox_param is not None:
            for i in range(len(clip_frame_ids)):
                bboxes[i] = self.bbox_aug(bboxes[i], img_shape)
        if self.with_crowd:
            for i in range(len(clip_frame_ids)):
                bboxes_ignore[i] = self.bbox_transform(bboxes_ignore[i], img_shape, pad_shape, scale_factor, flip)
        if self.with_mask:
            for i in range(len(clip_frame_ids)):
                masks[i] = self.mask_transform(masks[i], pad_shape, img_scale, flip, keep_ratio=self.resize_keep_ratio)

        ori_shape = (vid_info['height'], vid_info['width'], 3)
        img_meta = dict(
            ori_shape=ori_shape,
            img_shape=img_shape,
            pad_shape=pad_shape,
            video_id=vid,
            frame_id=frame_id,
            is_first=(frame_id == 0),
            scale_factor=scale_factor,
            flip=flip)

        data = dict(
            img=DC(to_tensor(imgs), stack=True),
            img_meta=DC(img_meta, cpu_only=True),
            bboxes=DC([to_tensor(bboxes[i]) for i in range(len(clip_frame_ids))]),
        )
        if self.proposals is not None:
            data['proposals'] = DC(to_tensor(proposals))
        if self.with_label:
            data['labels'] = DC([to_tensor(labels[i]) for i in range(len(clip_frame_ids))])
        if self.with_track:
            data['ids'] = DC([to_tensor(np.array(ids[i])) for i in range(len(clip_frame_ids))])
        if self.with_crowd:
            data['bboxes_ignore'] = DC([to_tensor(bboxes_ignore[i]) for i in range(len(clip_frame_ids))])
        if self.with_mask:
            data['masks'] = DC([to_tensor(masks[i]) for i in range(len(clip_frame_ids))], cpu_only=True)
        return data
Esempio n. 29
0
def imshow_keypoints_3d(
    pose_result,
    img=None,
    skeleton=None,
    pose_kpt_color=None,
    pose_limb_color=None,
    vis_height=400,
    kpt_score_thr=0.3,
    *,
    axis_azimuth=70,
    axis_limit=1.7,
    axis_dist=10.0,
    axis_elev=15.0,
):
    """Draw 3D keypoints and limbs in 3D coordinates.

    Args:
        pose_result (list[dict]): 3D pose results containing:
            - "keypoints_3d" ([K,4]): 3D keypoints
            - "title" (str): Optional. A string to specify the title of the
                visualization of this pose result
        img (str|np.ndarray): Opptional. The image or image path to show input
            image and/or 2D pose. Note that the image should be given in BGR
            channel order.
        skeleton (list of [idx_i,idx_j]): Skeleton described by a list of
            limbs, each is a pair of joint indices.
        pose_kpt_color (np.ndarray[Nx3]`): Color of N keypoints. If None, do
            not nddraw keypoints.
        pose_limb_color (np.array[Mx3]): Color of M limbs. If None, do not
            draw limbs.
        vis_height (int): The image hight of the visualization. The width
                will be N*vis_height depending on the number of visualized
                items.
        kpt_score_thr (float): Minimum score of keypoints to be shown.
            Default: 0.3.
        axis_azimuth (float): axis azimuth angle for 3D visualizations.
        axis_dist (float): axis distance for 3D visualizations.
        axis_elev (float): axis elevation view angle for 3D visualizations.
        axis_limit (float): The axis limit to visualize 3d pose. The xyz
            range will be set as:
            - x: [x_c - axis_limit/2, x_c + axis_limit/2]
            - y: [y_c - axis_limit/2, y_c + axis_limit/2]
            - z: [0, axis_limit]
            Where x_c, y_c is the mean value of x and y coordinates
        figsize: (float): figure size in inch.
    """

    show_img = img is not None
    num_axis = len(pose_result) + 1 if show_img else len(pose_result)

    plt.ioff()
    fig = plt.figure(figsize=(vis_height * num_axis * 0.01, vis_height * 0.01))

    if show_img:
        img = mmcv.imread(img, channel_order='bgr')
        img = mmcv.bgr2rgb(img)
        img = mmcv.imrescale(img, scale=vis_height / img.shape[0])

        ax_img = fig.add_subplot(1, num_axis, 1)
        ax_img.get_xaxis().set_visible(False)
        ax_img.get_yaxis().set_visible(False)
        ax_img.set_axis_off()
        ax_img.set_title('Input')
        ax_img.imshow(img, aspect='equal')

    for idx, res in enumerate(pose_result):
        kpts = res['keypoints_3d']
        valid = kpts[:, 3] >= kpt_score_thr

        ax_idx = idx + 2 if show_img else idx + 1
        ax = fig.add_subplot(1, num_axis, ax_idx, projection='3d')
        ax.view_init(
            elev=axis_elev,
            azim=axis_azimuth,
        )
        x_c = np.mean(kpts[valid, 0]) if sum(valid) > 0 else 0
        y_c = np.mean(kpts[valid, 1]) if sum(valid) > 0 else 0
        ax.set_xlim3d([x_c - axis_limit / 2, x_c + axis_limit / 2])
        ax.set_ylim3d([y_c - axis_limit / 2, y_c + axis_limit / 2])
        ax.set_zlim3d([0, axis_limit])
        ax.set_aspect('auto')
        ax.set_xticks([])
        ax.set_yticks([])
        ax.set_zticks([])
        ax.set_xticklabels([])
        ax.set_yticklabels([])
        ax.set_zticklabels([])
        ax.dist = axis_dist

        if pose_kpt_color is not None:
            pose_kpt_color = np.array(pose_kpt_color)
            assert len(pose_kpt_color) == len(kpts)
            x_3d, y_3d, z_3d = np.split(kpts[:, :3], [1, 2], axis=1)
            # matplotlib uses RGB color in [0, 1] value range
            _color = pose_kpt_color[..., ::-1] / 255.
            ax.scatter(
                x_3d[valid],
                y_3d[valid],
                z_3d[valid],
                marker='o',
                color=_color[valid],
            )

        if skeleton is not None and pose_limb_color is not None:
            pose_limb_color = np.array(pose_limb_color)
            assert len(pose_limb_color) == len(skeleton)
            for limb, limb_color in zip(skeleton, pose_limb_color):
                limb_indices = [_i - 1 for _i in limb]
                xs_3d = kpts[limb_indices, 0]
                ys_3d = kpts[limb_indices, 1]
                zs_3d = kpts[limb_indices, 2]
                kpt_score = kpts[limb_indices, 3]
                if kpt_score.min() > kpt_score_thr:
                    # matplotlib uses RGB color in [0, 1] value range
                    _color = limb_color[::-1] / 255.
                    ax.plot(xs_3d, ys_3d, zs_3d, color=_color, zdir='z')

        if 'title' in res:
            ax.set_title(res['title'])

    # convert figure to numpy array
    fig.tight_layout()
    fig.canvas.draw()
    img_w, img_h = fig.canvas.get_width_height()
    img_vis = np.frombuffer(fig.canvas.tostring_rgb(),
                            dtype=np.uint8).reshape(img_h, img_w, -1)
    img_vis = mmcv.rgb2bgr(img_vis)

    plt.close(fig)

    return img_vis
Esempio n. 30
0
import mmcv
import numpy as np

flag_1 = False  # 读取图像,保存图像
flag_2 = False  # 以byte形式读取图像
flag_3 = False  # 显示图像
flag_4 = False  # 图像颜色空间转换
flag_5 = False  # Resize
flag_6 = False  # Rotate
flag_7 = False  # Flip
flag_8 = False  # Crop
flag_9 = False  # Padding

if flag_1:
    # To read or write images files, use imread or imwrite.
    img = mmcv.imread("asset/a.jpg")
    img = mmcv.imread("asset/a.jpg", flag='grayscale')
    img_ = mmcv.imread(img)  # nothing will happen, img_ = img
    mmcv.imwrite(img, 'out.jpg')

if flag_2:
    # To read images from bytes
    with open("asset/a.jpg", 'rb') as f:
        data = f.read()
    img = mmcv.imfrombytes(data)
    print(img)

if flag_3:
    # To show an image file or a loaded image
    mmcv.imshow("asset/a.jpg")
    # this is equivalent to