# Datalist of SCUT-CTW1500 #test_dataset= './datalist/ctw1500_test_datalist.json' #img_prefix = '/path/to/Img_prefix/CTW1500/' out_dir= 'result' test_file = mmcv.load(test_dataset) cnt = 0 time_sum = 0.0 out_dict = {} # Inference and visualize image one by one for filename in test_file: # Load images img_path= img_prefix + filename img = mmcv.imread(img_path) img_copy = img.copy() img_name = img_path.split("/")[-1] # Inference print('predicting {} - {}'.format(cnt, img_path)) time_start = time.time() result = inference_detector(model, img_path) time_end = time.time() time_sum += (time_end - time_start) print(result) # Results visualization bboxes = [] for i in range(len(result["points"])): points2 = result["points"][i]
def test_flip(): # test assertion for invalid flip_ratio with pytest.raises(AssertionError): transform = dict(type='RandomFlip', flip_ratio=1.5) build_from_cfg(transform, PIPELINES) # test assertion for 0 <= sum(flip_ratio) <= 1 with pytest.raises(AssertionError): transform = dict(type='RandomFlip', flip_ratio=[0.7, 0.8], direction=['horizontal', 'vertical']) build_from_cfg(transform, PIPELINES) # test assertion for mismatch between number of flip_ratio and direction with pytest.raises(AssertionError): transform = dict(type='RandomFlip', flip_ratio=[0.4, 0.5]) build_from_cfg(transform, PIPELINES) # test assertion for invalid direction with pytest.raises(AssertionError): transform = dict(type='RandomFlip', flip_ratio=1., direction='horizonta') build_from_cfg(transform, PIPELINES) transform = dict(type='RandomFlip', flip_ratio=1.) flip_module = build_from_cfg(transform, PIPELINES) results = dict() img = mmcv.imread(osp.join(osp.dirname(__file__), '../data/color.jpg'), 'color') original_img = copy.deepcopy(img) results['img'] = img results['img2'] = copy.deepcopy(img) results['img_shape'] = img.shape results['ori_shape'] = img.shape # Set initial values for default meta_keys results['pad_shape'] = img.shape results['scale_factor'] = 1.0 results['img_fields'] = ['img', 'img2'] results = flip_module(results) assert np.equal(results['img'], results['img2']).all() flip_module = build_from_cfg(transform, PIPELINES) results = flip_module(results) assert np.equal(results['img'], results['img2']).all() assert np.equal(original_img, results['img']).all() # test flip_ratio is float, direction is list transform = dict(type='RandomFlip', flip_ratio=0.9, direction=['horizontal', 'vertical', 'diagonal']) flip_module = build_from_cfg(transform, PIPELINES) results = dict() img = mmcv.imread(osp.join(osp.dirname(__file__), '../data/color.jpg'), 'color') original_img = copy.deepcopy(img) results['img'] = img results['img_shape'] = img.shape results['ori_shape'] = img.shape # Set initial values for default meta_keys results['pad_shape'] = img.shape results['scale_factor'] = 1.0 results['img_fields'] = ['img'] results = flip_module(results) if results['flip']: assert np.array_equal( mmcv.imflip(original_img, results['flip_direction']), results['img']) else: assert np.array_equal(original_img, results['img']) # test flip_ratio is list, direction is list transform = dict(type='RandomFlip', flip_ratio=[0.3, 0.3, 0.2], direction=['horizontal', 'vertical', 'diagonal']) flip_module = build_from_cfg(transform, PIPELINES) results = dict() img = mmcv.imread(osp.join(osp.dirname(__file__), '../data/color.jpg'), 'color') original_img = copy.deepcopy(img) results['img'] = img results['img_shape'] = img.shape results['ori_shape'] = img.shape # Set initial values for default meta_keys results['pad_shape'] = img.shape results['scale_factor'] = 1.0 results['img_fields'] = ['img'] results = flip_module(results) if results['flip']: assert np.array_equal( mmcv.imflip(original_img, results['flip_direction']), results['img']) else: assert np.array_equal(original_img, results['img'])
def show_result2(self, img, result, score_thr=0.3, bbox_color='green', text_color='green', thickness=1, font_scale=0.5, win_name='', show=False, wait_time=0, out_file=None): """Draw `result` over `img`. Args: img (str or Tensor): The image to be displayed. result (Tensor or tuple): The results to draw over `img` bbox_result or (bbox_result, segm_result). score_thr (float, optional): Minimum score of bboxes to be shown. Default: 0.3. bbox_color (str or tuple or :obj:`Color`): Color of bbox lines. text_color (str or tuple or :obj:`Color`): Color of texts. thickness (int): Thickness of lines. font_scale (float): Font scales of texts. win_name (str): The window name. wait_time (int): Value of waitKey param. Default: 0. show (bool): Whether to show the image. Default: False. out_file (str or None): The filename to write the image. Default: None. Returns: img (Tensor): Only if not `show` or `out_file` """ img_o = mmcv.imread(img) img = np.zeros((500, 500, 3)) img = img.copy() if isinstance(result, tuple): bbox_result, segm_result = result if isinstance(segm_result, tuple): segm_result = segm_result[0] # ms rcnn else: bbox_result, segm_result = result, None bboxes = np.vstack(bbox_result) labels = [ np.full(bbox.shape[0], i, dtype=np.int32) for i, bbox in enumerate(bbox_result) ] labels = np.concatenate(labels) # draw segmentation masks if segm_result is not None and len(labels) > 0: # non empty segms = mmcv.concat_list(segm_result) inds = np.where(bboxes[:, -1] > score_thr)[0] np.random.seed(42) color_masks = [ np.random.randint(40, 256, (1, 3), dtype=np.uint8) for _ in range(max(labels) + 1) ] for i in inds: i = int(i) color_mask = color_masks[labels[i]] mask = segms[i] img[mask] = img[mask] * 0.5 + color_mask * 0.5 # if out_file specified, do not show image in window if out_file is not None: show = False from PIL import Image im = Image.fromarray(np.uint8(img)) im.save(out_file) # draw bounding boxes # mmcv.imshow_det_bboxes( # img_o, # bboxes, # labels, # class_names=self.CLASSES, # score_thr=score_thr, # bbox_color=bbox_color, # text_color=text_color, # thickness=thickness, # font_scale=font_scale, # win_name=win_name, # show=show, # wait_time=wait_time, # out_file='/disk2/mask_bbox_100/'+out_file.split('/')[-1]) if not (show or out_file): return img
def test_resize(): # test assertion if img_scale is a list with pytest.raises(AssertionError): transform = dict(type='Resize', img_scale=[1333, 800], keep_ratio=True) build_from_cfg(transform, PIPELINES) # test assertion if len(img_scale) while ratio_range is not None with pytest.raises(AssertionError): transform = dict(type='Resize', img_scale=[(1333, 800), (1333, 600)], ratio_range=(0.9, 1.1), keep_ratio=True) build_from_cfg(transform, PIPELINES) # test assertion for invalid multiscale_mode with pytest.raises(AssertionError): transform = dict(type='Resize', img_scale=[(1333, 800), (1333, 600)], keep_ratio=True, multiscale_mode='2333') build_from_cfg(transform, PIPELINES) # test assertion if both scale and scale_factor are setted with pytest.raises(AssertionError): results = dict(img_prefix=osp.join(osp.dirname(__file__), '../data'), img_info=dict(filename='color.jpg')) load = dict(type='LoadImageFromFile') load = build_from_cfg(load, PIPELINES) transform = dict(type='Resize', img_scale=(1333, 800), keep_ratio=True) transform = build_from_cfg(transform, PIPELINES) results = load(results) results['scale'] = (1333, 800) results['scale_factor'] = 1.0 results = transform(results) transform = dict(type='Resize', img_scale=(1333, 800), keep_ratio=True) resize_module = build_from_cfg(transform, PIPELINES) results = dict() img = mmcv.imread(osp.join(osp.dirname(__file__), '../data/color.jpg'), 'color') results['img'] = img results['img2'] = copy.deepcopy(img) results['img_shape'] = img.shape results['ori_shape'] = img.shape # Set initial values for default meta_keys results['pad_shape'] = img.shape results['img_fields'] = ['img', 'img2'] results = resize_module(results) assert np.equal(results['img'], results['img2']).all() results.pop('scale') results.pop('scale_factor') transform = dict(type='Resize', img_scale=(1280, 800), multiscale_mode='value', keep_ratio=False) resize_module = build_from_cfg(transform, PIPELINES) results = resize_module(results) assert np.equal(results['img'], results['img2']).all() assert results['img_shape'] == (800, 1280, 3)
def test_multi_scale_flip_aug(): # test assertion if give both scale_factor and img_scale with pytest.raises(AssertionError): transform = dict(type='MultiScaleFlipAug', scale_factor=1.0, img_scale=[(1333, 800)], transforms=[dict(type='Resize')]) build_from_cfg(transform, PIPELINES) # test assertion if both scale_factor and img_scale are None with pytest.raises(AssertionError): transform = dict(type='MultiScaleFlipAug', scale_factor=None, img_scale=None, transforms=[dict(type='Resize')]) build_from_cfg(transform, PIPELINES) # test assertion if img_scale is not tuple or list of tuple with pytest.raises(AssertionError): transform = dict(type='MultiScaleFlipAug', img_scale=[1333, 800], transforms=[dict(type='Resize')]) build_from_cfg(transform, PIPELINES) # test assertion if flip_direction is not str or list of str with pytest.raises(AssertionError): transform = dict(type='MultiScaleFlipAug', img_scale=[(1333, 800)], flip_direction=1, transforms=[dict(type='Resize')]) build_from_cfg(transform, PIPELINES) scale_transform = dict(type='MultiScaleFlipAug', img_scale=[(1333, 800), (1333, 640)], transforms=[dict(type='Resize', keep_ratio=True)]) transform = build_from_cfg(scale_transform, PIPELINES) results = dict() img = mmcv.imread(osp.join(osp.dirname(__file__), '../data/color.jpg'), 'color') results['img'] = img results['img_shape'] = img.shape results['ori_shape'] = img.shape # Set initial values for default meta_keys results['pad_shape'] = img.shape results['img_fields'] = ['img'] scale_results = transform(copy.deepcopy(results)) assert len(scale_results['img']) == 2 assert scale_results['img'][0].shape == (750, 1333, 3) assert scale_results['img_shape'][0] == (750, 1333, 3) assert scale_results['img'][1].shape == (640, 1138, 3) assert scale_results['img_shape'][1] == (640, 1138, 3) scale_factor_transform = dict( type='MultiScaleFlipAug', scale_factor=[0.8, 1.0, 1.2], transforms=[dict(type='Resize', keep_ratio=False)]) transform = build_from_cfg(scale_factor_transform, PIPELINES) scale_factor_results = transform(copy.deepcopy(results)) assert len(scale_factor_results['img']) == 3 assert scale_factor_results['img'][0].shape == (230, 409, 3) assert scale_factor_results['img_shape'][0] == (230, 409, 3) assert scale_factor_results['img'][1].shape == (288, 512, 3) assert scale_factor_results['img_shape'][1] == (288, 512, 3) assert scale_factor_results['img'][2].shape == (345, 614, 3) assert scale_factor_results['img_shape'][2] == (345, 614, 3) # test pipeline of coco_detection results = dict(img_prefix=osp.join(osp.dirname(__file__), '../data'), img_info=dict(filename='color.jpg')) load_cfg, multi_scale_cfg = mmcv.Config.fromfile( 'configs/_base_/datasets/coco_detection.py').test_pipeline load = build_from_cfg(load_cfg, PIPELINES) transform = build_from_cfg(multi_scale_cfg, PIPELINES) results = transform(load(results)) assert len(results['img']) == 1 assert len(results['img_metas']) == 1 assert isinstance(results['img'][0], torch.Tensor) assert isinstance(results['img_metas'][0], mmcv.parallel.DataContainer) assert results['img_metas'][0].data['ori_shape'] == (288, 512, 3) assert results['img_metas'][0].data['img_shape'] == (750, 1333, 3) assert results['img_metas'][0].data['pad_shape'] == (768, 1344, 3) assert results['img_metas'][0].data['scale_factor'].tolist() == [ 2.603515625, 2.6041667461395264, 2.603515625, 2.6041667461395264 ]
def visualize_bbox_act(img, bboxes,labels, act_preds, classes=None,thickness=1, font_scale=0.4,show=False, wait_time=0,out_file=None): """Show the tracks with opencv.""" assert bboxes.ndim == 2 assert labels.ndim == 1 assert bboxes.shape[0] == labels.shape[0] assert bboxes.shape[1] == 5 if isinstance(img, str): img = mmcv.imread(img) img_shape = img.shape bboxes[:, 0::2] = np.clip(bboxes[:, 0::2], 0, img_shape[1]) bboxes[:, 1::2] = np.clip(bboxes[:, 1::2], 0, img_shape[0]) text_width, text_height = 8, 15 for i, (bbox, label) in enumerate(zip(bboxes, labels), 0): x1, y1, x2, y2 = bbox[:4].astype(np.int32) score = float(bbox[-1]) # bbox bbox_color = random_color(label) bbox_color = [int(255 * _c) for _c in bbox_color][::-1] cv2.rectangle(img, (x1, y1), (x2, y2), bbox_color, thickness=thickness) # score text = '{:.02f}'.format(score) width = len(text) * text_width img[y1 - text_height:y1, x1:x1 + width, :] = bbox_color cv2.putText( img, text, (x1, y1 - 2), cv2.FONT_HERSHEY_COMPLEX, font_scale, color=(0, 0, 0)) classes_color = random_color(label + 1) text = classes[label] width = len(text) * text_width img[y1:y1 + text_height, x1:x1 + width, :] = bbox_color cv2.putText(img,text, (x1, y1 + text_height - 2), cv2.FONT_HERSHEY_COMPLEX, font_scale,color=classes_color) #background_color = random_color(label + 5) background_color = [255, 204, 153] if (act_preds is not None) and (len(bboxes)==len(labels)==len(act_preds)): for j, act_pred in enumerate(act_preds[i]): text = '{}: {:.02f}'.format(act_pred[0], act_pred[1]) width = len(text) * (text_width) img[y1+text_height*(j+2) :y1 + text_height*(j+3), x1:x1 + width, :] = background_color cv2.putText(img, text, (x1, y1 + text_height*(j+3) - 2), cv2.FONT_HERSHEY_COMPLEX, font_scale, color=classes_color) if show: mmcv.imshow(img, wait_time=wait_time) if out_file is not None: mmcv.imwrite(img, out_file) return img
def convert_lasot_test(lasot_test, ann_dir, save_dir): """Convert lasot dataset to COCO style. Args: lasot_test (dict): The converted COCO style annotations. ann_dir (str): The path of lasot test dataset save_dir (str): The path to save `lasot_test`. """ records = dict(vid_id=1, img_id=1, ann_id=1, global_instance_id=1) videos_list = osp.join(ann_dir, 'testing_set.txt') videos_list = mmcv.list_from_file(videos_list) lasot_test['categories'] = [dict(id=0, name=0)] for video_name in tqdm(videos_list): video_path = osp.join(ann_dir, video_name) video = dict(id=records['vid_id'], name=video_name) lasot_test['videos'].append(video) gt_bboxes = mmcv.list_from_file(osp.join(video_path, 'groundtruth.txt')) full_occlusion = mmcv.list_from_file( osp.join(video_path, 'full_occlusion.txt')) full_occlusion = full_occlusion[0].split(',') out_of_view = mmcv.list_from_file( osp.join(video_path, 'out_of_view.txt')) out_of_view = out_of_view[0].split(',') img = mmcv.imread(osp.join(video_path, 'img/00000001.jpg')) height, width, _ = img.shape for frame_id, gt_bbox in enumerate(gt_bboxes): file_name = '%08d' % (frame_id + 1) + '.jpg' file_name = osp.join(video_name, 'img', file_name) image = dict(file_name=file_name, height=height, width=width, id=records['img_id'], frame_id=frame_id, video_id=records['vid_id']) lasot_test['images'].append(image) x1, y1, w, h = gt_bbox.split(',') ann = dict(id=records['ann_id'], image_id=records['img_id'], instance_id=records['global_instance_id'], category_id=0, bbox=[int(x1), int(y1), int(w), int(h)], area=int(w) * int(h), full_occlusion=full_occlusion[frame_id] == '1', out_of_view=out_of_view[frame_id] == '1') lasot_test['annotations'].append(ann) records['ann_id'] += 1 records['img_id'] += 1 records['global_instance_id'] += 1 records['vid_id'] += 1 mmcv.dump(lasot_test, osp.join(save_dir, 'lasot_test.json')) print('-----LaSOT Test Dataset------') print(f'{records["vid_id"]- 1} videos') print(f'{records["global_instance_id"]- 1} instances') print(f'{records["img_id"]- 1} images') print(f'{records["ann_id"] - 1} objects') print('-----------------------------')
def convert_det(data_dir, mode=None, adjust_center=True): kitti = defaultdict(list) img_dir = osp.join(data_dir, 'image_2') label_dir = osp.join(data_dir, 'label_2') cali_dir = osp.join(data_dir, 'calib') if not osp.exists(img_dir): print(f"Folder {img_dir} is not found") return None if not osp.exists(label_dir): label_dir = None img_names = sorted(os.listdir(img_dir)) for k, v in cats_mapping.items(): kitti['categories'].append(dict(id=v, name=k)) pose_dict = dict(rotation=[0, 0, 0], position=[0, 0, 0]) ann_id = 0 vid_info = dict(id=0, name='', n_frames=len(img_names)) kitti['videos'].append(vid_info) for img_id, img_name in enumerate(img_names): if mode == 'train': if osp.splitext(img_name)[0] in det_val_sets: continue elif mode == 'val': if osp.splitext(img_name)[0] not in det_val_sets: continue print('DET ID: {}'.format(img_id)) img = mmcv.imread(osp.join(img_dir, img_name)) height, width, _ = img.shape projection = ku.read_calib_det(cali_dir, img_id) index = int(img_name.split('.')[0]) img_info = dict(file_name=osp.join(img_dir, img_name), cali=projection.tolist(), pose=pose_dict, height=height, width=width, fov=60, near_clip=0.15, id=img_id, video_id=0, index=index) kitti['images'].append(img_info) if label_dir: label_file = osp.join(label_dir, '{}.txt'.format(img_name.split('.')[0])) labels = mmcv.list_from_file(label_file) track_id = 0 for label in labels: label = label.split() cat = label[0] if cat in ['DontCare']: continue # if cat not in kitti_cats.keys(): # continue x1, y1, x2, y2 = float(label[4]), float(label[5]), float( label[6]), float(label[7]) if adjust_center: # KITTI GT uses the bottom of the car as center (x, 0, z). # Prediction uses center of the bbox as center (x, y, z). # So we align them to the bottom center as GT does y_cen_adjust = float(label[8]) / 2.0 else: y_cen_adjust = 0.0 center_2d = tu.cameratoimage( np.array([[ float(label[11]), float(label[12]) - y_cen_adjust, float(label[13]) ]]), projection).flatten().tolist() ann = dict(id=ann_id, image_id=img_id, instance_id=track_id, category_id=cats_mapping[kitti_cats[cat]], alpha=float(label[3]), roty=float(label[14]), dimension=[ float(label[8]), float(label[9]), float(label[10]) ], translation=[ float(label[11]), float(label[12]) - y_cen_adjust, float(label[13]) ], is_occluded=int(label[2]), is_truncated=float(label[1]), center_2d=center_2d, delta_2d=[ center_2d[0] - (x1 + x2) / 2.0, center_2d[1] - (y1 + y2) / 2.0 ], bbox=[x1, y1, x2 - x1, y2 - y1], area=(x2 - x1) * (y2 - y1), iscrowd=False, ignore=False, segmentation=[[x1, y1, x1, y2, x2, y2, x2, y1]]) kitti['annotations'].append(ann) ann_id += 1 track_id += 1 return kitti
def convert_track(data_dir, mode=None, adjust_center=True): kitti = defaultdict(list) img_dir = osp.join(data_dir, 'image_02') label_dir = osp.join(data_dir, 'label_02') cali_dir = osp.join(data_dir, 'calib') oxt_dir = osp.join(data_dir, 'oxts') if not osp.exists(img_dir): print(f"Folder {img_dir} is not found") return None if not osp.exists(label_dir): label_dir = None vid_names = sorted(os.listdir(img_dir)) print(f"{data_dir} with {len(vid_names)} sequences") for k, v in cats_mapping.items(): kitti['categories'].append(dict(id=v, name=k)) img_id = 0 global_track_id = 0 ann_id = 0 for vid_id, vid_name in enumerate(vid_names): if mode == 'train': if vid_name in val_sets: continue elif mode == 'val': if vid_name not in val_sets: continue elif mode == 'mini': if vid_name not in mini_sets: continue print("VID ID: {}".format(vid_id)) ind2id = dict() trackid_maps = dict() img_names = sorted([ f.path for f in os.scandir(osp.join(img_dir, vid_name)) if f.is_file() and f.name.endswith('png') ]) vid_info = dict(id=vid_id, name=vid_name, n_frames=len(img_names)) kitti['videos'].append(vid_info) projection = ku.read_calib(cali_dir, vid_id) for fr, img_name in enumerate(sorted(img_names)): img = mmcv.imread(img_name) fields = ku.read_oxts(oxt_dir, vid_id) poses = [ku.KittiPoseParser(fields[i]) for i in range(len(fields))] rotation = R.from_matrix(poses[fr].rotation).as_euler('xyz') position = poses[fr].position - poses[0].position pose_dict = dict(rotation=rotation.tolist(), position=position.tolist()) height, width, _ = img.shape index = fr img_info = dict(file_name=img_name, cali=projection.tolist(), pose=pose_dict, height=height, width=width, fov=60, near_clip=0.15, id=img_id, video_id=vid_id, index=index) kitti['images'].append(img_info) ind2id[index] = img_id img_id += 1 if label_dir: label_file = osp.join(label_dir, '{}.txt'.format(vid_name)) labels = mmcv.list_from_file(label_file) for label in labels: label = label.split() cat = label[2] if cat in ['DontCare']: continue # if cat not in kitti_cats.keys(): # continue image_id = ind2id[int(label[0])] if label[1] in trackid_maps.keys(): track_id = trackid_maps[label[1]] else: track_id = global_track_id trackid_maps[label[1]] = track_id global_track_id += 1 x1, y1, x2, y2 = float(label[6]), float(label[7]), float( label[8]), float(label[9]) if adjust_center: # KITTI GT uses the bottom of the car as center (x, 0, z). # Prediction uses center of the bbox as center (x, y, z). # So we align them to the bottom center as GT does y_cen_adjust = float(label[10]) / 2.0 else: y_cen_adjust = 0.0 center_2d = tu.cameratoimage( np.array([[ float(label[13]), float(label[14]) - y_cen_adjust, float(label[15]) ]]), projection).flatten().tolist() ann = dict(id=ann_id, image_id=image_id, category_id=cats_mapping[kitti_cats[cat]], instance_id=track_id, alpha=float(label[5]), roty=float(label[16]), dimension=[ float(label[10]), float(label[11]), float(label[12]) ], translation=[ float(label[13]), float(label[14]) - y_cen_adjust, float(label[15]) ], is_occluded=int(label[4]), is_truncated=float(label[3]), center_2d=center_2d, delta_2d=[ center_2d[0] - (x1 + x2) / 2.0, center_2d[1] - (y1 + y2) / 2.0 ], bbox=[x1, y1, x2 - x1, y2 - y1], area=(x2 - x1) * (y2 - y1), iscrowd=False, ignore=False, segmentation=[[x1, y1, x1, y2, x2, y2, x2, y1]]) kitti['annotations'].append(ann) ann_id += 1 return kitti
def show_result(self, img, result, palette=None, win_name='', show=False, wait_time=0, out_file=None, opacity=0.5): """Draw `result` over `img`. Args: img (str or Tensor): The image to be displayed. result (Tensor): The semantic segmentation results to draw over `img`. palette (list[list[int]]] | np.ndarray | None): The palette of segmentation map. If None is given, random palette will be generated. Default: None win_name (str): The window name. wait_time (int): Value of waitKey param. Default: 0. show (bool): Whether to show the image. Default: False. out_file (str or None): The filename to write the image. Default: None. opacity(float): Opacity of painted segmentation map. Default 0.5. Must be in (0, 1] range. Returns: img (Tensor): Only if not `show` or `out_file` """ img = mmcv.imread(img) img = img.copy() seg = result[0] if palette is None: if self.PALETTE is None: palette = np.random.randint(0, 255, size=(len(self.CLASSES), 3)) else: palette = self.PALETTE palette = np.array(palette) assert palette.shape[0] == len(self.CLASSES) assert palette.shape[1] == 3 assert len(palette.shape) == 2 assert 0 < opacity <= 1.0 color_seg = np.zeros((seg.shape[0], seg.shape[1], 3), dtype=np.uint8) for label, color in enumerate(palette): color_seg[seg == label, :] = color # convert to BGR color_seg = color_seg[..., ::-1] img = img * (1 - opacity) + color_seg * opacity img = img.astype(np.uint8) # if out_file specified, do not show image in window if out_file is not None: show = False if show: mmcv.imshow(img, win_name, wait_time) if out_file is not None: mmcv.imwrite(img, out_file) if not (show or out_file): warnings.warn('show==False and out_file is not specified, only ' 'result image will be returned') return img
def __call__(self): """ Load light-weight instance annotations of all images into a list of dicts in Detectron2 format. Do not load heavy data into memory in this file, since we will load the annotations of all images into memory. """ # cache the dataset_dicts to avoid loading masks from files hashed_file_name = hashlib.md5( ("".join([str(fn) for fn in self.objs]) + "dataset_dicts_{}_{}_{}_{}_{}_{}".format( self.name, self.dataset_root, self.with_masks, self.with_depth, self.with_xyz, osp.abspath(__file__))).encode("utf-8")).hexdigest() cache_path = osp.join( self.dataset_root, "dataset_dicts_{}_{}.pkl".format(self.name, hashed_file_name)) if osp.exists(cache_path) and self.use_cache: logger.info("load cached dataset dicts from {}".format(cache_path)) return mmcv.load(cache_path) t_start = time.perf_counter() dataset_dicts = [] self.num_instances_without_valid_segmentation = 0 self.num_instances_without_valid_box = 0 logger.info("loading dataset dicts: {}".format(self.name)) # it is slow because of loading and converting masks to rle for scene in self.scenes: scene_id = int(scene) scene_root = osp.join(self.dataset_root, scene) gt_dict = mmcv.load(osp.join(scene_root, 'scene_gt.json')) gt_info_dict = mmcv.load(osp.join(scene_root, 'scene_gt_info.json')) cam_dict = mmcv.load(osp.join(scene_root, 'scene_camera.json')) for str_im_id in tqdm(gt_dict, postfix=f"{scene_id}"): int_im_id = int(str_im_id) rgb_path = osp.join(scene_root, "rgb/{:06d}.jpg").format(int_im_id) assert osp.exists(rgb_path), rgb_path depth_path = osp.join(scene_root, "depth/{:06d}.png".format(int_im_id)) K = np.array(cam_dict[str_im_id]['cam_K'], dtype=np.float32).reshape(3, 3) depth_factor = 1000.0 / cam_dict[str_im_id][ 'depth_scale'] # 10000 record = { "dataset_name": self.name, 'file_name': osp.relpath(rgb_path, PROJ_ROOT), 'depth_file': osp.relpath(depth_path, PROJ_ROOT), 'height': self.height, 'width': self.width, 'image_id': int_im_id, "scene_im_id": "{}/{}".format(scene_id, int_im_id), # for evaluation "cam": K, "depth_factor": depth_factor, "img_type": 'syn_pbr' # NOTE: has background } insts = [] for anno_i, anno in enumerate(gt_dict[str_im_id]): obj_id = anno['obj_id'] if obj_id not in self.cat_ids: continue cur_label = self.cat2label[obj_id] # 0-based label R = np.array(anno['cam_R_m2c'], dtype='float32').reshape(3, 3) t = np.array(anno['cam_t_m2c'], dtype='float32') / 1000.0 pose = np.hstack([R, t.reshape(3, 1)]) quat = mat2quat(R).astype('float32') allo_q = mat2quat(egocentric_to_allocentric(pose) [:3, :3]).astype('float32') proj = (record["cam"] @ t.T).T proj = proj[:2] / proj[2] bbox_visib = gt_info_dict[str_im_id][anno_i]['bbox_visib'] bbox_obj = gt_info_dict[str_im_id][anno_i]['bbox_obj'] x1, y1, w, h = bbox_visib if self.filter_invalid: if h <= 1 or w <= 1: self.num_instances_without_valid_box += 1 continue mask_file = osp.join( scene_root, "mask/{:06d}_{:06d}.png".format(int_im_id, anno_i)) mask_visib_file = osp.join( scene_root, "mask_visib/{:06d}_{:06d}.png".format( int_im_id, anno_i)) assert osp.exists(mask_file), mask_file assert osp.exists(mask_visib_file), mask_visib_file # load mask visib TODO: load both mask_visib and mask_full mask_single = mmcv.imread(mask_visib_file, "unchanged") area = mask_single.sum() if area < 3: # filter out too small or nearly invisible instances self.num_instances_without_valid_segmentation += 1 continue mask_rle = binary_mask_to_rle(mask_single, compressed=True) inst = { 'category_id': cur_label, # 0-based label 'bbox': bbox_visib, # TODO: load both bbox_obj and bbox_visib 'bbox_mode': BoxMode.XYWH_ABS, 'pose': pose, "quat": quat, "trans": t, "allo_quat": allo_q, "centroid_2d": proj, # absolute (cx, cy) "segmentation": mask_rle, "mask_full_file": mask_file, # TODO: load as mask_full, rle } if self.with_xyz: xyz_crop_path = mask_file.replace( "/mask/", "/xyz_crop/").replace(".png", ".pkl") assert osp.exists(xyz_crop_path), xyz_crop_path inst["xyz_crop_path"] = xyz_crop_path insts.append(inst) if len(insts) == 0: # filter im without anno continue record['annotations'] = insts dataset_dicts.append(record) if self.num_instances_without_valid_segmentation > 0: logger.warning( "Filtered out {} instances without valid segmentation. " "There might be issues in your dataset generation process.". format(self.num_instances_without_valid_segmentation)) if self.num_instances_without_valid_box > 0: logger.warning( "Filtered out {} instances without valid box. " "There might be issues in your dataset generation process.". format(self.num_instances_without_valid_box)) ########################## if self.num_to_load > 0: self.num_to_load = min(int(self.num_to_load), len(dataset_dicts)) dataset_dicts = dataset_dicts[:self.num_to_load] logger.info("loaded {} dataset dicts, using {}s".format( len(dataset_dicts), time.perf_counter() - t_start)) mkdir_p(osp.dirname(cache_path)) mmcv.dump(dataset_dicts, cache_path, protocol=4) logger.info("Dumped dataset_dicts to {}".format(cache_path)) return dataset_dicts
def prepare_train_img(self, idx): img_info = self.img_infos[idx] # load image img = mmcv.imread(osp.join(self.img_prefix, img_info['filename'])) # load proposals if necessary if self.proposals is not None: proposals = self.proposals[idx][:self.num_max_proposals] # TODO: Handle empty proposals properly. Currently images with # no proposals are just ignored, but they can be used for # training in concept. if len(proposals) == 0: return None if not (proposals.shape[1] == 4 or proposals.shape[1] == 5): raise AssertionError( 'proposals should have shapes (n, 4) or (n, 5), ' 'but found {}'.format(proposals.shape)) if proposals.shape[1] == 5: scores = proposals[:, 4, None] proposals = proposals[:, :4] else: scores = None ann = self.get_ann_info(idx) gt_bboxes = ann['bboxes'] gt_labels = ann['labels'] if self.with_crowd: gt_bboxes_ignore = ann['bboxes_ignore'] assert len(self.img_scales[0]) == 2 and isinstance( self.img_scales[0][0], int) img, gt_bboxes, gt_labels, gt_bboxes_ignore = augment( img, gt_bboxes, gt_labels, gt_bboxes_ignore, self.img_scales[0]) ori_shape = img.shape[:2] img, img_shape, pad_shape, scale_factor = self.img_transform( img, img.shape[:2], False, keep_ratio=self.resize_keep_ratio) assert (scale_factor == 1) img_meta = dict( ori_shape=ori_shape, img_shape=ori_shape, pad_shape=(0, 0), scale_factor=1, flip=False, name=img_info['filename'], ) pos_maps = [] scale_maps = [] offset_maps = [] if not self.with_crowd: gt_bboxes_ignore = None for i, stride in enumerate(self.strides): pos_map, scale_map, offset_map = self.calc_gt_center(gt_bboxes, gt_bboxes_ignore, \ stride=stride, regress_range=self.regress_ranges[i], image_shape=ori_shape) pos_maps.append(pos_map) scale_maps.append(scale_map) offset_maps.append(offset_map) data = dict(img=DC(to_tensor(img), stack=True), img_meta=DC(img_meta, cpu_only=True), gt_bboxes=DC(to_tensor(gt_bboxes))) if self.proposals is not None: data['proposals'] = DC(to_tensor(proposals)) if self.with_label: data['gt_labels'] = DC(to_tensor(gt_labels)) if self.with_crowd: data['gt_bboxes_ignore'] = DC(to_tensor(gt_bboxes_ignore)) data['classification_maps'] = DC( [to_tensor(pos_map) for pos_map in pos_maps]) data['scale_maps'] = DC( [to_tensor(scale_map) for scale_map in scale_maps]) data['offset_maps'] = DC( [to_tensor(offset_map) for offset_map in offset_maps]) return data
def test_readtext(mock_kiedataset): # Fixing the weights of models to prevent them from # generating invalid results and triggering other assertion errors torch.manual_seed(4) random.seed(4) mmocr = MMOCR_testobj() mmocr_det = MMOCR_testobj(kie='', recog='') mmocr_recog = MMOCR_testobj(kie='', det='', recog='CRNN_TPS') mmocr_det_recog = MMOCR_testobj(kie='') def readtext(imgs, ocr_obj=mmocr, **kwargs): # filename can be different depends on how # the the image was loaded e2e_res = ocr_obj.readtext(imgs, **kwargs) for res in e2e_res: res.pop('filename') return e2e_res def kiedataset_with_test_dict(**kwargs): kwargs['dict_file'] = 'tests/data/kie_toy_dataset/dict.txt' return KIEDataset(**kwargs) mock_kiedataset.side_effect = kiedataset_with_test_dict # Single image toy_dir = 'tests/data/toy_dataset/imgs/test/' toy_img1_path = toy_dir + 'img_1.jpg' str_e2e_res = readtext(toy_img1_path) toy_img1 = mmcv.imread(toy_img1_path) np_e2e_res = readtext(toy_img1) assert str_e2e_res == np_e2e_res # Multiple images toy_img2_path = toy_dir + 'img_2.jpg' toy_img2 = mmcv.imread(toy_img2_path) toy_imgs = [toy_img1, toy_img2] toy_img_paths = [toy_img1_path, toy_img2_path] np_e2e_results = readtext(toy_imgs) str_e2e_results = readtext(toy_img_paths) str_tuple_e2e_results = readtext(tuple(toy_img_paths)) assert np_e2e_results == str_e2e_results assert str_e2e_results == str_tuple_e2e_results # Batch mode test toy_imgs.append(toy_dir + 'img_3.jpg') e2e_res = readtext(toy_imgs) full_batch_e2e_res = readtext(toy_imgs, batch_mode=True) assert full_batch_e2e_res == e2e_res batch_e2e_res = readtext(toy_imgs, batch_mode=True, recog_batch_size=2, det_batch_size=2) assert batch_e2e_res == full_batch_e2e_res # Batch mode test with DBNet only full_batch_det_res = mmocr_det.readtext(toy_imgs, batch_mode=True) det_res = mmocr_det.readtext(toy_imgs) batch_det_res = mmocr_det.readtext(toy_imgs, batch_mode=True, single_batch_size=2) assert len(full_batch_det_res) == len(det_res) assert len(batch_det_res) == len(det_res) assert all([ np.allclose(full_batch_det_res[i]['boundary_result'], det_res[i]['boundary_result']) for i in range(len(full_batch_det_res)) ]) assert all([ np.allclose(batch_det_res[i]['boundary_result'], det_res[i]['boundary_result']) for i in range(len(batch_det_res)) ]) # Batch mode test with CRNN_TPS only (CRNN doesn't support batch inference) full_batch_recog_res = mmocr_recog.readtext(toy_imgs, batch_mode=True) recog_res = mmocr_recog.readtext(toy_imgs) batch_recog_res = mmocr_recog.readtext(toy_imgs, batch_mode=True, single_batch_size=2) full_batch_recog_res.sort(key=lambda x: x['text']) batch_recog_res.sort(key=lambda x: x['text']) recog_res.sort(key=lambda x: x['text']) assert np.all([ np.allclose(full_batch_recog_res[i]['score'], recog_res[i]['score']) for i in range(len(full_batch_recog_res)) ]) assert np.all([ np.allclose(batch_recog_res[i]['score'], recog_res[i]['score']) for i in range(len(full_batch_recog_res)) ]) # Test export with tempfile.TemporaryDirectory() as tmpdirname: mmocr.readtext(toy_imgs, export=tmpdirname) assert len(os.listdir(tmpdirname)) == len(toy_imgs) with tempfile.TemporaryDirectory() as tmpdirname: mmocr_det.readtext(toy_imgs, export=tmpdirname) assert len(os.listdir(tmpdirname)) == len(toy_imgs) with tempfile.TemporaryDirectory() as tmpdirname: mmocr_recog.readtext(toy_imgs, export=tmpdirname) assert len(os.listdir(tmpdirname)) == len(toy_imgs) # Test output # Single image with tempfile.TemporaryDirectory() as tmpdirname: tmp_output = os.path.join(tmpdirname, '1.jpg') mmocr.readtext(toy_imgs[0], output=tmp_output) assert os.path.exists(tmp_output) # Multiple images with tempfile.TemporaryDirectory() as tmpdirname: mmocr.readtext(toy_imgs, output=tmpdirname) assert len(os.listdir(tmpdirname)) == len(toy_imgs) # Test imshow with mock.patch('mmocr.utils.ocr.mmcv.imshow') as mock_imshow: mmocr.readtext(toy_img1_path, imshow=True) mock_imshow.assert_called_once() mock_imshow.reset_mock() mmocr.readtext(toy_imgs, imshow=True) assert mock_imshow.call_count == len(toy_imgs) # Test print_result with io.StringIO() as capturedOutput: sys.stdout = capturedOutput res = mmocr.readtext(toy_imgs, print_result=True) assert json.loads('[%s]' % capturedOutput.getvalue().strip().replace( '\n\n', ',').replace("'", '"')) == res sys.stdout = sys.__stdout__ with io.StringIO() as capturedOutput: sys.stdout = capturedOutput res = mmocr.readtext(toy_imgs, details=True, print_result=True) assert json.loads('[%s]' % capturedOutput.getvalue().strip().replace( '\n\n', ',').replace("'", '"')) == res sys.stdout = sys.__stdout__ # Test merge with mock.patch('mmocr.utils.ocr.stitch_boxes_into_lines') as mock_merge: mmocr_det_recog.readtext(toy_imgs, merge=True) assert mock_merge.call_count == len(toy_imgs)
def show_result(self, result, img=None, skeleton=None, kpt_score_thr=0.3, radius=8, bbox_color='green', thickness=2, pose_kpt_color=None, pose_link_color=None, vis_height=400, num_instances=-1, win_name='', show=False, wait_time=0, out_file=None): """Visualize 3D pose estimation results. Args: result (list[dict]): The pose estimation results containing: - "keypoints_3d" ([K,4]): 3D keypoints - "keypoints" ([K,3] or [T,K,3]): Optional for visualizing 2D inputs. If a sequence is given, only the last frame will be used for visualization - "bbox" ([4,] or [T,4]): Optional for visualizing 2D inputs - "title" (str): title for the subplot img (str or Tensor): Optional. The image to visualize 2D inputs on. skeleton (list of [idx_i,idx_j]): Skeleton described by a list of links, each is a pair of joint indices. kpt_score_thr (float, optional): Minimum score of keypoints to be shown. Default: 0.3. radius (int): Radius of circles. bbox_color (str or tuple or :obj:`Color`): Color of bbox lines. thickness (int): Thickness of lines. pose_kpt_color (np.array[Nx3]`): Color of N keypoints. If None, do not draw keypoints. pose_link_color (np.array[Mx3]): Color of M limbs. If None, do not draw limbs. vis_height (int): The image height of the visualization. The width will be N*vis_height depending on the number of visualized items. num_instances (int): Number of instances to be shown in 3D. If smaller than 0, all the instances in the pose_result will be shown. Otherwise, pad or truncate the pose_result to a length of num_instances. win_name (str): The window name. show (bool): Whether to show the image. Default: False. wait_time (int): Value of waitKey param. Default: 0. out_file (str or None): The filename to write the image. Default: None. Returns: Tensor: Visualized img, only if not `show` or `out_file`. """ if num_instances < 0: assert len(result) > 0 result = sorted(result, key=lambda x: x.get('track_id', 0)) # draw image and 2d poses if img is not None: img = mmcv.imread(img) bbox_result = [] pose_2d = [] for res in result: if 'bbox' in res: bbox = np.array(res['bbox']) if bbox.ndim != 1: assert bbox.ndim == 2 bbox = bbox[-1] # Get bbox from the last frame bbox_result.append(bbox) if 'keypoints' in res: kpts = np.array(res['keypoints']) if kpts.ndim != 2: assert kpts.ndim == 3 kpts = kpts[-1] # Get 2D keypoints from the last frame pose_2d.append(kpts) if len(bbox_result) > 0: bboxes = np.vstack(bbox_result) mmcv.imshow_bboxes(img, bboxes, colors=bbox_color, top_k=-1, thickness=2, show=False) if len(pose_2d) > 0: imshow_keypoints(img, pose_2d, skeleton, kpt_score_thr=kpt_score_thr, pose_kpt_color=pose_kpt_color, pose_link_color=pose_link_color, radius=radius, thickness=thickness) img = mmcv.imrescale(img, scale=vis_height / img.shape[0]) img_vis = imshow_keypoints_3d(result, img, skeleton, pose_kpt_color, pose_link_color, vis_height, axis_limit=300, axis_azimuth=-115, axis_elev=15, kpt_score_thr=kpt_score_thr, num_instances=num_instances) if show: mmcv.visualization.imshow(img_vis, win_name, wait_time) if out_file is not None: mmcv.imwrite(img_vis, out_file) return img_vis
def test_multi_scale_flip_aug(): # test assertion if give both scale_factor and img_scale with pytest.raises(AssertionError): transform = dict(type='MultiScaleFlipAug', scale_factor=1.0, img_scale=[(1333, 800)], transforms=[dict(type='Resize')]) build_from_cfg(transform, PIPELINES) # test assertion if both scale_factor and img_scale are None with pytest.raises(AssertionError): transform = dict(type='MultiScaleFlipAug', scale_factor=None, img_scale=None, transforms=[dict(type='Resize')]) build_from_cfg(transform, PIPELINES) # test assertion if img_scale is not tuple or list of tuple with pytest.raises(AssertionError): transform = dict(type='MultiScaleFlipAug', img_scale=[1333, 800], transforms=[dict(type='Resize')]) build_from_cfg(transform, PIPELINES) # test assertion if flip_direction is not str or list of str with pytest.raises(AssertionError): transform = dict(type='MultiScaleFlipAug', img_scale=[(1333, 800)], flip_direction=1, transforms=[dict(type='Resize')]) build_from_cfg(transform, PIPELINES) scale_transform = dict(type='MultiScaleFlipAug', img_scale=[(1333, 800), (1333, 640)], transforms=[dict(type='Resize', keep_ratio=True)]) transform = build_from_cfg(scale_transform, PIPELINES) results = dict() img = mmcv.imread(osp.join(osp.dirname(__file__), '../data/color.jpg'), 'color') results['img'] = img results['img_shape'] = img.shape results['ori_shape'] = img.shape # Set initial values for default meta_keys results['pad_shape'] = img.shape results['img_fields'] = ['img'] scale_results = transform(copy.deepcopy(results)) assert len(scale_results['img']) == 2 assert scale_results['img'][0].shape == (750, 1333, 3) assert scale_results['img_shape'][0] == (750, 1333, 3) assert scale_results['img'][1].shape == (640, 1138, 3) assert scale_results['img_shape'][1] == (640, 1138, 3) scale_factor_transform = dict( type='MultiScaleFlipAug', scale_factor=[0.8, 1.0, 1.2], transforms=[dict(type='Resize', keep_ratio=False)]) transform = build_from_cfg(scale_factor_transform, PIPELINES) scale_factor_results = transform(copy.deepcopy(results)) assert len(scale_factor_results['img']) == 3 assert scale_factor_results['img'][0].shape == (230, 409, 3) assert scale_factor_results['img_shape'][0] == (230, 409, 3) assert scale_factor_results['img'][1].shape == (288, 512, 3) assert scale_factor_results['img_shape'][1] == (288, 512, 3) assert scale_factor_results['img'][2].shape == (345, 614, 3) assert scale_factor_results['img_shape'][2] == (345, 614, 3)
import matplotlib.image as img from mmdet.apis import inference_detector, init_detector, show_result_pyplot import os from tqdm import tqdm import glob import cv2 test_root = args.input_images_folder test_ids = os.listdir(test_root) model.cfg = cfg res_size=(1080,1440) for it in tqdm(range(len(test_ids))): id = test_ids[it] image = mmcv.imread(test_root+"/"+id) orig_shape = image.shape[0:2] image = cv2.resize(image, res_size[::-1],interpolation=cv2.INTER_NEAREST) result = inference_detector(model,image) count = 1 print(len(result[1][0])) for i,mask in enumerate(result[1][0]): if mask.sum()<500: continue mask = cv2.resize(mask, orig_shape[::-1],interpolation=cv2.INTER_NEAREST) img.imsave(args.save_path+"/"+id[:-4]+"_{}".format(count)+".bmp",mask) count+=1
def prepare_train_img(self, idx): img_info = self.img_infos[idx] # load image img = mmcv.imread(osp.join(self.img_prefix, img_info['filename'])) # load proposals if necessary if self.proposals is not None: proposals = self.proposals[idx][:self.num_max_proposals] # TODO: Handle empty proposals properly. Currently images with # no proposals are just ignored, but they can be used for # training in concept. if len(proposals) == 0: return None if not (proposals.shape[1] == 4 or proposals.shape[1] == 5): raise AssertionError( 'proposals should have shapes (n, 4) or (n, 5), ' 'but found {}'.format(proposals.shape)) if proposals.shape[1] == 5: scores = proposals[:, 4, None] proposals = proposals[:, :4] else: scores = None ann = self.get_ann_info(idx) gt_bboxes = ann['bboxes'] gt_labels = ann['labels'] if self.with_crowd: gt_bboxes_ignore = ann['bboxes_ignore'] # skip the image if there is no valid gt bbox if len(gt_bboxes) == 0: return None # extra augmentation if self.extra_aug is not None: img, gt_bboxes, gt_labels = self.extra_aug(img, gt_bboxes, gt_labels) # apply transforms flip = True if np.random.rand() < self.flip_ratio else False img_scale = random_scale(self.img_scales) # sample a scale img, img_shape, pad_shape, scale_factor = self.img_transform( img, img_scale, flip, keep_ratio=self.resize_keep_ratio) img = img.copy() if self.proposals is not None: proposals = self.bbox_transform(proposals, img_shape, scale_factor, flip) proposals = np.hstack([proposals, scores ]) if scores is not None else proposals gt_bboxes = self.bbox_transform(gt_bboxes, img_shape, scale_factor, flip) if self.with_crowd: gt_bboxes_ignore = self.bbox_transform(gt_bboxes_ignore, img_shape, scale_factor, flip) if self.with_mask: gt_masks = self.mask_transform(ann['masks'], pad_shape, scale_factor, flip) ori_shape = (img_info['height'], img_info['width'], 3) img_meta = dict(ori_shape=ori_shape, img_shape=img_shape, pad_shape=pad_shape, scale_factor=scale_factor, flip=flip) data = dict(img=DC(to_tensor(img), stack=True), img_meta=DC(img_meta, cpu_only=True), gt_bboxes=DC(to_tensor(gt_bboxes))) if self.proposals is not None: data['proposals'] = DC(to_tensor(proposals)) if self.with_label: data['gt_labels'] = DC(to_tensor(gt_labels)) if self.with_crowd: data['gt_bboxes_ignore'] = DC(to_tensor(gt_bboxes_ignore)) if self.with_mask: data['gt_masks'] = DC(gt_masks, cpu_only=True) return data
def make_lmdb(data_path, lmdb_path, img_path_list, keys, batch=5000, compress_level=1, multiprocessing_read=False, n_thread=40): """Make lmdb. Contents of lmdb. The file structure is: example.lmdb ├── data.mdb ├── lock.mdb ├── meta_info.txt The data.mdb and lock.mdb are standard lmdb files and you can refer to https://lmdb.readthedocs.io/en/release/ for more details. The meta_info.txt is a specified txt file to record the meta information of our datasets. It will be automatically created when preparing datasets by our provided dataset tools. Each line in the txt file records 1)image name (with extension), 2)image shape, and 3)compression level, separated by a white space. For example, the meta information could be: `000_00000000.png (720,1280,3) 1`, which means: 1) image name (with extension): 000_00000000.png; 2) image shape: (720,1280,3); 3) compression level: 1 We use the image name without extension as the lmdb key. If `multiprocessing_read` is True, it will read all the images to memory using multiprocessing. Thus, your server needs to have enough memory. Args: data_path (str): Data path for reading images. lmdb_path (str): Lmdb save path. img_path_list (str): Image path list. keys (str): Used for lmdb keys. batch (int): After processing batch images, lmdb commits. Default: 5000. compress_level (int): Compress level when encoding images. Default: 1. multiprocessing_read (bool): Whether use multiprocessing to read all the images to memory. Default: False. n_thread (int): For multiprocessing. """ assert len(img_path_list) == len(keys), ( 'img_path_list and keys should have the same length, ' f'but got {len(img_path_list)} and {len(keys)}') print(f'Create lmdb for {data_path}, save to {lmdb_path}...') print(f'Total images: {len(img_path_list)}') if not lmdb_path.endswith('.lmdb'): raise ValueError("lmdb_path must end with '.lmdb'.") if osp.exists(lmdb_path): print(f'Folder {lmdb_path} already exists. Exit.') sys.exit(1) if multiprocessing_read: # read all the images to memory (multiprocessing) dataset = {} # use dict to keep the order for multiprocessing shapes = {} print(f'Read images with multiprocessing, #thread: {n_thread} ...') prog_bar = mmcv.ProgressBar(len(img_path_list)) def callback(arg): """get the image data and update prog_bar.""" key, dataset[key], shapes[key] = arg prog_bar.update() pool = Pool(n_thread) for path, key in zip(img_path_list, keys): pool.apply_async(read_img_worker, args=(osp.join(data_path, path), key, compress_level), callback=callback) pool.close() pool.join() print(f'Finish reading {len(img_path_list)} images.') # create lmdb environment # obtain data size for one image img = mmcv.imread(osp.join(data_path, img_path_list[0]), flag='unchanged') _, img_byte = cv2.imencode('.png', img, [cv2.IMWRITE_PNG_COMPRESSION, compress_level]) data_size_per_img = img_byte.nbytes print('Data size per image is: ', data_size_per_img) data_size = data_size_per_img * len(img_path_list) env = lmdb.open(lmdb_path, map_size=data_size * 10) # write data to lmdb prog_bar = mmcv.ProgressBar(len(img_path_list)) txn = env.begin(write=True) txt_file = open(osp.join(lmdb_path, 'meta_info.txt'), 'w') for idx, (path, key) in enumerate(zip(img_path_list, keys)): prog_bar.update() key_byte = key.encode('ascii') if multiprocessing_read: img_byte = dataset[key] h, w, c = shapes[key] else: _, img_byte, img_shape = read_img_worker(osp.join(data_path, path), key, compress_level) h, w, c = img_shape txn.put(key_byte, img_byte) # write meta information txt_file.write(f'{key}.png ({h},{w},{c}) {compress_level}\n') if idx % batch == 0: txn.commit() txn = env.begin(write=True) txn.commit() env.close() txt_file.close() print('\nFinish writing lmdb.')
def show_result(self, img, result, skeleton=None, kpt_score_thr=0.3, bbox_color='green', pose_kpt_color=None, pose_limb_color=None, radius=4, text_color=(255, 0, 0), thickness=1, font_scale=0.5, win_name='', show=False, wait_time=0, out_file=None): """Draw `result` over `img`. Args: img (str or Tensor): The image to be displayed. result (list[dict]): The results to draw over `img` (bbox_result, pose_result). kpt_score_thr (float, optional): Minimum score of keypoints to be shown. Default: 0.3. bbox_color (str or tuple or :obj:`Color`): Color of bbox lines. pose_kpt_color (np.array[Nx3]`): Color of N keypoints. pose_limb_color (np.array[Mx3]): Color of M limbs. text_color (str or tuple or :obj:`Color`): Color of texts. thickness (int): Thickness of lines. font_scale (float): Font scales of texts. win_name (str): The window name. wait_time (int): Value of waitKey param. Default: 0. out_file (str or None): The filename to write the image. Default: None. Returns: img (Tensor): Only if not `show` or `out_file` """ img = mmcv.imread(img) img = img.copy() img_h, img_w, _ = img.shape bbox_result = [] pose_result = [] for res in result: bbox_result.append(res['bbox']) pose_result.append(res['keypoints']) if len(bbox_result) > 0: bboxes = np.vstack(bbox_result) # draw bounding boxes mmcv.imshow_bboxes(img, bboxes, colors=bbox_color, top_k=-1, thickness=thickness, show=False, win_name=win_name, wait_time=wait_time, out_file=None) for person_id, kpts in enumerate(pose_result): # draw each point on image for kid, kpt in enumerate(kpts): x_coord, y_coord, kpt_score = int(kpt[0]), int( kpt[1]), kpt[2] if kpt_score > kpt_score_thr: # cv2.circle(img, (x_coord, y_coord), radius, # pose_kpt_color, thickness) img_copy = img.copy() r, g, b = pose_kpt_color[kid] cv2.circle(img_copy, (int(x_coord), int(y_coord)), radius, (int(r), int(g), int(b)), -1) transparency = max(0, min(1, kpt_score)) cv2.addWeighted(img_copy, transparency, img, 1 - transparency, 0, dst=img) # draw limbs if skeleton is not None: for sk_id, sk in enumerate(skeleton): pos1 = (int(kpts[sk[0] - 1, 0]), int(kpts[sk[0] - 1, 1])) pos2 = (int(kpts[sk[1] - 1, 0]), int(kpts[sk[1] - 1, 1])) if (pos1[0] > 0 and pos1[0] < img_w and pos1[1] > 0 and pos1[1] < img_h and pos2[0] > 0 and pos2[0] < img_w and pos2[1] > 0 and pos2[1] < img_h and kpts[sk[0] - 1, 2] > kpt_score_thr and kpts[sk[1] - 1, 2] > kpt_score_thr): # cv2.line(img, pos1, pos2, pose_kpt_color, 2, 8) img_copy = img.copy() X = (pos1[0], pos2[0]) Y = (pos1[1], pos2[1]) mX = np.mean(X) mY = np.mean(Y) length = ((Y[0] - Y[1])**2 + (X[0] - X[1])**2)**0.5 angle = math.degrees( math.atan2(Y[0] - Y[1], X[0] - X[1])) stickwidth = 2 polygon = cv2.ellipse2Poly( (int(mX), int(mY)), (int(length / 2), int(stickwidth)), int(angle), 0, 360, 1) r, g, b = pose_limb_color[sk_id] cv2.fillConvexPoly(img_copy, polygon, (int(r), int(g), int(b))) transparency = max( 0, min( 1, 0.5 * (kpts[sk[0] - 1, 2] + kpts[sk[1] - 1, 2]))) cv2.addWeighted(img_copy, transparency, img, 1 - transparency, 0, dst=img) if show: imshow(img, win_name, wait_time) if out_file is not None: imwrite(img, out_file) return img
def _inference_single(model, img, img_transform, device): img = mmcv.imread(img) data = _prepare_data(img, img_transform, model.cfg, device) with torch.no_grad(): result = model(return_loss=False, rescale=True, **data) return result
def _load_semantic_seg(self, results): results['gt_semantic_seg'] = mmcv.imread(osp.join( results['seg_prefix'], results['ann_info']['seg_map']), flag='unchanged').squeeze() return results
def convert_svhn_to_coco_test(out_file, image_prefix): import re def convert(text): return int(text) if text.isdigit() else text.lower() def Sort(key): return [convert(c) for c in re.split('([0-9]+)', key)] def sorted_alphanumeric(data): return sorted(data, key=Sort) sorted_file = sorted_alphanumeric(os.listdir(image_prefix)) annotations = [] images = [] obj_count = 0 for file in sorted_file: if file[-4:] != ".png": continue idx = file[:-4] img_name = file filename = img_name img_path = osp.join(image_prefix, img_name) height, width = mmcv.imread(img_path).shape[:2] images.append( dict(id=idx, file_name=filename, height=height, width=width)) for e in range(1): label = int(0) left, top, width, height = 0, 0, 0, 0 data_anno = dict(image_id=idx, id=obj_count, category_id=label, bbox=[left, top, width, height], area=width * height, segmentation=[], iscrowd=0) annotations.append(data_anno) obj_count += 1 coco_format_json = dict(images=images, annotations=annotations, categories=[{ 'id': 0, 'name': '10' }, { 'id': 1, 'name': '1' }, { 'id': 2, 'name': '2' }, { 'id': 3, 'name': '3' }, { 'id': 4, 'name': '4' }, { 'id': 5, 'name': '5' }, { 'id': 6, 'name': '6' }, { 'id': 7, 'name': '7' }, { 'id': 8, 'name': '8' }, { 'id': 9, 'name': '9' }]) print("DONE") mmcv.dump(coco_format_json, out_file)
def test_random_crop(): # test assertion for invalid random crop with pytest.raises(AssertionError): transform = dict(type='RandomCrop', crop_size=(-1, 0)) build_from_cfg(transform, PIPELINES) results = dict() img = mmcv.imread(osp.join(osp.dirname(__file__), '../data/color.jpg'), 'color') results['img'] = img results['img_shape'] = img.shape results['ori_shape'] = img.shape # TODO: add img_fields test results['bbox_fields'] = ['gt_bboxes', 'gt_bboxes_ignore'] # Set initial values for default meta_keys results['pad_shape'] = img.shape results['scale_factor'] = 1.0 def create_random_bboxes(num_bboxes, img_w, img_h): bboxes_left_top = np.random.uniform(0, 0.5, size=(num_bboxes, 2)) bboxes_right_bottom = np.random.uniform(0.5, 1, size=(num_bboxes, 2)) bboxes = np.concatenate((bboxes_left_top, bboxes_right_bottom), 1) bboxes = (bboxes * np.array([img_w, img_h, img_w, img_h])).astype( np.int) return bboxes h, w, _ = img.shape gt_bboxes = create_random_bboxes(8, w, h) gt_bboxes_ignore = create_random_bboxes(2, w, h) results['gt_bboxes'] = gt_bboxes results['gt_bboxes_ignore'] = gt_bboxes_ignore transform = dict(type='RandomCrop', crop_size=(h - 20, w - 20)) crop_module = build_from_cfg(transform, PIPELINES) results = crop_module(results) assert results['img'].shape[:2] == (h - 20, w - 20) # All bboxes should be reserved after crop assert results['img_shape'][:2] == (h - 20, w - 20) assert results['gt_bboxes'].shape[0] == 8 assert results['gt_bboxes_ignore'].shape[0] == 2 def area(bboxes): return np.prod(bboxes[:, 2:4] - bboxes[:, 0:2], axis=1) assert (area(results['gt_bboxes']) <= area(gt_bboxes)).all() assert (area(results['gt_bboxes_ignore']) <= area(gt_bboxes_ignore)).all() # test assertion for invalid crop_type with pytest.raises(ValueError): transform = dict(type='RandomCrop', crop_size=(1, 1), crop_type='unknown') build_from_cfg(transform, PIPELINES) # test assertion for invalid crop_size with pytest.raises(AssertionError): transform = dict(type='RandomCrop', crop_type='relative', crop_size=(0, 0)) build_from_cfg(transform, PIPELINES) def _construct_toy_data(): img = np.array([[1, 2, 3, 4], [5, 6, 7, 8]], dtype=np.uint8) img = np.stack([img, img, img], axis=-1) results = dict() # image results['img'] = img results['img_shape'] = img.shape results['img_fields'] = ['img'] # bboxes results['bbox_fields'] = ['gt_bboxes', 'gt_bboxes_ignore'] results['gt_bboxes'] = np.array([[0., 0., 2., 1.]], dtype=np.float32) results['gt_bboxes_ignore'] = np.array([[2., 0., 3., 1.]], dtype=np.float32) # labels results['gt_labels'] = np.array([1], dtype=np.int64) return results # test crop_type "relative_range" results = _construct_toy_data() transform = dict(type='RandomCrop', crop_type='relative_range', crop_size=(0.3, 0.7), allow_negative_crop=True) transform_module = build_from_cfg(transform, PIPELINES) results_transformed = transform_module(copy.deepcopy(results)) h, w = results_transformed['img_shape'][:2] assert int(2 * 0.3 + 0.5) <= h <= int(2 * 1 + 0.5) assert int(4 * 0.7 + 0.5) <= w <= int(4 * 1 + 0.5) # test crop_type "relative" transform = dict(type='RandomCrop', crop_type='relative', crop_size=(0.3, 0.7), allow_negative_crop=True) transform_module = build_from_cfg(transform, PIPELINES) results_transformed = transform_module(copy.deepcopy(results)) h, w = results_transformed['img_shape'][:2] assert h == int(2 * 0.3 + 0.5) and w == int(4 * 0.7 + 0.5) # test crop_type "absolute" transform = dict(type='RandomCrop', crop_type='absolute', crop_size=(1, 2), allow_negative_crop=True) transform_module = build_from_cfg(transform, PIPELINES) results_transformed = transform_module(copy.deepcopy(results)) h, w = results_transformed['img_shape'][:2] assert h == 1 and w == 2 # test crop_type "absolute_range" transform = dict(type='RandomCrop', crop_type='absolute_range', crop_size=(1, 20), allow_negative_crop=True) transform_module = build_from_cfg(transform, PIPELINES) results_transformed = transform_module(copy.deepcopy(results)) h, w = results_transformed['img_shape'][:2] assert 1 <= h <= 2 and 1 <= w <= 4
def convert_svhn_to_coco_train(ann_file, out_file, image_prefix): data_infos = h5py.File(ann_file, "r") annotations = [] images = [] obj_count = 0 for idx in range(len(data_infos['/digitStruct/name'])): img_name = load_data.get_name(idx, data_infos) anno = load_data.get_bbox(idx, data_infos) filename = img_name img_path = osp.join(image_prefix, img_name) height, width = mmcv.imread(img_path).shape[:2] images.append( dict(id=idx, file_name=filename, height=height, width=width)) for e in range(len(anno['label'])): label = int(anno['label'][e]) left = anno['left'][e] top = anno['top'][e] width = anno['width'][e] height = anno['height'][e] if label == 10: label = 0 data_anno = dict(image_id=idx, id=obj_count, category_id=label, bbox=[left, top, width, height], area=width * height, segmentation=[], iscrowd=0) annotations.append(data_anno) obj_count += 1 coco_format_json = dict(images=images, annotations=annotations, categories=[{ 'id': 0, 'name': '10' }, { 'id': 1, 'name': '1' }, { 'id': 2, 'name': '2' }, { 'id': 3, 'name': '3' }, { 'id': 4, 'name': '4' }, { 'id': 5, 'name': '5' }, { 'id': 6, 'name': '6' }, { 'id': 7, 'name': '7' }, { 'id': 8, 'name': '8' }, { 'id': 9, 'name': '9' }]) print(coco_format_json) mmcv.dump(coco_format_json, out_file)
def test_cutout(): # test n_holes with pytest.raises(AssertionError): transform = dict(type='CutOut', n_holes=(5, 3), cutout_shape=(8, 8)) build_from_cfg(transform, PIPELINES) with pytest.raises(AssertionError): transform = dict(type='CutOut', n_holes=(3, 4, 5), cutout_shape=(8, 8)) build_from_cfg(transform, PIPELINES) # test cutout_shape and cutout_ratio with pytest.raises(AssertionError): transform = dict(type='CutOut', n_holes=1, cutout_shape=8) build_from_cfg(transform, PIPELINES) with pytest.raises(AssertionError): transform = dict(type='CutOut', n_holes=1, cutout_ratio=0.2) build_from_cfg(transform, PIPELINES) # either of cutout_shape and cutout_ratio should be given with pytest.raises(AssertionError): transform = dict(type='CutOut', n_holes=1) build_from_cfg(transform, PIPELINES) with pytest.raises(AssertionError): transform = dict(type='CutOut', n_holes=1, cutout_shape=(2, 2), cutout_ratio=(0.4, 0.4)) build_from_cfg(transform, PIPELINES) results = dict() img = mmcv.imread(osp.join(osp.dirname(__file__), '../data/color.jpg'), 'color') results['img'] = img results['img_shape'] = img.shape results['ori_shape'] = img.shape results['pad_shape'] = img.shape results['img_fields'] = ['img'] transform = dict(type='CutOut', n_holes=1, cutout_shape=(10, 10)) cutout_module = build_from_cfg(transform, PIPELINES) cutout_result = cutout_module(copy.deepcopy(results)) assert cutout_result['img'].sum() < img.sum() transform = dict(type='CutOut', n_holes=1, cutout_ratio=(0.8, 0.8)) cutout_module = build_from_cfg(transform, PIPELINES) cutout_result = cutout_module(copy.deepcopy(results)) assert cutout_result['img'].sum() < img.sum() transform = dict(type='CutOut', n_holes=(2, 4), cutout_shape=[(10, 10), (15, 15)], fill_in=(255, 255, 255)) cutout_module = build_from_cfg(transform, PIPELINES) cutout_result = cutout_module(copy.deepcopy(results)) assert cutout_result['img'].sum() > img.sum() transform = dict(type='CutOut', n_holes=1, cutout_ratio=(0.8, 0.8), fill_in=(255, 255, 255)) cutout_module = build_from_cfg(transform, PIPELINES) cutout_result = cutout_module(copy.deepcopy(results)) assert cutout_result['img'].sum() > img.sum()
def get_class_bboxes(input_path, model, cfg, dataset='coco', class_int=0, score_thr=0.78, show_result=False): ''' :param input_path: :param model: :param cfg: :param dataset: :param class_int: :param score_thr: :param show_result: :return: ''' if os.path.isdir(input_path): img_fnames = glob.glob('{}/*.jpg'.format(input_path)) detections = inference_detector(model, img_fnames, cfg) elif os.path.isfile(input_path): img_fnames = [input_path] detections = [inference_detector(model, input_path, cfg)] else: raise Exception('Provided image path is not a file or directory.') img_sizes = [mmcv.imread(img).shape for img in img_fnames] if not all(size == img_sizes[0] for size in img_sizes): raise Exception('Not all images are of the same size!') class_names = get_classes(dataset) result_dict = dict() result_dict['image_size'] = img_sizes[0][:2] result_dict['results'] = dict() for idx, det in enumerate(list(detections)): if isinstance(det, tuple): bbox_result, segm_result = det else: bbox_result, segm_result = det, None bboxes = np.vstack(bbox_result) if bbox_result is not None: # segms = mmcv.concat_list(segm_result) labels = [ np.full(bbox.shape[0], i, dtype=np.int32) for i, bbox in enumerate(bbox_result) ] labels = np.concatenate(labels) filter_thr = np.where(bboxes[:, -1] > score_thr)[0] filter_class = np.where(labels == class_int)[0] filter_idxs = np.intersect1d(filter_thr, filter_class) bboxes = bboxes[filter_idxs] labels = labels[filter_idxs] data = list() for bbox, label in zip(bboxes, labels): left_top = [int(bbox[0]), int(bbox[1])] right_bottom = [int(bbox[2]), int(bbox[3])] label_name = class_names[ label] if class_names is not None else 'cls {}'.format( label) data.append({ 'label': label_name, 'bbox': { 'lt': left_top, 'rb': right_bottom } }) result_dict['results'][os.path.basename( img_fnames[idx])] = data.copy() data.clear() ## Debug if show_result: img = mmcv.imread(img_fnames[idx]) mmcv.imshow_det_bboxes(img.copy(), bboxes, labels, class_names=class_names, score_thr=score_thr, show=show_result) ## with open('{}_detection_bboxes.json'.format(time.strftime("%Y%m%d%H%M%S")), 'w') as out_file: json.dump(result_dict, out_file) # print(json.dumps(out)) # debug return result_dict
def show_result(img, result, class_names, score_thr=0.3, wait_time=0, show=True, out_file=None): """Visualize the detection results on the image. Args: img (str or np.ndarray): Image filename or loaded image. result (tuple[list] or list): The detection result, can be either (bbox, segm) or just bbox. class_names (list[str] or tuple[str]): A list of class names. score_thr (float): The threshold to visualize the bboxes and masks. wait_time (int): Value of waitKey param. show (bool, optional): Whether to show the image with opencv or not. out_file (str, optional): If specified, the visualization result will be written to the out file instead of shown in a window. Returns: np.ndarray or None: If neither `show` nor `out_file` is specified, the visualized image is returned, otherwise None is returned. """ assert isinstance(class_names, (tuple, list)) img = mmcv.imread(img) img = img.copy() if isinstance(result, tuple): bbox_result, segm_result = result else: bbox_result, segm_result = result, None bboxes = np.vstack(bbox_result) labels = [ np.full(bbox.shape[0], i, dtype=np.int32) for i, bbox in enumerate(bbox_result) ] labels = np.concatenate(labels) # draw segmentation masks if segm_result is not None: segms = mmcv.concat_list(segm_result) inds = np.where(bboxes[:, -1] > score_thr)[0] np.random.seed(42) color_masks = [ np.random.randint(0, 256, (1, 3), dtype=np.uint8) for _ in range(max(labels) + 1) ] for i in inds: i = int(i) color_mask = color_masks[labels[i]] mask = maskUtils.decode(segms[i]).astype(np.bool) img[mask] = img[mask] * 0.5 + color_mask * 0.5 # draw bounding boxes mmcv.imshow_det_bboxes(img, bboxes, labels, class_names=class_names, score_thr=score_thr, show=show, wait_time=wait_time, out_file=out_file) if not (show or out_file): return img
def prepare_train_img(self, idx): # prepare a pair of image in a sequence vid, frame_id = idx vid_idx = self.vid_ids.index(vid) vid_info = self.vid_infos[vid_idx] basename = osp.basename(vid_info['filenames'][frame_id]) clip_frame_ids = self.sample_ref(idx) + [frame_id] clip_frame_ids.sort() imgs = [] for frame_id in clip_frame_ids: imgs.append(mmcv.imread(osp.join(self.img_prefix, vid_info['filenames'][frame_id]))) imgs = np.stack(imgs, axis=0) # load proposals if necessary if self.proposals is not None: proposals = self.proposals[idx][:self.num_max_proposals] # TODO: Handle empty proposals properly. Currently images with # no proposals are just ignored, but they can be used for # training in concept. if len(proposals) == 0: return None if not (proposals.shape[1] == 4 or proposals.shape[1] == 5): raise AssertionError( 'proposals should have shapes (n, 4) or (n, 5), ' 'but found {}'.format(proposals.shape)) if proposals.shape[1] == 5: scores = proposals[:, 4, None] proposals = proposals[:, :4] else: scores = None # load annotation of ref_frames bboxes, labels, ids, masks, bboxes_ignore = [], [], [], [], [] for frame_id in clip_frame_ids: ann = self.get_ann_info(vid, frame_id) bboxes.append(ann['bboxes']) labels.append(ann['labels']) # obj ids attribute does not exist in current annotation # need to add it ids.append(ann['obj_ids']) if self.with_mask: masks.append(ann['masks']) # compute matching of reference frame with current frame # 0 denote there is no matching # gt_pids = [ref_ids.index(i)+1 if i in ref_ids else 0 for i in gt_ids] if self.with_crowd: bboxes_ignore.append(ann['bboxes_ignore']) # extra augmentation if self.extra_aug is not None and self.with_mask: for i in range(len(clip_frame_ids)): imgs[i], bboxes[i], labels[i], masks[i], ids[i] = self.extra_aug(imgs[i], bboxes[i], labels[i], masks[i], ids[i]) # apply transforms flip = True if np.random.rand() < self.flip_ratio else False img_scale = random_scale(self.img_scales, mode='range_keep_ratio') # sample a scale temp_imgs = [] for i in range(len(clip_frame_ids)): img_cur, img_shape, pad_shape, scale_factor = self.img_transform( imgs[i], img_scale, flip, keep_ratio=self.resize_keep_ratio) temp_imgs.append(img_cur) imgs = np.stack(temp_imgs, axis=0) imgs = imgs.copy() if self.proposals is not None: proposals = self.bbox_transform(proposals, img_shape, scale_factor, flip) proposals = np.hstack([proposals, scores]) if scores is not None else proposals for i in range(len(clip_frame_ids)): bboxes[i] = self.bbox_transform(bboxes[i], img_shape, pad_shape, scale_factor, flip) if self.aug_ref_bbox_param is not None: for i in range(len(clip_frame_ids)): bboxes[i] = self.bbox_aug(bboxes[i], img_shape) if self.with_crowd: for i in range(len(clip_frame_ids)): bboxes_ignore[i] = self.bbox_transform(bboxes_ignore[i], img_shape, pad_shape, scale_factor, flip) if self.with_mask: for i in range(len(clip_frame_ids)): masks[i] = self.mask_transform(masks[i], pad_shape, img_scale, flip, keep_ratio=self.resize_keep_ratio) ori_shape = (vid_info['height'], vid_info['width'], 3) img_meta = dict( ori_shape=ori_shape, img_shape=img_shape, pad_shape=pad_shape, video_id=vid, frame_id=frame_id, is_first=(frame_id == 0), scale_factor=scale_factor, flip=flip) data = dict( img=DC(to_tensor(imgs), stack=True), img_meta=DC(img_meta, cpu_only=True), bboxes=DC([to_tensor(bboxes[i]) for i in range(len(clip_frame_ids))]), ) if self.proposals is not None: data['proposals'] = DC(to_tensor(proposals)) if self.with_label: data['labels'] = DC([to_tensor(labels[i]) for i in range(len(clip_frame_ids))]) if self.with_track: data['ids'] = DC([to_tensor(np.array(ids[i])) for i in range(len(clip_frame_ids))]) if self.with_crowd: data['bboxes_ignore'] = DC([to_tensor(bboxes_ignore[i]) for i in range(len(clip_frame_ids))]) if self.with_mask: data['masks'] = DC([to_tensor(masks[i]) for i in range(len(clip_frame_ids))], cpu_only=True) return data
def imshow_keypoints_3d( pose_result, img=None, skeleton=None, pose_kpt_color=None, pose_limb_color=None, vis_height=400, kpt_score_thr=0.3, *, axis_azimuth=70, axis_limit=1.7, axis_dist=10.0, axis_elev=15.0, ): """Draw 3D keypoints and limbs in 3D coordinates. Args: pose_result (list[dict]): 3D pose results containing: - "keypoints_3d" ([K,4]): 3D keypoints - "title" (str): Optional. A string to specify the title of the visualization of this pose result img (str|np.ndarray): Opptional. The image or image path to show input image and/or 2D pose. Note that the image should be given in BGR channel order. skeleton (list of [idx_i,idx_j]): Skeleton described by a list of limbs, each is a pair of joint indices. pose_kpt_color (np.ndarray[Nx3]`): Color of N keypoints. If None, do not nddraw keypoints. pose_limb_color (np.array[Mx3]): Color of M limbs. If None, do not draw limbs. vis_height (int): The image hight of the visualization. The width will be N*vis_height depending on the number of visualized items. kpt_score_thr (float): Minimum score of keypoints to be shown. Default: 0.3. axis_azimuth (float): axis azimuth angle for 3D visualizations. axis_dist (float): axis distance for 3D visualizations. axis_elev (float): axis elevation view angle for 3D visualizations. axis_limit (float): The axis limit to visualize 3d pose. The xyz range will be set as: - x: [x_c - axis_limit/2, x_c + axis_limit/2] - y: [y_c - axis_limit/2, y_c + axis_limit/2] - z: [0, axis_limit] Where x_c, y_c is the mean value of x and y coordinates figsize: (float): figure size in inch. """ show_img = img is not None num_axis = len(pose_result) + 1 if show_img else len(pose_result) plt.ioff() fig = plt.figure(figsize=(vis_height * num_axis * 0.01, vis_height * 0.01)) if show_img: img = mmcv.imread(img, channel_order='bgr') img = mmcv.bgr2rgb(img) img = mmcv.imrescale(img, scale=vis_height / img.shape[0]) ax_img = fig.add_subplot(1, num_axis, 1) ax_img.get_xaxis().set_visible(False) ax_img.get_yaxis().set_visible(False) ax_img.set_axis_off() ax_img.set_title('Input') ax_img.imshow(img, aspect='equal') for idx, res in enumerate(pose_result): kpts = res['keypoints_3d'] valid = kpts[:, 3] >= kpt_score_thr ax_idx = idx + 2 if show_img else idx + 1 ax = fig.add_subplot(1, num_axis, ax_idx, projection='3d') ax.view_init( elev=axis_elev, azim=axis_azimuth, ) x_c = np.mean(kpts[valid, 0]) if sum(valid) > 0 else 0 y_c = np.mean(kpts[valid, 1]) if sum(valid) > 0 else 0 ax.set_xlim3d([x_c - axis_limit / 2, x_c + axis_limit / 2]) ax.set_ylim3d([y_c - axis_limit / 2, y_c + axis_limit / 2]) ax.set_zlim3d([0, axis_limit]) ax.set_aspect('auto') ax.set_xticks([]) ax.set_yticks([]) ax.set_zticks([]) ax.set_xticklabels([]) ax.set_yticklabels([]) ax.set_zticklabels([]) ax.dist = axis_dist if pose_kpt_color is not None: pose_kpt_color = np.array(pose_kpt_color) assert len(pose_kpt_color) == len(kpts) x_3d, y_3d, z_3d = np.split(kpts[:, :3], [1, 2], axis=1) # matplotlib uses RGB color in [0, 1] value range _color = pose_kpt_color[..., ::-1] / 255. ax.scatter( x_3d[valid], y_3d[valid], z_3d[valid], marker='o', color=_color[valid], ) if skeleton is not None and pose_limb_color is not None: pose_limb_color = np.array(pose_limb_color) assert len(pose_limb_color) == len(skeleton) for limb, limb_color in zip(skeleton, pose_limb_color): limb_indices = [_i - 1 for _i in limb] xs_3d = kpts[limb_indices, 0] ys_3d = kpts[limb_indices, 1] zs_3d = kpts[limb_indices, 2] kpt_score = kpts[limb_indices, 3] if kpt_score.min() > kpt_score_thr: # matplotlib uses RGB color in [0, 1] value range _color = limb_color[::-1] / 255. ax.plot(xs_3d, ys_3d, zs_3d, color=_color, zdir='z') if 'title' in res: ax.set_title(res['title']) # convert figure to numpy array fig.tight_layout() fig.canvas.draw() img_w, img_h = fig.canvas.get_width_height() img_vis = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8).reshape(img_h, img_w, -1) img_vis = mmcv.rgb2bgr(img_vis) plt.close(fig) return img_vis
import mmcv import numpy as np flag_1 = False # 读取图像,保存图像 flag_2 = False # 以byte形式读取图像 flag_3 = False # 显示图像 flag_4 = False # 图像颜色空间转换 flag_5 = False # Resize flag_6 = False # Rotate flag_7 = False # Flip flag_8 = False # Crop flag_9 = False # Padding if flag_1: # To read or write images files, use imread or imwrite. img = mmcv.imread("asset/a.jpg") img = mmcv.imread("asset/a.jpg", flag='grayscale') img_ = mmcv.imread(img) # nothing will happen, img_ = img mmcv.imwrite(img, 'out.jpg') if flag_2: # To read images from bytes with open("asset/a.jpg", 'rb') as f: data = f.read() img = mmcv.imfrombytes(data) print(img) if flag_3: # To show an image file or a loaded image mmcv.imshow("asset/a.jpg") # this is equivalent to