def prepare_test_img(self, idx): sample_id = self.sample_ids[idx] # load image img = mmcv.imread(self.img_filenames[idx]) img, img_shape, pad_shape, scale_factor = self.img_transform( img, 1, False) data = dict(img=DC(to_tensor(img), stack=True), img_shape=DC(img_shape, cpu_only=True), sample_idx=DC(sample_id, cpu_only=True), calib=DC(self.calib, cpu_only=True)) if self.with_mask: NotImplemented if self.with_point: points = read_lidar(self.lidar_filenames[idx]) points = get_lidar_in_image_fov(points, self.calib, 0, 0, img_shape[1], img_shape[0], clip_distance=0.1) if self.generator is not None: voxels, coordinates, num_points = self.generator.generate(points) data['voxels'] = DC(to_tensor(voxels)) data['coordinates'] = DC(to_tensor(coordinates)) data['num_points'] = DC(to_tensor(num_points)) data['anchors'] = DC(to_tensor(self.anchors)) return data
def prepare_single(img, scale, flip): _img, img_shape, pad_shape, scale_factor = self.img_transform( img, scale, flip, keep_ratio=self.resize_keep_ratio) _img = to_tensor(_img) _img_meta = dict( ori_shape=(img_info['height'], img_info['width'], 3), img_shape=img_shape, pad_shape=pad_shape, scale_factor=scale_factor, flip=flip, img_name=row['img_name'], path=row['path']) return _img, _img_meta
def prepare_single(img, scale, flip, proposal=None): _img, img_shape, pad_shape, scale_factor = self.img_transform( img, scale, flip, keep_ratio=self.resize_keep_ratio) _img = to_tensor(_img) _img_meta = dict( ori_shape=(img_info['height'], img_info['width'], 3), img_shape=img_shape, pad_shape=pad_shape, scale_factor=scale_factor, flip=flip) if proposal is not None: if proposal.shape[1] == 5: score = proposal[:, 4, None] proposal = proposal[:, :4] else: score = None _proposal = self.bbox_transform(proposal, img_shape, scale_factor, flip) _proposal = np.hstack( [_proposal, score]) if score is not None else _proposal _proposal = to_tensor(_proposal) else: _proposal = None return _img, _img_meta, _proposal
def prepare_test_img(self, idx): """Prepare an image for testing (multi-scale and flipping)""" sample_id = self.sample_ids[idx] # load image img = mmcv.imread(osp.join(self.img_prefix, '%06d.png' % sample_id)) img, img_shape, pad_shape, scale_factor = self.img_transform( img, 1, False) calib = Calibration(osp.join(self.calib_prefix, '%06d.txt' % sample_id)) if self.with_label: objects = read_label( osp.join(self.label_prefix, '%06d.txt' % sample_id)) gt_bboxes = [ object.box3d for object in objects if object.type in self.class_name ] if len(gt_bboxes) != 0: gt_bboxes = np.array(gt_bboxes, dtype=np.float32) gt_labels = np.ones(len(gt_bboxes), dtype=np.int64) # transfer from cam to lidar coordinates gt_bboxes[:, :3] = project_rect_to_velo( gt_bboxes[:, :3], calib) else: gt_bboxes = None gt_labels = None img_meta = dict(img_shape=img_shape, sample_idx=sample_id, calib=calib) data = dict(img=to_tensor(img), img_meta=DC(img_meta, cpu_only=True)) if self.anchors is not None: data['anchors'] = DC(to_tensor(self.anchors.astype(np.float32))) if self.with_mask: NotImplemented if self.with_point: points = read_lidar( osp.join(self.lidar_prefix, '%06d.bin' % sample_id)) if isinstance(self.generator, VoxelGenerator): #voxels, coordinates, num_points = self.generator.generate(points) voxel_size = self.generator.voxel_size pc_range = self.generator.point_cloud_range grid_size = self.generator.grid_size keep = points_op_cpu.points_bound_kernel(points, pc_range[:3], pc_range[3:]) voxels = points[keep, :] coordinates = ( (voxels[:, [2, 1, 0]] - np.array(pc_range[[2, 1, 0]], dtype=np.float32)) / np.array(voxel_size[::-1], dtype=np.float32)).astype(np.int32) num_points = np.ones(len(keep)).astype(np.int32) data['voxels'] = DC(to_tensor(voxels.astype(np.float32))) data['coordinates'] = DC(to_tensor(coordinates)) data['num_points'] = DC(to_tensor(num_points)) if self.anchor_area_threshold >= 0 and self.anchors is not None: dense_voxel_map = sparse_sum_for_anchors_mask( coordinates, tuple(grid_size[::-1][1:])) dense_voxel_map = dense_voxel_map.cumsum(0) dense_voxel_map = dense_voxel_map.cumsum(1) anchors_area = fused_get_anchors_area(dense_voxel_map, self.anchors_bv, voxel_size, pc_range, grid_size) anchors_mask = anchors_area > self.anchor_area_threshold data['anchors_mask'] = DC( to_tensor(anchors_mask.astype(np.uint8))) if self.with_label: data['gt_labels'] = DC(to_tensor(gt_labels), cpu_only=True) data['gt_bboxes'] = DC(to_tensor(gt_bboxes), cpu_only=True) else: data['gt_labels'] = DC(None, cpu_only=True) data['gt_bboxes'] = DC(None, cpu_only=True) return data
def prepare_train_img(self, idx): sample_id = self.sample_ids[idx] # load image img = mmcv.imread(osp.join(self.img_prefix, '%06d.png' % sample_id)) img, img_shape, pad_shape, scale_factor = self.img_transform( img, 1, False) objects = read_label( osp.join(self.label_prefix, '%06d.txt' % sample_id)) calib = Calibration(osp.join(self.calib_prefix, '%06d.txt' % sample_id)) gt_bboxes = [ object.box3d for object in objects if object.type not in ["DontCare"] ] gt_bboxes = np.array(gt_bboxes, dtype=np.float32) gt_types = [ object.type for object in objects if object.type not in ["DontCare"] ] # transfer from cam to lidar coordinates if len(gt_bboxes) != 0: gt_bboxes[:, :3] = project_rect_to_velo(gt_bboxes[:, :3], calib) img_meta = dict(img_shape=img_shape, sample_idx=sample_id, calib=calib) data = dict(img=to_tensor(img), img_meta=DC(img_meta, cpu_only=True)) if self.anchors is not None: data['anchors'] = DC(to_tensor(self.anchors.astype(np.float32))) if self.with_mask: NotImplemented if self.with_point: points = read_lidar( osp.join(self.lidar_prefix, '%06d.bin' % sample_id)) if self.augmentor is not None and self.test_mode is False: sampled_gt_boxes, sampled_gt_types, sampled_points = self.augmentor.sample_all( gt_bboxes, gt_types) assert sampled_points.dtype == np.float32 gt_bboxes = np.concatenate([gt_bboxes, sampled_gt_boxes]) gt_types = gt_types + sampled_gt_types assert len(gt_types) == len(gt_bboxes) # to avoid overlapping point (option) masks = points_in_rbbox(points, sampled_gt_boxes) #masks = points_op_cpu.points_in_bbox3d_np(points[:,:3], sampled_gt_boxes) points = points[np.logical_not(masks.any(-1))] # paste sampled points to the scene points = np.concatenate([sampled_points, points], axis=0) # select the interest classes selected = [ i for i in range(len(gt_types)) if gt_types[i] in self.class_names ] gt_bboxes = gt_bboxes[selected, :] gt_types = [ gt_types[i] for i in range(len(gt_types)) if gt_types[i] in self.class_names ] # force van to have same label as car gt_types = ['Car' if n == 'Van' else n for n in gt_types] gt_labels = np.array( [self.class_names.index(n) + 1 for n in gt_types], dtype=np.int64) self.augmentor.noise_per_object_(gt_bboxes, points, num_try=100) gt_bboxes, points = self.augmentor.random_flip(gt_bboxes, points) gt_bboxes, points = self.augmentor.global_rotation( gt_bboxes, points) gt_bboxes, points = self.augmentor.global_scaling( gt_bboxes, points) if isinstance(self.generator, VoxelGenerator): #voxels, coordinates, num_points = self.generator.generate(points) voxel_size = self.generator.voxel_size pc_range = self.generator.point_cloud_range grid_size = self.generator.grid_size keep = points_op_cpu.points_bound_kernel(points, pc_range[:3], pc_range[3:]) voxels = points[keep, :] coordinates = ( (voxels[:, [2, 1, 0]] - np.array(pc_range[[2, 1, 0]], dtype=np.float32)) / np.array(voxel_size[::-1], dtype=np.float32)).astype(np.int32) num_points = np.ones(len(keep)).astype(np.int32) data['voxels'] = DC(to_tensor(voxels.astype(np.float32))) data['coordinates'] = DC(to_tensor(coordinates)) data['num_points'] = DC(to_tensor(num_points)) if self.anchor_area_threshold >= 0 and self.anchors is not None: dense_voxel_map = sparse_sum_for_anchors_mask( coordinates, tuple(grid_size[::-1][1:])) dense_voxel_map = dense_voxel_map.cumsum(0) dense_voxel_map = dense_voxel_map.cumsum(1) anchors_area = fused_get_anchors_area(dense_voxel_map, self.anchors_bv, voxel_size, pc_range, grid_size) anchors_mask = anchors_area > self.anchor_area_threshold data['anchors_mask'] = DC( to_tensor(anchors_mask.astype(np.uint8))) # filter gt_bbox out of range bv_range = self.generator.point_cloud_range[[0, 1, 3, 4]] mask = filter_gt_box_outside_range(gt_bboxes, bv_range) gt_bboxes = gt_bboxes[mask] gt_labels = gt_labels[mask] else: NotImplementedError # skip the image if there is no valid gt bbox if len(gt_bboxes) == 0: return None # limit rad to [-pi, pi] gt_bboxes[:, 6] = limit_period(gt_bboxes[:, 6], offset=0.5, period=2 * np.pi) if self.with_label: data['gt_labels'] = DC(to_tensor(gt_labels)) data['gt_bboxes'] = DC(to_tensor(gt_bboxes)) return data
def prepare_train_img(self, idx): try: img_info = self.img_infos[idx] # load image img = img_info.image() # mmcv.imread(osp.join(self.img_prefix, img_info['filename'])) # load proposals if necessary if self.proposals is not None: proposals = self.proposals[idx][:self.num_max_proposals] # TODO: Handle empty proposals properly. Currently images with # no proposals are just ignored, but they can be used for # training in concept. if len(proposals) == 0: return None if not (proposals.shape[1] == 4 or proposals.shape[1] == 5): raise AssertionError( 'proposals should have shapes (n, 4) or (n, 5), ' 'but found {}'.format(proposals.shape)) if proposals.shape[1] == 5: scores = proposals[:, 4, None] proposals = proposals[:, :4] else: scores = None ann = self.get_ann_info(idx) gt_bboxes = ann['bboxes'] gt_labels = ann['labels'] gt_masks = None gt_bboxes_ignore = None if self.with_mask: gt_masks = ann['masks'] if self.with_crowd: gt_bboxes_ignore = ann['bboxes_ignore'] # dumpData(f"d:/ttt/{img_info.id}_tmp_bbox.jpg", f"d:/ttt/{img_info.id}_tmp_mask.jpg", img, gt_labels-1, # gt_bboxes, gt_masks, self.CLASSES) img, gt_bboxes, gt_masks, gt_bboxes_ignore = self.applyAugmentations(img, gt_bboxes, gt_masks, gt_bboxes_ignore, True) # dumpData(f"d:/ttt/{img_info.id}_tmp_bbox_aug.jpg", f"d:/ttt/{img_info.id}_tmp_mask_aug.jpg", img, gt_labels-1, # gt_bboxes, gt_masks, self.CLASSES) # skip the image if there is no valid gt bbox if len(gt_bboxes) == 0: return None # extra augmentation if self.extra_aug is not None: # img = self.extra_aug(img) img, gt_bboxes, gt_labels = self.extra_aug(img, gt_bboxes, gt_labels) # apply transforms flip = True if np.random.rand() < self.flip_ratio else False # randomly sample a scale img_scale = random_scale(self.img_scales, self.multiscale_mode) img, img_shape, pad_shape, scale_factor = self.img_transform( img, img_scale, flip, keep_ratio=self.resize_keep_ratio) img = img.copy() if self.with_seg: # gt_seg = mmcv.imread( # osp.join(self.seg_prefix, img_info['file_name'].replace( # 'jpg', 'png')), # flag='unchanged') # gt_seg = self.seg_transform(gt_seg.squeeze(), img_scale, flip) # gt_seg = mmcv.imrescale( # gt_seg, self.seg_scale_factor, interpolation='nearest') # gt_seg = gt_seg[None, ...] pass if self.proposals is not None: proposals = self.bbox_transform(proposals, img_shape, scale_factor, flip) proposals = np.hstack( [proposals, scores]) if scores is not None else proposals gt_bboxes = self.bbox_transform(gt_bboxes, img_shape, scale_factor, flip) if self.with_crowd: gt_bboxes_ignore = self.bbox_transform(gt_bboxes_ignore, img_shape, scale_factor, flip) if self.with_mask: gt_masks = self.mask_transform(gt_masks, pad_shape, scale_factor, flip) ori_shape = (img_info['height'], img_info['width'], 3) img_meta = dict( id=img_info['id'], ori_shape=ori_shape, img_shape=img_shape, pad_shape=pad_shape, scale_factor=scale_factor, flip=flip) # imgt = img.transpose(1, 2, 0) # imgt -= np.min(imgt) # imgt *= (255 / np.max(imgt)) # imgt = imgt.astype(np.uint8) # dumpData(f"d:/ttt/{img_info.id}_tmp_bbox_aug1.jpg", f"d:/ttt/{img_info.id}_tmp_mask_aug1.jpg", imgt, # gt_labels - 1, # gt_bboxes, gt_masks, self.CLASSES) data = dict( img=DC(to_tensor(img), stack=True), img_meta=DC(img_meta, cpu_only=True), gt_bboxes=DC(to_tensor(gt_bboxes))) if self.proposals is not None: data['proposals'] = DC(to_tensor(proposals)) if self.with_label: data['gt_labels'] = DC(to_tensor(gt_labels)) if self.with_crowd: data['gt_bboxes_ignore'] = DC(to_tensor(gt_bboxes_ignore)) if self.with_mask: data['gt_masks'] = DC(gt_masks, cpu_only=True) # if self.with_seg: # data['gt_semantic_seg'] = DC(to_tensor(gt_seg), stack=True) return data finally: img_info.dispose()
def main(): args = parse_args() cfg = Config.fromfile(args.config) # set cudnn_benchmark if cfg.get('cudnn_benchmark', False): torch.backends.cudnn.benchmark = True if args.ckpt: cfg.resume_from = args.ckpt cfg.test_cfg.rcnn.score_thr = 0.5 FOCAL_LENGTH = cfg.get('FOCAL_LENGTH', 1000) model = build_detector(cfg.model, train_cfg=cfg.train_cfg, test_cfg=cfg.test_cfg) if cfg.checkpoint_config is not None: # save mmdet version, config file content and class names in # checkpoints as meta data cfg.checkpoint_config.meta = dict(mmdet_version=__version__, config=cfg.text, CLASSES=('Human', )) # add an attribute for visualization convenience model.CLASSES = ('Human', ) model = MMDataParallel(model, device_ids=[0]).cuda() # build runner optimizer = build_optimizer(model, cfg.optimizer) runner = Runner(model, lambda x: x, optimizer, cfg.work_dir, cfg.log_level) runner.resume(cfg.resume_from) model = runner.model model.eval() # necessary for headless rendering os.environ['PYOPENGL_PLATFORM'] = 'egl' render = Renderer(focal_length=FOCAL_LENGTH) img_transform = ImageTransform(size_divisor=32, **img_norm_cfg) img_scale = cfg.common_val_cfg.img_scale with torch.no_grad(): folder_name = args.image_folder output_folder = args.output_folder os.makedirs(output_folder, exist_ok=True) images = os.listdir(folder_name) for image in images: file_name = osp.join(folder_name, image) img = cv2.imread(file_name) ori_shape = img.shape img, img_shape, pad_shape, scale_factor = img_transform( img, img_scale) # Force padding for the issue of multi-GPU training padded_img = np.zeros((img.shape[0], img_scale[1], img_scale[0]), dtype=img.dtype) padded_img[:, :img.shape[-2], :img.shape[-1]] = img img = padded_img assert img.shape[1] == 512 and img.shape[ 2] == 832, "Image shape incorrect" data_batch = dict( img=DC([to_tensor(img[None, ...])], stack=True), img_meta=DC([{ 'img_shape': img_shape, 'scale_factor': scale_factor, 'flip': False, 'ori_shape': ori_shape }], cpu_only=True), ) bbox_results, pred_results = model(**data_batch, return_loss=False) if pred_results is not None: pred_results['bboxes'] = bbox_results[0] img = denormalize(img) img_viz = prepare_dump(pred_results, img, render, bbox_results, FOCAL_LENGTH) cv2.imwrite( f'{file_name.replace(folder_name, output_folder)}.output.jpg', img_viz[:, :, ::-1])
def prepare_train_img(self, idx): img_info = self.img_infos[idx] # load image assert osp.isfile(osp.join(self.img_prefix, img_info['filename'])) img = mmcv.imread(osp.join(self.img_prefix, img_info['filename'])) assert len(img.shape) == 3 # load proposals if necessary if self.proposals is not None: proposals = self.proposals[idx][:self.num_max_proposals] # TODO: Handle empty proposals properly. Currently images with # no proposals are just ignored, but they can be used for # training in concept. if len(proposals) == 0: return None if not (proposals.shape[1] == 4 or proposals.shape[1] == 5): raise AssertionError( 'proposals should have shapes (n, 4) or (n, 5), ' 'but found {}'.format(proposals.shape)) if proposals.shape[1] == 5: scores = proposals[:, 4, None] proposals = proposals[:, :4] else: scores = None # get ann here. ann = self.get_ann_info(idx) gt_bboxes = ann['bboxes'] gt_labels = ann['labels'] if self.with_crowd: gt_bboxes_ignore = ann['bboxes_ignore'] # skip the image if there is no valid gt bbox if len(gt_bboxes) == 0: return None # extra augmentation for photometric if self.extra_aug is not None: img = self.extra_aug.first_transform(img) img = img.copy() # apply transforms flip = True if np.random.rand() < self.flip_ratio else False img_scale = random_scale(self.img_scales, mode=self.resize_mode) # img_scale = random_scale(self.img_scales, mode='value') # sample a scale # here to select a rescale size, and pad the image. # scale_factor is used to guide the transformation of bboxes and masks. img, img_shape, pad_shape, scale_factor = self.img_transform( img, img_scale, flip, keep_ratio=self.resize_keep_ratio) img = img.copy() if self.proposals is not None: proposals = self.bbox_transform(proposals, img_shape, scale_factor, flip) proposals = np.hstack([proposals, scores ]) if scores is not None else proposals gt_bboxes = self.bbox_transform(gt_bboxes, img_shape, scale_factor, flip) if self.with_crowd: gt_bboxes_ignore = self.bbox_transform(gt_bboxes_ignore, img_shape, scale_factor, flip) if self.with_mask: gt_masks = self.mask_transform(ann['masks'], pad_shape, scale_factor, flip) gt_ignore_masks = ann['ignore_masks'] if len(gt_ignore_masks) > 1: gt_ignore_masks = self.mask_transform(gt_ignore_masks, pad_shape, scale_factor, flip) else: gt_ignore_masks = np.array([], dtype=np.uint8) # extra augmentation # different from the original ones. # there use mask to generate img, gt_bboxes and gt_labels. # extra_aug only supports random crop. if self.extra_aug is not None and self.with_mask: """ crop by masks, select gt_masks and gt_bboxes. the crop size is the ori_shape/img_shape/pad_shape scale_factor use the scale_factor. assert the input image shape: [3, H, W] and the output image shape [3, H, W] """ if self.with_crowd: img, gt_bboxes, gt_labels, gt_bboxes_ignore, gt_masks, img_shape, pad_shape = \ self.extra_aug(img=img, boxes=gt_bboxes, labels=gt_labels, masks=gt_masks, ignore_bboxes=gt_bboxes_ignore, ignore_masks=gt_ignore_masks, img_shape=img_shape, pad_shape=pad_shape) else: img, gt_bboxes, gt_labels, gt_bboxes_ignore, gt_masks, img_shape, pad_shape = \ self.extra_aug(img=img, boxes=gt_bboxes, labels=gt_labels, masks=gt_masks, ignore_bboxes=np.zeros((0, 4), dtype=np.float32), ignore_masks=gt_ignore_masks, img_shape=img_shape, pad_shape=pad_shape) img = img.copy() # self.debug_rc(img, gt_bboxes, gt_masks, img_info["filename"]) if len(gt_bboxes) == 0: return None # the ori_shape will be changed to the crop size. # ori_shape = (img_info['height'], img_info['width'], 3) # if self.extra_aug is None: if not (self.extra_aug is not None and self.with_mask): ori_shape = (img_info['height'], img_info['width'], 3) else: ori_shape = img_shape img_meta = dict(ori_shape=ori_shape, img_shape=img_shape, pad_shape=pad_shape, scale_factor=scale_factor, flip=flip) data = dict(img=DC(to_tensor(img), stack=True), img_meta=DC(img_meta, cpu_only=True), gt_bboxes=DC(to_tensor(gt_bboxes))) if self.proposals is not None: data['proposals'] = DC(to_tensor(proposals)) if self.with_label: data['gt_labels'] = DC(to_tensor(gt_labels)) if self.with_crowd: data['gt_bboxes_ignore'] = DC(to_tensor(gt_bboxes_ignore)) if self.with_mask: data['gt_masks'] = DC(gt_masks, cpu_only=True) return data
def prepare_test_img(self, idx): """Prepare an image for testing (multi-scale and flipping)""" img_info = self.img_infos[idx] img = mmcv.imread(osp.join(self.img_prefix, img_info['filename'])) if self.proposals is not None: proposal = self.proposals[idx][:self.num_max_proposals] if not (proposal.shape[1] == 4 or proposal.shape[1] == 5): raise AssertionError( 'proposals should have shapes (n, 4) or (n, 5), ' 'but found {}'.format(proposal.shape)) else: proposal = None def prepare_single(img, scale, flip, proposal=None): _img, img_shape, pad_shape, scale_factor = self.img_transform( img, scale, flip, keep_ratio=self.resize_keep_ratio) _img = to_tensor(_img) _img_meta = dict(ori_shape=(img_info['height'], img_info['width'], 3), img_shape=img_shape, pad_shape=pad_shape, scale_factor=scale_factor, flip=flip) if proposal is not None: if proposal.shape[1] == 5: score = proposal[:, 4, None] proposal = proposal[:, :4] else: score = None _proposal = self.bbox_transform(proposal, img_shape, scale_factor, flip) _proposal = np.hstack([_proposal, score ]) if score is not None else _proposal _proposal = to_tensor(_proposal) else: _proposal = None return _img, _img_meta, _proposal imgs = [] img_metas = [] proposals = [] for scale in self.img_scales: _img, _img_meta, _proposal = prepare_single( img, scale, False, proposal) imgs.append(_img) img_metas.append(DC(_img_meta, cpu_only=True)) proposals.append(_proposal) if self.flip_ratio > 0: _img, _img_meta, _proposal = prepare_single( img, scale, True, proposal) imgs.append(_img) img_metas.append(DC(_img_meta, cpu_only=True)) proposals.append(_proposal) data = dict(img=imgs, img_meta=img_metas) # if self.proposals is not None: # data['proposals'] = proposals # if self.with_cap is not None: # data['gt_caps'] = self.enc_captions[idx*self.cpi:(idx+1)*self.cpi] # data['gt_caplens'] = self.caplens[idx*self.cpi:(idx+1)*self.cpi] if self.with_seg: gt_seg = mmcv.imread(osp.join( self.seg_prefix, img_info['file_name'].replace('jpg', 'png')), flag='unchanged') gt_seg = self.seg_transform(gt_seg.squeeze(), self.img_scales[0], False) # gt_seg = mmcv.imrescale( # gt_seg, self.seg_scale_factor, interpolation='nearest') # gt_seg = resize_label(gt_seg, self.size_divisor) gt_seg = gt_seg[None, ...] data['gt_seg'] = DC(to_tensor(gt_seg.astype(np.long)), stack=True) if self.with_cap: rnd_idx = idx * self.cpi + np.random.randint(self.cpi) # data['gt_caps'] = DC(to_tensor(self.enc_captions[rnd_idx])) data['gt_caps'] = to_tensor(self.enc_captions[rnd_idx]) # data['gt_caplens'] = DC(to_tensor(self.caplens[rnd_idx])) data['gt_caplens'] = to_tensor(np.array(self.caplens[rnd_idx])) data['allcaps'] = to_tensor( self.enc_captions[idx * self.cpi:(idx + 1) * self.cpi]) return data
def prepare_train_img(self, idx): img_info = self.img_infos[idx] # load image img = mmcv.imread(osp.join(self.img_prefix, img_info['filename'])) # load proposals if necessary if self.proposals is not None: proposals = self.proposals[idx][:self.num_max_proposals] # TODO: Handle empty proposals properly. Currently images with # no proposals are just ignored, but they can be used for # training in concept. if len(proposals) == 0: return None if not (proposals.shape[1] == 4 or proposals.shape[1] == 5): raise AssertionError( 'proposals should have shapes (n, 4) or (n, 5), ' 'but found {}'.format(proposals.shape)) if proposals.shape[1] == 5: scores = proposals[:, 4, None] proposals = proposals[:, :4] else: scores = None ann = self.get_ann_info(idx) gt_bboxes = ann['bboxes'] gt_labels = ann['labels'] if self.with_crowd: gt_bboxes_ignore = ann['bboxes_ignore'] # skip the image if there is no valid gt bbox if len(gt_bboxes) == 0: return None # extra augmentation if self.extra_aug is not None: img, gt_bboxes, gt_labels = self.extra_aug(img, gt_bboxes, gt_labels) # apply transforms flip = True if np.random.rand() < self.flip_ratio else False # randomly sample a scale img_scale = random_scale(self.img_scales, self.multiscale_mode) img, img_shape, pad_shape, scale_factor = self.img_transform( img, img_scale, flip, keep_ratio=self.resize_keep_ratio) img = img.copy() if self.with_seg: gt_seg = mmcv.imread(osp.join( self.seg_prefix, img_info['file_name'].replace('jpg', 'png')), flag='unchanged') gt_seg = self.seg_transform(gt_seg.squeeze(), img_scale, flip) gt_seg = gt_seg[None, ...] # gt_seg = mmcv.imrescale( # gt_seg, self.seg_scale_factor, interpolation='nearest') # gt_seg = resize_label(gt_seg, self.size_divisor) # gt_seg = gt_seg[None, ...] if self.proposals is not None: proposals = self.bbox_transform(proposals, img_shape, scale_factor, flip) proposals = np.hstack([proposals, scores ]) if scores is not None else proposals gt_bboxes = self.bbox_transform(gt_bboxes, img_shape, scale_factor, flip) if self.with_crowd: gt_bboxes_ignore = self.bbox_transform(gt_bboxes_ignore, img_shape, scale_factor, flip) if self.with_mask: gt_masks = self.mask_transform(ann['masks'], pad_shape, scale_factor, flip) ori_shape = (img_info['height'], img_info['width'], 3) img_meta = dict(ori_shape=ori_shape, img_shape=img_shape, pad_shape=pad_shape, scale_factor=scale_factor, flip=flip) data = dict(img=DC(to_tensor(img), stack=True), img_meta=DC(img_meta, cpu_only=True), gt_bboxes=DC(to_tensor(gt_bboxes))) if self.proposals is not None: data['proposals'] = DC(to_tensor(proposals)) if self.with_label: data['gt_labels'] = DC(to_tensor(gt_labels)) if self.with_crowd: data['gt_bboxes_ignore'] = DC(to_tensor(gt_bboxes_ignore)) if self.with_mask: data['gt_masks'] = DC(gt_masks, cpu_only=True) if self.with_seg: data['gt_seg'] = DC(to_tensor(gt_seg.astype(np.long)), stack=True) if self.with_cap: rnd_idx = idx * self.cpi + np.random.randint(self.cpi) # data['gt_caps'] = DC(to_tensor(self.enc_captions[rnd_idx])) data['gt_caps'] = to_tensor(self.enc_captions[rnd_idx]) # data['gt_caplens'] = DC(to_tensor(self.caplens[rnd_idx])) data['gt_caplens'] = to_tensor(np.array(self.caplens[rnd_idx])) return data