def _pad_img(self, results): if self.size is not None: padded_img = mmcv.impad(results['img'], self.size, self.pad_val) elif self.size_divisor is not None: padded_img = mmcv.impad_to_multiple( results['img'], self.size_divisor, pad_val=self.pad_val) results['img'] = padded_img results['pad_shape'] = padded_img.shape results['pad_fixed_size'] = self.size results['pad_size_divisor'] = self.size_divisor
def pad(self, out_shape, pad_val=0): """See :func:`BaseInstanceMasks.pad`.""" if len(self.masks) == 0: padded_masks = np.empty((0, *out_shape), dtype=np.uint8) else: padded_masks = np.stack([ mmcv.impad(mask, shape=out_shape, pad_val=pad_val) for mask in self.masks ]) return BitmapMasks(padded_masks, *out_shape)
def _pad_masks(self, results): pad_shape = results['pad_shape'][:2] if len(results[results.get('mask_fields', [])[0]]) == 0: return results for key in results.get('mask_fields', []): padded_masks = [ mmcv.impad(mask, pad_shape, pad_val=self.pad_val) for mask in results[key] ] results[key] = np.stack(padded_masks, axis=0)
def _pad_masks(self, results): els = ['ref_mask_fields', 'mask_fields' ] if 'ref_mask_fields' in results else ['mask_fields'] for el in els: pad_shape = results['pad_shape'][:2] for key in results.get(el, []): padded_masks = [ mmcv.impad(mask, pad_shape, pad_val=self.pad_val) for mask in results[key] ] results[key] = np.stack(padded_masks, axis=0)
def _pad_masks(self, results): pad_shape = results['pad_shape'][:2] for key in results.get('mask_fields', []): padded_masks = [ mmcv.impad(mask, pad_shape, pad_val=self.pad_val) for mask in results[key] ] if padded_masks: results[key] = np.stack(padded_masks, axis=0) else: results[key] = np.empty((0, ) + pad_shape, dtype=np.uint8)
def _pad_img(self, results): """Pad images according to ``self.size``.""" if self.size is not None: padded_img = mmcv.impad( results['img'], shape=self.size, pad_val=self.pad_val) elif self.size_divisor is not None: padded_img = mmcv.impad_to_multiple( results['img'], self.size_divisor, pad_val=self.pad_val) results['img'] = padded_img results['pad_shape'] = padded_img.shape results['pad_fixed_size'] = self.size results['pad_size_divisor'] = self.size_divisor
def __call__(self, masks, pad_shape, scale_factor, flip=False): masks = [ mmcv.imrescale(mask, scale_factor, interpolation='nearest') for mask in masks ] if flip: masks = [mask[:, ::-1] for mask in masks] padded_masks = [ mmcv.impad(mask, pad_shape[:2], pad_val=0) for mask in masks ] padded_masks = np.stack(padded_masks, axis=0) return padded_masks
def _pad_img(self, results): """Pad images according to ``self.size``.""" for key in results.get('img_fields', ['img']): if self.size is not None: padded_img = mmcv.impad(results[key], self.size, self.pad_val) elif self.size_divisor is not None: padded_img = mmcv.impad_to_multiple( results[key], self.size_divisor, pad_val=self.pad_val) results[key] = padded_img results['pad_shape'] = padded_img.shape results['pad_fixed_size'] = self.size results['pad_size_divisor'] = self.size_divisor
def __call__(self, img, scale, flip=False, pad_val=(0, 0, 0), keep_ratio=True): """ :param img: :param scale: (w, h) :param flip: :param pad_val: :param keep_ratio: :return: """ # 1. rescale/resize the image to expected size if keep_ratio: # Resize image while keeping the aspect ratio. # The image will be rescaled as large as possible within the scale. img, scale_factor = imrescale( img=img, scale=scale, return_scale=True, interpolation='bilinear', ) else: # Resize image to a given size ignoring the aspect ratio. img, w_scale, h_scale = mmcv.imresize( img=img, size=scale, return_scale=True, interpolation='bilinear', ) scale_factor = np.array( [w_scale, h_scale, w_scale, h_scale], dtype=np.float32 ) # 2. normalize the image img_shape = img.shape img = imnormalize(img, self.mean, self.std, self.to_rgb) # 3. flip the image (if needed) if flip: img = mmcv.imflip(img) # 4. pad the image to the exact scale value if img_shape != scale: img = mmcv.impad(img=img, shape=scale if isinstance(scale, (int, float)) else (scale[1], scale[0]), pad_val=pad_val) pad_shape = img.shape else: pad_shape = img_shape # 5. transpose to (c, h, w) img = img.transpose(2, 0, 1) return img, img_shape, pad_shape, scale_factor
def _pad_img(self, results): for key in results.get('img_fields', []): if self.size is not None: padded_img = mmcv.impad(results[key], self.size, self.pad_val) elif self.size_divisor is not None: padded_img = mmcv.impad_to_multiple(results[key], self.size_divisor, pad_val=self.pad_val) results[key] = padded_img results['pad_shape'] = padded_img.shape results['pad_fixed_size'] = self.size results['pad_size_divisor'] = self.size_divisor
def __call__(self, results): """ Args: img (ndarray): Image to be cropped. """ for key in results.get('img_fields', ['img']): img = results[key] if self.padding is not None: img = mmcv.impad( img, padding=self.padding, pad_val=self.pad_val) # pad the height if needed if self.pad_if_needed and img.shape[0] < self.size[0]: img = mmcv.impad( img, padding=(0, self.size[0] - img.shape[0], 0, self.size[0] - img.shape[0]), pad_val=self.pad_val, padding_mode=self.padding_mode) # pad the width if needed if self.pad_if_needed and img.shape[1] < self.size[1]: img = mmcv.impad( img, padding=(self.size[1] - img.shape[1], 0, self.size[1] - img.shape[1], 0), pad_val=self.pad_val, padding_mode=self.padding_mode) ymin, xmin, height, width = self.get_params(img, self.size) results[key] = mmcv.imcrop( img, np.array([ xmin, ymin, xmin + width - 1, ymin + height - 1, ])) return results
def _pad_img(self, results): els = ['ref_img', 'img'] if 'ref_img' in results else ['img'] for el in els: if self.size is not None: padded_img = mmcv.impad(results['img'], self.size) elif self.size_divisor is not None: padded_img = mmcv.impad_to_multiple(results[el], self.size_divisor, pad_val=self.pad_val) results[el] = padded_img results['pad_shape'] = padded_img.shape results['pad_fixed_size'] = self.size results['pad_size_divisor'] = self.size_divisor
def _pad_img(self, results): """Pad images according to ``self.size``.""" img = results["img"] if self.size is not None: padded_img = mmcv.impad(img, shape=self.size, pad_val=self.pad_val) elif self.size_divisor is not None: padded_img = mmcv.impad_to_multiple( img, self.size_divisor, pad_val=self.pad_val ) results["img"] = padded_img results["pad_shape"] = padded_img.shape results["pad_fixed_size"] = self.size results["pad_size_divisor"] = self.size_divisor
def _pad_masks(self, results): pad_shape = results['pad_shape'][:2] for key in results.get('mask_fields', []): padded_masks = [ mmcv.impad(mask, pad_shape, pad_val=self.pad_val) for mask in results[key] ] # if len(padded_masks) > 0: # results[key] = np.stack(padded_masks, axis=0) # # print(results) # else: # results[key] = np.empty((0, pad_shape[0], pad_shape[1]), dtype='float32') results[key] = np.stack(padded_masks, axis=0)
def camera_motion_compensation(data, img_meta, bboxes, criteria, warp_mode=cv2.MOTION_EUCLIDEAN, filter_size=1): file_names = img_meta[0]['img_info']['filename'].split('-') frame_ind = int(file_names[-1].split('.')[0]) fill = len(str(frame_ind)) pre_file_name = file_names[-1].replace(str(frame_ind), str(frame_ind - 1).zfill(fill)) pre_file_name = '-'.join(file_names[:-1] + [pre_file_name]) pre_img = mmcv.imread(osp.join(data['img_prefix'], pre_file_name), flag='grayscale') cur_img = mmcv.imread(osp.join(data['img_prefix'], img_meta[0]['img_info']['filename']), flag='grayscale') pre_img = mmcv.impad(pre_img.T, data['img_scale']).T cur_img = mmcv.impad(cur_img.T, data['img_scale']).T warp_matrix = np.eye(2, 3, dtype=np.float32) try: _, warp_matrix = cv2.findTransformECC(pre_img, cur_img, warp_matrix, warp_mode, criteria, None, filter_size) warp_matrix = torch.from_numpy(warp_matrix).to(bboxes.device) dummy = bboxes.new_ones(bboxes.size(0), 1) pt1s = torch.cat((bboxes[:, 0:2], dummy), dim=1) pt2s = torch.cat((bboxes[:, 2:4], dummy), dim=1) new_pt1s = torch.mm(warp_matrix, pt1s.t()).t() new_pt2s = torch.mm(warp_matrix, pt2s.t()).t() bboxes = torch.cat((new_pt1s, new_pt2s, bboxes[:, -1].view(-1, 1)), dim=1) except cv2.error as e: print(img_meta[0]['img_info'], e) return bboxes
def __call__(self, results): for key in results.get('img_fields', ['img']): img = results[key] if self.pad_to_square: target_size = tuple( max(img.shape[0], img.shape[1]) for _ in range(2)) else: target_size = self.size img = mmcv.impad(img, shape=target_size, pad_val=self.pad_val, padding_mode=self.padding_mode) results[key] = img results['img_shape'] = img.shape return results
def _pad_sequence(self, results): """Pad images according to ``self.size``.""" if self.size is not None: padded_sequence_imgs = [ mmcv.impad(img, shape=self.size, pad_val=self.pad_val) for img in results['sequence_imgs'] ] elif self.size_divisor is not None: padded_sequence_imgs = [ mmcv.impad_to_multiple(img, self.size_divisor, pad_val=self.pad_val) for img in results['sequence_imgs'] ] results['sequence_imgs'] = padded_sequence_imgs
def __call__(self, masks, pad_shape, scale_factor, flip=False): masks = [ # ori version # to resize mask, only use nearest. do not use bilinear. # will deal with gt_ignore_mask together. mmcv.imrescale(mask, scale_factor, interpolation='nearest') for mask in masks ] if flip: masks = [mask[:, ::-1] for mask in masks] padded_masks = [ mmcv.impad(mask, pad_shape[:2], pad_val=0) for mask in masks ] padded_masks = np.stack(padded_masks, axis=0) return padded_masks
def __call__(self, masks, pad_shape, scale_factor, flip=False): # Resize image while keeping the aspect ratio. # The image will be rescaled as large as possible within the scale. masks = [ mmcv.imrescale(mask, scale_factor, interpolation='nearest') for mask in masks ] if flip: masks = [mask[:, ::-1] for mask in masks] padded_masks = [ mmcv.impad(mask, pad_shape[:2], pad_val=0) for mask in masks ] padded_masks = np.stack(padded_masks, axis=0) return padded_masks
def pad(self, out_shape, pad_val=0): """Pad masks to the given size of (h, w). Args: out_shape (tuple[int]): target (h, w) of padded mask pad_val (int): the padded value Returns: BitmapMasks: the padded masks """ if len(self.masks) == 0: padded_masks = np.empty((0, *out_shape), dtype=np.uint8) else: padded_masks = np.stack([ mmcv.impad(mask, out_shape, pad_val=pad_val) for mask in self.masks ]) return BitmapMasks(padded_masks, *out_shape)
def __call__(self, results): if results['keep_ratio']: gt_seg = mmcv.imrescale(results['gt_semantic_seg'], results['scale'], interpolation='nearest') else: gt_seg = mmcv.imresize(results['gt_semantic_seg'], results['scale'], interpolation='nearest') if results['flip']: gt_seg = mmcv.imflip(gt_seg) if gt_seg.shape != results['pad_shape']: gt_seg = mmcv.impad(gt_seg, results['pad_shape'][:2]) if self.scale_factor != 1: gt_seg = mmcv.imrescale(gt_seg, self.scale_factor, interpolation='nearest') results['gt_semantic_seg'] = gt_seg return results
def __call__(self, masks, pad_shape, scale, flip=False, keep_ratio=True): if keep_ratio: masks = [ mmcv.imrescale(mask, scale, interpolation='nearest') for mask in masks ] else: masks = [ mmcv.imresize(mask, scale, interpolation='nearest') for mask in masks ] if flip: masks = [mask[:, ::-1] for mask in masks] padded_masks = [ mmcv.impad(mask, shape=pad_shape[:2], pad_val=0) for mask in masks ] padded_masks = np.stack(padded_masks, axis=0) return padded_masks
def _pad_img(self, results): for key in results.get('img_fields', ['img']): images, img_num = [], results[key].shape[-1] if self.size is not None: for index in range(img_num): images.append( mmcv.impad(results[key][:, :, :, index], self.size, self.pad_val)) elif self.size_divisor is not None: for index in range(img_num): images.append( mmcv.impad_to_multiple(results[key][:, :, :, index], self.size_divisor, pad_val=self.pad_val)) images = np.stack(images, axis=-1) results[key] = images results['pad_shape'] = images.shape results['pad_fixed_size'] = self.size results['pad_size_divisor'] = self.size_divisor
def single_call(self, results, img_ref): if results['keep_ratio']: img_ref = mmcv.imrescale( img_ref, results['scale'], return_scale=False) else: img_ref = mmcv.imresize( img_ref, results['scale'], return_scale=False) if results['flip']: img_ref = mmcv.imflip(img_ref) if results['img_norm_cfg']: img_norm_cfg = results['img_norm_cfg'] img_ref = mmcv.imnormalize( img_ref, img_norm_cfg['mean'], img_norm_cfg['std'], img_norm_cfg['to_rgb']) if 'crop_coords' in results: crds = results['crop_coords'] img_ref = img_ref[crds[0]:crds[1], crds[2]:crds[3], :] if img_ref.shape != results['pad_shape']: img_ref = mmcv.impad(img_ref, results['pad_shape'][:2]) return img_ref
def __call__(self, results): if isinstance(results['img'], str): results['filename'] = results['img'] results['ori_filename'] = results['img'] else: results['filename'] = None results['ori_filename'] = None img = mmcv.imread(results['img']) x_start, y_start, x_stop, y_stop = results['patch_win'] ph_h = y_stop - y_start ph_w = x_stop - y_start patch = img[y_start:y_stop, x_start:x_stop] if ph_h > patch.shape[0] or ph_w > patch.shape[1]: patch = mmcv.impad(patch, shape=(ph_h, ph_w), pad_val=self.fill) results['img'] = patch results['img_fields'] = ['img'] results['img_shape'] = patch.shape results['ori_shape'] = patch.shape return results
def main(): bgr_img = mmcv.imread(image_path) h, w, _ = bgr_img.shape # convert color rgb_img = mmcv.bgr2rgb(bgr_img) # resize resize_img = mmcv.imresize(rgb_img, size=(256, 256)) # rotate rotate_img = mmcv.imrotate(rgb_img, angle=45) # flip flip_img = mmcv.imflip(rgb_img, direction='horizontal') # crop if h <= w: y_min, y_max = 0, h x_min = int((w - h) / 2) x_max = x_min + h else: x_min, x_max = 0, h y_min = int((h - w) / 2) y_max = y_min + w bbox = np.array([x_min, y_min, x_max, y_max]) crop_img = mmcv.imcrop(rgb_img, bbox) # padding max_size = max(h, w) pad_img = mmcv.impad(rgb_img, shape=(max_size, max_size), padding_mode='constant') mmcv.imshow(mmcv.rgb2bgr(pad_img))
def _scale_bbox_points(self, input_dict): """Private function to scale bounding boxes and points. Args: input_dict (dict): Result dict from loading pipeline. Returns: dict: Results after scaling, 'points'and keys in \ input_dict['bbox3d_fields'] are updated in the result dict. """ scale = input_dict['pcd_scale_factor'] points = input_dict['points'] points.scale(scale) if self.shift_height: assert 'height' in points.attribute_dims.keys() points.tensor[:, points.attribute_dims['height']] *= scale input_dict['points'] = points for key in input_dict['bbox3d_fields']: input_dict[key].scale(scale) if 'bev_seg_image' in input_dict.keys(): bev_seg_image = mmcv.imrescale(input_dict['bev_seg_image'], scale, interpolation='nearest') if scale > 1: bev_seg_image = bev_seg_image[:input_dict['bev_seg_image']. shape[0], : input_dict['bev_seg_image']. shape[1]] elif scale < 1: bev_seg_image = mmcv.impad( bev_seg_image, shape=input_dict['bev_seg_image'].shape, pad_val=0) input_dict['bev_seg_image'] = bev_seg_image
def _pad_seg(self, results): for key in results.get('seg_fields', []): results[key] = mmcv.impad(results[key], results['pad_shape'][:2])
def __call__(self, results): rank, _ = get_dist_info() if isinstance(self.height, int): dst_height = self.height dst_min_width = self.min_width dst_max_width = self.max_width else: # Multi-scale resize used in distributed training. # Choose one (height, width) pair for one rank id. idx = rank % len(self.height) dst_height = self.height[idx] dst_min_width = self.min_width[idx] dst_max_width = self.max_width[idx] img_shape = results['img_shape'] ori_height, ori_width = img_shape[:2] valid_ratio = 1.0 resize_shape = list(img_shape) pad_shape = list(img_shape) if self.keep_aspect_ratio: new_width = math.ceil(float(dst_height) / ori_height * ori_width) width_divisor = int(1 / self.width_downsample_ratio) # make sure new_width is an integral multiple of width_divisor. if new_width % width_divisor != 0: new_width = round(new_width / width_divisor) * width_divisor if dst_min_width is not None: new_width = max(dst_min_width, new_width) if dst_max_width is not None: valid_ratio = min(1.0, 1.0 * new_width / dst_max_width) resize_width = min(dst_max_width, new_width) img_resize = mmcv.imresize(results['img'], (resize_width, dst_height), backend=self.backend) resize_shape = img_resize.shape pad_shape = img_resize.shape if new_width < dst_max_width: img_resize = mmcv.impad(img_resize, shape=(dst_height, dst_max_width), pad_val=self.img_pad_value) pad_shape = img_resize.shape else: img_resize = mmcv.imresize(results['img'], (new_width, dst_height), backend=self.backend) resize_shape = img_resize.shape pad_shape = img_resize.shape else: img_resize = mmcv.imresize(results['img'], (dst_max_width, dst_height), backend=self.backend) resize_shape = img_resize.shape pad_shape = img_resize.shape results['img'] = img_resize results['resize_shape'] = resize_shape results['pad_shape'] = pad_shape results['valid_ratio'] = valid_ratio return results
# flip the image vertically mmcv.imflip(img, direction='vertical') # ============Crop============= # crop the region (10, 10, 100, 120) bboxes = np.array([10, 10, 100, 120]) patch = mmcv.imcrop(img, bboxes) # crop two regions (10, 10, 100, 120) and (0, 0, 50, 50) bboxes = np.array([[10, 10, 100, 120], [0, 0, 50, 50]]) patches = mmcv.imcrop(img, bboxes) # crop two regions, and rescale the patches by 1.2x patches = mmcv.imcrop(img, bboxes, scale_ratio=1.2) # =============Padding============= # pad the image to (1000, 1200) with all zeros img_ = mmcv.impad(img, (1000, 1200), pad_val=0) # pad the image to (1000, 1200) with different values for three channels. img_ = mmcv.impad(img, (1000, 1200), pad_val=[100, 50, 200]) # pad an image so that each edge is a multiple of some value. img_ = mmcv.impad_to_multiple(img, 32)