def augmentation(self, img, mask, hcoords, height, width, img_render=None, mask_render=None): foreground = np.sum(mask) # randomly mask out to add occlusion if self.cfg['mask'] and np.random.random() < 0.5: img, mask = mask_out_instance(img, mask, self.cfg['min_mask'], self.cfg['max_mask']) if img_render.any() != None and mask_render.any() != None: img_render, mask_render = mask_out_instance( img_render, mask_render, self.cfg['min_mask'], self.cfg['max_mask']) if foreground > 0: # randomly rotate around the center of the instance if self.cfg['rotation']: img, mask, hcoords, img_render, mask_render = rotate_instance( img, mask, hcoords, self.cfg['rot_ang_min'], self.cfg['rot_ang_max'], img_render, mask_render) # randomly crop and resize if self.cfg['crop']: if not self.cfg['use_old']: # 1. Under 80% probability, we resize the image, which will ensure the size of instance is [hmin,hmax][wmin,wmax] # otherwise, keep the image unchanged # 2. crop or padding the image to a fixed size img, mask, hcoords, img_render, mask_render = crop_resize_instance_v2( img, mask, hcoords, height, width, self.cfg['overlap_ratio'], self.cfg['resize_hmin'], self.cfg['resize_hmax'], self.cfg['resize_wmin'], self.cfg['resize_wmax'], img_render, mask_render) else: # 1. firstly crop a region which is [scale_min,scale_max]*[height,width], which ensures that # the area of the intersection between the cropped region and the instance region is at least # overlap_ratio**2 of instance region. # 2. if the region is larger than original image, then padding 0 # 3. then resize the cropped image to [height, width] (bilinear for image, nearest for mask) img, mask, hcoords, img_render, mask_render = crop_resize_instance_v1( img, mask, hcoords, height, width, self.cfg['overlap_ratio'], self.cfg['resize_ratio_min'], self.cfg['resize_ratio_max'], img_render, mask_render) else: img, mask, img_render, mask_render = crop_or_padding_to_fixed_size( img, mask, height, width, img_render, mask_render) # randomly flip if self.cfg['flip'] and np.random.random() < 0.5: img, mask, hcoords, img_render, mask_render = flip( img, mask, hcoords, img_render, mask_render) return img, mask, hcoords, img_render, mask_render
def augment(self, img, mask, kpt_2d, height, width): # add one column to kpt_2d for convenience to calculate hcoords = np.concatenate((kpt_2d, np.ones((9, 1))), axis=-1) img = np.asarray(img).astype(np.uint8) foreground = np.sum(mask) # randomly mask out to add occlusion if foreground > 0: img, mask, hcoords = rotate_instance(img, mask, hcoords, self.cfg.train.rotate_min, self.cfg.train.rotate_max) img, mask, hcoords = crop_resize_instance_v1(img, mask, hcoords, height, width, self.cfg.train.overlap_ratio, self.cfg.train.resize_ratio_min, self.cfg.train.resize_ratio_max) else: img, mask = crop_or_padding_to_fixed_size(img, mask, height, width) kpt_2d = hcoords[:, :2] return img, kpt_2d, mask
def __getitem__(self, idx): if isinstance(idx, tuple): local_idx, height, width = idx else: local_idx = idx assert not self.augment for object_name in self.object_names: if local_idx < self.lengths[object_name]: local_idx = self.split_indices[object_name][local_idx] # image image_name = os.path.join(self.base_dir, 'original_dataset', object_name, 'data', 'color{}.jpg'.format(local_idx)) image = cv2.imread(image_name) # mask mask_name = os.path.join(self.base_dir, 'masks', object_name, 'mask{}.png'.format(local_idx)) mask = cv2.imread(mask_name, cv2.IMREAD_GRAYSCALE) # online occlusion if self.occlude: image, mask = self.occlude_with_another_object(image, mask) image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) image = np.float32(image) / 255. mask = np.float32(mask) mask[mask != 0.] = 1. # keypoints pts2d = self.pts2d[object_name][local_idx] pts3d = self.pts3d[object_name] # symmetry correspondences sym_cor_name = os.path.join(self.base_dir, 'correspondences', object_name, 'cor{}.npy'.format(local_idx)) sym_cor = np.float32(np.load(sym_cor_name)) normal = self.normals[object_name] # data augmentation if self.augment: foreground = np.sum(mask) if foreground > 0: image, mask, pts2d, sym_cor = \ rotate_instance(image, mask, pts2d, sym_cor, self.rot_ang_min, self.rot_ang_max) foreground = np.sum(mask) if foreground > 0: image, mask, pts2d, sym_cor = \ crop_resize_instance_v1(image, mask, pts2d, sym_cor, height, width) else: image, mask, sym_cor = \ crop_or_padding_to_fixed_size(image, mask, sym_cor, height, width) else: image, mask, sym_cor = \ crop_or_padding_to_fixed_size(image, mask, sym_cor, height, width) image = image.transpose((2, 0, 1)) # (H, W, C) -> (C, H, W) image = torch.from_numpy(image) image = self.img_transform(image) mask = mask.reshape((1, mask.shape[0], mask.shape[1])) sym_cor = sym_cor.transpose([2, 0, 1]) # keypoint map pts2d_map = self.keypoints_to_map(mask, pts2d) # graph graph = self.keypoints_to_graph(mask, pts2d) # pose R_name = os.path.join(self.base_dir, 'original_dataset', object_name, 'data', 'rot{}.rot'.format(local_idx)) R = self.read_rotation(R_name) t_name = os.path.join(self.base_dir, 'original_dataset', object_name, 'data', 'tra{}.tra'.format(local_idx)) t = self.read_translation(t_name) if self.split == 'train': return { 'image': image, 'image_name': image_name, 'pts2d': pts2d, 'pts2d_map': pts2d_map, 'sym_cor': sym_cor, 'mask': mask, 'graph': graph } else: return { 'object_name': object_name, 'local_idx': local_idx, 'image_name': image_name, 'image': image, 'pts2d': pts2d, 'pts2d_map': pts2d_map, 'pts3d': pts3d, 'R': R, 't': t, 'sym_cor': sym_cor, 'normal': normal, 'mask': mask, 'graph': graph } else: local_idx -= self.lengths[object_name] raise ValueError('Invalid index: {}'.format(idx))
def __getitem__(self, idx): if isinstance(idx, tuple): idx, height, width = idx else: assert not self.augment # Because not a batch -> not training? multiplier = np.random.randint(0, self.length // self.size) idx = multiplier * self.size + idx # image image_name = os.path.join(self.data_dir, '{}.jpg'.format(idx)) image = cv2.imread(image_name) # mask mask_name = os.path.join(self.labels_dir, 'mask{}.png'.format(idx)) mask = cv2.imread(mask_name, cv2.IMREAD_GRAYSCALE) # online occlusion if self.occlude: raise NotImplementedError image, mask = self.occlude_with_another_object(image, mask) image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) image = np.float32(image) / 255. mask = np.float32(mask) mask[mask != 0.] = 1. # keypoints pts2d = self.pts2d[idx] # symmetry correspondences sym_cor_name = os.path.join(self.labels_dir, 'cor{}.npy'.format(idx)) sym_cor = np.float32(np.load(sym_cor_name)) # data augmentation if self.augment: foreground = np.sum(mask) if foreground > 0: image, mask, pts2d, sym_cor = \ rotate_instance(image, mask, pts2d, sym_cor, self.rot_ang_min, self.rot_ang_max) foreground = np.sum(mask) if foreground > 0: image, mask, pts2d, sym_cor = \ crop_resize_instance_v1(image, mask, pts2d, sym_cor, height, width) else: image, mask, sym_cor = \ crop_or_padding_to_fixed_size(image, mask, sym_cor, height, width) else: image, mask, sym_cor = \ crop_or_padding_to_fixed_size(image, mask, sym_cor, height, width) image = image.transpose((2, 0, 1)) # (H, W, C) -> (C, H, W) image = torch.from_numpy(image) image = self.img_transform(image) mask = mask.reshape((1, mask.shape[0], mask.shape[1])) sym_cor = sym_cor.transpose([2, 0, 1]) # keypoint map pts2d_map = self.keypoints_to_map(mask, pts2d) # graph graph = self.keypoints_to_graph(mask, pts2d) if self.split == 'train': return { 'image': image, 'image_name': image_name, 'pts2d': pts2d, 'pts2d_map': pts2d_map, 'sym_cor': sym_cor, 'mask': mask, 'graph': graph } else: R, t = self.read_pose(idx) pts3d = self.pts3d normal = self.normal return { # Missing object_name, local_idx, 'object_name': self.object_name, 'local_idx': idx, 'image': image, 'image_name': image_name, 'pts2d': pts2d, 'pts2d_map': pts2d_map, 'sym_cor': sym_cor, 'mask': mask, 'graph': graph, 'R': R, 't': t, 'pts3d': pts3d, 'normal': normal }
def __getitem__(self, idx): if isinstance(idx, tuple): idx, height, width = idx else: assert not self.augment multiplier = np.random.randint(0, self.length // self.size) idx = multiplier * self.size + idx # image image_name = os.path.join(self.base_dir, 'fuse', '{}_rgb.jpg'.format(idx)) image = cv2.imread(image_name) # mask mask_name = os.path.join(self.base_dir, 'fuse', '{}_mask.png'.format(idx)) mask = cv2.imread(mask_name, cv2.IMREAD_GRAYSCALE) image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) image = np.float32(image) / 255. mask = np.float32(mask) mask[mask != self.mask_idx] = 0. mask[mask == self.mask_idx] = 1. # keypoints pts2d = self.pts2d[idx] # symmetry correspondences sym_cor_name = os.path.join(self.base_dir, '{}_fuse_labels'.format(self.object_name), 'cor{}.npy'.format(idx)) sym_cor = np.float32(np.load(sym_cor_name)) # data augmentation if self.augment: foreground = np.sum(mask) if foreground > 0: image, mask, pts2d, sym_cor = \ rotate_instance(image, mask, pts2d, sym_cor, self.rot_ang_min, self.rot_ang_max) foreground = np.sum(mask) if foreground > 0: image, mask, pts2d, sym_cor = \ crop_resize_instance_v1(image, mask, pts2d, sym_cor, height, width) else: image, mask, sym_cor = \ crop_or_padding_to_fixed_size(image, mask, sym_cor, height, width) else: image, mask, sym_cor = \ crop_or_padding_to_fixed_size(image, mask, sym_cor, height, width) image = image.transpose((2, 0, 1)) # (H, W, C) -> (C, H, W) image = torch.from_numpy(image) image = self.img_transform(image) mask = mask.reshape((1, mask.shape[0], mask.shape[1])) sym_cor = sym_cor.transpose([2, 0, 1]) # keypoint map pts2d_map = self.keypoints_to_map(mask, pts2d) # graph graph = self.keypoints_to_graph(mask, pts2d) return { 'image': image, 'image_name': image_name, 'pts2d': pts2d, 'pts2d_map': pts2d_map, 'sym_cor': sym_cor, 'mask': mask, 'graph': graph }