コード例 #1
0
ファイル: linemod_dataset.py プロジェクト: leeshd/pvnet
    def augmentation(self,
                     img,
                     mask,
                     hcoords,
                     height,
                     width,
                     img_render=None,
                     mask_render=None):
        foreground = np.sum(mask)
        # randomly mask out to add occlusion
        if self.cfg['mask'] and np.random.random() < 0.5:
            img, mask = mask_out_instance(img, mask, self.cfg['min_mask'],
                                          self.cfg['max_mask'])
            if img_render.any() != None and mask_render.any() != None:
                img_render, mask_render = mask_out_instance(
                    img_render, mask_render, self.cfg['min_mask'],
                    self.cfg['max_mask'])

        if foreground > 0:
            # randomly rotate around the center of the instance
            if self.cfg['rotation']:
                img, mask, hcoords, img_render, mask_render = rotate_instance(
                    img, mask, hcoords, self.cfg['rot_ang_min'],
                    self.cfg['rot_ang_max'], img_render, mask_render)

            # randomly crop and resize
            if self.cfg['crop']:
                if not self.cfg['use_old']:
                    # 1. Under 80% probability, we resize the image, which will ensure the size of instance is [hmin,hmax][wmin,wmax]
                    #    otherwise, keep the image unchanged
                    # 2. crop or padding the image to a fixed size
                    img, mask, hcoords, img_render, mask_render = crop_resize_instance_v2(
                        img, mask, hcoords, height, width,
                        self.cfg['overlap_ratio'], self.cfg['resize_hmin'],
                        self.cfg['resize_hmax'], self.cfg['resize_wmin'],
                        self.cfg['resize_wmax'], img_render, mask_render)
                else:
                    # 1. firstly crop a region which is [scale_min,scale_max]*[height,width], which ensures that
                    #    the area of the intersection between the cropped region and the instance region is at least
                    #    overlap_ratio**2 of instance region.
                    # 2. if the region is larger than original image, then padding 0
                    # 3. then resize the cropped image to [height, width] (bilinear for image, nearest for mask)
                    img, mask, hcoords, img_render, mask_render = crop_resize_instance_v1(
                        img, mask, hcoords, height, width,
                        self.cfg['overlap_ratio'],
                        self.cfg['resize_ratio_min'],
                        self.cfg['resize_ratio_max'], img_render, mask_render)
        else:
            img, mask, img_render, mask_render = crop_or_padding_to_fixed_size(
                img, mask, height, width, img_render, mask_render)

        # randomly flip
        if self.cfg['flip'] and np.random.random() < 0.5:
            img, mask, hcoords, img_render, mask_render = flip(
                img, mask, hcoords, img_render, mask_render)

        return img, mask, hcoords, img_render, mask_render
コード例 #2
0
ファイル: pvnet.py プロジェクト: sherwoac/clean-pvnet
    def augment(self, img, mask, kpt_2d, height, width):
        # add one column to kpt_2d for convenience to calculate
        hcoords = np.concatenate((kpt_2d, np.ones((9, 1))), axis=-1)
        img = np.asarray(img).astype(np.uint8)
        foreground = np.sum(mask)
        # randomly mask out to add occlusion
        if foreground > 0:
            img, mask, hcoords = rotate_instance(img, mask, hcoords, self.cfg.train.rotate_min, self.cfg.train.rotate_max)
            img, mask, hcoords = crop_resize_instance_v1(img, mask, hcoords, height, width,
                                                         self.cfg.train.overlap_ratio,
                                                         self.cfg.train.resize_ratio_min,
                                                         self.cfg.train.resize_ratio_max)
        else:
            img, mask = crop_or_padding_to_fixed_size(img, mask, height, width)
        kpt_2d = hcoords[:, :2]

        return img, kpt_2d, mask
コード例 #3
0
ファイル: linemod.py プロジェクト: onegul/HybridPose
 def __getitem__(self, idx):
     if isinstance(idx, tuple):
         local_idx, height, width = idx
     else:
         local_idx = idx
         assert not self.augment
     for object_name in self.object_names:
         if local_idx < self.lengths[object_name]:
             local_idx = self.split_indices[object_name][local_idx]
             # image
             image_name = os.path.join(self.base_dir, 'original_dataset',
                                       object_name, 'data',
                                       'color{}.jpg'.format(local_idx))
             image = cv2.imread(image_name)
             # mask
             mask_name = os.path.join(self.base_dir, 'masks',
                                      object_name, 'mask{}.png'.format(local_idx))
             mask = cv2.imread(mask_name, cv2.IMREAD_GRAYSCALE)
             # online occlusion
             if self.occlude:
                 image, mask = self.occlude_with_another_object(image, mask)
             image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
             image = np.float32(image) / 255.
             mask = np.float32(mask)
             mask[mask != 0.] = 1.
             # keypoints
             pts2d = self.pts2d[object_name][local_idx]
             pts3d = self.pts3d[object_name]
             # symmetry correspondences
             sym_cor_name = os.path.join(self.base_dir, 'correspondences',
                                         object_name, 'cor{}.npy'.format(local_idx))
             sym_cor = np.float32(np.load(sym_cor_name))
             normal = self.normals[object_name]
             # data augmentation
             if self.augment:
                 foreground = np.sum(mask)
                 if foreground > 0:
                     image, mask, pts2d, sym_cor = \
                             rotate_instance(image, mask, pts2d, sym_cor,
                                             self.rot_ang_min, self.rot_ang_max)
                     foreground = np.sum(mask)
                     if foreground > 0:
                         image, mask, pts2d, sym_cor = \
                                 crop_resize_instance_v1(image, mask, pts2d, sym_cor,
                                                         height, width)
                     else:
                         image, mask, sym_cor = \
                                 crop_or_padding_to_fixed_size(image, mask, sym_cor,
                                                               height, width)
                 else:
                     image, mask, sym_cor = \
                             crop_or_padding_to_fixed_size(image, mask, sym_cor,
                                                           height, width)
             image = image.transpose((2, 0, 1)) # (H, W, C) -> (C, H, W)
             image = torch.from_numpy(image)
             image = self.img_transform(image)
             mask = mask.reshape((1, mask.shape[0], mask.shape[1]))
             sym_cor = sym_cor.transpose([2, 0, 1])
             # keypoint map
             pts2d_map = self.keypoints_to_map(mask, pts2d)
             # graph
             graph = self.keypoints_to_graph(mask, pts2d)
             # pose
             R_name = os.path.join(self.base_dir, 'original_dataset', object_name,
                                   'data', 'rot{}.rot'.format(local_idx))
             R = self.read_rotation(R_name)
             t_name = os.path.join(self.base_dir, 'original_dataset', object_name,
                                   'data', 'tra{}.tra'.format(local_idx))
             t = self.read_translation(t_name)
             if self.split == 'train':
                 return {
                         'image': image,
                         'image_name': image_name,
                         'pts2d': pts2d,
                         'pts2d_map': pts2d_map,
                         'sym_cor': sym_cor,
                         'mask': mask,
                         'graph': graph
                         }
             else:
                 return {
                         'object_name': object_name,
                         'local_idx': local_idx,
                         'image_name': image_name,
                         'image': image,
                         'pts2d': pts2d,
                         'pts2d_map': pts2d_map,
                         'pts3d': pts3d,
                         'R': R,
                         't': t,
                         'sym_cor': sym_cor,
                         'normal': normal,
                         'mask': mask,
                         'graph': graph
                         }
         else:
             local_idx -= self.lengths[object_name]
     raise ValueError('Invalid index: {}'.format(idx))
コード例 #4
0
 def __getitem__(self, idx):
     if isinstance(idx, tuple):
         idx, height, width = idx
     else:
         assert not self.augment  # Because not a batch -> not training?
     multiplier = np.random.randint(0, self.length // self.size)
     idx = multiplier * self.size + idx
     # image
     image_name = os.path.join(self.data_dir, '{}.jpg'.format(idx))
     image = cv2.imread(image_name)
     # mask
     mask_name = os.path.join(self.labels_dir, 'mask{}.png'.format(idx))
     mask = cv2.imread(mask_name, cv2.IMREAD_GRAYSCALE)
     # online occlusion
     if self.occlude:
         raise NotImplementedError
         image, mask = self.occlude_with_another_object(image, mask)
     image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
     image = np.float32(image) / 255.
     mask = np.float32(mask)
     mask[mask != 0.] = 1.
     # keypoints
     pts2d = self.pts2d[idx]
     # symmetry correspondences
     sym_cor_name = os.path.join(self.labels_dir, 'cor{}.npy'.format(idx))
     sym_cor = np.float32(np.load(sym_cor_name))
     # data augmentation
     if self.augment:
         foreground = np.sum(mask)
         if foreground > 0:
             image, mask, pts2d, sym_cor = \
                     rotate_instance(image, mask, pts2d, sym_cor,
                                     self.rot_ang_min, self.rot_ang_max)
             foreground = np.sum(mask)
             if foreground > 0:
                 image, mask, pts2d, sym_cor = \
                         crop_resize_instance_v1(image, mask, pts2d, sym_cor,
                                                 height, width)
             else:
                 image, mask, sym_cor = \
                         crop_or_padding_to_fixed_size(image, mask, sym_cor,
                                                       height, width)
         else:
             image, mask, sym_cor = \
                     crop_or_padding_to_fixed_size(image, mask, sym_cor,
                                                   height, width)
     image = image.transpose((2, 0, 1))  # (H, W, C) -> (C, H, W)
     image = torch.from_numpy(image)
     image = self.img_transform(image)
     mask = mask.reshape((1, mask.shape[0], mask.shape[1]))
     sym_cor = sym_cor.transpose([2, 0, 1])
     # keypoint map
     pts2d_map = self.keypoints_to_map(mask, pts2d)
     # graph
     graph = self.keypoints_to_graph(mask, pts2d)
     if self.split == 'train':
         return {
             'image': image,
             'image_name': image_name,
             'pts2d': pts2d,
             'pts2d_map': pts2d_map,
             'sym_cor': sym_cor,
             'mask': mask,
             'graph': graph
         }
     else:
         R, t = self.read_pose(idx)
         pts3d = self.pts3d
         normal = self.normal
         return {  # Missing object_name, local_idx,
             'object_name': self.object_name,
             'local_idx': idx,
             'image': image,
             'image_name': image_name,
             'pts2d': pts2d,
             'pts2d_map': pts2d_map,
             'sym_cor': sym_cor,
             'mask': mask,
             'graph': graph,
             'R': R,
             't': t,
             'pts3d': pts3d,
             'normal': normal
         }
コード例 #5
0
 def __getitem__(self, idx):
     if isinstance(idx, tuple):
         idx, height, width = idx
     else:
         assert not self.augment
     multiplier = np.random.randint(0, self.length // self.size)
     idx = multiplier * self.size + idx
     # image
     image_name = os.path.join(self.base_dir, 'fuse',
                               '{}_rgb.jpg'.format(idx))
     image = cv2.imread(image_name)
     # mask
     mask_name = os.path.join(self.base_dir, 'fuse',
                              '{}_mask.png'.format(idx))
     mask = cv2.imread(mask_name, cv2.IMREAD_GRAYSCALE)
     image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
     image = np.float32(image) / 255.
     mask = np.float32(mask)
     mask[mask != self.mask_idx] = 0.
     mask[mask == self.mask_idx] = 1.
     # keypoints
     pts2d = self.pts2d[idx]
     # symmetry correspondences
     sym_cor_name = os.path.join(self.base_dir,
                                 '{}_fuse_labels'.format(self.object_name),
                                 'cor{}.npy'.format(idx))
     sym_cor = np.float32(np.load(sym_cor_name))
     # data augmentation
     if self.augment:
         foreground = np.sum(mask)
         if foreground > 0:
             image, mask, pts2d, sym_cor = \
                     rotate_instance(image, mask, pts2d, sym_cor,
                                     self.rot_ang_min, self.rot_ang_max)
             foreground = np.sum(mask)
             if foreground > 0:
                 image, mask, pts2d, sym_cor = \
                         crop_resize_instance_v1(image, mask, pts2d, sym_cor,
                                                 height, width)
             else:
                 image, mask, sym_cor = \
                         crop_or_padding_to_fixed_size(image, mask, sym_cor,
                                                       height, width)
         else:
             image, mask, sym_cor = \
                     crop_or_padding_to_fixed_size(image, mask, sym_cor,
                                                   height, width)
     image = image.transpose((2, 0, 1))  # (H, W, C) -> (C, H, W)
     image = torch.from_numpy(image)
     image = self.img_transform(image)
     mask = mask.reshape((1, mask.shape[0], mask.shape[1]))
     sym_cor = sym_cor.transpose([2, 0, 1])
     # keypoint map
     pts2d_map = self.keypoints_to_map(mask, pts2d)
     # graph
     graph = self.keypoints_to_graph(mask, pts2d)
     return {
         'image': image,
         'image_name': image_name,
         'pts2d': pts2d,
         'pts2d_map': pts2d_map,
         'sym_cor': sym_cor,
         'mask': mask,
         'graph': graph
     }