def __getitem__(self, idx): # BGR image filename = self.image_files[idx] print('filename = ', filename) im = cv2.imread(filename) if cfg.TRAIN.CHROMATIC and cfg.MODE == 'TRAIN' and np.random.rand( 1) > 0.1: im = chromatic_transform(im) if cfg.TRAIN.ADD_NOISE and cfg.MODE == 'TRAIN' and np.random.rand( 1) > 0.1: im = add_noise(im) im_tensor = torch.from_numpy(im) / 255.0 im_tensor_bgr = im_tensor.clone() im_tensor_bgr = im_tensor_bgr.permute(2, 0, 1) im_tensor -= self._pixel_mean image_blob = im_tensor.permute(2, 0, 1) # Label labels_filename = filename.replace('image_color', 'annotation') foreground_labels = util_.imread_indexed(labels_filename) foreground_labels = self.process_label(foreground_labels) label_blob = torch.from_numpy(foreground_labels).unsqueeze(0) index = filename.find('OSD') sample = { 'image_color': image_blob, 'image_color_bgr': im_tensor_bgr, 'label': label_blob, 'filename': filename[index + 4:] } # Depth image if cfg.INPUT == 'DEPTH' or cfg.INPUT == 'RGBD': pcd_filename = filename.replace('image_color', 'pcd') pcd_filename = pcd_filename.replace('png', 'pcd') print('pcd_filename = ', pcd_filename) pcloud = pcl.load(pcd_filename).to_array() pcloud[np.isnan(pcloud)] = 0 xyz_img = pcloud.reshape((self._height, self._width, 3)) depth_blob = torch.from_numpy(xyz_img).permute(2, 0, 1) sample['depth'] = depth_blob # # Depth image # if cfg.INPUT == 'DEPTH' or cfg.INPUT == 'RGBD': # pcd_filename = filename.replace('image_color', 'pcd') # pcd_filename = pcd_filename.replace('png', 'pcd') # # pcl replaced with open3d # pcloud = o3d.io.read_point_cloud(pcd_filename) # pcloud = np.asarray(pcloud) # print(np.isnan(pcloud)) # pcloud[np.isnan(pcloud)] = 0 # xyz_img = pcloud.reshape((self._height, self._width, 3)) # depth_blob = torch.from_numpy(xyz_img).permute(2, 0, 1) # sample['depth'] = depth_blob return sample
def __getitem__(self, idx): # BGR image filename = str(self.image_paths[idx]) im = cv2.imread(filename) if cfg.TRAIN.CHROMATIC and cfg.MODE == 'TRAIN' and np.random.rand( 1) > 0.1: im = chromatic_transform(im) if cfg.TRAIN.ADD_NOISE and cfg.MODE == 'TRAIN' and np.random.rand( 1) > 0.1: im = add_noise(im) im_tensor = torch.from_numpy(im) / 255.0 im_tensor_bgr = im_tensor.clone() im_tensor_bgr = im_tensor_bgr.permute(2, 0, 1) im_tensor -= self._pixel_mean image_blob = im_tensor.permute(2, 0, 1) # Label labels_filename = filename.replace('rgb', 'label') foreground_labels = util_.imread_indexed(labels_filename) # mask table as background foreground_labels[foreground_labels == 1] = 0 if 'table' in labels_filename: foreground_labels[foreground_labels == 2] = 0 foreground_labels = self.process_label(foreground_labels) label_blob = torch.from_numpy(foreground_labels).unsqueeze(0) index = filename.find('OCID') sample = { 'image_color': image_blob, 'image_color_bgr': im_tensor_bgr, 'label': label_blob, 'filename': filename[index + 5:] } # Depth image if cfg.INPUT == 'DEPTH' or cfg.INPUT == 'RGBD': pcd_filename = filename.replace('rgb', 'pcd') pcd_filename = pcd_filename.replace('png', 'pcd') pcloud = pcl.load(pcd_filename).to_array() pcloud[np.isnan(pcloud)] = 0 xyz_img = pcloud.reshape((self._height, self._width, 3)) depth_blob = torch.from_numpy(xyz_img).permute(2, 0, 1) sample['depth'] = depth_blob return sample
from transforms3d.quaternions import mat2quat, quat2mat import _init_paths from datasets import OCIDObject, OSDObject import matplotlib.pyplot as plt from utils import mask as util_ if __name__ == '__main__': dataset = OSDObject('test') num = dataset._size num_objects = [] for i in range(num): filename = str(dataset.image_files[i]) # labels_filename = filename.replace('rgb', 'label') labels_filename = filename.replace('image_color', 'annotation') foreground_labels = util_.imread_indexed(labels_filename) # mask table as background foreground_labels[foreground_labels == 1] = 0 if 'table' in labels_filename: foreground_labels[foreground_labels == 2] = 0 foreground_labels = dataset.process_label(foreground_labels) n = len(np.unique(foreground_labels)) - 1 num_objects.append(n) print(labels_filename, n) nums = np.array(num_objects) print('min: %d' % (np.min(nums))) print('max: %d' % (np.max(nums))) print('mean: %f' % (np.mean(nums)))
def __getitem__(self, idx): # Get scene directory, crop dose not use background scene_idx = idx // self.NUM_VIEWS_PER_SCENE scene_dir = self.scene_dirs[scene_idx] # Get view number view_num = idx % self.NUM_VIEWS_PER_SCENE if cfg.TRAIN.SYN_CROP: view_num += 2 # Label foreground_labels_filename = os.path.join( scene_dir, 'segmentation_%05d.png' % view_num) foreground_labels = util_.imread_indexed(foreground_labels_filename) # mask table as background foreground_labels[foreground_labels == 1] = 0 foreground_labels = self.process_label(foreground_labels) # BGR image filename = os.path.join(scene_dir, 'rgb_%05d.jpeg' % view_num) im = cv2.imread(filename) if cfg.INPUT == 'DEPTH' or cfg.INPUT == 'RGBD': # Depth image depth_img_filename = os.path.join(scene_dir, 'depth_%05d.png' % view_num) depth_img = cv2.imread( depth_img_filename, cv2.IMREAD_ANYDEPTH ) # This reads a 16-bit single-channel image. Shape: [H x W] xyz_img = self.process_depth(depth_img) else: xyz_img = None # crop if cfg.TRAIN.SYN_CROP: im, foreground_labels, xyz_img = self.pad_crop_resize( im, foreground_labels, xyz_img) foreground_labels = self.process_label(foreground_labels) # sample labels if cfg.TRAIN.EMBEDDING_SAMPLING: foreground_labels = self.sample_pixels( foreground_labels, cfg.TRAIN.EMBEDDING_SAMPLING_NUM) label_blob = torch.from_numpy(foreground_labels).unsqueeze(0) sample = {'label': label_blob} if cfg.TRAIN.CHROMATIC and cfg.MODE == 'TRAIN' and np.random.rand( 1) > 0.1: im = chromatic_transform(im) if cfg.TRAIN.ADD_NOISE and cfg.MODE == 'TRAIN' and np.random.rand( 1) > 0.1: im = add_noise(im) im_tensor = torch.from_numpy(im) / 255.0 im_tensor -= self._pixel_mean image_blob = im_tensor.permute(2, 0, 1) sample['image_color'] = image_blob if cfg.INPUT == 'DEPTH' or cfg.INPUT == 'RGBD': depth_blob = torch.from_numpy(xyz_img).permute(2, 0, 1) sample['depth'] = depth_blob return sample
def __getitem__(self, idx): sample = self.data[idx] # (idx: [rgb, d, seg]) rgb_path = sample[0] depth_path = sample[1] segmentation_path = sample[2] # _, ax = plt.subplots(1, 3) # ax[0].imshow(rgb) # ax[1].imshow(depth) # ax[2].imshow(segmentation) # plt.show() foreground_labels_filename = segmentation_path foreground_labels = util_.imread_indexed(foreground_labels_filename) # mask table as background foreground_labels[foreground_labels == 1] = 0 foreground_labels = self.process_label(foreground_labels) # BGR image filename = rgb_path im = cv2.imread(filename) if cfg.INPUT == 'DEPTH' or cfg.INPUT == 'RGBD': # Depth image depth_img_filename = depth_path depth_img = cv2.imread( depth_img_filename, cv2.IMREAD_ANYDEPTH ) # This reads a 16-bit single-channel image. Shape: [H x W] xyz_img = self.process_depth(depth_img) else: xyz_img = None # crop if cfg.TRAIN.SYN_CROP: im, foreground_labels, xyz_img = self.pad_crop_resize( im, foreground_labels, xyz_img) foreground_labels = self.process_label(foreground_labels) # sample labels if cfg.TRAIN.EMBEDDING_SAMPLING: foreground_labels = self.sample_pixels( foreground_labels, cfg.TRAIN.EMBEDDING_SAMPLING_NUM) label_blob = torch.from_numpy(foreground_labels).unsqueeze(0) sample = {'label': label_blob} if cfg.TRAIN.CHROMATIC and cfg.MODE == 'TRAIN' and np.random.rand( 1) > 0.1: im = chromatic_transform(im) if cfg.TRAIN.ADD_NOISE and cfg.MODE == 'TRAIN' and np.random.rand( 1) > 0.1: im = add_noise(im) im_tensor = torch.from_numpy(im) / 255.0 im_tensor -= self._pixel_mean image_blob = im_tensor.permute(2, 0, 1) sample['image_color'] = image_blob if cfg.INPUT == 'DEPTH' or cfg.INPUT == 'RGBD': depth_blob = torch.from_numpy(xyz_img).permute(2, 0, 1) sample['depth'] = depth_blob return sample