def __getitem__(self, index): processed_entry = self.get_processed_entry(self._entry_list[index]) # Do normalization on images from mankey.utils.imgproc import rgb_image_normalize, depth_image_normalize # The randomization on rgb color_aug_scale = self._get_color_randomization_parameter() normalized_rgb = rgb_image_normalize(processed_entry.cropped_rgb, self._config.rgb_mean, color_aug_scale) #do gray scale here normalized_gray = np.dot(np.transpose(normalized_rgb, (1, 2, 0)), [0.2989, 0.5870, 0.1140]) normalized_gray = np.expand_dims(normalized_gray, axis=0) # 1xhxw gray_channels, height, width = normalized_gray.shape # Check the total size of tensor tensor_channels = gray_channels if processed_entry.has_depth: tensor_channels += 1 # Construct the tensor stacked_tensor = np.zeros(shape=(tensor_channels, height, width), dtype=np.float32) stacked_tensor[0:gray_channels, :, :] = normalized_gray # Process other channels channel_offset = gray_channels if processed_entry.has_depth: # The depth should not be randomized normalized_depth = depth_image_normalize( processed_entry.cropped_depth, self._config.depth_image_clip, self._config.depth_image_mean, self._config.depth_image_scale) stacked_tensor[channel_offset, :, :] = normalized_depth channel_offset += 1 # Do scale on keypoint xy and depth normalized_keypoint_xy_depth = processed_entry.keypoint_xy_depth.copy() normalized_keypoint_xy_depth[0, :] = ( processed_entry.keypoint_xy_depth[0, :] / float(width)) - 0.5 normalized_keypoint_xy_depth[1, :] = ( processed_entry.keypoint_xy_depth[1, :] / float(height)) - 0.5 normalized_keypoint_xy_depth[2, :] = \ (processed_entry.keypoint_xy_depth[2, :] - self._config.depth_image_mean) / float(self._config.depth_image_scale) normalized_keypoint_xy_depth = np.transpose( normalized_keypoint_xy_depth, (1, 0)) # OK validity = np.transpose(processed_entry.keypoint_validity, (1, 0)) return { parameter.rgbd_image_key: stacked_tensor, parameter.keypoint_xyd_key: normalized_keypoint_xy_depth.astype(np.float32), parameter.keypoint_validity_key: validity.astype(np.float32), parameter.target_heatmap_key: processed_entry.target_heatmap.astype(np.float32) }
def proc_input_img_internal( rgb, depth, is_path_input, # type: bool bbox_topleft, # type: PixelCoord bbox_bottomright, # type: PixelCoord ): """ The worker for image processing. :param rgb: numpy ndarray if is_path_input==False, str if is_path_input==True :param depth: The same as rgb :param is_path_input: binary flag for whether the input is path or actual image :param bbox_topleft: Tight bounding box by maskrcnn :param bbox_bottomright: Tight bounding box by maskrcnn :return: """ # Get the image and crop them using tight bounding box if is_path_input: warped_rgb, bbox2patch = imgproc.get_bbox_cropped_image_path( rgb, True, bbox_topleft, bbox_bottomright, patch_width=parameter.default_patch_size_input, patch_height=parameter.default_patch_size_input, bbox_scale=parameter.bbox_scale) warped_depth, _ = imgproc.get_bbox_cropped_image_path( depth, False, bbox_topleft, bbox_bottomright, patch_width=parameter.default_patch_size_input, patch_height=parameter.default_patch_size_input, bbox_scale=parameter.bbox_scale) else: # Image input warped_rgb, bbox2patch = imgproc.get_bbox_cropped_image_raw( rgb, True, bbox_topleft, bbox_bottomright, patch_width=parameter.default_patch_size_input, patch_height=parameter.default_patch_size_input, bbox_scale=parameter.bbox_scale) warped_depth, _ = imgproc.get_bbox_cropped_image_raw( depth, False, bbox_topleft, bbox_bottomright, patch_width=parameter.default_patch_size_input, patch_height=parameter.default_patch_size_input, bbox_scale=parameter.bbox_scale) # Perform normalization normalized_rgb = imgproc.rgb_image_normalize(warped_rgb, parameter.rgb_mean, [1.0, 1.0, 1.0]) normalized_depth = imgproc.depth_image_normalize( warped_depth, parameter.depth_image_clip, parameter.depth_image_mean, parameter.depth_image_scale) # Construct the tensor channels, height, width = normalized_rgb.shape stacked_rgbd = np.zeros(shape=(channels + 1, height, width), dtype=np.float32) stacked_rgbd[0:3, :, :] = normalized_rgb stacked_rgbd[3, :, :] = normalized_depth # OK imgproc_out = ImageProcOut() imgproc_out.stacked_rgbd = stacked_rgbd imgproc_out.bbox2patch = bbox2patch imgproc_out.warped_rgb = warped_rgb imgproc_out.warped_depth = warped_depth return imgproc_out
def __getitem__(self, index): processed_entry = self.get_processed_entry(self._entry_list[index]) # Do normalization on images from mankey.utils.imgproc import rgb_image_normalize, depth_image_normalize # The randomization on rgb color_aug_scale = self._get_color_randomization_parameter() normalized_rgb = rgb_image_normalize(processed_entry.cropped_rgb, self._config.rgb_mean, color_aug_scale) rgb_channels, height, width = normalized_rgb.shape # Check the total size of tensor tensor_channels = rgb_channels if processed_entry.has_depth: tensor_channels += 1 # Construct the tensor stacked_tensor = np.zeros(shape=(tensor_channels, height, width), dtype=np.float32) stacked_tensor[0:rgb_channels, :, :] = normalized_rgb # Process other channels channel_offset = rgb_channels if processed_entry.has_depth: # The depth should not be randomized normalized_depth = depth_image_normalize( processed_entry.cropped_depth, self._config.depth_image_clip, self._config.depth_image_mean, self._config.depth_image_scale) stacked_tensor[channel_offset, :, :] = normalized_depth channel_offset += 1 # Do scale on keypoint xy and depth normalized_keypoint_xy_depth = processed_entry.keypoint_xy_depth.copy() normalized_keypoint_xy_depth[0, :] = (processed_entry.keypoint_xy_depth[0, :] / float(width)) -0.5 normalized_keypoint_xy_depth[1, :] = (processed_entry.keypoint_xy_depth[1, :] / float(height)) -0.5 normalized_keypoint_xy_depth[2, :] = \ (processed_entry.keypoint_xy_depth[2, :] - self._config.depth_image_mean) / float(self._config.depth_image_scale) normalized_keypoint_xy_depth = np.transpose(normalized_keypoint_xy_depth, (1, 0)) # OK validity = np.transpose(processed_entry.keypoint_validity, (1, 0)) return { parameter.rgbd_image_key: stacked_tensor, parameter.keypoint_xyd_key: normalized_keypoint_xy_depth.astype(np.float32), parameter.keypoint_validity_key: validity.astype(np.float32), parameter.target_heatmap_key: processed_entry.target_heatmap.astype(np.float32), parameter.delta_rot_key: processed_entry.delta_rotation_matrix.astype(np.float32), parameter.delta_rot_cls_key: processed_entry.delta_rot_cls.astype(np.int), parameter.delta_xyz_key: processed_entry.delta_translation.astype(np.float32), parameter.gripper_pose_key: processed_entry.gripper_pose.astype(np.float32), parameter.step_size_key: processed_entry.step_size.astype(np.float32) }