Esempio n. 1
0
    def __getitem__(self, index):
        processed_entry = self.get_processed_entry(self._entry_list[index])

        # Do normalization on images
        from mankey.utils.imgproc import rgb_image_normalize, depth_image_normalize
        # The randomization on rgb
        color_aug_scale = self._get_color_randomization_parameter()
        normalized_rgb = rgb_image_normalize(processed_entry.cropped_rgb,
                                             self._config.rgb_mean,
                                             color_aug_scale)
        #do gray scale here
        normalized_gray = np.dot(np.transpose(normalized_rgb, (1, 2, 0)),
                                 [0.2989, 0.5870, 0.1140])
        normalized_gray = np.expand_dims(normalized_gray, axis=0)  # 1xhxw

        gray_channels, height, width = normalized_gray.shape

        # Check the total size of tensor
        tensor_channels = gray_channels
        if processed_entry.has_depth:
            tensor_channels += 1

        # Construct the tensor
        stacked_tensor = np.zeros(shape=(tensor_channels, height, width),
                                  dtype=np.float32)
        stacked_tensor[0:gray_channels, :, :] = normalized_gray

        # Process other channels
        channel_offset = gray_channels
        if processed_entry.has_depth:
            # The depth should not be randomized
            normalized_depth = depth_image_normalize(
                processed_entry.cropped_depth, self._config.depth_image_clip,
                self._config.depth_image_mean, self._config.depth_image_scale)
            stacked_tensor[channel_offset, :, :] = normalized_depth
            channel_offset += 1

        # Do scale on keypoint xy and depth
        normalized_keypoint_xy_depth = processed_entry.keypoint_xy_depth.copy()
        normalized_keypoint_xy_depth[0, :] = (
            processed_entry.keypoint_xy_depth[0, :] / float(width)) - 0.5
        normalized_keypoint_xy_depth[1, :] = (
            processed_entry.keypoint_xy_depth[1, :] / float(height)) - 0.5
        normalized_keypoint_xy_depth[2, :] = \
            (processed_entry.keypoint_xy_depth[2, :] - self._config.depth_image_mean) / float(self._config.depth_image_scale)
        normalized_keypoint_xy_depth = np.transpose(
            normalized_keypoint_xy_depth, (1, 0))

        # OK
        validity = np.transpose(processed_entry.keypoint_validity, (1, 0))
        return {
            parameter.rgbd_image_key:
            stacked_tensor,
            parameter.keypoint_xyd_key:
            normalized_keypoint_xy_depth.astype(np.float32),
            parameter.keypoint_validity_key:
            validity.astype(np.float32),
            parameter.target_heatmap_key:
            processed_entry.target_heatmap.astype(np.float32)
        }
Esempio n. 2
0
def proc_input_img_internal(
        rgb, depth,
        is_path_input,  # type: bool
        bbox_topleft,  # type: PixelCoord
        bbox_bottomright,  # type: PixelCoord
    ):
    """
    The worker for image processing.
    :param rgb: numpy ndarray if is_path_input==False, str if is_path_input==True
    :param depth: The same as rgb
    :param is_path_input: binary flag for whether the input is path or actual image
    :param bbox_topleft: Tight bounding box by maskrcnn
    :param bbox_bottomright: Tight bounding box by maskrcnn
    :return:
    """
    # Get the image and crop them using tight bounding box
    if is_path_input:
        warped_rgb, bbox2patch = imgproc.get_bbox_cropped_image_path(
            rgb, True,
            bbox_topleft, bbox_bottomright,
            patch_width=parameter.default_patch_size_input, patch_height=parameter.default_patch_size_input,
            bbox_scale=parameter.bbox_scale)
        warped_depth, _ = imgproc.get_bbox_cropped_image_path(
            depth, False,
            bbox_topleft, bbox_bottomright,
            patch_width=parameter.default_patch_size_input, patch_height=parameter.default_patch_size_input,
            bbox_scale=parameter.bbox_scale)
    else:  # Image input
        warped_rgb, bbox2patch = imgproc.get_bbox_cropped_image_raw(
            rgb, True,
            bbox_topleft, bbox_bottomright,
            patch_width=parameter.default_patch_size_input, patch_height=parameter.default_patch_size_input,
            bbox_scale=parameter.bbox_scale)
        warped_depth, _ = imgproc.get_bbox_cropped_image_raw(
            depth, False,
            bbox_topleft, bbox_bottomright,
            patch_width=parameter.default_patch_size_input, patch_height=parameter.default_patch_size_input,
            bbox_scale=parameter.bbox_scale)

    # Perform normalization
    normalized_rgb = imgproc.rgb_image_normalize(warped_rgb, parameter.rgb_mean, [1.0, 1.0, 1.0])
    normalized_depth = imgproc.depth_image_normalize(
        warped_depth,
        parameter.depth_image_clip,
        parameter.depth_image_mean,
        parameter.depth_image_scale)

    # Construct the tensor
    channels, height, width = normalized_rgb.shape
    stacked_rgbd = np.zeros(shape=(channels + 1, height, width), dtype=np.float32)
    stacked_rgbd[0:3, :, :] = normalized_rgb
    stacked_rgbd[3, :, :] = normalized_depth

    # OK
    imgproc_out = ImageProcOut()
    imgproc_out.stacked_rgbd = stacked_rgbd
    imgproc_out.bbox2patch = bbox2patch
    imgproc_out.warped_rgb = warped_rgb
    imgproc_out.warped_depth = warped_depth
    return imgproc_out
    def __getitem__(self, index):
        processed_entry = self.get_processed_entry(self._entry_list[index])
        # Do normalization on images
        from mankey.utils.imgproc import rgb_image_normalize, depth_image_normalize
        # The randomization on rgb
        color_aug_scale = self._get_color_randomization_parameter()
        normalized_rgb = rgb_image_normalize(processed_entry.cropped_rgb, self._config.rgb_mean, color_aug_scale)
        rgb_channels, height, width = normalized_rgb.shape

        # Check the total size of tensor
        tensor_channels = rgb_channels
        if processed_entry.has_depth:
            tensor_channels += 1

        # Construct the tensor
        stacked_tensor = np.zeros(shape=(tensor_channels, height, width), dtype=np.float32)
        stacked_tensor[0:rgb_channels, :, :] = normalized_rgb

        # Process other channels
        channel_offset = rgb_channels
        if processed_entry.has_depth:
            # The depth should not be randomized
            normalized_depth = depth_image_normalize(
                processed_entry.cropped_depth,
                self._config.depth_image_clip,
                self._config.depth_image_mean,
                self._config.depth_image_scale)
            stacked_tensor[channel_offset, :, :] = normalized_depth
            channel_offset += 1

        # Do scale on keypoint xy and depth
        normalized_keypoint_xy_depth = processed_entry.keypoint_xy_depth.copy()
        normalized_keypoint_xy_depth[0, :] = (processed_entry.keypoint_xy_depth[0, :] / float(width)) -0.5
        normalized_keypoint_xy_depth[1, :] = (processed_entry.keypoint_xy_depth[1, :] / float(height)) -0.5
        normalized_keypoint_xy_depth[2, :] = \
            (processed_entry.keypoint_xy_depth[2, :] - self._config.depth_image_mean) / float(self._config.depth_image_scale)
        normalized_keypoint_xy_depth = np.transpose(normalized_keypoint_xy_depth, (1, 0))

        # OK
        validity = np.transpose(processed_entry.keypoint_validity, (1, 0))
        return {
            parameter.rgbd_image_key: stacked_tensor,
            parameter.keypoint_xyd_key: normalized_keypoint_xy_depth.astype(np.float32),
            parameter.keypoint_validity_key: validity.astype(np.float32),
            parameter.target_heatmap_key: processed_entry.target_heatmap.astype(np.float32),
            parameter.delta_rot_key: processed_entry.delta_rotation_matrix.astype(np.float32),
            parameter.delta_rot_cls_key: processed_entry.delta_rot_cls.astype(np.int),
            parameter.delta_xyz_key: processed_entry.delta_translation.astype(np.float32),
            parameter.gripper_pose_key: processed_entry.gripper_pose.astype(np.float32),
            parameter.step_size_key: processed_entry.step_size.astype(np.float32)
        }