예제 #1
0
    def _get_transformed_keypoint(
            transform: np.ndarray, entry: SupervisedKeypointDBEntry,
            patch_width: int, patch_height: int) -> (np.ndarray, np.ndarray):
        """
        Given the bounding box to patch transform, compute the transform keypoint
        and their validity. Note that transformed pixel might not be int
        :param transform: 3x3 homogeneous transform matrix
        :param entry:
        :param patch_width:
        :param patch_height:
        :return: A tuple contains the transformed pixelxy_depth and validity
        """
        from mankey.utils.imgproc import transform_2d, PixelCoord, pixel_in_bbox

        # Allocate the space
        n_keypoint = entry.keypoint_pixelxy_depth.shape[1]
        transformed_pixelxy_depth = np.zeros((3, n_keypoint))
        transformed_validity_weight = np.ones((3, n_keypoint))

        # Construct bounding box
        top_left = PixelCoord()
        top_left.x = 0
        top_left.y = 0
        bottom_right = PixelCoord()
        bottom_right.x = patch_width
        bottom_right.y = patch_height

        # Do transform
        pixel = PixelCoord()
        for i in range(n_keypoint):
            transformed_pixelxy_depth[0:2, i] = transform_2d(
                entry.keypoint_pixelxy_depth[0:2, i], transform)
            transformed_pixelxy_depth[2, i] = entry.keypoint_pixelxy_depth[2,
                                                                           i]

            # Check validity
            pixel.x = int(transformed_pixelxy_depth[0, i])
            pixel.y = int(transformed_pixelxy_depth[1, i])
            if not pixel_in_bbox(pixel, top_left, bottom_right):
                transformed_validity_weight[0, i] = 0
                transformed_validity_weight[1, i] = 0
                transformed_validity_weight[2, i] = 0

        # OK
        return transformed_pixelxy_depth, transformed_validity_weight
예제 #2
0
    def _get_image_entry(self, image_map, scene_root: str) -> SupervisedKeypointDBEntry:
        entry = SupervisedKeypointDBEntry()
        # The path for rgb image
        #rgb_name = image_map['rgb_image_filename']
        # multi-view pic, the main pic is chosen now
        #rgb_name = image_map['rgb_image_filename'][0]
        rgb_path = []
        for rgb_name in image_map['rgb_image_filename']:    
            rgb_path.append(os.path.join(scene_root, 'processed/images/' + rgb_name))
        #assert os.path.exists(rgb_path)
        entry.rgb_image_path = rgb_path

        # The path for depth image
        #depth_name = image_map['depth_image_filename']
        # multi-view pic, the main pic is chosen now
        #depth_name = image_map['depth_image_filename'][0]
        rgb_path = []
        for rgb_name in image_map['rgb_image_filename']:    
        depth_path = os.path.join(scene_root, 'processed/images/' + depth_name)
        assert os.path.exists(depth_path) # Spartan must have depth image
        entry.depth_image_path = depth_path
        
        # The path for pcd
        ''' old version
        pcd_name = depth_name.split('.')[0] + '.npy'
        pcd_path = os.path.join(scene_root, 'processed/pcd/' + pcd_name)
        assert os.path.exists(pcd_path)
        entry.pcd_path = pcd_path
        '''
        pcd_name = image_map['pcd']
        pcd_path = os.path.join(scene_root, 'processed/pcd_seg_heatmap_3kpt/' + pcd_name)
        assert os.path.exists(pcd_path)
        entry.pcd_path = pcd_path
        
        # pcd centroid & pcd mean
        entry.pcd_centroid = np.array(image_map['pcd_centroid'])
        entry.pcd_mean = np.array(image_map['pcd_mean'])
        
        '''
        # The path for mask image
        mask_name = depth_name[0:6] + '_mask.png'
        mask_path = os.path.join(scene_root, 'processed/image_masks/' + mask_name)
        assert os.path.exists(mask_path)
        entry.binary_mask_path = mask_path
        '''
        # xyzrot
        entry.delta_rotation_matrix = np.array(image_map['delta_rotation_matrix']).reshape((3,3))
        #entry.delta_rot_cls = np.array(image_map['cls']).reshape((3,))
        entry.delta_translation = np.array(image_map['delta_translation']).reshape((3,))
        entry.gripper_pose = np.array(image_map['gripper_pose']).reshape((4,4))
        #step_size_value = max(min(image_map['step_size'], 1.0), 0.0)
        step_size_value = np.linalg.norm(entry.delta_translation)
        if step_size_value == 0:
            entry.unit_delta_translation = entry.delta_translation
        else:
            entry.unit_delta_translation = entry.delta_translation / step_size_value
        step_size_value = step_size_value*100
        if step_size_value >= 1.0:
            entry.step_size = np.array([1.0]).reshape((1,))
        else:
            entry.step_size = np.array([step_size_value]).reshape((1,))

        # The camera pose in world
        camera2world_map = image_map['camera_to_world']
        entry.camera_in_world = camera2world_from_map(camera2world_map)

        # The bounding box
        top_left = PixelCoord()
        bottom_right = PixelCoord()
        top_left.x, top_left.y = image_map['bbox_top_left_xy'][0], image_map['bbox_top_left_xy'][1]
        bottom_right.x, bottom_right.y = image_map['bbox_bottom_right_xy'][0], image_map['bbox_bottom_right_xy'][1]
        entry.bbox_top_left = top_left
        entry.bbox_bottom_right = bottom_right

        # The size of keypoint
        keypoint_camera_frame_list = image_map['3d_keypoint_camera_frame']
        n_keypoint = len(keypoint_camera_frame_list)
        if self._num_keypoint < 0:
            self._num_keypoint = n_keypoint
        else:
            assert self._num_keypoint == n_keypoint

        # The keypoint in camera frame
        entry.keypoint_camera = np.zeros((3, n_keypoint))
        for i in range(n_keypoint):
            for j in range(3):
                entry.keypoint_camera[j, i] = keypoint_camera_frame_list[i][j]

        # The pixel coordinate and depth of keypoint
        keypoint_pixelxy_depth_list = image_map['keypoint_pixel_xy_depth']
        assert n_keypoint == len(keypoint_pixelxy_depth_list)
        entry.keypoint_pixelxy_depth = np.zeros((3, n_keypoint), dtype=np.int)
        for i in range(n_keypoint):
            for j in range(3):
                entry.keypoint_pixelxy_depth[j, i] = keypoint_pixelxy_depth_list[i][j]

        # Check the validity
        entry.keypoint_validity_weight = np.ones((3, n_keypoint))
        for i in range(n_keypoint):
            pixel = PixelCoord()
            pixel.x = entry.keypoint_pixelxy_depth[0, i]
            pixel.y = entry.keypoint_pixelxy_depth[1, i]
            depth_mm = entry.keypoint_pixelxy_depth[2, i]
            valid = True
            if depth_mm < 0:  # The depth cannot be negative
                valid = False

            # The pixel must be in bounding box
            if not pixel_in_bbox(pixel, entry.bbox_top_left, entry.bbox_bottom_right):
                valid = False

            # Invalid all the dimension
            if not valid:
                entry.keypoint_validity_weight[0, i] = 0
                entry.keypoint_validity_weight[1, i] = 0
                entry.keypoint_validity_weight[2, i] = 0
                entry.on_boundary = True

        # OK
        return entry

    def _check_image_entry(self, entry: SupervisedKeypointDBEntry) -> bool:
        # Check the bounding box
        if entry.bbox_top_left.x is None or entry.bbox_top_left.y is None:
            return False

        if entry.bbox_bottom_right.x is None or entry.bbox_bottom_right.y is None:
            return False

        # OK
        return True
예제 #3
0
    def _get_image_entry(self, image_map,
                         scene_root: str) -> SupervisedKeypointDBEntry:
        entry = SupervisedKeypointDBEntry()
        # The path for rgb image
        rgb_name = image_map['rgb_image_filename']
        rgb_path = os.path.join(scene_root, 'processed/images/' + rgb_name)
        assert os.path.exists(rgb_path)
        entry.rgb_image_path = rgb_path

        # The path for depth image
        depth_name = image_map['depth_image_filename']
        depth_path = os.path.join(scene_root, 'processed/images/' + depth_name)
        assert os.path.exists(depth_path)  # Spartan must have depth image
        entry.depth_image_path = depth_path
        '''
        # The path for mask image
        mask_name = depth_name[0:6] + '_mask.png'
        mask_path = os.path.join(scene_root, 'processed/image_masks/' + mask_name)
        assert os.path.exists(mask_path)
        entry.binary_mask_path = mask_path
        '''
        # xyzrot
        entry.delta_rotation_matrix = np.array(
            image_map['delta_rotation_matrix']).reshape((3, 3))
        entry.delta_translation = np.array(
            image_map['delta_translation']).reshape((3, ))
        entry.gripper_pose = np.array(image_map['gripper_pose']).reshape(
            (4, 4))
        step_size_value = max(min(image_map['step_size'], 1.0), 0.0)
        entry.step_size = np.array([step_size_value]).reshape((1, ))

        # The camera pose in world
        camera2world_map = image_map['camera_to_world']
        entry.camera_in_world = camera2world_from_map(camera2world_map)

        # The bounding box
        top_left = PixelCoord()
        bottom_right = PixelCoord()
        top_left.x, top_left.y = image_map['bbox_top_left_xy'][0], image_map[
            'bbox_top_left_xy'][1]
        bottom_right.x, bottom_right.y = image_map['bbox_bottom_right_xy'][
            0], image_map['bbox_bottom_right_xy'][1]
        entry.bbox_top_left = top_left
        entry.bbox_bottom_right = bottom_right

        # The size of keypoint
        keypoint_camera_frame_list = image_map['3d_keypoint_camera_frame']
        n_keypoint = len(keypoint_camera_frame_list)
        if self._num_keypoint < 0:
            self._num_keypoint = n_keypoint
        else:
            assert self._num_keypoint == n_keypoint

        # The keypoint in camera frame
        entry.keypoint_camera = np.zeros((3, n_keypoint))
        for i in range(n_keypoint):
            for j in range(3):
                entry.keypoint_camera[j, i] = keypoint_camera_frame_list[i][j]

        # The pixel coordinate and depth of keypoint
        keypoint_pixelxy_depth_list = image_map['keypoint_pixel_xy_depth']
        assert n_keypoint == len(keypoint_pixelxy_depth_list)
        entry.keypoint_pixelxy_depth = np.zeros((3, n_keypoint), dtype=np.int)
        for i in range(n_keypoint):
            for j in range(3):
                entry.keypoint_pixelxy_depth[
                    j, i] = keypoint_pixelxy_depth_list[i][j]

        # Check the validity
        entry.keypoint_validity_weight = np.ones((3, n_keypoint))
        for i in range(n_keypoint):
            pixel = PixelCoord()
            pixel.x = entry.keypoint_pixelxy_depth[0, i]
            pixel.y = entry.keypoint_pixelxy_depth[1, i]
            depth_mm = entry.keypoint_pixelxy_depth[2, i]
            valid = True
            if depth_mm < 0:  # The depth cannot be negative
                valid = False

            # The pixel must be in bounding box
            if not pixel_in_bbox(pixel, entry.bbox_top_left,
                                 entry.bbox_bottom_right):
                valid = False

            # Invalid all the dimension
            if not valid:
                entry.keypoint_validity_weight[0, i] = 0
                entry.keypoint_validity_weight[1, i] = 0
                entry.keypoint_validity_weight[2, i] = 0
                entry.on_boundary = True

        # OK
        return entry