def _get_transformed_keypoint( transform: np.ndarray, entry: SupervisedKeypointDBEntry, patch_width: int, patch_height: int) -> (np.ndarray, np.ndarray): """ Given the bounding box to patch transform, compute the transform keypoint and their validity. Note that transformed pixel might not be int :param transform: 3x3 homogeneous transform matrix :param entry: :param patch_width: :param patch_height: :return: A tuple contains the transformed pixelxy_depth and validity """ from mankey.utils.imgproc import transform_2d, PixelCoord, pixel_in_bbox # Allocate the space n_keypoint = entry.keypoint_pixelxy_depth.shape[1] transformed_pixelxy_depth = np.zeros((3, n_keypoint)) transformed_validity_weight = np.ones((3, n_keypoint)) # Construct bounding box top_left = PixelCoord() top_left.x = 0 top_left.y = 0 bottom_right = PixelCoord() bottom_right.x = patch_width bottom_right.y = patch_height # Do transform pixel = PixelCoord() for i in range(n_keypoint): transformed_pixelxy_depth[0:2, i] = transform_2d( entry.keypoint_pixelxy_depth[0:2, i], transform) transformed_pixelxy_depth[2, i] = entry.keypoint_pixelxy_depth[2, i] # Check validity pixel.x = int(transformed_pixelxy_depth[0, i]) pixel.y = int(transformed_pixelxy_depth[1, i]) if not pixel_in_bbox(pixel, top_left, bottom_right): transformed_validity_weight[0, i] = 0 transformed_validity_weight[1, i] = 0 transformed_validity_weight[2, i] = 0 # OK return transformed_pixelxy_depth, transformed_validity_weight
def _get_image_entry(self, image_map, scene_root: str) -> SupervisedKeypointDBEntry: entry = SupervisedKeypointDBEntry() # The path for rgb image #rgb_name = image_map['rgb_image_filename'] # multi-view pic, the main pic is chosen now #rgb_name = image_map['rgb_image_filename'][0] rgb_path = [] for rgb_name in image_map['rgb_image_filename']: rgb_path.append(os.path.join(scene_root, 'processed/images/' + rgb_name)) #assert os.path.exists(rgb_path) entry.rgb_image_path = rgb_path # The path for depth image #depth_name = image_map['depth_image_filename'] # multi-view pic, the main pic is chosen now #depth_name = image_map['depth_image_filename'][0] rgb_path = [] for rgb_name in image_map['rgb_image_filename']: depth_path = os.path.join(scene_root, 'processed/images/' + depth_name) assert os.path.exists(depth_path) # Spartan must have depth image entry.depth_image_path = depth_path # The path for pcd ''' old version pcd_name = depth_name.split('.')[0] + '.npy' pcd_path = os.path.join(scene_root, 'processed/pcd/' + pcd_name) assert os.path.exists(pcd_path) entry.pcd_path = pcd_path ''' pcd_name = image_map['pcd'] pcd_path = os.path.join(scene_root, 'processed/pcd_seg_heatmap_3kpt/' + pcd_name) assert os.path.exists(pcd_path) entry.pcd_path = pcd_path # pcd centroid & pcd mean entry.pcd_centroid = np.array(image_map['pcd_centroid']) entry.pcd_mean = np.array(image_map['pcd_mean']) ''' # The path for mask image mask_name = depth_name[0:6] + '_mask.png' mask_path = os.path.join(scene_root, 'processed/image_masks/' + mask_name) assert os.path.exists(mask_path) entry.binary_mask_path = mask_path ''' # xyzrot entry.delta_rotation_matrix = np.array(image_map['delta_rotation_matrix']).reshape((3,3)) #entry.delta_rot_cls = np.array(image_map['cls']).reshape((3,)) entry.delta_translation = np.array(image_map['delta_translation']).reshape((3,)) entry.gripper_pose = np.array(image_map['gripper_pose']).reshape((4,4)) #step_size_value = max(min(image_map['step_size'], 1.0), 0.0) step_size_value = np.linalg.norm(entry.delta_translation) if step_size_value == 0: entry.unit_delta_translation = entry.delta_translation else: entry.unit_delta_translation = entry.delta_translation / step_size_value step_size_value = step_size_value*100 if step_size_value >= 1.0: entry.step_size = np.array([1.0]).reshape((1,)) else: entry.step_size = np.array([step_size_value]).reshape((1,)) # The camera pose in world camera2world_map = image_map['camera_to_world'] entry.camera_in_world = camera2world_from_map(camera2world_map) # The bounding box top_left = PixelCoord() bottom_right = PixelCoord() top_left.x, top_left.y = image_map['bbox_top_left_xy'][0], image_map['bbox_top_left_xy'][1] bottom_right.x, bottom_right.y = image_map['bbox_bottom_right_xy'][0], image_map['bbox_bottom_right_xy'][1] entry.bbox_top_left = top_left entry.bbox_bottom_right = bottom_right # The size of keypoint keypoint_camera_frame_list = image_map['3d_keypoint_camera_frame'] n_keypoint = len(keypoint_camera_frame_list) if self._num_keypoint < 0: self._num_keypoint = n_keypoint else: assert self._num_keypoint == n_keypoint # The keypoint in camera frame entry.keypoint_camera = np.zeros((3, n_keypoint)) for i in range(n_keypoint): for j in range(3): entry.keypoint_camera[j, i] = keypoint_camera_frame_list[i][j] # The pixel coordinate and depth of keypoint keypoint_pixelxy_depth_list = image_map['keypoint_pixel_xy_depth'] assert n_keypoint == len(keypoint_pixelxy_depth_list) entry.keypoint_pixelxy_depth = np.zeros((3, n_keypoint), dtype=np.int) for i in range(n_keypoint): for j in range(3): entry.keypoint_pixelxy_depth[j, i] = keypoint_pixelxy_depth_list[i][j] # Check the validity entry.keypoint_validity_weight = np.ones((3, n_keypoint)) for i in range(n_keypoint): pixel = PixelCoord() pixel.x = entry.keypoint_pixelxy_depth[0, i] pixel.y = entry.keypoint_pixelxy_depth[1, i] depth_mm = entry.keypoint_pixelxy_depth[2, i] valid = True if depth_mm < 0: # The depth cannot be negative valid = False # The pixel must be in bounding box if not pixel_in_bbox(pixel, entry.bbox_top_left, entry.bbox_bottom_right): valid = False # Invalid all the dimension if not valid: entry.keypoint_validity_weight[0, i] = 0 entry.keypoint_validity_weight[1, i] = 0 entry.keypoint_validity_weight[2, i] = 0 entry.on_boundary = True # OK return entry def _check_image_entry(self, entry: SupervisedKeypointDBEntry) -> bool: # Check the bounding box if entry.bbox_top_left.x is None or entry.bbox_top_left.y is None: return False if entry.bbox_bottom_right.x is None or entry.bbox_bottom_right.y is None: return False # OK return True
def _get_image_entry(self, image_map, scene_root: str) -> SupervisedKeypointDBEntry: entry = SupervisedKeypointDBEntry() # The path for rgb image rgb_name = image_map['rgb_image_filename'] rgb_path = os.path.join(scene_root, 'processed/images/' + rgb_name) assert os.path.exists(rgb_path) entry.rgb_image_path = rgb_path # The path for depth image depth_name = image_map['depth_image_filename'] depth_path = os.path.join(scene_root, 'processed/images/' + depth_name) assert os.path.exists(depth_path) # Spartan must have depth image entry.depth_image_path = depth_path ''' # The path for mask image mask_name = depth_name[0:6] + '_mask.png' mask_path = os.path.join(scene_root, 'processed/image_masks/' + mask_name) assert os.path.exists(mask_path) entry.binary_mask_path = mask_path ''' # xyzrot entry.delta_rotation_matrix = np.array( image_map['delta_rotation_matrix']).reshape((3, 3)) entry.delta_translation = np.array( image_map['delta_translation']).reshape((3, )) entry.gripper_pose = np.array(image_map['gripper_pose']).reshape( (4, 4)) step_size_value = max(min(image_map['step_size'], 1.0), 0.0) entry.step_size = np.array([step_size_value]).reshape((1, )) # The camera pose in world camera2world_map = image_map['camera_to_world'] entry.camera_in_world = camera2world_from_map(camera2world_map) # The bounding box top_left = PixelCoord() bottom_right = PixelCoord() top_left.x, top_left.y = image_map['bbox_top_left_xy'][0], image_map[ 'bbox_top_left_xy'][1] bottom_right.x, bottom_right.y = image_map['bbox_bottom_right_xy'][ 0], image_map['bbox_bottom_right_xy'][1] entry.bbox_top_left = top_left entry.bbox_bottom_right = bottom_right # The size of keypoint keypoint_camera_frame_list = image_map['3d_keypoint_camera_frame'] n_keypoint = len(keypoint_camera_frame_list) if self._num_keypoint < 0: self._num_keypoint = n_keypoint else: assert self._num_keypoint == n_keypoint # The keypoint in camera frame entry.keypoint_camera = np.zeros((3, n_keypoint)) for i in range(n_keypoint): for j in range(3): entry.keypoint_camera[j, i] = keypoint_camera_frame_list[i][j] # The pixel coordinate and depth of keypoint keypoint_pixelxy_depth_list = image_map['keypoint_pixel_xy_depth'] assert n_keypoint == len(keypoint_pixelxy_depth_list) entry.keypoint_pixelxy_depth = np.zeros((3, n_keypoint), dtype=np.int) for i in range(n_keypoint): for j in range(3): entry.keypoint_pixelxy_depth[ j, i] = keypoint_pixelxy_depth_list[i][j] # Check the validity entry.keypoint_validity_weight = np.ones((3, n_keypoint)) for i in range(n_keypoint): pixel = PixelCoord() pixel.x = entry.keypoint_pixelxy_depth[0, i] pixel.y = entry.keypoint_pixelxy_depth[1, i] depth_mm = entry.keypoint_pixelxy_depth[2, i] valid = True if depth_mm < 0: # The depth cannot be negative valid = False # The pixel must be in bounding box if not pixel_in_bbox(pixel, entry.bbox_top_left, entry.bbox_bottom_right): valid = False # Invalid all the dimension if not valid: entry.keypoint_validity_weight[0, i] = 0 entry.keypoint_validity_weight[1, i] = 0 entry.keypoint_validity_weight[2, i] = 0 entry.on_boundary = True # OK return entry