def transform_world_to_image_coordinate(word_coord_array, camera_token: str, lyftd: LyftDataset): sd_record = lyftd.get("sample_data", camera_token) cs_record = lyftd.get("calibrated_sensor", sd_record["calibrated_sensor_token"]) sensor_record = lyftd.get("sensor", cs_record["sensor_token"]) pose_record = lyftd.get("ego_pose", sd_record["ego_pose_token"]) cam_intrinsic_mtx = np.array(cs_record["camera_intrinsic"]) # homogeneous coordinate to non-homogeneous one pose_to_sense_rot_mtx = Quaternion(cs_record['rotation']).rotation_matrix.T world_to_pose_rot_mtx = Quaternion(pose_record['rotation']).rotation_matrix.T ego_coord_array = np.dot(world_to_pose_rot_mtx, word_coord_array) t = np.array(pose_record['translation']) for i in range(3): ego_coord_array[i, :] = ego_coord_array[i, :] - t[i] sense_coord_array = np.dot(pose_to_sense_rot_mtx, ego_coord_array) t = np.array(cs_record['translation']) for i in range(3): sense_coord_array[i, :] = sense_coord_array[i, :] - t[i] return view_points(sense_coord_array, cam_intrinsic_mtx, normalize=True)
def transform_pc_to_camera_coord(cam: dict, pointsensor: dict, point_cloud_3d: LidarPointCloud, lyftd: LyftDataset): warnings.warn("The point cloud is transformed to camera coordinates in place", UserWarning) # Points live in the point sensor frame. So they need to be transformed via global to the image plane. # First step: transform the point-cloud to the ego vehicle frame for the timestamp of the sweep. cs_record = lyftd.get("calibrated_sensor", pointsensor["calibrated_sensor_token"]) point_cloud_3d.rotate(Quaternion(cs_record["rotation"]).rotation_matrix) point_cloud_3d.translate(np.array(cs_record["translation"])) # Second step: transform to the global frame. poserecord = lyftd.get("ego_pose", pointsensor["ego_pose_token"]) point_cloud_3d.rotate(Quaternion(poserecord["rotation"]).rotation_matrix) point_cloud_3d.translate(np.array(poserecord["translation"])) # Third step: transform into the ego vehicle frame for the timestamp of the image. poserecord = lyftd.get("ego_pose", cam["ego_pose_token"]) point_cloud_3d.translate(-np.array(poserecord["translation"])) point_cloud_3d.rotate(Quaternion(poserecord["rotation"]).rotation_matrix.T) # Fourth step: transform into the camera. cs_record = lyftd.get("calibrated_sensor", cam["calibrated_sensor_token"]) point_cloud_3d.translate(-np.array(cs_record["translation"])) point_cloud_3d.rotate(Quaternion(cs_record["rotation"]).rotation_matrix.T) # Take the actual picture (matrix multiplication with camera-matrix + renormalization). point_cloud_2d = view_points(point_cloud_3d.points[:3, :], np.array(cs_record["camera_intrinsic"]), normalize=True) return point_cloud_3d, point_cloud_2d
def project_pts_to_image(self, pointcloud: LidarPointCloud, token: str) -> np.ndarray: """Project lidar points into image. Args: pointcloud: The LidarPointCloud in nuScenes lidar frame. token: Unique KITTI token. Returns: <np.float: N, 3.> X, Y are points in image pixel coordinates. Z is depth in image. """ # Copy and convert pointcloud. pc_image = LidarPointCloud(points=pointcloud.points.copy()) pc_image.rotate(self.kitti_to_nu_lidar_inv) # Rotate to KITTI lidar. # Transform pointcloud to camera frame. transforms = self.get_transforms(token, root=self.root) pc_image.rotate(transforms["velo_to_cam"]["R"]) pc_image.translate(transforms["velo_to_cam"]["T"]) # Project to image. depth = pc_image.points[2, :] points_fov = view_points(pc_image.points[:3, :], transforms["p_combined"], normalize=True) points_fov[2, :] = depth return points_fov
def transform_bounding_box_to_sensor_coord_and_get_corners(box: Box, sample_data_token: str, lyftd: LyftDataset, frustum_pointnet_convention=False): """ Transform the bounding box to Get the bounding box corners :param box: :param sample_data_token: camera data token :param level5data: :return: """ transformed_box = transform_box_from_world_to_sensor_coordinates(box, sample_data_token, lyftd) sd_record = lyftd.get("sample_data", sample_data_token) cs_record = lyftd.get("calibrated_sensor", sd_record["calibrated_sensor_token"]) sensor_record = lyftd.get("sensor", cs_record["sensor_token"]) if sensor_record['modality'] == 'camera': cam_intrinsic_mtx = np.array(cs_record["camera_intrinsic"]) else: cam_intrinsic_mtx = None box_corners_on_cam_coord = transformed_box.corners() # Regarrange to conform Frustum-pointnet's convention if frustum_pointnet_convention: rearranged_idx = [0, 3, 7, 4, 1, 2, 6, 5] box_corners_on_cam_coord = box_corners_on_cam_coord[:, rearranged_idx] assert np.allclose((box_corners_on_cam_coord[:, 0] + box_corners_on_cam_coord[:, 6]) / 2, np.array(transformed_box.center)) # For perspective transformation, the normalization should set to be True box_corners_on_image = view_points(box_corners_on_cam_coord, view=cam_intrinsic_mtx, normalize=True) return box_corners_on_image, box_corners_on_cam_coord
def get_mapped_points(self): ############################################################################### pointsensor = self.level5data.get('sample_data', self.my_sample['data'][self.ldName]) cam = self.level5data.get("sample_data", self.my_sample['data'][self.caName]) if 1 > 0: pc = self.origin # First step: transform the point-cloud to the ego vehicle frame for the timestamp of the sweep. cs_record = self.level5data.get( "calibrated_sensor", pointsensor["calibrated_sensor_token"]) #需要判 pc.rotate(Quaternion(cs_record["rotation"]).rotation_matrix) pc.translate(np.array(cs_record["translation"])) # Second step: transform to the global frame. poserecord = self.level5data.get("ego_pose", pointsensor["ego_pose_token"]) pc.rotate(Quaternion(poserecord["rotation"]).rotation_matrix) pc.translate(np.array(poserecord["translation"])) # Third step: transform into the ego vehicle frame for the timestamp of the image. poserecord = self.level5data.get("ego_pose", cam["ego_pose_token"]) pc.translate(-np.array(poserecord["translation"])) pc.rotate(Quaternion(poserecord["rotation"]).rotation_matrix.T) # Fourth step: transform into the camera. cs_record = self.level5data.get("calibrated_sensor", cam["calibrated_sensor_token"]) pc.translate(-np.array(cs_record["translation"])) pc.rotate(Quaternion(cs_record["rotation"]).rotation_matrix.T) depths = pc.points[2, :] # Retrieve the color from the depth. coloring = depths # Take the actual picture (matrix multiplication with camera-matrix + renormalization). points = view_points(pc.points[:3, :], np.array(cs_record["camera_intrinsic"]), normalize=True) # Remove points that are either outside or behind the camera. Leave a margin of 1 pixel for aesthetic reasons. mask = np.ones(depths.shape[0], dtype=bool) mask = np.logical_and(mask, depths > 0) mask = np.logical_and(mask, points[0, :] > 1) mask = np.logical_and(mask, points[0, :] < 1920 - 1) mask = np.logical_and(mask, points[1, :] > 1) mask = np.logical_and(mask, points[1, :] < 1080 - 1) #points[:, mask] temp_points = points coloring = coloring[mask] self.cam_points = [] for i in range(temp_points[0].size): point = [ temp_points[0][i], temp_points[1][i], temp_points[2][i] ] self.cam_points.append(point)
def render_point_cloud_top_view(self, ax, view_matrix=np.array([[1, 0, 0], [0, 0, 1], [0, 0, 0]]), show_angle_text=False): # self.box_in_sensor_coord.render(ax, view=view_matrix, normalize=False) projected_pts = view_points(np.transpose(self.point_cloud_in_box), view=view_matrix, normalize=False) ax.scatter(projected_pts[0, :], projected_pts[1, :], s=0.1) if projected_pts.shape[1] > 0: if show_angle_text: ax.text(np.mean(projected_pts[0, :]), np.mean(projected_pts[1, :]), "{:.2f}".format(self.frustum_angle * 180 / np.pi))
def render_cv2( self, image: np.ndarray, view: np.ndarray = np.eye(3), normalize: bool = False, colors: Tuple = ((0, 0, 255), (255, 0, 0), (155, 155, 155)), linewidth: int = 2, ) -> None: """Renders box using OpenCV2. Args: image: <np.array: width, height, 3>. Image array. Channels are in BGR order. view: <np.array: 3, 3>. Define a projection if needed (e.g. for drawing projection in an image). normalize: Whether to normalize the remaining coordinate. colors: ((R, G, B), (R, G, B), (R, G, B)). Colors for front, side & rear. linewidth: Linewidth for plot. Returns: """ corners = view_points(self.corners(), view, normalize=normalize)[:2, :] def draw_rect(selected_corners, color): prev = selected_corners[-1] for corner in selected_corners: cv2.line(image, (int(prev[0]), int(prev[1])), (int(corner[0]), int(corner[1])), color, linewidth) prev = corner # Draw the sides for i in range(4): cv2.line( image, (int(corners.T[i][0]), int(corners.T[i][1])), (int(corners.T[i + 4][0]), int(corners.T[i + 4][1])), colors[2][::-1], linewidth, ) # Draw front (first 4 corners) and rear (last 4 corners) rectangles(3d)/lines(2d) draw_rect(corners.T[:4], colors[0][::-1]) draw_rect(corners.T[4:], colors[1][::-1]) # Draw line indicating the front center_bottom_forward = np.mean(corners.T[2:4], axis=0) center_bottom = np.mean(corners.T[[2, 3, 7, 6]], axis=0) cv2.line( image, (int(center_bottom[0]), int(center_bottom[1])), (int(center_bottom_forward[0]), int(center_bottom_forward[1])), colors[0][::-1], linewidth, )
def getSampleData(sample_token, dataLyft3D): data_path, boxes, camera_intrinsic = dataLyft3D.get_sample_data( sample_token, selected_anntokens=None) newBoxes = [] im = Image.open(data_path) labels = list(map(lambda x: x.name, boxes)) for box in boxes: # import pdb; pdb.set_trace() corners = view_points(box.corners(), camera_intrinsic, normalize=True)[:2, :] corners = clipDetections(corners, im.size) newBoxes.append(box3Dto2D(corners)) return data_path, newBoxes, boxes, camera_intrinsic
def render( self, axis: Axes, view: np.ndarray = np.eye(3), normalize: bool = False, colors: Tuple = ("b", "r", "k"), linewidth: float = 2, ): """Renders the box in the provided Matplotlib axis. Args: axis: Axis onto which the box should be drawn. view: <np.array: 3, 3>. Define a projection in needed (e.g. for drawing projection in an image). normalize: Whether to normalize the remaining coordinate. colors: (<Matplotlib.colors>: 3). Valid Matplotlib colors (<str> or normalized RGB tuple) for front, back and sides. linewidth: Width in pixel of the box sides. """ corners = view_points(self.corners(), view, normalize=normalize)[:2, :] def draw_rect(selected_corners, color): prev = selected_corners[-1] for corner in selected_corners: axis.plot([prev[0], corner[0]], [prev[1], corner[1]], color=color, linewidth=linewidth) prev = corner # Draw the sides for i in range(4): axis.plot( [corners.T[i][0], corners.T[i + 4][0]], [corners.T[i][1], corners.T[i + 4][1]], color=colors[2], linewidth=linewidth, ) # Draw front (first 4 corners) and rear (last 4 corners) rectangles(3d)/lines(2d) draw_rect(corners.T[:4], colors[0]) draw_rect(corners.T[4:], colors[1]) # Draw line indicating the front center_bottom_forward = np.mean(corners.T[2:4], axis=0) center_bottom = np.mean(corners.T[[2, 3, 7, 6]], axis=0) axis.plot( [center_bottom[0], center_bottom_forward[0]], [center_bottom[1], center_bottom_forward[1]], color=colors[0], linewidth=linewidth, )
def __getitem__(self, item): sample_idx = item // len(CAMS) sample = self.lyft_data.sample[sample_idx] sd_lidar = self.lyft_data.get('sample_data', sample['data']['LIDAR_TOP']) cs_lidar = self.lyft_data.get('calibrated_sensor', sd_lidar['calibrated_sensor_token']) pc = LidarPointCloud.from_file(self.lyft_data.data_path / sd_lidar['filename']) cam = CAMS[item % len(CAMS)] cam_token = sample['data'][cam] sd_cam = self.lyft_data.get('sample_data', cam_token) cs_cam = self.lyft_data.get('calibrated_sensor', sd_cam['calibrated_sensor_token']) img = Image.open(str(self.lyft_data.data_path / cam["filename"])) img = transform(img) lidar_2_ego = transform_matrix(cs_lidar['translation'], Quaternion(cs_lidar['rotation']), inverse=False) ego_2_cam = transform_matrix(cs_cam['translation'], Quaternion(cs_cam['rotation']), inverse=True) cam_2_bev = Quaternion(axis=[1, 0, 0], angle=-np.pi / 2).transformation_matrix # lidar_2_cam = ego_2_cam @ lidar_2_ego lidar_2_bevcam = cam_2_bev @ ego_2_cam @ lidar_2_ego points = view_points(pc.points[:3, :], lidar_2_bevcam, normalize=False) points = clip_points(points) points = pd.DataFrame(np.swapaxes(points, 0, 1), columns=['x', 'y', 'z']) points = PyntCloud(points) voxelgrid_id = points.add_structure("voxelgrid", size_x=0.1, size_y=0.1, size_z=0.2, regular_bounding_box=False) voxelgrid = points.structures[voxelgrid_id] occ_map = voxelgrid.get_feature_vector(mode='binary') padded_occ = np.zeros((800, 700, 30)) padded_occ[:occ_map.shape[0], :occ_map.shape[1], :occ_map. shape[2]] = occ_map return img, padded_occ
def project_kitti_box_to_image( box: Box, p_left: np.ndarray, imsize: Tuple[int, int]) -> Union[None, Tuple[int, int, int, int]]: """Projects 3D box into KITTI image FOV. Args: box: 3D box in KITTI reference frame. p_left: <np.float: 3, 4>. Projection matrix. imsize: (width, height). Image size. Returns: (xmin, ymin, xmax, ymax). Bounding box in image plane or None if box is not in the image. """ # Create a new box. box = box.copy() # KITTI defines the box center as the bottom center of the object. # We use the true center, so we need to adjust half height in negative y direction. box.translate(np.array([0, -box.wlh[2] / 2, 0])) # Check that some corners are inside the image. corners = np.array( [corner for corner in box.corners().T if corner[2] > 0]).T if len(corners) == 0: return None # Project corners that are in front of the camera to 2d to get bbox in pixel coords. imcorners = view_points(corners, p_left, normalize=True)[:2] bbox = (np.min(imcorners[0]), np.min(imcorners[1]), np.max(imcorners[0]), np.max(imcorners[1])) # Crop bbox to prevent it extending outside image. bbox_crop = tuple(max(0, b) for b in bbox) bbox_crop = ( min(imsize[0], bbox_crop[0]), min(imsize[0], bbox_crop[1]), min(imsize[0], bbox_crop[2]), min(imsize[1], bbox_crop[3]), ) # Detect if a cropped box is empty. if bbox_crop[0] >= bbox_crop[2] or bbox_crop[1] >= bbox_crop[3]: return None return bbox_crop
def get_box_corners(transformed_box: Box, cam_intrinsic_mtx: np.array, frustum_pointnet_convention=True): box_corners_on_cam_coord = transformed_box.corners() # Regarrange to conform Frustum-pointnet's convention if frustum_pointnet_convention: rearranged_idx = [0, 3, 7, 4, 1, 2, 6, 5] box_corners_on_cam_coord = box_corners_on_cam_coord[:, rearranged_idx] assert np.allclose((box_corners_on_cam_coord[:, 0] + box_corners_on_cam_coord[:, 6]) / 2, np.array(transformed_box.center)) # For perspective transformation, the normalization should set to be True box_corners_on_image = view_points(box_corners_on_cam_coord, view=cam_intrinsic_mtx, normalize=True) return box_corners_on_image
def _render_helper(self, color_channel: int, ax: Axes, view: np.ndarray, x_lim: Tuple, y_lim: Tuple, marker_size: float) -> None: """Helper function for rendering. Args: color_channel: Point channel to use as color. ax: Axes on which to render the points. view: <np.float: n, n>. Defines an arbitrary projection (n <= 4). x_lim: (min <float>, max <float>). y_lim: (min <float>, max <float>). marker_size: Marker size. """ points = view_points(self.points[:3, :], view, normalize=False) ax.scatter(points[0, :], points[1, :], c=self.points[color_channel, :], s=marker_size) ax.set_xlim(x_lim[0], x_lim[1]) ax.set_ylim(y_lim[0], y_lim[1])
def project_to_2d(box, p_left, height, width): box = box.copy() # KITTI defines the box center as the bottom center of the object. # We use the true center, so we need to adjust half height in negative y direction. box.translate(np.array([0, -box.wlh[2] / 2, 0])) # Check that some corners are inside the image. corners = np.array([corner for corner in box.corners().T if corner[2] > 0]).T if len(corners) == 0: return None # Project corners that are in front of the camera to 2d to get bbox in pixel coords. imcorners = view_points(corners, p_left, normalize=True)[:2] bbox = (np.min(imcorners[0]), np.min(imcorners[1]), np.max(imcorners[0]), np.max(imcorners[1])) inside = (0 <= bbox[1] < height and 0 < bbox[3] <= height) and (0 <= bbox[0] < width and 0 < bbox[2] <= width) valid = (0 <= bbox[1] < height or 0 < bbox[3] <= height) and (0 <= bbox[0] < width or 0 < bbox[2] <= width) if not valid: return None truncated = valid and not inside if truncated: _bbox = [0] * 4 _bbox[0] = max(0, bbox[0]) _bbox[1] = max(0, bbox[1]) _bbox[2] = min(width, bbox[2]) _bbox[3] = min(height, bbox[3]) truncated = 1.0 - ((_bbox[2] - _bbox[0]) * (_bbox[3] - _bbox[1])) / ((bbox[2] - bbox[0]) * (bbox[3] - bbox[1])) bbox = _bbox else: truncated = 0.0 return {"bbox": bbox, "truncated": truncated}
def test_transform_image_to_camera_coord(self): train_df = parse_train_csv() sample_token, bounding_box = get_train_data_sample_token_and_box( 0, train_df) first_train_sample = level5data.get('sample', sample_token) cam_token = first_train_sample['data']['CAM_FRONT'] sd_record = level5data.get("sample_data", cam_token) cs_record = level5data.get("calibrated_sensor", sd_record["calibrated_sensor_token"]) cam_intrinsic_mtx = np.array(cs_record["camera_intrinsic"]) box_corners = transform_bounding_box_to_sensor_coord_and_get_corners( bounding_box, cam_token) # check)image cam_image_file = level5data.get_sample_data_path(cam_token) cam_image_mtx = imread(cam_image_file) xmin, xmax, ymin, ymax = get_2d_corners_from_projected_box_coordinates( box_corners) random_depth = 20 image_center = np.array([[(xmax + xmin) / 2, (ymax + ymin) / 2, random_depth]]).T image_center_in_cam_coord = transform_image_to_cam_coordinate( image_center, cam_token) self.assertTrue(np.isclose(random_depth, image_center_in_cam_coord[2:])) transformed_back_image_center = view_points(image_center_in_cam_coord, cam_intrinsic_mtx, normalize=True) self.assertTrue( np.allclose(image_center[0:2, :], transformed_back_image_center[0:2, :]))
def project_point_clouds_to_image(camera_token: str, pointsensor_token: str, lyftd: LyftDataset, use_multisweep=False): """ :param camera_token: :param pointsensor_token: :return: (image array, transformed 3d point cloud to camera coordinate, 2d point cloud array) """ cam = lyftd.get("sample_data", camera_token) pointsensor = lyftd.get("sample_data", pointsensor_token) pcl_path = lyftd.data_path / pointsensor["filename"] assert pointsensor["sensor_modality"] == "lidar" if use_multisweep: sample_of_pc_record = lyftd.get("sample", cam['sample_token']) pc, _ = LidarPointCloud.from_file_multisweep(lyftd, sample_of_pc_record, chan='LIDAR_TOP', ref_chan='LIDAR_TOP', num_sweeps=5) else: pc = LidarPointCloud.from_file(pcl_path) im = Image.open(str(lyftd.data_path / cam["filename"])) # Points live in the point sensor frame. So they need to be transformed via global to the image plane. # First step: transform the point-cloud to the ego vehicle frame for the timestamp of the sweep. cs_record = lyftd.get("calibrated_sensor", pointsensor["calibrated_sensor_token"]) pc.rotate(Quaternion(cs_record["rotation"]).rotation_matrix) pc.translate(np.array(cs_record["translation"])) # Second step: transform to the global frame. poserecord = lyftd.get("ego_pose", pointsensor["ego_pose_token"]) pc.rotate(Quaternion(poserecord["rotation"]).rotation_matrix) pc.translate(np.array(poserecord["translation"])) # Third step: transform into the ego vehicle frame for the timestamp of the image. poserecord = lyftd.get("ego_pose", cam["ego_pose_token"]) pc.translate(-np.array(poserecord["translation"])) pc.rotate(Quaternion(poserecord["rotation"]).rotation_matrix.T) # Fourth step: transform into the camera. cs_record = lyftd.get("calibrated_sensor", cam["calibrated_sensor_token"]) pc.translate(-np.array(cs_record["translation"])) pc.rotate(Quaternion(cs_record["rotation"]).rotation_matrix.T) # Take the actual picture (matrix multiplication with camera-matrix + renormalization). point_cloud_2d = view_points(pc.points[:3, :], np.array(cs_record["camera_intrinsic"]), normalize=True) return im, pc, point_cloud_2d
def plot_lidar_with_depth(lyftdata, sample): '''plot given sample''' print(f'Plotting sample, token: {sample["token"]}') lidar_token = sample["data"]["LIDAR_TOP"] pc = get_lidar_points(lyftdata, lidar_token) _, boxes, _ = lyftdata.get_sample_data(lidar_token, flat_vehicle_coordinates=True) fig = mlab.figure(figure=None, bgcolor=(0, 0, 0), fgcolor=None, engine=None, size=(1000, 500)) # plot lidar points draw_lidar(pc.T, fig=fig) # plot boxes one by one for box in boxes: corners = view_points(box.corners(), view=np.eye(3), normalize=False) draw_gt_boxes3d([corners.T], fig=fig, color=(0, 1, 0)) mlab.show(1)
def get_sp_points(self, box_area): pointsensor = self.level5data.get('sample_data', self.my_sample['data'][self.ldName]) cam = self.level5data.get("sample_data", self.my_sample['data'][self.caName]) pc = self.origin temp = pc.points[:, :] cs_record0 = self.level5data.get( "calibrated_sensor", pointsensor["calibrated_sensor_token"]) cs_record1 = self.level5data.get("calibrated_sensor", cam["calibrated_sensor_token"]) poserecord0 = self.level5data.get("ego_pose", pointsensor["ego_pose_token"]) poserecord1 = self.level5data.get("ego_pose", cam["ego_pose_token"]) for i in range(self.size): pc.points = temp[:4, i:i + 1] # First step: transform the point-cloud to the ego vehicle frame for the timestamp of the sweep. #需要判 pc.rotate(Quaternion(cs_record0["rotation"]).rotation_matrix) pc.translate(np.array(cs_record0["translation"])) # Second step: transform to the global frame. pc.rotate(Quaternion(poserecord0["rotation"]).rotation_matrix) pc.translate(np.array(poserecord0["translation"])) # Third step: transform into the ego vehicle frame for the timestamp of the image. pc.translate(-np.array(poserecord1["translation"])) pc.rotate(Quaternion(poserecord1["rotation"]).rotation_matrix.T) # Fourth step: transform into the camera. pc.translate(-np.array(cs_record1["translation"])) pc.rotate(Quaternion(cs_record1["rotation"]).rotation_matrix.T) # Take the actual picture (matrix multiplication with camera-matrix + renormalization). points = view_points(pc.points[:3, :], np.array(cs_record1["camera_intrinsic"]), normalize=True) #self.test_points.append(points) if points[0] < box_area[3] and points[0] > box_area[2] and points[ 1] < box_area[1] and points[1] > box_area[0] and points[ 2] > 0: if self.points[i][0] < 0: self.sp_points.append(self.points[i])
def select_annotation_boxes(sample_token, lyftd: LyftDataset, box_vis_level: BoxVisibility = BoxVisibility.ALL, camera_type=['CAM_FRONT', 'CAM_BACK','CAM_FRONT_LEFT','CAM_FRONT_RIGHT','CAM_BACK_RIGHT','CAM_BACK_LEFT']) -> (str, str, Box): """ Select annotations that is a camera image defined by box_vis_level :param sample_token: :param box_vis_level:BoxVisbility.ALL or BoxVisibility.ANY :param camera_type: a list of camera that the token should be selected from :return: yield (sample_token,cam_token, Box) """ sample_record = lyftd.get('sample', sample_token) cams = [key for key in sample_record["data"].keys() if "CAM" in key] cams = [cam for cam in cams if cam in camera_type] for cam in cams: # This step selects all the annotations in a camera image that matches box_vis_level cam_token = sample_record["data"][cam] image_filepath, boxes_in_sensor_coord, cam_intrinsic = lyftd.get_sample_data( cam_token, box_vis_level=box_vis_level, selected_anntokens=sample_record['anns'] ) sd_record = lyftd.get('sample_data', cam_token) img_width = sd_record['width'] # typically 1920 img_height = sd_record['height'] # typically 1080 CORNER_NUMBER = 4 corner_list = np.empty((len(boxes_in_sensor_coord), CORNER_NUMBER)) for idx, box_in_sensor_coord in enumerate(boxes_in_sensor_coord): # For perspective transformation, the normalization should set to be True box_corners_on_image = view_points(box_in_sensor_coord.corners(), view=cam_intrinsic, normalize=True) corners_2d = get_2d_corners_from_projected_box_coordinates(box_corners_on_image) corner_list[idx, :] = corners_2d yield image_filepath, cam_token, corner_list, boxes_in_sensor_coord, img_width, img_height
def get_lidar_points(lyftdata, lidar_token): '''Get lidar point cloud in the frame of the ego vehicle''' sd_record = lyftdata.get("sample_data", lidar_token) sensor_modality = sd_record["sensor_modality"] # Get aggregated point cloud in lidar frame. sample_rec = lyftdata.get("sample", sd_record["sample_token"]) chan = sd_record["channel"] ref_chan = "LIDAR_TOP" pc, times = LidarPointCloud.from_file_multisweep(lyftdata, sample_rec, chan, ref_chan, num_sweeps=1) # Compute transformation matrices for lidar point cloud cs_record = lyftdata.get("calibrated_sensor", sd_record["calibrated_sensor_token"]) pose_record = lyftdata.get("ego_pose", sd_record["ego_pose_token"]) vehicle_from_sensor = np.eye(4) vehicle_from_sensor[:3, :3] = Quaternion( cs_record["rotation"]).rotation_matrix vehicle_from_sensor[:3, 3] = cs_record["translation"] ego_yaw = Quaternion(pose_record["rotation"]).yaw_pitch_roll[0] rot_vehicle_flat_from_vehicle = np.dot( Quaternion(scalar=np.cos(ego_yaw / 2), vector=[0, 0, np.sin(ego_yaw / 2)]).rotation_matrix, Quaternion(pose_record["rotation"]).inverse.rotation_matrix, ) vehicle_flat_from_vehicle = np.eye(4) vehicle_flat_from_vehicle[:3, :3] = rot_vehicle_flat_from_vehicle points = view_points(pc.points[:3, :], np.dot(vehicle_flat_from_vehicle, vehicle_from_sensor), normalize=False) return points
def render_box(box, axis, view, normalize, colors, im, linewidth=2): corners = view_points(box.corners(), view, normalize=normalize)[:2, :] corners = clipDetections(corners, im.size) # print([np.min(corners[:][0]), np.max(corners[:][0])], # [np.min(corners[:][1]), np.max(corners[:][1])]) render_corners(axis, corners, linewidth)
def map_pc_to_image(lyft_data, pointsensor_token: str, camera_token: str = None, get_ego=False, get_world=False) -> LidarPointCloud: """Given a point sensor (lidar/radar) token and camera sample_data token, load point-cloud and map it to the image plane. Args: pointsensor_token: Lidar/radar sample_data token. camera_token: Camera sample_data token. Returns: tuple of pointcloud <np.float: 2, n)> coloring <np.float: n>, image <Image> """ pointsensor = lyft_data.get("sample_data", pointsensor_token) pcl_path = lyft_data.data_path / pointsensor["filename"] if pointsensor["sensor_modality"] == "lidar": pc = LidarPointCloud.from_file(pcl_path) else: pc = RadarPointCloud.from_file(pcl_path) # Points live in the point sensor frame. So they need to be transformed via global to the image plane. # First step: transform the point-cloud to the ego vehicle frame for the timestamp of the sweep. cs_record = lyft_data.get("calibrated_sensor", pointsensor["calibrated_sensor_token"]) pc.rotate(Quaternion(cs_record["rotation"]).rotation_matrix) pc.translate(np.array(cs_record["translation"])) pc_ego = pc.points.copy() if get_ego: return pc # Second step: transform to the global frame. poserecord = lyft_data.get("ego_pose", pointsensor["ego_pose_token"]) pc.rotate(Quaternion(poserecord["rotation"]).rotation_matrix) pc.translate(np.array(poserecord["translation"])) if get_world: return pc # Obtain image assert camera_token is not None, "Must specify a camera token" cam = lyft_data.get("sample_data", camera_token) im = Image.open(str(lyft_data.data_path / cam["filename"])) # Third step: transform into the ego vehicle frame for the timestamp of the image. poserecord = lyft_data.get("ego_pose", cam["ego_pose_token"]) pc.translate(-np.array(poserecord["translation"])) pc.rotate(Quaternion(poserecord["rotation"]).rotation_matrix.T) # Fourth step: transform into the camera. cs_record = lyft_data.get("calibrated_sensor", cam["calibrated_sensor_token"]) pc.translate(-np.array(cs_record["translation"])) pc.rotate(Quaternion(cs_record["rotation"]).rotation_matrix.T) # Fifth step: actually take a "picture" of the point cloud. # Grab the depths (camera frame z axis points away from the camera). depths = pc.points[2, :] # Take the actual picture (matrix multiplication with camera-matrix + renormalization). points = view_points(pc.points[:3, :], np.array(cs_record["camera_intrinsic"]), normalize=True) # Remove points that are either outside or behind the camera. Leave a margin of 1 pixel for aesthetic reasons. mask = np.ones(depths.shape[0], dtype=bool) mask = np.logical_and(mask, depths > 0) mask = np.logical_and(mask, points[0, :] > 1) mask = np.logical_and(mask, points[0, :] < im.size[0] - 1) mask = np.logical_and(mask, points[1, :] > 1) mask = np.logical_and(mask, points[1, :] < im.size[1] - 1) points = points[:, mask] # Map the image points to the lidar points. idx = points.round().astype(np.int) col_idx = idx[0,:] row_idx = idx[1,:] image = np.asarray(im) points_rgb = image[row_idx, col_idx].T return np.vstack((pc_ego[:, mask], points_rgb))
def pointcloud_color_from_image( lyftd: lyftdataset, pointsensor_token: str, camera_token: str) -> Tuple[np.array, np.array]: """ Given a point sensor (lidar/radar) token and camera sample_data token, load point-cloud and map it to the image plane, then retrieve the colors of the closest image pixels. :param nusc: NuScenes instance. :param pointsensor_token: Lidar/radar sample_data token. :param camera_token: Camera sample data token. :return (coloring <np.float: 3, n>, mask <np.bool: m>). Returns the colors for n points that reproject into the image out of m total points. The mask indicates which points are selected. """ cam = lyftd.get('sample_data', camera_token) pointsensor = lyftd.get('sample_data', pointsensor_token) pc = LidarPointCloud.from_file( osp.join(lyftd.data_path, pointsensor['filename'])) im = Image.open(osp.join(lyftd.data_path, cam['filename'])) # Points live in the point sensor frame. So they need to be transformed via global to the image plane. # First step: transform the point-cloud to the ego vehicle frame for the timestamp of the sweep. cs_record = lyftd.get('calibrated_sensor', pointsensor['calibrated_sensor_token']) pc.rotate(Quaternion(cs_record['rotation']).rotation_matrix) pc.translate(np.array(cs_record['translation'])) # Second step: transform to the global frame. poserecord = lyftd.get('ego_pose', pointsensor['ego_pose_token']) pc.rotate(Quaternion(poserecord['rotation']).rotation_matrix) pc.translate(np.array(poserecord['translation'])) # Third step: transform into the ego vehicle frame for the timestamp of the image. poserecord = lyftd.get('ego_pose', cam['ego_pose_token']) pc.translate(-np.array(poserecord['translation'])) pc.rotate(Quaternion(poserecord['rotation']).rotation_matrix.T) # Fourth step: transform into the camera. cs_record = lyftd.get('calibrated_sensor', cam['calibrated_sensor_token']) pc.translate(-np.array(cs_record['translation'])) pc.rotate(Quaternion(cs_record['rotation']).rotation_matrix.T) # Fifth step: actually take a "picture" of the point cloud. # Grab the depths (camera frame z axis points away from the camera). depths = pc.points[2, :] # Take the actual picture (matrix multiplication with camera-matrix + renormalization). points = view_points(pc.points[:3, :], np.array(cs_record['camera_intrinsic']), normalize=True) # Remove points that are either outside or behind the camera. Leave a margin of 1 pixel for aesthetic reasons. mask = np.ones(depths.shape[0], dtype=bool) mask = np.logical_and(mask, depths > 0) mask = np.logical_and(mask, points[0, :] > 1) mask = np.logical_and(mask, points[0, :] < im.size[0] - 1) mask = np.logical_and(mask, points[1, :] > 1) mask = np.logical_and(mask, points[1, :] < im.size[1] - 1) points = points[:, mask] # Pick the colors of the points im_data = np.array(im) coloring = np.zeros(points.shape) for i, p in enumerate(points.transpose()): point = p[:2].round().astype(np.int32) coloring[:, i] = im_data[point[1], point[0], :] return coloring, mask
def render_sample_data( self, token: str, sensor_modality: str = "lidar", with_anns: bool = True, axes_limit: float = 30, ax: Axes = None, view_3d: np.ndarray = np.eye(4), color_func: Any = None, augment_previous: bool = False, box_linewidth: int = 2, filter_classes: List[str] = None, max_dist: float = None, out_path: str = None, render_2d: bool = False, ) -> None: """Render sample data onto axis. Visualizes lidar in nuScenes lidar frame and camera in camera frame. Args: token: KITTI token. sensor_modality: The modality to visualize, e.g. lidar or camera. with_anns: Whether to draw annotations. axes_limit: Axes limit for lidar data (measured in meters). ax: Axes onto which to render. view_3d: 4x4 view matrix for 3d views. color_func: Optional function that defines the render color given the class name. augment_previous: Whether to augment an existing plot (does not redraw pointcloud/image). box_linewidth: Width of the box lines. filter_classes: Optionally filter the classes to render. max_dist: Maximum distance in meters to still draw a box. out_path: Optional path to save the rendered figure to disk. render_2d: Whether to render 2d boxes (only works for camera data). """ # Default settings. if color_func is None: color_func = LyftDatasetExplorer.get_color boxes = self.get_boxes(token, filter_classes=filter_classes, max_dist=max_dist) # In nuScenes lidar frame. if sensor_modality == "lidar": # Load pointcloud. pc = self.get_pointcloud(token, self.root) # In KITTI lidar frame. pc.rotate(self.kitti_to_nu_lidar.rotation_matrix ) # In nuScenes lidar frame. # Alternative options: # depth = pc.points[1, :] # height = pc.points[2, :] intensity = pc.points[3, :] # Project points to view. points = view_points(pc.points[:3, :], view_3d, normalize=False) coloring = intensity if ax is None: _, ax = plt.subplots(1, 1, figsize=(9, 9)) if not augment_previous: ax.scatter(points[0, :], points[1, :], c=coloring, s=1) ax.set_xlim(-axes_limit, axes_limit) ax.set_ylim(-axes_limit, axes_limit) if with_anns: for box in boxes: color = np.array(color_func(box.name)) / 255 box.render(ax, view=view_3d, colors=(color, color, "k"), linewidth=box_linewidth) elif sensor_modality == "camera": im_path = KittiDB.get_filepath(token, "image_2", root=self.root) im = Image.open(im_path) if ax is None: _, ax = plt.subplots(1, 1, figsize=(16, 9)) if not augment_previous: ax.imshow(im) ax.set_xlim(0, im.size[0]) ax.set_ylim(im.size[1], 0) if with_anns: if render_2d: # Use KITTI's 2d boxes. boxes_2d, names = self.get_boxes_2d( token, filter_classes=filter_classes) for box, name in zip(boxes_2d, names): color = np.array(color_func(name)) / 255 ax.plot([box[0], box[0]], [box[1], box[3]], color=color, linewidth=box_linewidth) ax.plot([box[2], box[2]], [box[1], box[3]], color=color, linewidth=box_linewidth) ax.plot([box[0], box[2]], [box[1], box[1]], color=color, linewidth=box_linewidth) ax.plot([box[0], box[2]], [box[3], box[3]], color=color, linewidth=box_linewidth) else: # Project 3d boxes to 2d. transforms = self.get_transforms(token, self.root) for box in boxes: # Undo the transformations in get_boxes() to get back to the camera frame. box.rotate(self.kitti_to_nu_lidar_inv ) # In KITTI lidar frame. box.rotate( Quaternion(matrix=transforms["velo_to_cam"]["R"])) box.translate( transforms["velo_to_cam"] ["T"]) # In KITTI camera frame, un-rectified. box.rotate(Quaternion(matrix=transforms["r0_rect"]) ) # In KITTI camera frame, rectified. # Filter boxes outside the image (relevant when visualizing nuScenes data in KITTI format). if not box_in_image(box, transforms["p_left"][:3, :3], im.size, vis_level=BoxVisibility.ANY): continue # Render. color = np.array(color_func(box.name)) / 255 box.render( ax, view=transforms["p_left"][:3, :3], normalize=True, colors=(color, color, "k"), linewidth=box_linewidth, ) else: raise ValueError( "Unrecognized modality {}.".format(sensor_modality)) ax.axis("off") ax.set_title(token) ax.set_aspect("equal") # Render to disk. plt.tight_layout() if out_path is not None: plt.savefig(out_path)
def getXYZRGB(ds: LyftDataset, pc, sample: dict, pointsensor): """ Implements the extraction of RGB values for each LiDAR point inputs: - ds: dataset class (LyftDataset) - pc: pointcloud (LiDAR/RADAR) - sample: sample (dict) output: - xyzrgb (np.ndarray): point cloud containing xyz coordinates and rgb values This function is based on the 'map_pointcloud_to_image' function of the LyftDataset class (lyftdataset.py in the SDK). """ ############################################################# # This part exactly the same as in 'map_pointcloud_to_image'# # some parts were slightly adapted to work within this # # function # # # # load point cloud first # no merging of all 3 LiDARs since they are not available cameras = [] for sensor in sample["data"]: if "CAM" in sensor: cameras.append(sensor) # Points live in the point sensor frame. So they need to be transformed via global to the image plane. # First step: transform the point-cloud to the ego vehicle frame for the timestamp of the sweep. cs_record = ds.get("calibrated_sensor", pointsensor["calibrated_sensor_token"]) pc.rotate(Quaternion(cs_record["rotation"]).rotation_matrix) pc.translate(np.array(cs_record["translation"])) # Second step: transform to the global frame. poserecord = ds.get("ego_pose", pointsensor["ego_pose_token"]) pc.rotate(Quaternion(poserecord["rotation"]).rotation_matrix) pc.translate(np.array(poserecord["translation"])) # Third step: transform into the ego vehicle frame for the timestamp of the image. cam = ds.get("sample_data", sample["data"][cameras[0]]) poserecord = ds.get("ego_pose", cam["ego_pose_token"]) pc.translate(-np.array(poserecord["translation"])) pc.rotate(Quaternion(poserecord["rotation"]).rotation_matrix.T) # # # # ############################################################# ############################################################# # This is the new part # # # # for each but the zoomed front camera: # # # # 1. points are projected into a 2D camera plane # # 2. points are selected (masked) that are within the image # # 3. RGB values are extracted # # # # !The Point Cloud shape is preserved! # # # RGB = np.zeros_like(pc.points[0:3, :], dtype=np.uint8) XYZ = copy.deepcopy(pc.points) for camera in cameras: pcc = copy.deepcopy(pc) cam = ds.get("sample_data", sample["data"][camera]) img = Image.open(str(ds.data_path / cam["filename"])) cs_record = ds.get("calibrated_sensor", cam["calibrated_sensor_token"]) pcc.translate(-np.array(cs_record["translation"])) pcc.rotate(Quaternion(cs_record["rotation"]).rotation_matrix.T) points = view_points(pcc.points[0:3, :], np.array(cs_record["camera_intrinsic"]), normalize=True) # generate mask mask = np.ones(pcc.points.shape[1], dtype=np.uint8) # filter points that are behind the camera depths = pcc.points[2, :] mask = np.logical_and(mask, depths > 0) # filter points outside the image mask = np.logical_and(mask, points[0, :] < img.size[0]) # x coordinate mask = np.logical_and(mask, points[0, :] > 0) mask = np.logical_and(mask, points[1, :] < img.size[1]) # y coordinate mask = np.logical_and(mask, points[1, :] > 0) # get RGB values for each point img = np.array(img) coords = np.floor(points[0:2, mask]).astype(dtype=np.uint8) rgblist = [] for i in range(coords.shape[1]): rgblist.append(img[coords[1][i], coords[0][i], :]) rgblist = np.array(rgblist) RGB[:, mask] = rgblist.T # # # # ############################################################# return np.vstack((XYZ[0:3, :], RGB))
def plot_sample(sample_id): """Helper, plot sample of lidar and camera data""" sample = ds_lyft.get('sample', sample_id) sample_data = ds_lyft.get( 'sample_data', sample['data']['LIDAR_TOP'] ) pc = LidarPointCloud.from_file( Path(os.path.join(os.environ['RAW_DATA_PATH'], 'train', sample_data['filename'])) ) _, boxes, _ = ds_lyft.get_sample_data( sample['data']['LIDAR_TOP'], flat_vehicle_coordinates=False ) ds_lyft.render_sample(sample_id) df_tmp = pd.DataFrame(pc.points[:3, :].T, columns=['x', 'y', 'z']) df_tmp['norm'] = np.sqrt(np.power(df_tmp[['x', 'y', 'z']].values, 2).sum(axis=1)) scatter = go.Scatter3d( x=df_tmp['x'], y=df_tmp['y'], z=df_tmp['z'], mode='markers', marker=dict( size=1, color=df_tmp['norm'], opacity=0.8 ) ) x_lines = [] y_lines = [] z_lines = [] def f_lines_add_nones(): x_lines.append(None) y_lines.append(None) z_lines.append(None) ixs_box_0 = [0, 1, 2, 3, 0] ixs_box_1 = [4, 5, 6, 7, 4] for box in boxes: points = view_points(box.corners(), view=np.eye(3), normalize=False) x_lines.extend(points[0, ixs_box_0]) y_lines.extend(points[1, ixs_box_0]) z_lines.extend(points[2, ixs_box_0]) f_lines_add_nones() x_lines.extend(points[0, ixs_box_1]) y_lines.extend(points[1, ixs_box_1]) z_lines.extend(points[2, ixs_box_1]) f_lines_add_nones() for i in range(4): x_lines.extend(points[0, [ixs_box_0[i], ixs_box_1[i]]]) y_lines.extend(points[1, [ixs_box_0[i], ixs_box_1[i]]]) z_lines.extend(points[2, [ixs_box_0[i], ixs_box_1[i]]]) f_lines_add_nones() lines = go.Scatter3d( x=x_lines, y=y_lines, z=z_lines, mode='lines', name='lines' ) fig = go.Figure(data=[scatter, lines]) fig.update_layout(scene_aspectmode='data') fig.show()
def render_point_cloud_on_image(self, ax): projected_pts = view_points(np.transpose(self.point_cloud_in_box), view=self.camera_intrinsic, normalize=True) ax.scatter(projected_pts[0, :], projected_pts[1, :], s=0.1, alpha=0.4)
def render_rotated_point_cloud_top_view(self, ax, view_matrix=np.array([[1, 0, 0], [0, 0, 1], [0, 0, 0]])): pc = self._get_rotated_point_cloud() projected_pts = view_points(np.transpose(pc), view=view_matrix, normalize=False) ax.scatter(projected_pts[0, :], projected_pts[1, :], s=0.1)