def render_track(self, im: np.ndarray, view: np.ndarray, normalize: bool = False, color: Tuple = (255, 0, 0), linewidth: int = 2) -> None: """ Render untight bounding box on imaae, with track name on it """ corners = view_points(self.corners(), view, normalize=normalize)[:2, :] # find untight bounding box min_size = np.amin(corners, axis=1) max_size = np.amax(corners, axis=1) cv2.rectangle(im, (int(min_size[0]), int(min_size[1])), (int(max_size[0]), int(max_size[1])), color, linewidth) # draw label label = self.name + ' ' + str(self.label) text_thickness = 1 font_scale = 1.0 text_size = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, font_scale, text_thickness) text_x = min_size[0] text_y = max(min_size[1] - text_thickness, 0) textbox_x = min(min_size[0] + text_size[0][0], im.shape[1]) textbox_y = max(min_size[1] - 2 * text_thickness - text_size[0][1], 0) cv2.rectangle(im, (int(min_size[0]), int(min_size[1])), (int(textbox_x), int(textbox_y)), color, -1) cv2.putText(im, label, (int(text_x), int(text_y)), cv2.FONT_HERSHEY_SIMPLEX, font_scale, (255, 255, 255), text_thickness)
def get_lidar_pointcloud(self): """ Extracting lidar pointcloud for current timestep in current scene Author: Javier :return Point cloud [Position(x,y,z) X n_points] """ # Select sensor data record channel = 'LIDAR_TOP' sample_data_token = self.sample['data'][channel] sd_record = self.nusc.get('sample_data', sample_data_token) # Get aggregated point cloud in lidar frame. chan = sd_record['channel'] pc, times = LidarPointCloud.from_file_multisweep(self.nusc, self.sample, chan, channel, nsweeps=1) #calibration of LiDAR points ref_sd_record = self.nusc.get('sample_data', sample_data_token) cs_record = self.nusc.get('calibrated_sensor', ref_sd_record['calibrated_sensor_token']) pose_record = self.nusc.get('ego_pose', ref_sd_record['ego_pose_token']) ref_to_ego = transform_matrix(translation=cs_record['translation'], rotation=Quaternion(cs_record["rotation"])) # Compute rotation between 3D vehicle pose and "flat" vehicle pose (parallel to global z plane). ego_yaw = Quaternion(pose_record['rotation']).yaw_pitch_roll[0] rotation_vehicle_flat_from_vehicle = np.dot( Quaternion(scalar=np.cos(ego_yaw / 2), vector=[0, 0, np.sin(ego_yaw / 2)]).rotation_matrix, Quaternion(pose_record['rotation']).inverse.rotation_matrix) vehicle_flat_from_vehicle = np.eye(4) vehicle_flat_from_vehicle[:3, :3] = rotation_vehicle_flat_from_vehicle viewpoint = np.dot(vehicle_flat_from_vehicle, ref_to_ego) points = view_points(pc.points[:3, :], viewpoint, normalize=False) self.points_lidar = points return points
def project_pts_to_image(self, pointcloud: LidarPointCloud, token: str) -> np.ndarray: """ Project lidar points into image. :param pointcloud: The LidarPointCloud in nuScenes lidar frame. :param token: Unique KITTI token. :return: <np.float: N, 3.> X, Y are points in image pixel coordinates. Z is depth in image. """ # Copy and convert pointcloud. pc_image = LidarPointCloud(points=pointcloud.points.copy()) pc_image.rotate(self.kitti_to_nu_lidar_inv) # Rotate to KITTI lidar. # Transform pointcloud to camera frame. transforms = self.get_transforms(token, root=self.root) pc_image.rotate(transforms['velo_to_cam']['R']) pc_image.translate(transforms['velo_to_cam']['T']) # Project to image. depth = pc_image.points[2, :] points_fov = view_points(pc_image.points[:3, :], transforms['p_combined'], normalize=True) points_fov[2, :] = depth return points_fov
def project_kitti_box_to_image( box: Box, p_left: np.ndarray, imsize: Tuple[int, int]) -> Tuple[int, int, int, int]: """ Projects 3D box into KITTI image FOV. :param box: 3D box in KITTI reference frame. :param p_left: <np.float: 3, 4>. Projection matrix. :param imsize: (width , height). Image size. :return: (xmin, ymin, xmax, ymax). Bounding box in image plane. """ # Create a new box. box = box.copy() # KITTI defines the box center as the bottom center of the object. # We use the true center, so we need to adjust half height in negative y direction. box.translate(np.array([0, -box.wlh[2] / 2, 0])) # Project corners to 2d to get bbox in pixel coords. corners = np.array( [corner for corner in box.corners().T if corner[2] > 0]).T imcorners = view_points(corners, p_left, normalize=True)[:2] bbox = (np.min(imcorners[0]), np.min(imcorners[1]), np.max(imcorners[0]), np.max(imcorners[1])) # Crop bbox to prevent it extending outside image bbox_crop = (max(0, bbox[0]), max(0, bbox[1]), min(imsize[0], bbox[2]), min(imsize[1], bbox[3])) return bbox_crop
def map_pointcloud_to_image(pointcloud, cam_intrinsic, img_shape=(1600, 900)): """ Map point cloud from camera coordinates to the image :param pc (PointCloud): point cloud in camera coordinates :param cam_cs_record (dict): Camera calibrated sensor record :param img_shape: shape of the image (width, height) :param coordinates (str): Point cloud coordinates ('vehicle', 'global') :return points (nparray), depth, mask: Mapped and filtered points with depth and mask """ pc = copy.deepcopy(pointcloud) if isinstance(pc, LidarPointCloud) or isinstance(pc, RadarPointCloud): points = pc.points[:3, :] else: points = pc (width, height) = img_shape depths = points[2, :] ## Take the actual picture points = view_points(points[:3, :], cam_intrinsic, normalize=True) ## Remove points that are either outside or behind the camera. mask = np.ones(depths.shape[0], dtype=bool) mask = np.logical_and(mask, depths > 0) mask = np.logical_and(mask, points[0, :] > 1) mask = np.logical_and(mask, points[0, :] < width - 1) mask = np.logical_and(mask, points[1, :] > 1) mask = np.logical_and(mask, points[1, :] < height - 1) points = points[:, mask] depths = depths[mask] return points, depths, mask
def render_pc_in_bev(pc, ax=None, point_size=1, color='k', x_lim=(-20, 20), y_lim=(-20, 20)): """ Render point clouds in Birds Eye View (BEV). pc can be in vehicle or point sensor coordinate system. :param pc (np.float32: m, n): point cloud as a numpy array :param ax (plt ax): Axes on which to render the points :param point_size (int): point size :param color: points color in Matplotlib color format :param x_lim (int, int): x (min, max) range for plotting :param y_lim (int, int): y (min, max) range for plotting """ view = np.eye(4) # bev view if ax is None: fig, ax = plt.subplots(1, 1, figsize=(9, 9)) points = view_points(pc[:3, :], view, normalize=False) ax.scatter(points[0, :], points[1, :], c=color, s=point_size) # Show ego vehicle. ax.plot(0, 0, 'x', color='red') ax.set_xlim(x_lim[0], x_lim[1]) ax.set_ylim(y_lim[0], y_lim[1]) return ax
def filter_points(points_orig, cam_cs_record, img_shape=(1600, 900)): """ :param points: pointcloud or box in the coordinate system of the camera :param cam_cs_record: calibrated sensor record of the camera to filter to :param img_shape: shape of the image (width, height) """ if isinstance(points_orig, np.ndarray): points = NuscenesDataset.pc_to_sensor(points_orig, cam_cs_record) viewed_points = view_points(points[:3, :], np.array( cam_cs_record['camera_intrinsic']), normalize=True) visible = np.logical_and(viewed_points[0, :] > 0, viewed_points[0, :] < img_shape[0]) visible = np.logical_and(visible, viewed_points[1, :] < img_shape[1]) visible = np.logical_and(visible, viewed_points[1, :] > 0) visible = np.logical_and(visible, points[2, :] > 1) in_front = points[2, :] > 0.1 # True if a corner is at least 0.1 meter in front of the camera. isVisible = np.logical_and(visible, in_front) points_orig = points_orig.T[isVisible] points_orig = points_orig.T return points_orig else: raise TypeError('{} is not able to be filtered'.format( type(points)))
def project_camera( corners, cam_model ): """ ------------------------------------------------------------------------------------------------------------- Project a 3D bounding box into camera (image) space (the origin of the camera space is top left corner). This function is taken from post_process_coords() in nuscenes-devkit/python-sdk/nuscenes/scripts/export_2d_annotations_as_json.py corners: [np.array] the 8 corners of the 3D box cam_model: ??? return: [tuple of int] ??? INT OR FLOAT coordinates of the 2D box, None if the object is outside the camera frame ------------------------------------------------------------------------------------------------------------- """ front = np.argwhere( corners[ 2, :] > 0 ) # check which corners are in front of the camera corners = corners[ :, front.flatten() ] # and take those only corners_p = view_points( corners, cam_model, True ) # project the 3D corners in camera space corners_p = corners_p.T[ :, : 2 ] # take only X and Y in camera space poly = MultiPoint( corners_p.tolist() ).convex_hull img = box( 0, 0, img_size[ 0 ], img_size[ 1 ] ) if not poly.intersects( img ): return None # return None if the projection is out of the camera frame inters = poly.intersection( img ) coords = np.array( [ c for c in inters.exterior.coords ] ) min_x = min( coords[ :, 0 ] ) min_y = min( coords[ :, 1 ] ) max_x = max( coords[ :, 0 ] ) max_y = max( coords[ :, 1 ] ) return min_x, min_y, max_x, max_y
def render(self, axis: Axes, view: np.ndarray=np.eye(3), normalize: bool=False, colors: Tuple=('b', 'r', 'k'), linewidth: float=2): """ Renders the box in the provided Matplotlib axis. :param axis: Axis onto which the box should be drawn. :param view: <np.array: 3, 3>. Define a projection in needed (e.g. for drawing projection in an image). :param normalize: Whether to normalize the remaining coordinate. :param colors: (<Matplotlib.colors>: 3). Valid Matplotlib colors (<str> or normalized RGB tuple) for front, back and sides. :param linewidth: Width in pixel of the box sides. """ corners = view_points(self.corners(), view, normalize=normalize)[:2, :] def draw_rect(selected_corners, color): prev = selected_corners[-1] for corner in selected_corners: axis.plot([prev[0], corner[0]], [prev[1], corner[1]], color=color, linewidth=linewidth) prev = corner # Draw the sides for i in range(4): axis.plot([corners.T[i][0], corners.T[i + 4][0]], [corners.T[i][1], corners.T[i + 4][1]], color=colors[2], linewidth=linewidth) # Draw front (first 4 corners) and rear (last 4 corners) rectangles(3d)/lines(2d) draw_rect(corners.T[:4], colors[0]) draw_rect(corners.T[4:], colors[1]) # Draw line indicating the front center_bottom_forward = np.mean(corners.T[2:4], axis=0) center_bottom = np.mean(corners.T[[2, 3, 7, 6]], axis=0) axis.plot([center_bottom[0], center_bottom_forward[0]], [center_bottom[1], center_bottom_forward[1]], color=colors[0], linewidth=linewidth)
def corners3d_to_image(corners, cam_cs_record, img_shape): """ # TODO: check compatibility Return the 2D box corners mapped to the image plane :param corners (np.array <N, 3, 8>) :param cam_cs_record (dict): calibrated sensor record of a camera dictionary from nuscenes dataset :param img_shape (tuple<width, height>) :return (ndarray<N,2,8>, list<N>) """ cornerList = [] mask = [] for box_corners in corners: box_corners = pc_to_sensor(box_corners, cam_cs_record) this_box_corners = view_points(box_corners, np.array( cam_cs_record['camera_intrinsic']), normalize=True)[:2, :] visible = np.logical_and(this_box_corners[0, :] > 0, this_box_corners[0, :] < img_shape[0]) visible = np.logical_and(visible, this_box_corners[1, :] < img_shape[1]) visible = np.logical_and(visible, this_box_corners[1, :] > 0) visible = np.logical_and(visible, box_corners[2, :] > 1) in_front = box_corners[ 2, :] > 0.1 # True if a corner is at least 0.1 meter in front of the camera. isVisible = any(visible) and all(in_front) mask.append(isVisible) if isVisible: cornerList.append(this_box_corners) return np.array(cornerList), mask
def project_kitti_box_to_image(box: Box, p_left: np.ndarray, imsize: Tuple[int, int]) \ -> Union[None, Tuple[int, int, int, int]]: """ Projects 3D box into image FOV. :param box: 3D box in camera coordinate :param p_left: <np.float: 3, 4>. Projection matrix. :param imsize: (width, height). Image size. :return: (xmin, ymin, xmax, ymax). Bounding box in image plane or None if box is not in the image. """ # Check that some corners are inside the image. corners = np.array([corner for corner in box.corners().T if corner[2] > 0]).T if len(corners) == 0: return None # Project corners that are in front of the camera to 2d to get bbox in pixel coords. imcorners = view_points(corners, p_left, normalize=True)[:2] bbox = (np.min(imcorners[0]), np.min(imcorners[1]), np.max(imcorners[0]), np.max(imcorners[1])) # Crop bbox to prevent it extending outside image. bbox_crop = tuple(max(0, b) for b in bbox) bbox_crop = (min(imsize[0], bbox_crop[0]), min(imsize[0], bbox_crop[1]), min(imsize[0], bbox_crop[2]), min(imsize[1], bbox_crop[3])) # Detect if a cropped box is empty. if bbox_crop[0] >= bbox_crop[2] or bbox_crop[1] >= bbox_crop[3]: return None return bbox_crop
def get_corners(self, view: np.ndarray = np.eye(3), normalize: bool = False ) -> None: corners = view_points(self.corners(), view, normalize=normalize)[:2, :] corners = corners.T corners = corners[(0, 4, 5, 1), :] return corners
def __getitem__(self, index): # gather tokens and samples needed for data extraction tokens = self.token[index] if len(tokens.split('_')) < 2: print(tokens) im_token = tokens.split('_')[0] annotation_token = tokens.split('_')[1] sample_data = self.nusc.get('sample_data', im_token) image_name = sample_data['filename'] sample = self.nusc.get('sample', sample_data['sample_token']) lidar_token = sample['data']['LIDAR_TOP'] # get the sample_data for the image batch #image_path = '/data/sets/nuscenes/' + image_name img = imread('/home/fengjia/data/sets/nuscenes/' + image_name) im = np.array(img) # get ground truth boxes _, boxes, camera_intrinsic = self.nusc.get_sample_data(im_token, box_vis_level=BoxVisibility.ALL) for box in boxes: corners = view_points(box.corners(), view=camera_intrinsic, normalize=True) if box.token == annotation_token: # Find the crop area of the box width = corners[0].max() - corners[0].min() height = corners[1].max() - corners[1].min() x_mid = (corners[0].max() + corners[0].min())/2 y_mid = (corners[1].max() + corners[1].min())/2 side = max(width, height)*random.uniform(1.0,1.2) if (x_mid - side/2) < 0: side = x_mid*2 if (y_mid - side/2) < 0: side = y_mid*2 # Crop the image bottom_left = [int(x_mid - side/2), int(y_mid - side/2)] top_right = [int(x_mid + side/2), int(y_mid + side/2)] corners[0]=corners[0] - bottom_left[0] corners[1]=corners[1] - bottom_left[1] crop_img = im[bottom_left[1]:top_right[1],bottom_left[0]:top_right[0]] # Scale to same size scale = 128/ side scaled = cv2.resize(crop_img, (128, 128)) crop_img = np.transpose(scaled, (2,0,1)) crop_img = crop_img.astype(np.float32) crop_img /= 255 # Get corresponding point cloud for the crop pcl, m, offset, camera_intrinsic, box_corners = get_pointcloud(self.nusc, bottom_left, top_right, box, lidar_token, im_token) break pcl = pcl.astype(np.float32) box_corners = box_corners.astype(np.float32) return crop_img, pcl, offset, m, camera_intrinsic, box_corners
def visualize_sample(sample_token, nusc): points, sf = parse_sample(sample_token, nusc) sample_data = nusc.get('sample', sample_token) for cam_name in CamNames: cam_token = sample_data['data'][cam_name] cam_data = nusc.get('sample_data', cam_token) im = Image.open(osp.join(nusc.dataroot, cam_data['filename'])) cam_cs = nusc.get('calibrated_sensor', cam_data['calibrated_sensor_token']) # transform points for ego pose to camera pose cam_points = deepcopy(points) - np.array( cam_cs['translation'])[:, np.newaxis] cam_points = np.dot( Quaternion(cam_cs['rotation']).rotation_matrix.T, cam_points) # use camera intrinsics to project points into image plane. cam_points = view_points(cam_points, np.array(cam_cs['camera_intrinsic']), normalize=True) # filter out points which do not show in this camera mask = np.ones(cam_points.shape[1], dtype=bool) mask = np.logical_and(mask, cam_points[0, :] > 1) mask = np.logical_and(mask, cam_points[0, :] < im.size[0] - 1) mask = np.logical_and(mask, cam_points[1, :] > 1) mask = np.logical_and(mask, cam_points[1, :] < im.size[1] - 1) #print('Mask num', cam_points.shape[1], mask.sum()) cam_points = cam_points[:, mask] cam_sf = sf[:, mask] img = cv2.cvtColor(np.asarray(im), cv2.COLOR_RGB2BGR) prev_point = np.array([0, 0]) for i in range(cam_points.shape[1]): cur_cord = np.array([int(cam_points[0, i]), int(cam_points[1, i])]) cv2.circle(img, (int(cam_points[0, i]), int(cam_points[1, i])), 4, [0, 0, 255], -1) font = cv2.FONT_HERSHEY_SIMPLEX sf_str = '{:.3f} - {:.3f} - {:.3f}'.format(sf[0, i], sf[1, i], sf[2, i]) #print(sf_str) #print(cam_points[:, i]) #if np.sum(np.abs(cur_cord - prev_point)) > 300: # cv2.putText(img, sf_str, (int(cam_points[0, i]), int(cam_points[1, i])), font, 2, (255, 0, 0)) # prev_point = cur_cord _, boxes, _ = nusc.get_sample_data(cam_token, box_vis_level=BoxVisibility.ANY) for box in boxes: c = nusc.colormap[box.name] box.render_cv2(img, view=np.array(cam_cs['camera_intrinsic']), normalize=True, colors=(c, c, c)) im = Image.fromarray(cv2.cvtColor(img, cv2.COLOR_BGR2RGB)) im.save(os.path.join('/home/zhangty/tmp/visual_sf', cam_name + '.png'))
def visualize_sample(sample_token, nusc): ret_dict = parse_sample(sample_token, nusc) sample_data = nusc.get('sample', sample_token) for cam_name in CamNames: if ret_dict[cam_name] is None: # set xxx to None continue cam_token = sample_data['data'][cam_name] cam_data = nusc.get('sample_data', cam_token) im = Image.open(osp.join(nusc.dataroot, cam_data['filename'])) cam_cs = nusc.get('calibrated_sensor', cam_data['calibrated_sensor_token']) ret_points = ret_dict[cam_name] cam_points, cam_sf = ret_points[:3, ], ret_points[3:, ] # use camera intrinsics to project points into image plane. cam_points = view_points(cam_points, np.array(cam_cs['camera_intrinsic']), normalize=True) # filter out points which do not show in this camera mask = np.ones(cam_points.shape[1], dtype=bool) mask = np.logical_and(mask, cam_points[0, :] > 1) mask = np.logical_and(mask, cam_points[0, :] < im.size[0] - 1) mask = np.logical_and(mask, cam_points[1, :] > 1) mask = np.logical_and(mask, cam_points[1, :] < im.size[1] - 1) #print('Mask num', cam_points.shape[1], mask.sum()) cam_points = cam_points[:, mask] cam_sf = cam_sf[:, mask] img = cv2.cvtColor(np.asarray(im),cv2.COLOR_RGB2BGR) prev_point = np.array([0,0]) for i in range(cam_points.shape[1]): cur_cord = np.array([int(cam_points[0, i]), int(cam_points[1, i])]) cv2.circle(img, (int(cam_points[0, i]), int(cam_points[1, i])), 4, [0,0,255], -1) font = cv2.FONT_HERSHEY_SIMPLEX sf_str = '{:.3f} - {:.3f} - {:.3f}'.format(cam_sf[0, i], cam_sf[1, i], cam_sf[2, i]) direct = np.array([cam_sf[0, i], cam_sf[1, i]]) #direct = direct / (np.sum(np.abs(direct)) + 1e-4) * 80 direct = direct * 100 end_cord = cur_cord + direct.astype(np.int) print(sf_str, direct, end_cord) if np.sum(np.abs(cur_cord - prev_point)) > 300: cv2.arrowedLine(img, cur_cord, end_cord, [0, 255, 0], 5) prev_point = cur_cord _, boxes, _ = nusc.get_sample_data(cam_token, box_vis_level=BoxVisibility.ANY) for box in boxes: c = nusc.colormap[box.name] box.render_cv2(img, view=np.array(cam_cs['camera_intrinsic']), normalize=True, colors=(c, c, c)) im = Image.fromarray(cv2.cvtColor(img,cv2.COLOR_BGR2RGB)) im.save(os.path.join('/home/zhangty/tmp/visual_sf', cam_name+'.png'))
def render_annotation(self, anntoken: str, margin: float = 10, view: np.ndarray = np.eye(4), box_vis_level: BoxVisibility = BoxVisibility.ANY) -> None: """ Render selected annotation. :param anntoken: Sample_annotation token. :param margin: How many meters in each direction to include in LIDAR view. :param view: LIDAR view point. :param box_vis_level: If sample_data is an image, this sets required visibility for boxes. """ ann_record = self.nusc.get('sample_annotation', anntoken) sample_record = self.nusc.get('sample', ann_record['sample_token']) assert 'LIDAR_TOP' in sample_record['data'].keys(), 'No LIDAR_TOP in data, cant render' fig, axes = plt.subplots(1, 2, figsize=(18, 9)) # Figure out which camera the object is fully visible in (this may return nothing) boxes, cam = [], [] cams = [key for key in sample_record['data'].keys() if 'CAM' in key] for cam in cams: _, boxes, _ = self.nusc.get_sample_data(sample_record['data'][cam], box_vis_level=box_vis_level, selected_anntokens=[anntoken]) if len(boxes) > 0: break # We found an image that matches. Let's abort. assert len(boxes) > 0, "Could not find image where annotation is visible. Try using e.g. BoxVisibility.ANY." assert len(boxes) < 2, "Found multiple annotations. Something is wrong!" cam = sample_record['data'][cam] # Plot LIDAR view lidar = sample_record['data']['LIDAR_TOP'] data_path, boxes, camera_intrinsic = self.nusc.get_sample_data(lidar, selected_anntokens=[anntoken]) LidarPointCloud.from_file(data_path).render_height(axes[0], view=view) for box in boxes: c = np.array(self.get_color(box.name)) / 255.0 box.render(axes[0], view=view, colors=(c, c, c)) corners = view_points(boxes[0].corners(), view, False)[:2, :] axes[0].set_xlim([np.min(corners[0, :]) - margin, np.max(corners[0, :]) + margin]) axes[0].set_ylim([np.min(corners[1, :]) - margin, np.max(corners[1, :]) + margin]) axes[0].axis('off') axes[0].set_aspect('equal') # Plot CAMERA view data_path, boxes, camera_intrinsic = self.nusc.get_sample_data(cam, selected_anntokens=[anntoken]) im = Image.open(data_path) axes[1].imshow(im) axes[1].set_title(self.nusc.get('sample_data', cam)['channel']) axes[1].axis('off') axes[1].set_aspect('equal') for box in boxes: c = np.array(self.get_color(box.name)) / 255.0 box.render(axes[1], view=camera_intrinsic, normalize=True, colors=(c, c, c))
def parse_labels( data: NuScenes, boxes: List[Box], ego_pose: DictStrAny, calib_sensor: DictStrAny, img_size: Optional[Tuple[int, int]] = None, ) -> Optional[List[Label]]: """Parse NuScenes labels into sensor frame.""" if len(boxes): labels = [] # transform into the sensor coord system transform_boxes(boxes, ego_pose, calib_sensor) intrinsic_matrix: NDArrayF64 = np.array( calib_sensor["camera_intrinsic"], dtype=np.float64) for box in boxes: box_class = category_to_detection_name(box.name) in_image = True if img_size is not None: in_image = box_in_image(box, intrinsic_matrix, img_size) if in_image and box_class is not None: xyz = tuple(box.center.tolist()) w, l, h = box.wlh roty = quaternion_to_yaw(box.orientation) box2d = None if img_size is not None: # Project 3d box to 2d. corners = box.corners() corner_coords = (view_points(corners, intrinsic_matrix, True).T[:, :2].tolist()) # Keep only corners that fall within the image, transform box2d = xyxy_to_box2d(*post_process_coords(corner_coords)) instance_data = data.get("sample_annotation", box.token) # Attributes can be retrieved via instance_data and also the # category is more fine-grained than box_class. # This information could be stored in attributes if needed in # the future label = Label( id=instance_data["instance_token"], category=box_class, box2d=box2d, box3d=Box3D( location=xyz, dimension=(h, w, l), orientation=(0, roty, 0), alpha=rotation_y_to_alpha(roty, xyz), # type: ignore ), ) labels.append(label) return labels return None
def _anno_to_2d_bbox(self, anno, pc_file, cam_front, lidar_top, ego_pose_cam, ego_pose_lidar, cam_intrinsic): # Make pixel indexes 0-based dists = [] nusc_box = self.nusc.get_box(anno['token']) # Move them to the ego-pose frame. nusc_box.translate(-np.array(ego_pose_cam['translation'])) nusc_box.rotate(Quaternion(ego_pose_cam['rotation']).inverse) # Move them to the calibrated sensor frame. nusc_box.translate(-np.array(cam_front['translation'])) nusc_box.rotate(Quaternion(cam_front['rotation']).inverse) dists.append(np.linalg.norm(nusc_box.center)) # Filter out the corners that are not in front of the calibrated sensor. #Corners is a 3x8 matrix, first four corners are the ones facing forward, last 4 are ons facing backward #(0,1) top, forward #(2,3) bottom, forward #(4,5) top, backward #(6,7) bottom, backward corners_3d = nusc_box.corners() #Getting first 4 values of Z dists.append(np.mean(corners_3d[2, :4])) # z is height of object for ego pose or lidar # y is height of object for camera frame #TODO: Discover why this is taking the Z axis in_front = np.argwhere(corners_3d[2, :] > 0).flatten() corners_3d = corners_3d[:, in_front] #print(corners_3d) #above = np.argwhere(corners_3d[2, :] > 0).flatten() #corners_3d = corners_3d[:, above] # Project 3d box to 2d. corner_coords = view_points(corners_3d, cam_intrinsic, True).T[:, :2].tolist() #print(corner_coords) # Keep only corners that fall within the image. polygon_from_2d_box = MultiPoint(corner_coords).convex_hull img_canvas = box(0, 0, self._imwidth - 1, self._imheight - 1) if polygon_from_2d_box.intersects(img_canvas): img_intersection = polygon_from_2d_box.intersection(img_canvas) intersection_coords = np.array( [coord for coord in img_intersection.exterior.coords]) min_x = min(intersection_coords[:, 0]) min_y = min(intersection_coords[:, 1]) max_x = max(intersection_coords[:, 0]) max_y = max(intersection_coords[:, 1]) #print('contained pts {}'.format(contained_points)) return [min_x, min_y, max_x, max_y], dists else: return None, dists
def show_annotation(nusc, sample_data_token): data_path, boxes, camera_intrinsic = nusc.get_sample_data( sample_data_token, box_vis_level=BoxVisibility.ANY) data = Image.open(data_path) _, ax = plt.subplots(1, 1, figsize=(9, 16)) ax.imshow(data) for box in boxes: corners = view_points(box.corners(), camera_intrinsic, normalize=True)[:2, :] c = np.array(nusc.explorer.get_color(box.name)) / 255.0 color = (c, c, c) draw_box_matlab(ax, corners, color)
def generate_sf_for_one_sample(sample_token, nusc, save_dir, meta=None): ret_dict = parse_sample(sample_token, nusc) sample_data = nusc.get('sample', sample_token) for cam_name in CamNames: ret = {} cam_token = sample_data['data'][cam_name] cam_data = nusc.get('sample_data', cam_token) filename = cam_data['filename'] ret['img_path'] = filename if ret_dict[cam_name] is None: raise NotImplementedError ret['points_path'] = None meta[cam_token] = ret continue cam_cs = nusc.get('calibrated_sensor', cam_data['calibrated_sensor_token']) ret_points_list = ret_dict[cam_name] i = 0 for ret_points in ret_points_list: if ret_points is None: ret['points_path_{}'.format(SFNameList[i])] = None i += 1 continue cam_points, cam_sf = ret_points[:3, ], ret_points[3:, ] cam_points = view_points(cam_points, np.array(cam_cs['camera_intrinsic']), normalize=True) # filter out points which do not show in this camera mask = np.ones(cam_points.shape[1], dtype=bool) mask = np.logical_and(mask, cam_points[0, :] > 1) mask = np.logical_and(mask, cam_points[0, :] < 1599) mask = np.logical_and(mask, cam_points[1, :] > 1) mask = np.logical_and(mask, cam_points[1, :] < 899) cam_points = cam_points[:, mask] cam_sf = cam_sf[:, mask] save_points = np.concatenate([cam_points, cam_sf], axis=0) img_name = osp.split(filename)[-1].split('.')[0] np.save(osp.join(save_dir, img_name + SFNameList[i]), save_points) # will add .npy postfix automaticlly save_path = osp.join(img_name + SFNameList[i] +'.npy') ret['points_path_{}'.format(SFNameList[i])] = save_path i += 1 meta[cam_token] = ret return meta
def get_the_two_dimensional_annotations(self, scene_token: str, channel: str = 'CAM_FRONT' ): # Get records from DB scene_rec = self.nusc.get('scene', scene_token) sample_rec = self.nusc.get('sample', scene_rec['first_sample_token']) sd_rec = self.nusc.get('sample_data', sample_rec['data'][channel]) scene_list = [] has_more_frames = True while has_more_frames: # Get data from DB impath, boxes, camera_intrinsic = self.nusc.get_sample_data(sd_rec['token'], box_vis_level=BoxVisibility.ANY) reprojections = {} annotation_list = [] for three_d_box in boxes: entry = {} cs_rec = self.nusc.get('calibrated_sensor', sd_rec['calibrated_sensor_token']) camera_intrinsic = np.array(cs_rec['camera_intrinsic']) corner_coords = view_points(three_d_box.corners(), camera_intrinsic, True).T[:, :2].tolist() boundaries = [] min_x = int(min(coord[0] for coord in corner_coords)) min_y = int(min(coord[1] for coord in corner_coords)) max_x = int(max(coord[0] for coord in corner_coords)) max_y = int(max(coord[1] for coord in corner_coords)) boundaries.append((min_x, min_y)) boundaries.append((max_x, min_y)) boundaries.append((max_x, max_y)) boundaries.append((min_x, max_y)) entry['corners'] = boundaries entry['category_name'] = three_d_box.name sample_annotation = self.nusc.get('sample_annotation', three_d_box.token) entry['instance_token'] = sample_annotation['instance_token'] annotation_list.append(entry) reprojections["annotation_list"] = annotation_list reprojections["filename"] = impath reprojections['sample_data_token'] = sd_rec['token'] reprojections['timestamp'] = sd_rec['timestamp'] reprojections['next'] = sd_rec['next'] scene_list.append(reprojections) if not sd_rec['next'] == "": sd_rec = self.nusc.get('sample_data', sd_rec['next']) else: has_more_frames = False return scene_list
def get_gt(nusc, corners, camera_token, pointsensor_token): cam = nusc.get('sample_data', camera_token) pointsensor = nusc.get('sample_data', pointsensor_token) pcl_path = os.path.join(nusc.dataroot, pointsensor['filename']) # im = Image.open(os.path.join(nusc.dataroot, cam['filename'])) pc = LidarPointCloud(corners) # Points live in the point sensor frame. So they need to be transformed via global to the image plane. # First step: transform the point-cloud to the ego vehicle frame for the timestamp of the sweep. cs_record = nusc.get('calibrated_sensor', pointsensor['calibrated_sensor_token']) pc.rotate(Quaternion(cs_record['rotation']).rotation_matrix) pc.translate(np.array(cs_record['translation'])) # Second step: transform to the global frame. poserecord = nusc.get('ego_pose', pointsensor['ego_pose_token']) pc.rotate(Quaternion(poserecord['rotation']).rotation_matrix) pc.translate(np.array(poserecord['translation'])) # Third step: transform into the ego vehicle frame for the timestamp of the image. poserecord = nusc.get('ego_pose', cam['ego_pose_token']) pc.translate(-np.array(poserecord['translation'])) pc.rotate(Quaternion(poserecord['rotation']).rotation_matrix.T) # Fourth step: transform into the camera. cs_record = nusc.get('calibrated_sensor', cam['calibrated_sensor_token']) pc.translate(-np.array(cs_record['translation'])) pc.rotate(Quaternion(cs_record['rotation']).rotation_matrix.T) # Fifth step: actually take a "picture" of the point cloud. # Grab the depths (camera frame z axis points away from the camera). depths = pc.points[2, :] # Retrieve the color from the depth. coloring = depths # Take the actual picture (matrix multiplication with camera-matrix + renormalization). points = view_points(pc.points[:3, :], np.array(cs_record['camera_intrinsic']), normalize=True) # Remove points that are either outside or behind the camera. Leave a margin of 1 pixel for aesthetic reasons. # Also make sure points are at least 1m in front of the camera to avoid seeing the lidar points on the camera # casing for non-keyframes which are slightly out of sync. # mask = np.ones(depths.shape[0], dtype=bool) # mask = np.logical_and(mask, depths > min_dist) # mask = np.logical_and(mask, points[0, :] > 1) # mask = np.logical_and(mask, points[0, :] < im.size[0] - 1) # mask = np.logical_and(mask, points[1, :] > 1) # mask = np.logical_and(mask, points[1, :] < im.size[1] - 1) # points = points[:, mask] # coloring = coloring[mask] return points, coloring, im
def get_radar_pointcloud(self, channel='RADAR_FRONT'): """ Extracting radar detection pointcloud with velocities for current timestep in current scene for specified radar channel. Author: Gabor :param channel: Radar channel selection. Front radar as default :return (Point cloud [Position(x,y,z) X n_points], Point cloud [Velocity(x,y,z) X n_points]) """ # Check for correct radar channel selection assert channel in self.radar_sensor_channels, " [!] Radar channel \"{}\" not found.".format(channel) # Select sensor data record sample_data_token = self.sample['data'][channel] sd_record = self.nusc.get('sample_data', sample_data_token) lidar_token = self.sample['data']['LIDAR_TOP'] # The point cloud is transformed to the lidar frame for visualization purposes. ref_chan = 'LIDAR_TOP' pc, times = RadarPointCloud.from_file_multisweep(self.nusc, self.sample, channel, ref_chan, nsweeps=1) # Transform radar velocities (x is front, y is left), as these are not transformed when loading the point # cloud. radar_cs_record = self.nusc.get('calibrated_sensor', sd_record['calibrated_sensor_token']) lidar_sd_record = self.nusc.get('sample_data', lidar_token) lidar_cs_record = self.nusc.get('calibrated_sensor', lidar_sd_record['calibrated_sensor_token']) velocities = pc.points[8:10, :] # Compensated velocity velocities = np.vstack((velocities, np.zeros(pc.points.shape[1]))) velocities = np.dot(Quaternion(radar_cs_record['rotation']).rotation_matrix, velocities) velocities = np.dot(Quaternion(lidar_cs_record['rotation']).rotation_matrix.T, velocities) velocities[2, :] = np.zeros(pc.points.shape[1]) # Show point cloud. points = view_points(pc.points[:3, :], np.eye(4), normalize=False) points_vel = view_points(pc.points[:3, :] + velocities, np.eye(4), normalize=False) print(" [*] Radar point cloud extracted from channel \"" + channel + "\". Shape: " + str(points.shape)) return points, points_vel
def extremePoints(self, view: np.ndarray = np.eye(3), normalize: bool = False): corners = view_points(self.corners(), view, normalize=normalize)[:2, :] # corners.sort(key = lambda x : abs(x[0] - self.center[0]), reverse = True) corners = corners.T corners = corners[corners[:, 0].argsort()] corners = np.array([corners[0], corners[1], corners[-1], corners[-2]]) corners = corners.T l = np.min(corners[0]) # left limit r = np.max(corners[0]) # right limit t = np.max(corners[1]) # top limit b = np.min(corners[1]) # bottom limit return np.array([[l, b], [r, b], [r, t], [l, t]])
def _render_helper(self, color_channel: int, ax: Axes, view: np.ndarray, x_lim: Tuple, y_lim: Tuple, marker_size: float) -> None: """ Helper function for rendering. :param color_channel: Point channel to use as color. :param ax: Axes on which to render the points. :param view: <np.float: n, n>. Defines an arbitrary projection (n <= 4). :param x_lim: (min <float>, max <float>). :param y_lim: (min <float>, max <float>). :param marker_size: Marker size. """ points = view_points(self.points[:3, :], view, normalize=False) ax.scatter(points[0, :], points[1, :], c=self.points[color_channel, :], s=marker_size) ax.set_xlim(x_lim[0], x_lim[1]) ax.set_ylim(y_lim[0], y_lim[1])
def map_lidar_to_imgidx(pts, info): # Param: pts=(N, 3) # Return: pts_img=(N, 2); (col, row) pts = pts.copy() pts = Quaternion(info['lidar2ego_rotation']).rotation_matrix @ pts pts = pts + np.array(info['lidar2ego_translation'])[:, np.newaxis] pts = Quaternion(info['ego2global_rotation_lidar']).rotation_matrix @ pts pts = pts + np.array(info['ego2global_translation_lidar'])[:, np.newaxis] pts = pts - np.array(info['ego2global_translation_cam'])[:, np.newaxis] pts = Quaternion(info['ego2global_rotation_cam']).rotation_matrix.T @ pts pts = pts - np.array(info['cam2ego_translation'])[:, np.newaxis] pts = Quaternion(info['cam2ego_rotation']).rotation_matrix.T @ pts pts_img = view_points(pts, np.array(info['cam_intrinsic']), normalize=True) pts_img = pts_img.T[:, :2].astype(np.int32) return pts_img
def map_pointcloud_to_image(pc, im, cam_cs_record, radar=False, global_coords=False, ego_pose=None): """ Given a point sensor (lidar/radar) token and camera sample_data token, load point-cloud and map it to the image plane. :param pointsensor_token: Lidar/radar sample_data token. :param camera_token: Camera sample_data token. :return (pointcloud <np.float: 2, n)>, coloring <np.float: n>, image <Image>). """ ## Transform into the camera. pc = NuscenesDataset.pc_to_sensor(pc, cam_cs_record, global_coordinates=global_coords, ego_pose=ego_pose) ## Grab the depths (camera frame z axis points away from the camera). depths = pc.points[2, :] ## Retrieve the color from the depth. coloring = depths ## Take the actual picture (matrix multiplication with camera-matrix + renormalization). points = view_points(pc.points[:3, :], np.array(cam_cs_record['camera_intrinsic']), normalize=True) ## Remove points that are either outside or behind the camera. # Leave a margin of 1 pixel for aesthetic reasons. mask = np.ones(depths.shape[0], dtype=bool) mask = np.logical_and(mask, depths > 0) mask = np.logical_and(mask, points[0, :] > 1) mask = np.logical_and(mask, points[0, :] < im.shape[1] - 1) mask = np.logical_and(mask, points[1, :] > 1) mask = np.logical_and(mask, points[1, :] < im.shape[0] - 1) points = points[:, mask] coloring = coloring[mask] if radar: im = plot_points_on_image(im, points.T, coloring, 8) else: im = plot_points_on_image(im, points.T, coloring, 2) return im
def control_points_loc(self, view: np.ndarray = np.eye(3), normalize: bool = True, wlh_factor: float = 1.0): """ Renders the box in the provided Matplotlib axis. :param axis: Axis onto which the box should be drawn. :param view: <np.array: 3, 3>. Define a projection in needed (e.g. for drawing projection in an image). :param normalize: Whether to normalize the remaining coordinate. :param colors: (<Matplotlib.colors>: 3). Valid Matplotlib colors (<str> or normalized RGB tuple) for front, back and sides. :param linewidth: Width in pixel of the box sides. """ corners_points = view_points(self.corners(), view, normalize=normalize)[:2, :] corners_points = corners_points * wlh_factor center_point = np.mean(corners_points.T[:], axis=0) control_points = np.vstack((corners_points.T, center_point)) return control_points
def render_cv2(self, im: np.ndarray, view: np.ndarray = np.eye(3), normalize: bool = False, colors: Tuple = ((0, 0, 255), (255, 0, 0), (155, 155, 155)), linewidth: int = 2) -> None: """ Renders box using OpenCV2. :param im: <np.array: width, height, 3>. Image array. Channels are in BGR order. :param view: <np.array: 3, 3>. Define a projection if needed (e.g. for drawing projection in an image). :param normalize: Whether to normalize the remaining coordinate. :param colors: ((R, G, B), (R, G, B), (R, G, B)). Colors for front, side & rear. :param linewidth: Linewidth for plot. """ corners = view_points(self.corners(), view, normalize=normalize)[:2, :] def draw_rect(selected_corners, color): prev = selected_corners[-1] for corner in selected_corners: cv2.line(im, (int(prev[0]), int(prev[1])), (int(corner[0]), int(corner[1])), color, linewidth) prev = corner # Draw the sides for i in range(4): cv2.line(im, (int(corners.T[i][0]), int(corners.T[i][1])), (int(corners.T[i + 4][0]), int(corners.T[i + 4][1])), colors[2][::-1], linewidth) # Draw front (first 4 corners) and rear (last 4 corners) rectangles(3d)/lines(2d) draw_rect(corners.T[:4], colors[0][::-1]) draw_rect(corners.T[4:], colors[1][::-1]) # Draw line indicating the front center_bottom_forward = np.mean(corners.T[2:4], axis=0) center_bottom = np.mean(corners.T[[2, 3, 7, 6]], axis=0) cv2.line(im, (int(center_bottom[0]), int(center_bottom[1])), (int(center_bottom_forward[0]), int(center_bottom_forward[1])), colors[0][::-1], linewidth)
def project_kitti_box_to_image( box: Box, p_left: np.ndarray, imsize: Tuple[int, int]) -> Union[None, Tuple[int, int, int, int]]: """ Projects 3D box into KITTI image FOV. :param box: 3D box in KITTI reference frame. :param p_left: <np.float: 3, 4>. Projection matrix. :param imsize: (width, height). Image size. :return: (xmin, ymin, xmax, ymax). Bounding box in image plane or None if box is not in the image. """ # Create a new box. # box = box.copy() # KITTI defines the box center as the bottom center of the object. # We use the true center, so we need to adjust half height in negative y direction. box.translate(np.array([0, -box.wlh[2] / 2, 0])) # Check that some corners are inside the image. corners = np.array( [corner for corner in box.corners().T if corner[2] > 0]).T if len(corners) == 0: return None # Project corners that are in front of the camera to 2d to get bbox in pixel coords. imcorners = view_points(corners, p_left, normalize=True)[:2] bbox = (np.min(imcorners[0]), np.min(imcorners[1]), np.max(imcorners[0]), np.max(imcorners[1])) # Crop bbox to prevent it extending outside image. bbox_crop = tuple(max(0, b) for b in bbox) bbox_crop = ( min(imsize[0], bbox_crop[0]), min(imsize[0], bbox_crop[1]), min(imsize[0], bbox_crop[2]), min(imsize[1], bbox_crop[3]), ) # Detect if a cropped box is empty. if bbox_crop[0] >= bbox_crop[2] or bbox_crop[1] >= bbox_crop[3]: return None return bbox_crop