def map_pointcloud_to_image(self, pointsensor_token: str, camera_token: str) -> Tuple: """ Given a point sensor (lidar/radar) token and camera sample_data token, load point-cloud and map it to the image plane. :param pointsensor_token: Lidar/radar sample_data token. :param camera_token: Camera sample_data token. :return (pointcloud <np.float: 2, n)>, coloring <np.float: n>, image <Image>). """ cam = self.nusc.get('sample_data', camera_token) pointsensor = self.nusc.get('sample_data', pointsensor_token) pcl_path = osp.join(self.nusc.dataroot, pointsensor['filename']) if pointsensor['sensor_modality'] == 'lidar': pc = LidarPointCloud.from_file(pcl_path) else: pc = RadarPointCloud.from_file(pcl_path) im = Image.open(osp.join(self.nusc.dataroot, cam['filename'])) # Points live in the point sensor frame. So they need to be transformed via global to the image plane. # First step: transform the point-cloud to the ego vehicle frame for the timestamp of the sweep. cs_record = self.nusc.get('calibrated_sensor', pointsensor['calibrated_sensor_token']) pc.rotate(Quaternion(cs_record['rotation']).rotation_matrix) pc.translate(np.array(cs_record['translation'])) # Second step: transform to the global frame. poserecord = self.nusc.get('ego_pose', pointsensor['ego_pose_token']) pc.rotate(Quaternion(poserecord['rotation']).rotation_matrix) pc.translate(np.array(poserecord['translation'])) # Third step: transform into the ego vehicle frame for the timestamp of the image. poserecord = self.nusc.get('ego_pose', cam['ego_pose_token']) pc.translate(-np.array(poserecord['translation'])) pc.rotate(Quaternion(poserecord['rotation']).rotation_matrix.T) # Fourth step: transform into the camera. cs_record = self.nusc.get('calibrated_sensor', cam['calibrated_sensor_token']) pc.translate(-np.array(cs_record['translation'])) pc.rotate(Quaternion(cs_record['rotation']).rotation_matrix.T) # Fifth step: actually take a "picture" of the point cloud. # Grab the depths (camera frame z axis points away from the camera). depths = pc.points[2, :] # Set the height to be the coloring. coloring = pc.points[2, :] # Take the actual picture (matrix multiplication with camera-matrix + renormalization). points = view_points(pc.points[:3, :], np.array(cs_record['camera_intrinsic']), normalize=True) # Remove points that are either outside or behind the camera. Leave a margin of 1 pixel for aesthetic reasons. mask = np.ones(depths.shape[0], dtype=bool) mask = np.logical_and(mask, depths > 0) mask = np.logical_and(mask, points[0, :] > 1) mask = np.logical_and(mask, points[0, :] < im.size[0] - 1) mask = np.logical_and(mask, points[1, :] > 1) mask = np.logical_and(mask, points[1, :] < im.size[1] - 1) points = points[:, mask] coloring = coloring[mask] return points, coloring, im
def get_radar_pc_from_file(nusc, sample_rec, chan: str, invalid_states=None, dynprop_states=None, ambig_states=None): """ Returns the radar point cloud of a single keyframe. Original function can be found at: https://github.com/nutonomy/nuscenes-devkit/blob/ae022aba3b37f07202ea402f8278979097738369/python-sdk/nuscenes/utils/data_classes.py#L296. Arguments: nusc: A NuScenes instance, <NuScenes>. sample_rec: The current sample, <dict>. chan: The radar channel from which the point cloud is extracted, <str>. invalid_states: Radar states to be kept, <int List>. dynprop_states: Radar states to be kept, <int List>. Use [0, 2, 6] for moving objects only. ambig_states: Radar states to be kept, <int List>. """ # Get filename sample_data = nusc.get('sample_data', sample_rec['data'][chan]) pcl_path = os.path.join(nusc.dataroot, sample_data['filename']) # Extract point cloud pc = RadarPointCloud.from_file(pcl_path, invalid_states=invalid_states, dynprop_states=dynprop_states, ambig_states=ambig_states) return pc
def _get_radar_data(self, sample_rec: dict, sample_data: dict, nsweeps: int) -> RadarPointCloud: """ Returns Radar point cloud in Vehicle Coordinates :param sample_rec: sample record dictionary from nuscenes :param sample_data: sample data dictionary from nuscenes :param nsweeps: number of sweeps to return for this radar :return pc: RadarPointCloud containing this samnple and all sweeps """ radar_path = os.path.join(self.nusc_path, sample_data['filename']) cs_record = self.nusc.get('calibrated_sensor', sample_data['calibrated_sensor_token']) if nsweeps > 1: ## Returns in vehicle coordinates pc, _ = RadarPointCloud.from_file_multisweep( self.nusc, sample_rec, sample_data['channel'], sample_data['channel'], nsweeps=nsweeps, min_distance=self.radar_min_distance) else: ## Returns in sensor coordinates pc = RadarPointCloud.from_file(radar_path) ## Sensor to vehicle rot_matrix = Quaternion(cs_record['rotation']).rotation_matrix pc.rotate(rot_matrix) pc.translate(np.array(cs_record['translation'])) return pc
def from_sample_token(self, nusc: 'NuScenes', sample_token: str, sensor: str) -> 'myRadarPointCloud': # trace back frame data - here we have all the sensor data and all the annotations sample_rec = nusc.get('sample', sample_token) # get sensor data sd_rec = nusc.get('sample_data', sample_rec['data'][sensor]) # get sensor calibration cs_rec = nusc.get('calibrated_sensor', sd_rec['calibrated_sensor_token']) # ego pose - ego car to global frame ep_rec = nusc.get('ego_pose', sd_rec['ego_pose_token']) # A_cs^car: Homogeneous transformation matrix from ########### A_cs_2_car = transform_matrix(cs_rec['translation'], Quaternion(cs_rec['rotation']), inverse=False) # A_car^gl: Homogeneous transformation matrix from ########### A_car_2_gl = transform_matrix(ep_rec['translation'], Quaternion(ep_rec['rotation']), inverse=False) # read radar pointcloud in sensor coordinates filepath = os.path.join(nusc.dataroot, sd_rec['filename']) pc_cs = RadarPointCloud.from_file( file_name=filepath ).points # :return: <np.float: d, n>. Point cloud matrix with d dimensions and n points. # select fields: x,y,vx_comp,vy_comp,RCS pc_cs = pc_cs[self._FEAT_SELECTOR, :] # return instance return self(nusc, pc_cs, A_cs_2_car, A_car_2_gl)
def test_load_pointclouds(self): """ Loads up lidar and radar pointclouds. """ assert 'NUSCENES' in os.environ, 'Set NUSCENES env. variable to enable tests.' dataroot = os.environ['NUSCENES'] nusc = NuScenes(version='v1.0-mini', dataroot=dataroot, verbose=False) sample_rec = nusc.sample[0] lidar_name = nusc.get('sample_data', sample_rec['data']['LIDAR_TOP'])['filename'] radar_name = nusc.get('sample_data', sample_rec['data']['RADAR_FRONT'])['filename'] lidar_path = os.path.join(dataroot, lidar_name) radar_path = os.path.join(dataroot, radar_name) pc1 = LidarPointCloud.from_file(lidar_path) pc2 = RadarPointCloud.from_file(radar_path) pc3, _ = LidarPointCloud.from_file_multisweep(nusc, sample_rec, 'LIDAR_TOP', 'LIDAR_TOP', nsweeps=2) pc4, _ = RadarPointCloud.from_file_multisweep(nusc, sample_rec, 'RADAR_FRONT', 'RADAR_FRONT', nsweeps=2) # Check for valid dimensions. assert pc1.points.shape[0] == pc3.points.shape[ 0] == 4, 'Error: Invalid dimension for lidar pointcloud!' assert pc2.points.shape[0] == pc4.points.shape[ 0] == 18, 'Error: Invalid dimension for radar pointcloud!' assert pc1.points.dtype == pc3.points.dtype, 'Error: Invalid dtype for lidar pointcloud!' assert pc2.points.dtype == pc4.points.dtype, 'Error: Invalid dtype for radar pointcloud!'
def tokens_to_data_pairs(nusc: NuScenes, cam_sd_tokens: List[str], rad_sd_tokens: List[str], depth: float, threshold: float) -> list(zip()): """ This method takes a pair of lists with the Camera and Radar sample_data tokens, filters the RadarPointCloud for detections closer than the parameter depth, loads the actual data in two corresponding lists and returns the zipped lists :param nusc: Nuscenes instance :param cam_sd_tokens: List with all the camera sample_data tokens :param rad_sd_tokens: List with all the radar sample_data tokens :param depth: Distance from the radar sensor (x value) above which all detections are omitted :param threshold: No pairs of points in the resulted dataset will have distance less than this threshold :return: list(zip(np.array, np.array)) List of zipped array lists with the data """ rgb_images_list = [] for i in range(len(cam_sd_tokens)): cam_sd_path = nusc.get_sample_data_path(cam_sd_tokens[i]) if not os.path.isfile(cam_sd_path): continue #im = Image.open(cam_sd_path) #im = im.resize((IMAGE_WIDTH, IMAGE_HEIGHT), Image.BILINEAR) img = cv2.imread(cam_sd_path) #Resize with Bilinear interpolation img = cv2.resize(img, (IMAGE_WIDTH, IMAGE_HEIGHT)) img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) rgb_images_list.append(img) radar_pcl_list = [] for i in range(len(rad_sd_tokens)): rad_sd_path = nusc.get_sample_data_path(rad_sd_tokens[i]) if not os.path.isfile(rad_sd_path): continue #radar_pcl = RadarPointCloud.from_file(rad_sd_path, invalid_states = range(18), dynprop_states = range(18), ambig_states = range(18)) #radar_pcl = RadarPointCloud.from_file(rad_sd_path, invalid_states = None, dynprop_states = [0,2,6], ambig_states = None) radar_pcl = RadarPointCloud.from_file(rad_sd_path) # radar_pcl.points.shape is (18, num_points) radar_pcl.points, dist_list = filter_pointcloud( radar_pcl.points, threshold) # radar_pcl.points.shape became (3, num_points) #RADNET expects shape (num_points, 4) radar_pcl.points = radar_pcl.points.transpose() #radar_pcl.points = radar_pcl.points[:, :3] #radar_pcl.points = radar_pcl.points[radar_pcl.points[:, 0] < depth] radar_pcl.points = np.hstack((radar_pcl.points, np.ones((radar_pcl.points.shape[0], 1), dtype=radar_pcl.points.dtype))) radar_pcl_list.append(radar_pcl) assert (len(rgb_images_list) == len(radar_pcl_list)) image_radar_pairs = list(zip(rgb_images_list, radar_pcl_list)) del rgb_images_list del radar_pcl_list return image_radar_pairs
def load_sample_data(self, sample, sensor_channel, image_target_shape=None, **kwargs): """ This function takes the token of a sample and a sensor sensor_channel and returns the according data Radar format: - Shape: 18 x n - Semantics: x y z dyn_prop id rcs vx vy vx_comp vy_comp is_quality_valid ambig_state x_rms y_rms invalid_state pdh0 Image format: - Shape: h x w x 3 - Channels: RGB - KWargs: - [int] image_target_shape to limit image size - [tuple[int]] image_target_shape to limit image size #TODO figure out directions """ # Get filepath sd_rec = self.nusc.get('sample_data', sample['data'][sensor_channel]) file_name = os.path.join(self.nusc.dataroot, sd_rec['filename']) # Check conditions assert os.path.exists( file_name), "nuscenes data must be located in %s" % file_name # Read the data if "RADAR" in sensor_channel: pc = RadarPointCloud.from_file(file_name) # Load radar points data = pc.points.astype(self.DATATYPE) elif "CAM" in sensor_channel: i = Image.open(file_name) # resize if size is given if image_target_shape is not None: try: _ = iter(image_target_shape) except TypeError: # not iterable size # limit both dimension to size, but keep aspect ration size = (image_target_shape, image_target_shape) i.thumbnail(size=size) else: # iterable size size = image_target_shape[::-1] # revert dimensions i = i.resize(size=size) data = np.array(i, dtype=self.DATATYPE) / 255 else: raise Exception("\"%s\" is not supported" % sensor_channel) return data
def get_radar_points(cls, data_path) -> 'RadarPointCloud': # To get all points/detections we need to disable settings (..._states) radar_pcls = RadarPointCloud.from_file(data_path, invalid_states = list(range(17+1)), dynprop_states = list(range(7+1)), ambig_states = list(range(4+1))) # radar_points_array = radar_pcls.points return radar_pcls
def tokens_to_data_pairs(nusc: NuScenes, cam_sd_tokens: List[str], rad_sd_tokens: List[str]) -> list(zip()): """ This method takes a pair of lists with the Camera and Radar sample_data tokens, loads the actual data in two corresponding lists and returns the zipped lists :param nusc: Nuscenes instance :param cam_sd_tokens: List with all the camera sample_data tokens :param rad_sd_tokens: List with all the radar sample_data tokens :return: list(zip(np.array, np.array)) List of zipped array lists with the data """ rgb_images_list = [] for i in range(len(cam_sd_tokens)): cam_sd_path = nusc.get_sample_data_path(cam_sd_tokens[i]) if not os.path.isfile(cam_sd_path): continue #im = Image.open(cam_sd_path) #im = im.resize((IMAGE_WIDTH, IMAGE_HEIGHT), Image.BILINEAR) img = cv2.imread(cam_sd_path) #Resize with Bilinear interpolation img = cv2.resize(img, (IMAGE_WIDTH, IMAGE_HEIGHT)) img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) rgb_images_list.append(img) radar_pcl_list = [] for i in range(len(rad_sd_tokens)): rad_sd_path = nusc.get_sample_data_path(rad_sd_tokens[i]) if not os.path.isfile(rad_sd_path): continue #radar_pcl = RadarPointCloud.from_file(rad_sd_path, invalid_states = range(18), dynprop_states = range(18), ambig_states = range(18)) #radar_pcl = RadarPointCloud.from_file(rad_sd_path, invalid_states = None, dynprop_states = [0,2,6], ambig_states = None) radar_pcl = RadarPointCloud.from_file(rad_sd_path) #nuScenes RadarPointCloud has shape (18, num_points) #RADNET expects (num_points, 4) radar_pcl.points = radar_pcl.points.transpose() radar_pcl.points = radar_pcl.points[:, :3] radar_pcl.points = np.hstack((radar_pcl.points, np.ones((radar_pcl.points.shape[0], 1), dtype=radar_pcl.points.dtype))) radar_pcl_list.append(radar_pcl) assert (len(rgb_images_list) == len(radar_pcl_list)) image_radar_pairs = list(zip(rgb_images_list, radar_pcl_list)) del rgb_images_list del radar_pcl_list return image_radar_pairs
def _read_radar_data(self, sample_data_token): sd_record = self.nusc.get("sample_data", sample_data_token) cs_record = self.nusc.get("calibrated_sensor", sd_record["calibrated_sensor_token"]) sensor_record = self.nusc.get("sensor", cs_record["sensor_token"]) assert sensor_record["modality"] == "radar" # currently using only keyframes assert sd_record["is_key_frame"] data_path = self.nusc.get_sample_data_path(sample_data_token) radar_point_cloud = RadarPointCloud.from_file(data_path) points = tf.convert_to_tensor(radar_point_cloud.points.transpose()) radar_extrinsics = transform_matrix(cs_record["translation"], Quaternion(cs_record["rotation"])) return {"{}_points": tf.io.serialize_tensor(points)}, radar_extrinsics
def radar_points_from_cloud(filename, result_as_array=False): """ Mit dieser Funktion kann bei angegebenen filename eine RADAR-PointCloud gelesen werden. Diese werden dann in ein ARRAY? geschrieben und für die Weiterverarbeitung returned :filename = Pfad der PCL-Datei, data_info aus get_data_from_sample(..wanted_sample_data_info = "filename") """ number_of_samplefiles = len(filename) # Zunächst die Filter disablen, damit man alle Radardaten bekommt RadarPointCloud.disable_filters() #scene_points_array = np.empty(number_of_samplefiles) scene_points_list = [] # List für die Point-Arrays (je Array 18 x bis 125) #scene_points_as_array = np.zeros(number_of_samplefiles) for i in range(number_of_samplefiles): # print(filename[i]) wird zum debuggen benutzt path = dataroot + filename[i] # trotz des Aufrufs von disable_filters werden hier die Filter (nochmals) definiert point_of_current_file = RadarPointCloud.from_file(path, invalid_states=list(range(17+1)), dynprop_states=list(range(7+1)), ambig_states=list(range(4+1))) if result_as_array: # Punkte in Array anfügen scene_points_as_array = np.vstack() if not result_as_array: # Default wird Liste erstellt scene_points_list.append(point_of_current_file.points) if result_as_array: return scene_points_as_array else: return scene_points_list
def get_radar_with_sweeps(self, index, max_sweeps=1): info = self.infos[index] radar_chans = [ 'RADAR_FRONT', 'RADAR_FRONT_RIGHT', 'RADAR_FRONT_LEFT', 'RADAR_BACK_LEFT', 'RADAR_BACK_RIGHT' ] points_all = [] for radar_chan in radar_chans: radar_path = self.root_path / info['radar_paths'][radar_chan] radar_point_cloud = RadarPointCloud.from_file(radar_path) # TODO HERE: FIGURE OUT HOW TO USE THIS INFO PROPERLY, INCLUDING THE PROPER TRANSFORM FOR MULTIPLE CHANNELS # points = np.fromfile(str(radar_path), dtype=np.float32, count=-1).reshape([-1, 5])[:, :4] sweep_points_list = [points] sweep_times_list = [np.zeros((points.shape[0], 1))] for k in np.random.choice( len(info['sweeps']), max_sweeps - 1, replace=False): # TODO: why is this random? points_sweep, times_sweep = self.get_sweep(info['sweeps'][k]) sweep_points_list.append(points_sweep) sweep_times_list.append(times_sweep) points = np.concatenate(sweep_points_list, axis=0) times = np.concatenate(sweep_times_list, axis=0).astype(points.dtype) # TODO: Will need to merge the points from the different radar sensors. And velocities? Need to understand pcd format fully points = np.concatenate((points, times), axis=1) # TODO: merge/concatenate into points_all for each radar channel, here. # RECALL HOW THE MERGING OF RADAR WAS DONE. WHICH AXIS WAS IGNORED? IT WAS HEIGHT, RIGHT? return points_all
def tokens_to_data(nusc: NuScenes, rad_sd_tokens: List[str]) -> List: """ This method takes a list with the Radar sample_data tokens and loads the actual data in a new list :param nusc: Nuscenes instance :param rad_sd_tokens: List with all the radar sample_data tokens :return: list((np.array) List of numpy array with the radar data """ radar_pcl_list = [] for i in range(len(rad_sd_tokens)): rad_sd_path = nusc.get_sample_data_path(rad_sd_tokens[i]) if not os.path.isfile(rad_sd_path): continue radar_pcl = RadarPointCloud.from_file(rad_sd_path) #nuScenes RadarPointCloud has shape (18, num_points) #RADNET expects (num_points, 4) radar_pcl.points = radar_pcl.points.transpose() radar_pcl.points = radar_pcl.points[:, :3] radar_pcl.points = np.hstack((radar_pcl.points, np.ones((radar_pcl.points.shape[0], 1), dtype=radar_pcl.points.dtype))) radar_pcl_list.append(radar_pcl) return radar_pcl_list
def map_pointcloud_to_image(self, nusc, pointsensor_token: str, camera_token: str, min_dist: float = 1.0, render_intensity: bool = False, show_lidarseg: bool = False): """ Given a point sensor (lidar/radar) token and camera sample_data token, load pointcloud and map it to the image plane. :param pointsensor_token: Lidar/radar sample_data token. :param camera_token: Camera sample_data token. :param min_dist: Distance from the camera below which points are discarded. :param render_intensity: Whether to render lidar intensity instead of point depth. :param show_lidarseg: Whether to render lidar intensity instead of point depth. :param filter_lidarseg_labels: Only show lidar points which belong to the given list of classes. If None or the list is empty, all classes will be displayed. :param lidarseg_preds_bin_path: A path to the .bin file which contains the user's lidar segmentation predictions for the sample. :return (pointcloud <np.float: 2, n)>, coloring <np.float: n>, image <Image>). """ cam = nusc.get('sample_data', camera_token) pointsensor = nusc.get('sample_data', pointsensor_token) pcl_path = osp.join(nusc.dataroot, pointsensor['filename']) if pointsensor['sensor_modality'] == 'lidar': # Ensure that lidar pointcloud is from a keyframe. assert pointsensor['is_key_frame'], \ 'Error: Only pointclouds which are keyframes have lidar segmentation labels. Rendering aborted.' pc = LidarPointCloud.from_file(pcl_path) else: pc = RadarPointCloud.from_file(pcl_path) im = Image.open(osp.join(nusc.dataroot, cam['filename'])) # Points live in the point sensor frame. So they need to be transformed via global to the image plane. # First step: transform the pointcloud to the ego vehicle frame for the timestamp of the sweep. cs_record = nusc.get('calibrated_sensor', pointsensor['calibrated_sensor_token']) pc.rotate(Quaternion(cs_record['rotation']).rotation_matrix) pc.translate(np.array(cs_record['translation'])) # Second step: transform from ego to the global frame. poserecord = nusc.get('ego_pose', pointsensor['ego_pose_token']) pc.rotate(Quaternion(poserecord['rotation']).rotation_matrix) pc.translate(np.array(poserecord['translation'])) # Third step: transform from global into the ego vehicle frame for the timestamp of the image. poserecord = nusc.get('ego_pose', cam['ego_pose_token']) pc.translate(-np.array(poserecord['translation'])) pc.rotate(Quaternion(poserecord['rotation']).rotation_matrix.T) # Fourth step: transform from ego into the camera. cs_record = nusc.get('calibrated_sensor', cam['calibrated_sensor_token']) pc.translate(-np.array(cs_record['translation'])) pc.rotate(Quaternion(cs_record['rotation']).rotation_matrix.T) # Fifth step: actually take a "picture" of the point cloud. # Grab the depths (camera frame z axis points away from the camera). depths = pc.points[2, :] if render_intensity: assert pointsensor['sensor_modality'] == 'lidar', 'Error: Can only render intensity for lidar, ' \ 'not %s!' % pointsensor['sensor_modality'] # Retrieve the color from the intensities. # Performs arbitary scaling to achieve more visually pleasing results. intensities = pc.points[3, :] intensities = (intensities - np.min(intensities)) / ( np.max(intensities) - np.min(intensities)) intensities = intensities**0.1 intensities = np.maximum(0, intensities - 0.5) coloring = intensities else: # Retrieve the color from the depth. coloring = depths # Take the actual picture (matrix multiplication with camera-matrix + renormalization). points = view_points(pc.points[:3, :], np.array(cs_record['camera_intrinsic']), normalize=True) # Remove points that are either outside or behind the camera. Leave a margin of 1 pixel for aesthetic reasons. # Also make sure points are at least 1m in front of the camera to avoid seeing the lidar points on the camera # casing for non-keyframes which are slightly out of sync. mask = np.ones(depths.shape[0], dtype=bool) mask = np.logical_and(mask, depths > min_dist) mask = np.logical_and(mask, points[0, :] > 1) mask = np.logical_and(mask, points[0, :] < im.size[0] - 1) mask = np.logical_and(mask, points[1, :] > 1) mask = np.logical_and(mask, points[1, :] < im.size[1] - 1) points = points[:, mask] coloring = coloring[mask] return points, coloring, im
def make_multisweep_radar_data(nusc, nusc_can, sample, radar_version, max_sweeps): from nuscenes.utils.data_classes import RadarPointCloud from pyquaternion import Quaternion from nuscenes.utils.geometry_utils import transform_matrix scene_ = nusc.get('scene', sample['scene_token']) scenes_first_sample_tk = scene_['first_sample_token'] root_path = '/mnt/sda/jskim/OpenPCDet/data/nuscenes/v1.0-trainval/' all_pc = np.zeros((0, 18)) lidar_token = sample["data"]["LIDAR_TOP"] ref_sd_rec = nusc.get('sample_data', sample['data']["LIDAR_TOP"]) ref_cs_record = nusc.get('calibrated_sensor', ref_sd_rec['calibrated_sensor_token']) ref_pose_record = nusc.get('ego_pose', ref_sd_rec['ego_pose_token']) # For using can_bus data scene_rec = nusc.get('scene', sample['scene_token']) scene_name = scene_rec['name'] scene_pose = nusc_can.get_messages(scene_name, 'pose') utimes = np.array([m['utime'] for m in scene_pose]) vel_data = np.array([m['vel'][0] for m in scene_pose]) yaw_rate = np.array([m['rotation_rate'][2] for m in scene_pose]) l2e_r = ref_cs_record['rotation'] l2e_t = ref_cs_record['translation'] e2g_r = ref_pose_record['rotation'] e2g_t = ref_pose_record['translation'] l2e_r_mat = Quaternion(l2e_r).rotation_matrix e2g_r_mat = Quaternion(e2g_r).rotation_matrix radar_channels = [ 'RADAR_FRONT', 'RADAR_FRONT_RIGHT', 'RADAR_FRONT_LEFT', 'RADAR_BACK_LEFT', 'RADAR_BACK_RIGHT' ] for radar_channel in radar_channels: radar_data_token = sample['data'][radar_channel] radar_sample_data = nusc.get('sample_data', radar_data_token) ref_radar_time = radar_sample_data['timestamp'] for _ in range(max_sweeps): radar_path = root_path + radar_sample_data['filename'] radar_cs_record = nusc.get( 'calibrated_sensor', radar_sample_data['calibrated_sensor_token']) radar_pose_record = nusc.get('ego_pose', radar_sample_data['ego_pose_token']) time_gap = (ref_radar_time - radar_sample_data['timestamp']) * 1e-6 # import pdb; pdb.set_trace() #Using can_bus data get ego_vehicle_speed new_time = np.abs(utimes - ref_radar_time) time_idx = np.argmin(new_time) current_from_car = transform_matrix( [0, 0, 0], Quaternion(radar_cs_record['rotation']), inverse=True)[0:2, 0:2] vel_data[time_idx] = np.around(vel_data[time_idx], 10) vehicle_v = np.array( (vel_data[time_idx] * np.cos(yaw_rate[time_idx]), vel_data[time_idx] * np.sin(yaw_rate[time_idx]))) final_v = np.dot(current_from_car, vehicle_v) current_pc = RadarPointCloud.from_file(radar_path).points.T # import pdb; pdb.set_trace() if radar_version in ['version3', 'versionx3']: version3_pc = copy.deepcopy(current_pc) if radar_version == 'version3': current_pc[:, 8:10] = final_v[0:2] + current_pc[:, 6:8] else: version3_pc[:, 8:10] = final_v[0:2] + version3_pc[:, 6:8] versionx3_pc = copy.deepcopy(current_pc) if radar_version in ['version1']: pass if radar_version in ['version2', 'versionx3']: current_pc[:, :2] += current_pc[:, 8:10] * time_gap versionx3_pc = np.vstack((versionx3_pc, current_pc)) if radar_version in ['version3', 'versionx3']: if radar_version == 'version3': current_pc[:, :2] += current_pc[:, 8:10] * time_gap else: version3_pc[:, :2] += version3_pc[:, 8:10] * time_gap versionx3_pc = np.vstack((versionx3_pc, version3_pc)) else: raise NotImplementedError #radar_point to lidar top coordinate r2e_r_s = radar_cs_record['rotation'] r2e_t_s = radar_cs_record['translation'] e2g_r_s = radar_pose_record['rotation'] e2g_t_s = radar_pose_record['translation'] r2e_r_s_mat = Quaternion(r2e_r_s).rotation_matrix e2g_r_s_mat = Quaternion(e2g_r_s).rotation_matrix R = (r2e_r_s_mat.T @ e2g_r_s_mat.T) @ ( np.linalg.inv(e2g_r_mat).T @ np.linalg.inv(l2e_r_mat).T) T = (r2e_t_s @ e2g_r_s_mat.T + e2g_t_s) @ ( np.linalg.inv(e2g_r_mat).T @ np.linalg.inv(l2e_r_mat).T) T -= e2g_t @ (np.linalg.inv(e2g_r_mat).T @ np.linalg.inv( l2e_r_mat).T) + l2e_t @ np.linalg.inv(l2e_r_mat).T if radar_version in ['version1', 'version2', 'version3']: current_pc[:, :3] = current_pc[:, :3] @ R current_pc[:, [8, 9]] = current_pc[:, [8, 9]] @ R[:2, :2] current_pc[:, :3] += T all_pc = np.vstack((all_pc, current_pc)) elif radar_version in ['versionx3']: versionx3_pc[:, :3] = versionx3_pc[:, :3] @ R versionx3_pc[:, [8, 9]] = versionx3_pc[:, [8, 9]] @ R[:2, :2] versionx3_pc[:, :3] += T all_pc = np.vstack((all_pc, versionx3_pc)) else: raise NotImplementedError if sample['prev'] == '' or sample['prev'] == scenes_first_sample_tk: if radar_sample_data['next'] == '': break else: radar_sample_data = nusc.get('sample_data', radar_sample_data['next']) else: if radar_sample_data['prev'] == '': break else: radar_sample_data = nusc.get('sample_data', radar_sample_data['prev']) if radar_version in ['version1', 'version2', 'version3', 'versionx3']: use_idx = [0, 1, 2, 5, 8, 9] else: raise NotImplementedError all_pc = all_pc[:, use_idx] return all_pc
def run_format(pcd_file, formatted_data_path, normalized_data_path): pcd = RadarPointCloud.from_file(pcd_file) radar_points = pcd.points.T print("radar_points.shape:", radar_points.shape) positions = radar_points[:, 0:3] print("positions.shape:", positions.shape) print("positions:", positions) velocities = radar_points[:, 6:8] print("velocities.shape:", velocities.shape) print("velocities:", velocities) formatted_data = np.concatenate((np.zeros( (radar_points.shape[0], 1)), positions, velocities, np.zeros((radar_points.shape[0], 1))), axis=1) print("formatted_data.shape:", formatted_data.shape) header = "t,X,Y,Z,V_X,V_Y,V_Z" formatted_data_dir = "/" + os.path.join( *formatted_data_path.split("/")[:-1]) print("formatted_data_dir:", formatted_data_dir) if not os.path.isdir(formatted_data_dir): os.makedirs(formatted_data_dir) np.savetxt(formatted_data_path, formatted_data, delimiter=",", header=header, comments="") # Normalize data for col in range(1, formatted_data.shape[1]): min_val = np.amin(formatted_data[:, col]) max_val = np.amax(formatted_data[:, col]) print("\nmin_val:", min_val) print("max_val:", max_val) if min_val != 0 or max_val != 0: # Leave columns with only 0s alone formatted_data[:, col] -= min_val formatted_data[:, col] /= (max_val - min_val) formatted_data[:, col] *= 2 formatted_data[:, col] -= 1 for col in range(1, formatted_data.shape[1]): min_val = np.amin(formatted_data[:, col]) max_val = np.amax(formatted_data[:, col]) print("\nmin_val:", min_val) print("max_val:", max_val) normalized_data_dir = "/" + os.path.join( *normalized_data_path.split("/")[:-1]) print("normalized_data_dir:", normalized_data_dir) if not os.path.isdir(normalized_data_dir): os.makedirs(normalized_data_dir) np.savetxt(normalized_data_path, formatted_data, delimiter=",", header=header, comments="")
def get_pc_from_file_multisweep(nusc, sample_rec, chan: str, ref_chan: str, nsweeps: int = 3, min_distance: float = 1.0, invalid_states=None, dynprop_states=None, ambig_states=None): """ Returns a point cloud of multiple aggregated sweeps. Original function can be found at: https://github.com/nutonomy/nuscenes-devkit/blob/ae022aba3b37f07202ea402f8278979097738369/python-sdk/nuscenes/utils/data_classes.py#L56. Function is modified to accept additional input parametes, just like the get_pc_from_file function. This enables the filtering by invalid_states, dynprop_states and ambig_states. Arguments: nusc: A NuScenes instance, <NuScenes>. sample_rec: The current sample, <dict>. chan: The radar/lidar channel from which we track back n sweeps to aggregate the point cloud, <str>. ref_chan: The reference channel of the current sample_rec that the point clouds are mapped to, <str>. nsweeps: Number of sweeps to aggregated, <int>. min_distance: Distance below which points are discarded, <float>. invalid_states: Radar states to be kept, <int List>. dynprop_states: Radar states to be kept, <int List>. Use [0, 2, 6] for moving objects only. ambig_states: Radar states to be kept, <int List>. """ # Define if chan == 'LIDAR_TOP': points = np.zeros((get_number_of_lidar_pc_dimensions(), 0)) all_pc = LidarPointCloud(points) else: points = np.zeros((get_number_of_radar_pc_dimensions(), 0)) all_pc = RadarPointCloud(points) all_times = np.zeros((1, 0)) # Get reference pose and timestamp. ref_sd_token = sample_rec['data'][ref_chan] ref_sd_rec = nusc.get('sample_data', ref_sd_token) ref_pose_rec = nusc.get('ego_pose', ref_sd_rec['ego_pose_token']) ref_cs_rec = nusc.get('calibrated_sensor', ref_sd_rec['calibrated_sensor_token']) ref_time = 1e-6 * ref_sd_rec['timestamp'] # Homogeneous transform from ego car frame to reference frame. ref_from_car = transform_matrix(ref_cs_rec['translation'], Quaternion(ref_cs_rec['rotation']), inverse=True) # Homogeneous transformation matrix from global to _current_ ego car frame. car_from_global = transform_matrix(ref_pose_rec['translation'], Quaternion(ref_pose_rec['rotation']), inverse=True) # Aggregate current and previous sweeps. sample_data_token = sample_rec['data'][chan] current_sd_rec = nusc.get('sample_data', sample_data_token) for _ in range(nsweeps): # Load up the pointcloud. if chan == 'LIDAR_TOP': current_pc = LidarPointCloud.from_file(file_name=os.path.join( nusc.dataroot, current_sd_rec['filename'])) else: current_pc = RadarPointCloud.from_file( file_name=os.path.join(nusc.dataroot, current_sd_rec['filename']), invalid_states=invalid_states, dynprop_states=dynprop_states, ambig_states=ambig_states) # Get past pose. current_pose_rec = nusc.get('ego_pose', current_sd_rec['ego_pose_token']) global_from_car = transform_matrix(current_pose_rec['translation'], Quaternion( current_pose_rec['rotation']), inverse=False) # Homogeneous transformation matrix from sensor coordinate frame to ego car frame. current_cs_rec = nusc.get('calibrated_sensor', current_sd_rec['calibrated_sensor_token']) car_from_current = transform_matrix(current_cs_rec['translation'], Quaternion( current_cs_rec['rotation']), inverse=False) # Fuse four transformation matrices into one and perform transform. trans_matrix = reduce( np.dot, [ref_from_car, car_from_global, global_from_car, car_from_current]) current_pc.transform(trans_matrix) # Remove close points and add timevector. current_pc.remove_close(min_distance) time_lag = ref_time - 1e-6 * current_sd_rec[ 'timestamp'] # Positive difference. times = time_lag * np.ones((1, current_pc.nbr_points())) all_times = np.hstack((all_times, times)) # Merge with key pc. all_pc.points = np.hstack((all_pc.points, current_pc.points)) # Abort if there are no previous sweeps. if current_sd_rec['prev'] == '': break else: current_sd_rec = nusc.get('sample_data', current_sd_rec['prev']) return all_pc, all_times
# In[35]: color[:, 0] = seg / 32 color[:, 1] = 0.5 color[:, 2] = 0.5 pcd = o3d.geometry.PointCloud() pcd.points = o3d.utility.Vector3dVector(points[:, :3]) pcd.colors = o3d.utility.Vector3dVector(color) o3d.visualization.draw_geometries([pcd]) # In[30]: #4.3.(a)Visualizing radar data radar_pcd_name = '/data/sets/nuscenes/samples/RADAR_FRONT/n015-2018-07-24-11-22-45+0800__RADAR_FRONT__1532402927664178.pcd' radar_scan = np.fromfile(radar_pcd_name, dtype=np.float32) radar_obj = RadarPointCloud.from_file(radar_pcd_name) radar_points = radar_obj.points radar_points = radar_obj.points.T color_radar = np.zeros([len(radar_points), 3]) pcd.points = o3d.utility.Vector3dVector(radar_points[:, :3]) color_radar[:, 0] = radar_points[:, 0] / 32 color_radar[:, 1] = radar_points[:, 1] / 32 pcd.colors = o3d.utility.Vector3dVector(color_radar) o3d.visualization.draw_geometries([pcd]) # In[14]: #4.3.(b)Colorizing points by height and velocity height_radar = radar_points[:, 2] vel = radar_points[:, 6] pcd = o3d.geometry.PointCloud()
def get_sensor_sample_data(nusc, sample, sensor_channel, dtype=np.float32, size=None): """ This function takes the token of a sample and a sensor sensor_channel and returns the according data :param sample: the nuscenes sample dict :param sensor_channel: the target sensor channel of the given sample to load the data from :param dtype: the target numpy type :param size: for resizing the image Radar Format: - Shape: 19 x n - Semantics: [0]: x (1) [1]: y (2) [2]: z (3) [3]: dyn_prop (4) [4]: id (5) [5]: rcs (6) [6]: vx (7) [7]: vy (8) [8]: vx_comp (9) [9]: vy_comp (10) [10]: is_quality_valid (11) [11]: ambig_state (12) [12]: x_rms (13) [13]: y_rms (14) [14]: invalid_state (15) [15]: pdh0 (16) [16]: vx_rms (17) [17]: vy_rms (18) [18]: distance (19) Image Format: - Shape: h x w x 3 - Channels: RGB - size: - [int] size to limit image size - [tuple[int]] size to limit image size """ # Get filepath sd_rec = nusc.get('sample_data', sample['data'][sensor_channel]) file_name = osp.join(nusc.dataroot, sd_rec['filename']) # Check conditions if not osp.exists(file_name): raise FileNotFoundError("nuscenes data must be located in %s" % file_name) # Read the data if "RADAR" in sensor_channel: pc = RadarPointCloud.from_file(file_name) # Load radar points data = pc.points.astype(dtype) data = radar.enrich_radar_data( data) # enrich the radar data an bring them into proper format elif "CAM" in sensor_channel: i = Image.open(file_name) # resize if size is given if size is not None: try: _ = iter(size) except TypeError: # not iterable # limit both dimension to size, but keep aspect ration size = (size, size) i.thumbnail(size=size) else: size = size[::-1] # revert dimensions i = i.resize(size=size) data = np.array(i, dtype=dtype) if np.issubdtype(dtype, np.floating): data = data / 255 # floating images usually are on [0,1] interval else: raise Exception("\"%s\" is not supported" % sensor_channel) return data
def map_pointcloud_to_image(self, pointsensor_token: str, camera_token: str, min_dist: float = 1.0) -> Tuple: """ Given a point sensor (lidar/radar) token and camera sample_data token, load point-cloud and map it to the image plane. [Recoded from the NuscenesExplorer class so the image is not to be loaded]. :param pointsensor_token: Lidar/radar sample_data token. :param camera_token: Camera sample_data token. :param min_dist: Distance from the camera below which points are discarded. :return (pointcloud <np.float: 2, n)>, coloring <np.float: n>). """ cam = self.nusc.get('sample_data', camera_token) pointsensor = self.nusc.get('sample_data', pointsensor_token) pcl_path = os.path.join(self.nusc.dataroot, pointsensor['filename']) if pointsensor['sensor_modality'] == 'lidar': pc = LidarPointCloud.from_file(pcl_path) else: pc = RadarPointCloud.from_file(pcl_path) # Points live in the point sensor frame. So they need to be transformed via global to the image plane. # First step: transform the point-cloud to the ego vehicle frame for the timestamp of the sweep. cs_record = self.nusc.get('calibrated_sensor', pointsensor['calibrated_sensor_token']) pc.rotate(Quaternion(cs_record['rotation']).rotation_matrix) pc.translate(np.array(cs_record['translation'])) # Second step: transform to the global frame. poserecord = self.nusc.get('ego_pose', pointsensor['ego_pose_token']) pc.rotate(Quaternion(poserecord['rotation']).rotation_matrix) pc.translate(np.array(poserecord['translation'])) # Third step: transform into the ego vehicle frame for the timestamp of the image. poserecord = self.nusc.get('ego_pose', cam['ego_pose_token']) pc.translate(-np.array(poserecord['translation'])) pc.rotate(Quaternion(poserecord['rotation']).rotation_matrix.T) # Fourth step: transform into the camera. cs_record = self.nusc.get('calibrated_sensor', cam['calibrated_sensor_token']) pc.translate(-np.array(cs_record['translation'])) pc.rotate(Quaternion(cs_record['rotation']).rotation_matrix.T) # Fifth step: actually take a "picture" of the point cloud. # Grab the depths (camera frame z axis points away from the camera). depths = pc.points[2, :] # Retrieve the color from the depth. coloring = depths # Take the actual picture (matrix multiplication with camera-matrix + renormalization). points = view_points(pc.points[:3, :], np.array(cs_record['camera_intrinsic']), normalize=True) # Remove points that are either outside or behind the camera. Leave a margin of 1 pixel for aesthetic reasons. # Also make sure points are at least 1m in front of the camera to avoid seeing the lidar points on the camera # casing for non-keyframes which are slightly out of sync. mask = np.ones(depths.shape[0], dtype=bool) mask = np.logical_and(mask, depths > min_dist) mask = np.logical_and(mask, points[0, :] > 1) mask = np.logical_and(mask, points[0, :] < 1600 - 1) # hardcoded width mask = np.logical_and(mask, points[1, :] > 1) mask = np.logical_and(mask, points[1, :] < 900 - 1) # hardcoded height points = points[:, mask] coloring = coloring[mask] return points, coloring
def map_pointcloud_to_image( nusc, sample_token, pointsensor_channel, camera_channel, min_dist: float = 1.0, depth_map = None ): sample_record = nusc.get('sample', sample_token) # Here we just grab the front camera and the point sensor. pointsensor_token = sample_record['data'][pointsensor_channel] camera_token = sample_record['data'][camera_channel] cam = nusc.get('sample_data', camera_token) pointsensor = nusc.get('sample_data', pointsensor_token) pcl_path = osp.join(nusc.dataroot, pointsensor['filename']) if pointsensor['sensor_modality'] == 'lidar': pc = LidarPointCloud.from_file(pcl_path) else: pc = RadarPointCloud.from_file(pcl_path) im = Image.open(osp.join(nusc.dataroot, cam['filename'])) # Points live in the point sensor frame. So they need to be transformed via global to the image plane. # First step: transform the pointcloud to the ego vehicle frame for the timestamp of the sweep. cs_record = nusc.get('calibrated_sensor', pointsensor['calibrated_sensor_token']) pc.rotate(Quaternion(cs_record['rotation']).rotation_matrix) pc.translate(np.array(cs_record['translation'])) # Second step: transform from ego to the global frame. poserecord = nusc.get('ego_pose', pointsensor['ego_pose_token']) pc.rotate(Quaternion(poserecord['rotation']).rotation_matrix) pc.translate(np.array(poserecord['translation'])) # Third step: transform from global into the ego vehicle frame for the timestamp of the image. poserecord = nusc.get('ego_pose', cam['ego_pose_token']) pc.translate(-np.array(poserecord['translation'])) pc.rotate(Quaternion(poserecord['rotation']).rotation_matrix.T) # Fourth step: transform from ego into the camera. cs_record = nusc.get('calibrated_sensor', cam['calibrated_sensor_token']) pc.translate(-np.array(cs_record['translation'])) pc.rotate(Quaternion(cs_record['rotation']).rotation_matrix.T) # Fifth step: actually take a "picture" of the point cloud. # Grab the depths (camera frame z axis points away from the camera). depths = pc.points[2, :] coloring = depths # Take the actual picture (matrix multiplication with camera-matrix + renormalization). points = view_points(pc.points[:3, :], np.array(cs_record['camera_intrinsic']), normalize=True) # Remove points that are either outside or behind the camera. Leave a margin of 1 pixel for aesthetic reasons. # Also make sure points are at least 1m in front of the camera to avoid seeing the lidar points on the camera # casing for non-keyframes which are slightly out of sync. mask = np.ones(depths.shape[0], dtype=bool) mask = np.logical_and(mask, depths > min_dist) mask = np.logical_and(mask, points[0, :] > 1) mask = np.logical_and(mask, points[0, :] < im.size[0] - 1) mask = np.logical_and(mask, points[1, :] > 1) mask = np.logical_and(mask, points[1, :] < im.size[1] - 1) points = points[:, mask] coloring = coloring[mask] width = im.size[0] height = im.size[1] if depth_map is None: depth_map = np.zeros((height, width), dtype=np.uint16) for x, y, depth in zip(points[0], points[1], coloring): depth_val = int(depth.real * 255.0) iy = int(y) ix = int(x) cv2.circle(depth_map, (ix, iy), 5, depth_val, -1) return depth_map
def map_pointcloud_to_image_(nusc: NuScenes, bbox, pointsensor_token: str, camera_token: str, min_dist: float = 1.0, dist_thresh: float = 0.1, visualize: bool = False) -> Tuple: """ Given a point sensor (lidar/radar) token and camera sample_data token, load point-cloud and map it to the image plane. :param nusc: NuScenes instance. :param bbox: object coordinates in the current image. :param pointsensor_token: Lidar/radar sample_data token. :param camera_token: Camera sample_data token. :param min_dist: Distance from the camera below which points are discarded. :param dist_thresh: Threshold to consider points within floor plane. :return (points_ann <np.float: 2, n)>, coloring_ann <np.float: n>, ori_points_ann<np.float: 3, n)>, image <Image>). """ cam = nusc.get('sample_data', camera_token) # Sample camera info pointsensor = nusc.get('sample_data', pointsensor_token) # Sample point cloud # pcl_path is the path from root to the pointCloud file pcl_path = osp.join(nusc.dataroot, pointsensor['filename']) # Open the pointCloud path using the Lidar or Radar class if pointsensor['sensor_modality'] == 'lidar': # Read point cloud with LidarPointCloud (4 x samples) --> X, Y, Z and intensity pc = LidarPointCloud.from_file(pcl_path) # To access the points pc.points else: # Read point cloud with LidarPointCloud (18 x samples) --> # https://github.com/nutonomy/nuscenes-devkit/blob/master/python-sdk/nuscenes/utils/data_classes.py#L296 pc = RadarPointCloud.from_file(pcl_path) # Open image of the interest camera im = Image.open(osp.join(nusc.dataroot, cam['filename'])) # Save original points (X, Y and Z) coordinates in LiDAR frame ori_points = pc.points[:3, :].copy() # Points live in the point sensor frame. So they need to be transformed via global to the image plane. # First step: transform the point-cloud to the ego vehicle frame for the timestamp of the sweep. cs_record = nusc.get('calibrated_sensor', pointsensor['calibrated_sensor_token'] ) # Transformation matrix of pointCloud # Transform the Quaternion into a rotation matrix and use method rotate in PointCloud class to rotate # Map from the laser sensor to ego_pose # The method is a dot product between cs_record['rotation'] (3 x 3) and points (3 x points) pc.rotate(Quaternion(cs_record['rotation']).rotation_matrix) # Add the traslation vector between ego vehicle and sensor # The method translate is an addition cs_record['translation'] (3,) and points (3 x points) pc.translate(np.array(cs_record['translation'])) # Second step: transform to the global frame. poserecord = nusc.get('ego_pose', pointsensor['ego_pose_token']) # Same step as before, map from ego_pose to world coordinate frame pc.rotate(Quaternion(poserecord['rotation']).rotation_matrix) pc.translate(np.array(poserecord['translation'])) # Third step: transform into the ego vehicle frame for the timestamp of the image. poserecord = nusc.get('ego_pose', cam['ego_pose_token']) # Same step as before, map from world coordinate frame to ego vehicle frame for the timestamp of the image. pc.translate(-np.array(poserecord['translation'])) pc.rotate(Quaternion(poserecord['rotation']).rotation_matrix.T) # Fourth step: transform into the camera. cs_record = nusc.get('calibrated_sensor', cam['calibrated_sensor_token']) # Same step as before, map from ego at camera timestamp to camera pc.translate(-np.array(cs_record['translation'])) pc.rotate(Quaternion(cs_record['rotation']).rotation_matrix.T) # Fifth step: actually take a "picture" of the point cloud. # Grab the depths (camera frame z axis points away from the camera). depths = pc.points[2, :] # Retrieve the color from the depth. coloring = depths # Take the actual picture (matrix multiplication with camera-matrix + renormalization). # Normalization means to divide the X and Y coordinates by the depth # The output dim (3 x n_points) where the 3rd row are ones. points = view_points(pc.points[:3, :], np.array(cs_record['camera_intrinsic']), normalize=True) # bounding box coordinates min_x, min_y, max_x, max_y = [int(points_b) for points_b in bbox] # Floor segmentation points_img, coloring_img, ori_points_img = segment_floor( points, coloring, ori_points, (im.size[0], im.size[1]), dist_thresh, 1.0) # Filter the points within the annotaiton # Create a mask of bools the same size of depth points mask_ann = np.ones(coloring_img.shape[0], dtype=bool) # Keep points such as X coordinate is bigger than bounding box minimum coordinate mask_ann = np.logical_and(mask_ann, points_img[0, :] > min_x + 1) # remove points such as X coordinate is bigger than bounding box maximum coordinate mask_ann = np.logical_and(mask_ann, points_img[0, :] < max_x - 1) # Keep points such as Y coordinate is bigger than bounding box minimum coordinate mask_ann = np.logical_and(mask_ann, points_img[1, :] > min_y + 1) # remove points such as Y coordinate is bigger than bounding box maximum coordinate mask_ann = np.logical_and(mask_ann, points_img[1, :] < max_y - 1) # Keep only the interest points points_ann = points_img[:, mask_ann] coloring_ann = coloring_img[mask_ann] ori_points_ann = ori_points_img[:, mask_ann] if visualize: plt.figure(figsize=(9, 16)) plt.imshow(im) plt.scatter(points_ann[0, :], points_ann[1, :], c=coloring_ann, s=5) plt.axis('off') return points_ann, coloring_ann, ori_points_ann, im