def from_sample_token(self, nusc: 'NuScenes', sample_token: str, sensor: str) -> 'myRadarPointCloud': # trace back frame data - here we have all the sensor data and all the annotations sample_rec = nusc.get('sample', sample_token) # get sensor data sd_rec = nusc.get('sample_data', sample_rec['data'][sensor]) # get sensor calibration cs_rec = nusc.get('calibrated_sensor', sd_rec['calibrated_sensor_token']) # ego pose - ego car to global frame ep_rec = nusc.get('ego_pose', sd_rec['ego_pose_token']) # A_cs^car: Homogeneous transformation matrix from ########### A_cs_2_car = transform_matrix(cs_rec['translation'], Quaternion(cs_rec['rotation']), inverse=False) # A_car^gl: Homogeneous transformation matrix from ########### A_car_2_gl = transform_matrix(ep_rec['translation'], Quaternion(ep_rec['rotation']), inverse=False) # read radar pointcloud in sensor coordinates filepath = os.path.join(nusc.dataroot, sd_rec['filename']) pc_cs = RadarPointCloud.from_file( file_name=filepath ).points # :return: <np.float: d, n>. Point cloud matrix with d dimensions and n points. # select fields: x,y,vx_comp,vy_comp,RCS pc_cs = pc_cs[self._FEAT_SELECTOR, :] # return instance return self(nusc, pc_cs, A_cs_2_car, A_car_2_gl)
def get_rad_to_cam(nusc: NuScenes, cam_sd_token: str, rad_sd_token: str): """ Method to get the extrinsic calibration matrix from radar_front to camera_front for a specifi sample. Every sample_data has a record on which calibrated - sensor the data is collected from ("calibrated_sensor_token" key) The calibrated_sensor record consists of the definition of a particular sensor (lidar/radar/camera) as calibrated on a particular vehicle :param nusc: Nuscenes instance :param cam_sd_token : A token of a specific camera_front sample_data :param rad_sd_token : A token of a specific radar_front sample_data :param nuscenes_way : Temporal debug param until transformation order is clear :return: rad_to_cam <np.float: 4, 4> Returns homogeneous transform matrix from radar to camera """ cam_cs_token = nusc.get('sample_data', cam_sd_token)["calibrated_sensor_token"] cam_cs_rec = nusc.get('calibrated_sensor', cam_cs_token) rad_cs_token = nusc.get('sample_data', rad_sd_token)["calibrated_sensor_token"] rad_cs_rec = nusc.get('calibrated_sensor', rad_cs_token) #Based on how transforms are handled in nuScenes scripts like scripts/export_kitti.py rad_to_ego = transform_matrix(rad_cs_rec['translation'], Quaternion(rad_cs_rec['rotation']), inverse=False) ego_to_cam = transform_matrix(cam_cs_rec['translation'], Quaternion(cam_cs_rec['rotation']), inverse=True) rad_to_cam = np.dot(ego_to_cam, rad_to_ego) return rad_to_cam
def getPoints(self, index): sample = self.nusc.get('sample', self.filtered_sample_tokens[index]) sample_lidar_token = sample['data']['LIDAR_TOP'] lidar_data = self.nusc.get('sample_data', sample_lidar_token) ego_pose = self.nusc.get('ego_pose', lidar_data['ego_pose_token']) calibrated_sensor = self.nusc.get( 'calibrated_sensor', lidar_data['calibrated_sensor_token']) global_from_car = transform_matrix(ego_pose['translation'], Quaternion(ego_pose['rotation']), inverse=False) car_from_sensor = transform_matrix(calibrated_sensor['translation'], Quaternion( calibrated_sensor['rotation']), inverse=False) try: lidar_pointcloud, times = LidarPointCloud.from_file_multisweep( self.nusc, sample, 'LIDAR_TOP', 'LIDAR_TOP') lidar_pointcloud.transform(car_from_sensor) except Exception as e: print(f"Failed to load Lidar Pointcloud for {sample}:{e}") points = lidar_pointcloud.points points[3, :] /= 255 points[3, :] -= 0.5 # points_cat = np.concatenate([points, times], axis=0).transpose() points_cat = points.transpose() points_cat = points_cat[~np.isnan(points_cat).any(axis=1)] return points_cat
def return_pose(lidar): poss = nusc.get('ego_pose', lidar['ego_pose_token']) cs_record_lid = nusc.get('calibrated_sensor', lidar['calibrated_sensor_token']) lid_to_ego = transform_matrix(cs_record_lid["translation"], Quaternion(cs_record_lid["rotation"]), inverse=False) lid_ego_to_world = transform_matrix(poss["translation"], Quaternion(poss["rotation"]), inverse=False) # kitti_to_nu_lidar = Quaternion(axis=(0, 0, 1), angle=np.pi / 2) lid_to_world = np.dot(lid_ego_to_world, lid_to_ego)#np.dot(lid_to_ego, kitti_to_nu_lidar.transformation_matrix) return lid_to_world
def get_sweeps(sample, channel, max_sweeps, ref_from_car, car_from_global): sample_data_token = sample['data'][channel] curr_sd_rec = nusc.get('sample_data', sample_data_token) sweeps = [] while len(sweeps) < max_sweeps - 1: if curr_sd_rec['prev'] == '': if len(sweeps) == 0: sweep = { 'file_path': Path(ref_lidar_path).relative_to(data_path).__str__(), 'sample_data_token': curr_sd_rec['token'], 'transform_matrix': None, 'time_lag': curr_sd_rec['timestamp'] * 0, } sweep['lidar_path'] = sweep['file_path'] # for compatibility with old approach sweeps.append(sweep) else: sweeps.append(sweeps[-1]) else: curr_sd_rec = nusc.get('sample_data', curr_sd_rec['prev']) # Get past pose current_pose_rec = nusc.get('ego_pose', curr_sd_rec['ego_pose_token']) global_from_car = transform_matrix( current_pose_rec['translation'], Quaternion(current_pose_rec['rotation']), inverse=False, ) # Homogeneous transformation matrix from sensor coordinate frame to ego car frame. current_cs_rec = nusc.get( 'calibrated_sensor', curr_sd_rec['calibrated_sensor_token'] ) car_from_current = transform_matrix( current_cs_rec['translation'], Quaternion(current_cs_rec['rotation']), inverse=False, ) tm = reduce(np.dot, [ref_from_car, car_from_global, global_from_car, car_from_current]) lidar_path = nusc.get_sample_data_path(curr_sd_rec['token']) time_lag = ref_time - 1e-6 * curr_sd_rec['timestamp'] sweep = { 'file_path': Path(lidar_path).relative_to(data_path).__str__(), 'sample_data_token': curr_sd_rec['token'], 'transform_matrix': tm, 'global_from_car': global_from_car, 'car_from_current': car_from_current, 'time_lag': time_lag, } sweep['lidar_path'] = sweep['file_path'] # for compatibility with old approach sweeps.append(sweep) assert len(sweeps) == max_sweeps - 1, \ f"sweep {curr_sd_rec['token']} only has {len(sweeps)} sweeps, " \ f"you should duplicate to sweep num {max_sweeps - 1}" return sweeps
def get_global_pose(self, sd_token, inverse=False): sd = self.nusc.get("sample_data", sd_token) sd_ep = self.nusc.get("ego_pose", sd["ego_pose_token"]) sd_cs = self.nusc.get("calibrated_sensor", sd["calibrated_sensor_token"]) if inverse is False: global_from_ego = transform_matrix(sd_ep["translation"], Quaternion(sd_ep["rotation"]), inverse=False) ego_from_sensor = transform_matrix(sd_cs["translation"], Quaternion(sd_cs["rotation"]), inverse=False) pose = global_from_ego.dot(ego_from_sensor) else: sensor_from_ego = transform_matrix(sd_cs["translation"], Quaternion(sd_cs["rotation"]), inverse=True) ego_from_global = transform_matrix(sd_ep["translation"], Quaternion(sd_ep["rotation"]), inverse=True) pose = sensor_from_ego.dot(ego_from_global) return pose
def veh_pos_to_transform(veh_pos): "convert vehicle pose to two transformation matrix" rotation = veh_pos[:3, :3] tran = veh_pos[:3, 3] global_from_car = transform_matrix( tran, Quaternion(matrix=rotation), inverse=False ) car_from_global = transform_matrix( tran, Quaternion(matrix=rotation), inverse=True ) return global_from_car, car_from_global
def _compute_calibration_data(self, frame_sample_data): cal_sensor_data = self.nusc.get( 'calibrated_sensor', frame_sample_data['calibrated_sensor_token']) pose_data = self.nusc.get('ego_pose', frame_sample_data['ego_pose_token']) car_to_global = geometry_utils.transform_matrix( pose_data['translation'], Quaternion(pose_data['rotation']), inverse=False) sensor_to_car = geometry_utils.transform_matrix( cal_sensor_data['translation'], Quaternion(cal_sensor_data['rotation']), inverse=False) trans_matrix = np.dot(car_to_global, sensor_to_car) return cal_sensor_data, pose_data, trans_matrix
def get_extrinsics(ego_pose: DictStrAny, car_from_sensor: DictStrAny) -> Extrinsics: """Convert NuScenes ego pose / sensor_to_car to global extrinsics.""" global_from_car = transform_matrix( ego_pose["translation"], Quaternion(ego_pose["rotation"]), inverse=False, ) car_from_sensor_ = transform_matrix( car_from_sensor["translation"], Quaternion(car_from_sensor["rotation"]), inverse=False, ) extrinsics = np.dot(global_from_car, car_from_sensor_) return get_extrinsics_from_matrix(extrinsics)
def _read_camera_data(self, sample_data_token): sd_record = self.nusc.get("sample_data", sample_data_token) cs_record = self.nusc.get("calibrated_sensor", sd_record["calibrated_sensor_token"]) sensor_record = self.nusc.get("sensor", cs_record["sensor_token"]) assert sensor_record["modality"] == "camera" # currently using only keyframes assert sd_record["is_key_frame"] data_path = self.nusc.get_sample_data_path(sample_data_token) imsize = (sd_record["width"], sd_record["height"]) cam_intrinsic = np.array(cs_record["camera_intrinsic"]) cam_extrinsics = transform_matrix(cs_record["translation"], Quaternion(cs_record["rotation"])) with open(data_path, "rb") as in_file: img_bytes_jpg = in_file.read() return ( { "{}_jpg": img_bytes_jpg, "{}_size": np.asarray(imsize, dtype=np.int64), "{}_intrinsics": cam_intrinsic.astype(np.float32), }, cam_extrinsics, )
def project_points_to_image(current_pc, pointsensor, cam): pc = copy.deepcopy(current_pc) # Points live in the point sensor frame. So they need to be transformed via global to the image plane. # First step: transform the point-cloud to the ego vehicle frame for the timestamp of the sweep. cs_record = nusc.get('calibrated_sensor', pointsensor['calibrated_sensor_token']) pc.rotate(Quaternion(cs_record['rotation']).rotation_matrix) pc.translate(np.array(cs_record['translation'])) # import pdb; pdb.set_trace() # Second step: transform to the global frame. poserecord = nusc.get('ego_pose', pointsensor['ego_pose_token']) pc.rotate(Quaternion(poserecord['rotation']).rotation_matrix) pc.translate(np.array(poserecord['translation'])) t2 = transform_matrix(translation=poserecord['translation'], rotation=Quaternion(poserecord['rotation']), inverse=True) # import pdb; pdb.set_trace() global_frame_pc = copy.deepcopy(pc) # Third step: transform into the ego vehicle frame for the timestamp of the image. poserecord = nusc.get('ego_pose', cam['ego_pose_token']) pc.translate(-np.array(poserecord['translation'])) pc.rotate(Quaternion(poserecord['rotation']).rotation_matrix.T) # Fourth step: transform into the camera. cs_record = nusc.get('calibrated_sensor', cam['calibrated_sensor_token']) pc.translate(-np.array(cs_record['translation'])) pc.rotate(Quaternion(cs_record['rotation']).rotation_matrix.T) return pc, global_frame_pc
def get_lidar_pointcloud(self): """ Extracting lidar pointcloud for current timestep in current scene Author: Javier :return Point cloud [Position(x,y,z) X n_points] """ # Select sensor data record channel = 'LIDAR_TOP' sample_data_token = self.sample['data'][channel] sd_record = self.nusc.get('sample_data', sample_data_token) # Get aggregated point cloud in lidar frame. chan = sd_record['channel'] pc, times = LidarPointCloud.from_file_multisweep(self.nusc, self.sample, chan, channel, nsweeps=1) #calibration of LiDAR points ref_sd_record = self.nusc.get('sample_data', sample_data_token) cs_record = self.nusc.get('calibrated_sensor', ref_sd_record['calibrated_sensor_token']) pose_record = self.nusc.get('ego_pose', ref_sd_record['ego_pose_token']) ref_to_ego = transform_matrix(translation=cs_record['translation'], rotation=Quaternion(cs_record["rotation"])) # Compute rotation between 3D vehicle pose and "flat" vehicle pose (parallel to global z plane). ego_yaw = Quaternion(pose_record['rotation']).yaw_pitch_roll[0] rotation_vehicle_flat_from_vehicle = np.dot( Quaternion(scalar=np.cos(ego_yaw / 2), vector=[0, 0, np.sin(ego_yaw / 2)]).rotation_matrix, Quaternion(pose_record['rotation']).inverse.rotation_matrix) vehicle_flat_from_vehicle = np.eye(4) vehicle_flat_from_vehicle[:3, :3] = rotation_vehicle_flat_from_vehicle viewpoint = np.dot(vehicle_flat_from_vehicle, ref_to_ego) points = view_points(pc.points[:3, :], viewpoint, normalize=False) self.points_lidar = points return points
def get_lidar_to_image_transform(nusc, pointsensor, camera_sensor): tms = [] intrinsics = [] cam_paths = [] for chan in CAM_CHANS: cam = camera_sensor[chan] # Points live in the point sensor frame. So they need to be transformed via global to the image plane. # First step: transform the point-cloud to the ego vehicle frame for the timestamp of the sweep. lidar_cs_record = nusc.get('calibrated_sensor', pointsensor['calibrated_sensor_token']) car_from_lidar = transform_matrix( lidar_cs_record["translation"], Quaternion(lidar_cs_record["rotation"]), inverse=False ) # Second step: transform to the global frame. lidar_poserecord = nusc.get('ego_pose', pointsensor['ego_pose_token']) global_from_car = transform_matrix( lidar_poserecord["translation"], Quaternion(lidar_poserecord["rotation"]), inverse=False, ) # Third step: transform into the ego vehicle frame for the timestamp of the image. cam_poserecord = nusc.get('ego_pose', cam['ego_pose_token']) car_from_global = transform_matrix( cam_poserecord["translation"], Quaternion(cam_poserecord["rotation"]), inverse=True, ) # Fourth step: transform into the camera. cam_cs_record = nusc.get('calibrated_sensor', cam['calibrated_sensor_token']) cam_from_car = transform_matrix( cam_cs_record["translation"], Quaternion(cam_cs_record["rotation"]), inverse=True ) tm = reduce( np.dot, [cam_from_car, car_from_global, global_from_car, car_from_lidar], ) cam_path, _, intrinsic = nusc.get_sample_data(cam['token']) tms.append(tm) intrinsics.append(intrinsic) cam_paths.append(cam_path ) return tms, intrinsics, cam_paths
def load_position_raw(self, sample_lidar_token): lidar_data = self.NuScenes_data.get("sample_data", sample_lidar_token) ego_pose = self.NuScenes_data.get("ego_pose", lidar_data["ego_pose_token"]) calibrated_sensor = self.NuScenes_data.get("calibrated_sensor", lidar_data["calibrated_sensor_token"]) # Homogeneous transformation matrix from car frame to world frame. global_from_car = transform_matrix(ego_pose['translation'], Quaternion(ego_pose['rotation']), inverse=False) return global_from_car
def load_cloud_raw(self, sample_lidar_token): lidar_data = self.NuScenes_data.get("sample_data", sample_lidar_token) lidar_filepath = self.NuScenes_data.get_sample_data_path(sample_lidar_token) ego_pose = self.NuScenes_data.get("ego_pose", lidar_data["ego_pose_token"]) calibrated_sensor = self.NuScenes_data.get("calibrated_sensor", lidar_data["calibrated_sensor_token"]) # Homogeneous transformation matrix from sensor coordinate frame to ego car frame. car_from_sensor = transform_matrix(calibrated_sensor['translation'], Quaternion(calibrated_sensor['rotation']), inverse=False) lidar_pointcloud = LidarPointCloud.from_file(lidar_filepath) # The lidar pointcloud is defined in the sensor's reference frame. # We want it in the car's reference frame, so we transform each point lidar_pointcloud.transform(car_from_sensor) return lidar_pointcloud.points[:3,:].T
def _read_radar_data(self, sample_data_token): sd_record = self.nusc.get("sample_data", sample_data_token) cs_record = self.nusc.get("calibrated_sensor", sd_record["calibrated_sensor_token"]) sensor_record = self.nusc.get("sensor", cs_record["sensor_token"]) assert sensor_record["modality"] == "radar" # currently using only keyframes assert sd_record["is_key_frame"] data_path = self.nusc.get_sample_data_path(sample_data_token) radar_point_cloud = RadarPointCloud.from_file(data_path) points = tf.convert_to_tensor(radar_point_cloud.points.transpose()) radar_extrinsics = transform_matrix(cs_record["translation"], Quaternion(cs_record["rotation"])) return {"{}_points": tf.io.serialize_tensor(points)}, radar_extrinsics
def __init__(self, ego_pose, cam_calibrated, lidar_calibrated): """ :param ego_pose: ego_pose dictionary from NuScenes :param cam_calibrated: calibrated_sensor dictionary from NuScenes :param lidar_calibrated: calibrated_sensor dictionary from NuScenes """ self.t_global_car = transform_matrix(ego_pose['translation'], Quaternion(ego_pose['rotation']), inverse=False) self.t_car_global = transform_matrix(ego_pose['translation'], Quaternion(ego_pose['rotation']), inverse=True) self.t_car_lidar = transform_matrix(lidar_calibrated['translation'], Quaternion( lidar_calibrated['rotation']), inverse=False) self.t_lidar_car = transform_matrix(lidar_calibrated['translation'], Quaternion( lidar_calibrated['rotation']), inverse=True) self.t_car_cam = transform_matrix(cam_calibrated['translation'], Quaternion( cam_calibrated['rotation']), inverse=False) self.t_cam_car = transform_matrix(cam_calibrated['translation'], Quaternion( cam_calibrated['rotation']), inverse=True) camera_intrinsic = np.array(cam_calibrated['camera_intrinsic']) self.t_img_cam = np.eye(4) self.t_img_cam[:camera_intrinsic.shape[0], :camera_intrinsic. shape[1]] = camera_intrinsic self.ego_pose = ego_pose self.lidar_calibrated = lidar_calibrated self.cam_calibrated = cam_calibrated
current_lidar = first_lidar lidar_filenames = [] poses = [] if not os.path.exists(velodyne_folder): print("Creating output folder: {}".format(output_folder)) os.makedirs(velodyne_folder) original = [] while current_lidar != "": lidar_data = nusc.get('sample_data', current_lidar) calib_data = nusc.get("calibrated_sensor", lidar_data["calibrated_sensor_token"]) egopose_data = nusc.get('ego_pose', lidar_data["ego_pose_token"]) car_to_velo = geoutils.transform_matrix(calib_data["translation"], Quaternion(calib_data["rotation"])) pose_car = geoutils.transform_matrix(egopose_data["translation"], Quaternion(egopose_data["rotation"])) pose = np.dot(pose_car, car_to_velo) scan = np.fromfile(os.path.join(dataroot, lidar_data["filename"]), dtype=np.float32) points = scan.reshape((-1, 5))[:, :4] # ensure that remission is in [0,1] max_remission = np.max(points[:, 3]) min_remission = np.min(points[:, 3]) points[:, 3] = (points[:, 3] - min_remission) / (max_remission - min_remission) output_filename = os.path.join(velodyne_folder, "{:05d}.bin".format(len(lidar_filenames))) points.tofile(output_filename)
def get_pc_from_file_multisweep(nusc, sample_rec, chan: str, ref_chan: str, nsweeps: int = 3, min_distance: float = 1.0, invalid_states=None, dynprop_states=None, ambig_states=None): """ Returns a point cloud of multiple aggregated sweeps. Original function can be found at: https://github.com/nutonomy/nuscenes-devkit/blob/ae022aba3b37f07202ea402f8278979097738369/python-sdk/nuscenes/utils/data_classes.py#L56. Function is modified to accept additional input parametes, just like the get_pc_from_file function. This enables the filtering by invalid_states, dynprop_states and ambig_states. Arguments: nusc: A NuScenes instance, <NuScenes>. sample_rec: The current sample, <dict>. chan: The radar/lidar channel from which we track back n sweeps to aggregate the point cloud, <str>. ref_chan: The reference channel of the current sample_rec that the point clouds are mapped to, <str>. nsweeps: Number of sweeps to aggregated, <int>. min_distance: Distance below which points are discarded, <float>. invalid_states: Radar states to be kept, <int List>. dynprop_states: Radar states to be kept, <int List>. Use [0, 2, 6] for moving objects only. ambig_states: Radar states to be kept, <int List>. """ # Define if chan == 'LIDAR_TOP': points = np.zeros((get_number_of_lidar_pc_dimensions(), 0)) all_pc = LidarPointCloud(points) else: points = np.zeros((get_number_of_radar_pc_dimensions(), 0)) all_pc = RadarPointCloud(points) all_times = np.zeros((1, 0)) # Get reference pose and timestamp. ref_sd_token = sample_rec['data'][ref_chan] ref_sd_rec = nusc.get('sample_data', ref_sd_token) ref_pose_rec = nusc.get('ego_pose', ref_sd_rec['ego_pose_token']) ref_cs_rec = nusc.get('calibrated_sensor', ref_sd_rec['calibrated_sensor_token']) ref_time = 1e-6 * ref_sd_rec['timestamp'] # Homogeneous transform from ego car frame to reference frame. ref_from_car = transform_matrix(ref_cs_rec['translation'], Quaternion(ref_cs_rec['rotation']), inverse=True) # Homogeneous transformation matrix from global to _current_ ego car frame. car_from_global = transform_matrix(ref_pose_rec['translation'], Quaternion(ref_pose_rec['rotation']), inverse=True) # Aggregate current and previous sweeps. sample_data_token = sample_rec['data'][chan] current_sd_rec = nusc.get('sample_data', sample_data_token) for _ in range(nsweeps): # Load up the pointcloud. if chan == 'LIDAR_TOP': current_pc = LidarPointCloud.from_file(file_name=os.path.join( nusc.dataroot, current_sd_rec['filename'])) else: current_pc = RadarPointCloud.from_file( file_name=os.path.join(nusc.dataroot, current_sd_rec['filename']), invalid_states=invalid_states, dynprop_states=dynprop_states, ambig_states=ambig_states) # Get past pose. current_pose_rec = nusc.get('ego_pose', current_sd_rec['ego_pose_token']) global_from_car = transform_matrix(current_pose_rec['translation'], Quaternion( current_pose_rec['rotation']), inverse=False) # Homogeneous transformation matrix from sensor coordinate frame to ego car frame. current_cs_rec = nusc.get('calibrated_sensor', current_sd_rec['calibrated_sensor_token']) car_from_current = transform_matrix(current_cs_rec['translation'], Quaternion( current_cs_rec['rotation']), inverse=False) # Fuse four transformation matrices into one and perform transform. trans_matrix = reduce( np.dot, [ref_from_car, car_from_global, global_from_car, car_from_current]) current_pc.transform(trans_matrix) # Remove close points and add timevector. current_pc.remove_close(min_distance) time_lag = ref_time - 1e-6 * current_sd_rec[ 'timestamp'] # Positive difference. times = time_lag * np.ones((1, current_pc.nbr_points())) all_times = np.hstack((all_times, times)) # Merge with key pc. all_pc.points = np.hstack((all_pc.points, current_pc.points)) # Abort if there are no previous sweeps. if current_sd_rec['prev'] == '': break else: current_sd_rec = nusc.get('sample_data', current_sd_rec['prev']) return all_pc, all_times
def evaluate(split): params = read_params(FLAGS.param) nusc = NuScenes(version='v1.0-trainval', dataroot=FLAGS.nuscenes, verbose=True) sensor = 'LIDAR_TOP' kitti_to_nu_lidar = Quaternion(axis=(0, 0, 1), angle=np.pi / 2) meta = { 'use_camera': False, 'use_lidar': True, 'use_radar': False, 'use_map': False, 'use_external': False, } results = {} results_0_3 = {} detection_graph = tf.Graph() with detection_graph.as_default(): od_graph_def = tf.GraphDef() with tf.gfile.GFile(FLAGS.graph, 'rb') as fid: serialized_graph = fid.read() od_graph_def.ParseFromString(serialized_graph) for node in od_graph_def.node: if 'BatchMultiClassNonMaxSuppression' in node.name: node.device = '/device:CPU:0' tf.import_graph_def(od_graph_def, name='') with detection_graph.as_default(): config = tf.ConfigProto() config.gpu_options.allow_growth = False with tf.Session(graph=detection_graph, config=config) as sess: image_tensor = detection_graph.get_tensor_by_name('image_tensor:0') detection_boxes = detection_graph.get_tensor_by_name('detection_boxes:0') detection_boxes_inclined = detection_graph.get_tensor_by_name('detection_boxes_3d:0') detection_scores = detection_graph.get_tensor_by_name('detection_scores:0') detection_classes = detection_graph.get_tensor_by_name('detection_classes:0') num_detections = detection_graph.get_tensor_by_name('num_detections:0') scene_splits = create_splits_scenes() # os.system('touch {}'.format()) inf_time_list = [] for scene in nusc.scene: if scene['name'] not in scene_splits[split]: continue current_sample_token = scene['first_sample_token'] last_sample_token = scene['last_sample_token'] sample_in_scene = True while sample_in_scene: if current_sample_token == last_sample_token: sample_in_scene = False sample = nusc.get('sample', current_sample_token) lidar_top_data = nusc.get('sample_data', sample['data'][sensor]) # Get global pose and calibration data ego_pose = nusc.get('ego_pose', lidar_top_data['ego_pose_token']) calib_sensor = nusc.get('calibrated_sensor', lidar_top_data['calibrated_sensor_token']) ego_to_global = transform_matrix(ego_pose['translation'], Quaternion(ego_pose['rotation'])) lidar_to_ego = transform_matrix(calib_sensor['translation'], Quaternion(calib_sensor['rotation'])) # Read input data filename_prefix = os.path.splitext(os.path.splitext(lidar_top_data['filename'])[0])[0] image_stacked, det_mask, image_ground, image_zmax = read_images(FLAGS.data, filename_prefix) # Inference start_time = time.time() (boxes_aligned, boxes_inclined, scores, classes, num) = sess.run( [detection_boxes, detection_boxes_inclined, detection_scores, detection_classes, num_detections], feed_dict={image_tensor: image_stacked}) inf_time = time.time() - start_time print('Inference time:', inf_time) inf_time_list.append(inf_time) # Evaluate object detection label_map = label_map_util.load_labelmap(FLAGS.label_map) categories = label_map_util.convert_label_map_to_categories(label_map, max_num_classes=10, use_display_name=True) category_index = label_map_util.create_category_index(categories) boxes = [] boxes_0_3 = [] scores = np.squeeze(scores) for i in range(scores.shape[0]): if scores[i] > .230: object_class = category_index[int(np.squeeze(classes)[i])]['name'] box = calculate_object_box(tuple(np.squeeze(boxes_aligned)[i]), tuple(np.squeeze(boxes_inclined)[i]), image_ground, image_zmax, object_class, scores[i], params) # Transformation box coordinate system to nuscenes lidar coordinate system box.rotate(kitti_to_nu_lidar) # Transformation nuscenes lidar coordinate system to ego vehicle frame box.rotate(Quaternion(matrix=lidar_to_ego[:3, :3])) box.translate(lidar_to_ego[:3, 3]) # Transformation ego vehicle frame to global frame box.rotate(Quaternion(matrix=ego_to_global[:3, :3])) box.translate(ego_to_global[:3, 3]) boxes.append(box) for i in range(scores.shape[0]): if scores[i] > .225: object_class = category_index[int(np.squeeze(classes)[i])]['name'] box = calculate_object_box(tuple(np.squeeze(boxes_aligned)[i]), tuple(np.squeeze(boxes_inclined)[i]), image_ground, image_zmax, object_class, scores[i], params) # Transformation box coordinate system to nuscenes lidar coordinate system box.rotate(kitti_to_nu_lidar) # Transformation nuscenes lidar coordinate system to ego vehicle frame box.rotate(Quaternion(matrix=lidar_to_ego[:3, :3])) box.translate(lidar_to_ego[:3, 3]) # Transformation ego vehicle frame to global frame box.rotate(Quaternion(matrix=ego_to_global[:3, :3])) box.translate(ego_to_global[:3, 3]) boxes_0_3.append(box) # Convert boxes to nuScenes detection challenge result format. sample_results = [box_to_sample_result(current_sample_token, box) for box in boxes] results[current_sample_token] = sample_results sample_results_0_3 = [box_to_sample_result(current_sample_token, box) for box in boxes_0_3] results_0_3[current_sample_token] = sample_results_0_3 current_sample_token = sample['next'] f_info = open( os.path.join(FLAGS.output, 'inference_time.txt'),'w+') average_inf_time = sum(inf_time_list) / float(len(inf_time_list)) f_info.write('average time:{}\n'.format(average_inf_time)) f_info.write(str(inf_time_list)) submission = { 'meta': meta, 'results': results } submission_path = os.path.join(FLAGS.output, 'submission_0_3nn30.json') with open(submission_path, 'w') as f: json.dump(submission, f, indent=2) submission_0_3 = { 'meta': meta, 'results': results_0_3 } submission_path_0_3 = os.path.join(FLAGS.output, 'submission_0_3nn25.json') with open(submission_path_0_3, 'w') as f: json.dump(submission_0_3, f, indent=2)
def fill_trainval_infos(data_path, nusc, train_scenes, val_scenes, test=False, max_sweeps=10): train_nusc_infos = [] val_nusc_infos = [] progress_bar = tqdm.tqdm(total=len(nusc.sample), desc='create_info', dynamic_ncols=True) ref_chan = 'LIDAR_TOP' # The radar channel from which we track back n sweeps to aggregate the point cloud. chan = 'LIDAR_TOP' # The reference channel of the current sample_rec that the point clouds are mapped to. for index, sample in enumerate(nusc.sample): progress_bar.update() ref_sd_token = sample['data'][ref_chan] ref_sd_rec = nusc.get('sample_data', ref_sd_token) ref_cs_rec = nusc.get('calibrated_sensor', ref_sd_rec['calibrated_sensor_token']) ref_pose_rec = nusc.get('ego_pose', ref_sd_rec['ego_pose_token']) ref_time = 1e-6 * ref_sd_rec['timestamp'] ref_lidar_path, ref_boxes, _ = get_sample_data(nusc, ref_sd_token) ref_cam_front_token = sample['data']['CAM_FRONT'] ref_cam_path, _, ref_cam_intrinsic = nusc.get_sample_data(ref_cam_front_token) # Homogeneous transform from ego car frame to reference frame ref_from_car = transform_matrix( ref_cs_rec['translation'], Quaternion(ref_cs_rec['rotation']), inverse=True ) # Homogeneous transformation matrix from global to _current_ ego car frame car_from_global = transform_matrix( ref_pose_rec['translation'], Quaternion(ref_pose_rec['rotation']), inverse=True, ) info = { 'lidar_path': Path(ref_lidar_path).relative_to(data_path).__str__(), 'cam_front_path': Path(ref_cam_path).relative_to(data_path).__str__(), 'cam_intrinsic': ref_cam_intrinsic, 'token': sample['token'], 'sweeps': [], 'ref_from_car': ref_from_car, 'car_from_global': car_from_global, 'timestamp': ref_time, } sample_data_token = sample['data'][chan] curr_sd_rec = nusc.get('sample_data', sample_data_token) sweeps = [] while len(sweeps) < max_sweeps - 1: if curr_sd_rec['prev'] == '': if len(sweeps) == 0: sweep = { 'lidar_path': Path(ref_lidar_path).relative_to(data_path).__str__(), 'sample_data_token': curr_sd_rec['token'], 'transform_matrix': None, 'time_lag': curr_sd_rec['timestamp'] * 0, } sweeps.append(sweep) else: sweeps.append(sweeps[-1]) else: curr_sd_rec = nusc.get('sample_data', curr_sd_rec['prev']) # Get past pose current_pose_rec = nusc.get('ego_pose', curr_sd_rec['ego_pose_token']) global_from_car = transform_matrix( current_pose_rec['translation'], Quaternion(current_pose_rec['rotation']), inverse=False, ) # Homogeneous transformation matrix from sensor coordinate frame to ego car frame. current_cs_rec = nusc.get( 'calibrated_sensor', curr_sd_rec['calibrated_sensor_token'] ) car_from_current = transform_matrix( current_cs_rec['translation'], Quaternion(current_cs_rec['rotation']), inverse=False, ) tm = reduce(np.dot, [ref_from_car, car_from_global, global_from_car, car_from_current]) lidar_path = nusc.get_sample_data_path(curr_sd_rec['token']) time_lag = ref_time - 1e-6 * curr_sd_rec['timestamp'] sweep = { 'lidar_path': Path(lidar_path).relative_to(data_path).__str__(), 'sample_data_token': curr_sd_rec['token'], 'transform_matrix': tm, 'global_from_car': global_from_car, 'car_from_current': car_from_current, 'time_lag': time_lag, } sweeps.append(sweep) info['sweeps'] = sweeps assert len(info['sweeps']) == max_sweeps - 1, \ f"sweep {curr_sd_rec['token']} only has {len(info['sweeps'])} sweeps, " \ f"you should duplicate to sweep num {max_sweeps - 1}" if not test: annotations = [nusc.get('sample_annotation', token) for token in sample['anns']] # the filtering gives 0.5~1 map improvement num_lidar_pts = np.array([anno['num_lidar_pts'] for anno in annotations]) num_radar_pts = np.array([anno['num_radar_pts'] for anno in annotations]) mask = (num_lidar_pts + num_radar_pts > 0) locs = np.array([b.center for b in ref_boxes]).reshape(-1, 3) dims = np.array([b.wlh for b in ref_boxes]).reshape(-1, 3)[:, [1, 0, 2]] # wlh == > dxdydz (lwh) velocity = np.array([b.velocity for b in ref_boxes]).reshape(-1, 3) rots = np.array([quaternion_yaw(b.orientation) for b in ref_boxes]).reshape(-1, 1) names = np.array([b.name for b in ref_boxes]) tokens = np.array([b.token for b in ref_boxes]) gt_boxes = np.concatenate([locs, dims, rots, velocity[:, :2]], axis=1) assert len(annotations) == len(gt_boxes) == len(velocity) info['gt_boxes'] = gt_boxes[mask, :] info['gt_boxes_velocity'] = velocity[mask, :] info['gt_names'] = np.array([map_name_from_general_to_detection[name] for name in names])[mask] info['gt_boxes_token'] = tokens[mask] info['num_lidar_pts'] = num_lidar_pts[mask] info['num_radar_pts'] = num_radar_pts[mask] if sample['scene_token'] in train_scenes: train_nusc_infos.append(info) else: val_nusc_infos.append(info) progress_bar.close() return train_nusc_infos, val_nusc_infos
def main(): SCENE_SPLITS["mini-val"] = SCENE_SPLITS["val"] if not os.path.exists(DATA_PATH): os.mkdir(DATA_PATH) if not os.path.exists(OUT_PATH): os.mkdir(OUT_PATH) for split in SPLITS: data_path = DATA_PATH # + '{}/'.format(SPLITS[split]) nusc = NuScenes(version=SPLITS[split], dataroot=data_path, verbose=True) out_path = OUT_PATH + "{}.json".format(split) categories_info = [{ "name": CATS[i], "id": i + 1 } for i in range(len(CATS))] ret = { "images": [], "annotations": [], "categories": categories_info, "videos": [], "attributes": ATTRIBUTE_TO_ID, } num_images = 0 num_anns = 0 num_videos = 0 # A "sample" in nuScenes refers to a timestamp with 6 cameras and 1 LIDAR. for sample in nusc.sample: scene_name = nusc.get("scene", sample["scene_token"])["name"] if not (split in ["mini", "test" ]) and not (scene_name in SCENE_SPLITS[split]): continue if sample["prev"] == "": print("scene_name", scene_name) num_videos += 1 ret["videos"].append({ "id": num_videos, "file_name": scene_name }) frame_ids = {k: 0 for k in sample["data"]} track_ids = {} # We decompose a sample into 6 images in our case. for sensor_name in sample["data"]: if sensor_name in USED_SENSOR: image_token = sample["data"][sensor_name] image_data = nusc.get("sample_data", image_token) num_images += 1 # Complex coordinate transform. This will take time to understand. sd_record = nusc.get("sample_data", image_token) cs_record = nusc.get("calibrated_sensor", sd_record["calibrated_sensor_token"]) pose_record = nusc.get("ego_pose", sd_record["ego_pose_token"]) global_from_car = transform_matrix( pose_record["translation"], Quaternion(pose_record["rotation"]), inverse=False, ) car_from_sensor = transform_matrix( cs_record["translation"], Quaternion(cs_record["rotation"]), inverse=False, ) trans_matrix = np.dot(global_from_car, car_from_sensor) _, boxes, camera_intrinsic = nusc.get_sample_data( image_token, box_vis_level=BoxVisibility.ANY) calib = np.eye(4, dtype=np.float32) calib[:3, :3] = camera_intrinsic calib = calib[:3] frame_ids[sensor_name] += 1 # image information in COCO format image_info = { "id": num_images, "file_name": image_data["filename"], "calib": calib.tolist(), "video_id": num_videos, "frame_id": frame_ids[sensor_name], "sensor_id": SENSOR_ID[sensor_name], "sample_token": sample["token"], "trans_matrix": trans_matrix.tolist(), "width": sd_record["width"], "height": sd_record["height"], "pose_record_trans": pose_record["translation"], "pose_record_rot": pose_record["rotation"], "cs_record_trans": cs_record["translation"], "cs_record_rot": cs_record["rotation"], } ret["images"].append(image_info) anns = [] for box in boxes: det_name = category_to_detection_name(box.name) if det_name is None: continue num_anns += 1 v = np.dot(box.rotation_matrix, np.array([1, 0, 0])) yaw = -np.arctan2(v[2], v[0]) box.translate(np.array([0, box.wlh[2] / 2, 0])) category_id = CAT_IDS[det_name] amodel_center = project_to_image( np.array( [ box.center[0], box.center[1] - box.wlh[2] / 2, box.center[2], ], np.float32, ).reshape(1, 3), calib, )[0].tolist() sample_ann = nusc.get("sample_annotation", box.token) instance_token = sample_ann["instance_token"] if not (instance_token in track_ids): track_ids[instance_token] = len(track_ids) + 1 attribute_tokens = sample_ann["attribute_tokens"] attributes = [ nusc.get("attribute", att_token)["name"] for att_token in attribute_tokens ] att = "" if len(attributes) == 0 else attributes[0] if len(attributes) > 1: print(attributes) import pdb pdb.set_trace() track_id = track_ids[instance_token] vel = nusc.box_velocity(box.token) # global frame vel = np.dot( np.linalg.inv(trans_matrix), np.array([vel[0], vel[1], vel[2], 0], np.float32), ).tolist() # instance information in COCO format ann = { "id": num_anns, "image_id": num_images, "category_id": category_id, "dim": [box.wlh[2], box.wlh[0], box.wlh[1]], "location": [box.center[0], box.center[1], box.center[2]], "depth": box.center[2], "occluded": 0, "truncated": 0, "rotation_y": yaw, "amodel_center": amodel_center, "iscrowd": 0, "track_id": track_id, "attributes": ATTRIBUTE_TO_ID[att], "velocity": vel, } bbox = KittiDB.project_kitti_box_to_image( copy.deepcopy(box), camera_intrinsic, imsize=(1600, 900)) alpha = _rot_y2alpha( yaw, (bbox[0] + bbox[2]) / 2, camera_intrinsic[0, 2], camera_intrinsic[0, 0], ) ann["bbox"] = [ bbox[0], bbox[1], bbox[2] - bbox[0], bbox[3] - bbox[1], ] ann["area"] = (bbox[2] - bbox[0]) * (bbox[3] - bbox[1]) ann["alpha"] = alpha anns.append(ann) # Filter out bounding boxes outside the image visable_anns = [] for i in range(len(anns)): vis = True for j in range(len(anns)): if anns[i]["depth"] - min(anns[i][ "dim"]) / 2 > anns[j]["depth"] + max( anns[j]["dim"]) / 2 and _bbox_inside( anns[i]["bbox"], anns[j]["bbox"]): vis = False break if vis: visable_anns.append(anns[i]) else: pass for ann in visable_anns: ret["annotations"].append(ann) if DEBUG: img_path = data_path + image_info["file_name"] img = cv2.imread(img_path) img_3d = img.copy() for ann in visable_anns: bbox = ann["bbox"] cv2.rectangle( img, (int(bbox[0]), int(bbox[1])), (int(bbox[2] + bbox[0]), int(bbox[3] + bbox[1])), (0, 0, 255), 3, lineType=cv2.LINE_AA, ) box_3d = compute_box_3d(ann["dim"], ann["location"], ann["rotation_y"]) box_2d = project_to_image(box_3d, calib) img_3d = draw_box_3d(img_3d, box_2d) pt_3d = unproject_2d_to_3d(ann["amodel_center"], ann["depth"], calib) pt_3d[1] += ann["dim"][0] / 2 print("location", ann["location"]) print("loc model", pt_3d) pt_2d = np.array( [(bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2], dtype=np.float32, ) pt_3d = unproject_2d_to_3d(pt_2d, ann["depth"], calib) pt_3d[1] += ann["dim"][0] / 2 print("loc ", pt_3d) cv2.imshow("img", img) cv2.imshow("img_3d", img_3d) cv2.waitKey() nusc.render_sample_data(image_token) plt.show() print("reordering images") images = ret["images"] video_sensor_to_images = {} for image_info in images: tmp_seq_id = image_info["video_id"] * 20 + image_info["sensor_id"] if tmp_seq_id in video_sensor_to_images: video_sensor_to_images[tmp_seq_id].append(image_info) else: video_sensor_to_images[tmp_seq_id] = [image_info] ret["images"] = [] for tmp_seq_id in sorted(video_sensor_to_images): ret["images"] = ret["images"] + video_sensor_to_images[tmp_seq_id] print("{} {} images {} boxes".format(split, len(ret["images"]), len(ret["annotations"]))) print("out_path", out_path) json.dump(ret, open(out_path, "w"))
def _read_sample(self, sample: typing.Tuple[dict, int]): radars = ["RADAR_FRONT", "RADAR_FRONT_LEFT", "RADAR_FRONT_RIGHT"] cameras = ["CAM_FRONT"] sample_id = self.make_sample_id(sample) sample, sample_index = sample sample_data_token: str = sample["data"]["LIDAR_TOP"] lidar_sample_data = self.nusc.get("sample_data", sample_data_token) assert lidar_sample_data["is_key_frame"] data_path, box_list, cam_intrinsic = self.nusc.get_sample_data( sample_data_token) # POINT CLOUD [(x y z I) x P]. Intensity from 0.0 to 255.0 point_cloud_orig = LidarPointCloud.from_file(data_path) if point_cloud_orig.points.dtype != np.float32: raise RuntimeError("Point cloud has wrong data type.") # -> [P x (x y z I)] point_cloud_orig = point_cloud_orig.points.transpose() camera_data = {} radar_data = {} box_data = {} lidarseg_data = {} if self.read_semantics: lidarseg_data = self._read_lidarseg_data(sample_data_token, point_cloud_orig) if self.read_bounding_boxes: box_data = self._read_bounding_boxes(box_list, point_cloud_orig, sample) # rotate point cloud 90 degrees around z-axis, # so that x-axis faces front of vehicle x = point_cloud_orig[:, 0:1] y = point_cloud_orig[:, 1:2] point_cloud = np.concatenate((y, -x, point_cloud_orig[:, 2:4]), axis=-1) # LiDAR extrinsic calibration calibration_lidar = self.nusc.get( "calibrated_sensor", lidar_sample_data["calibrated_sensor_token"]) tf_lidar = transform_matrix(calibration_lidar["translation"], Quaternion(calibration_lidar["rotation"])) # vehicle -> point cloud coords (turned by 90 degrees) tf_vehicle_pc = np.linalg.inv( np.dot(tf_lidar, NuscenesReader.turn_matrix)) if self.read_camera: # CAMERAS camera_data: [{ str: typing.Any }] = [ self._read_sensor_data_and_extrinsics(sample, cam_name, tf_vehicle_pc, self._read_camera_data) for cam_name in cameras ] camera_data = {k: v for d in camera_data for k, v in d.items()} if self.read_radar: # RADARS radar_data: [{ str: typing.Any }] = [ self._read_sensor_data_and_extrinsics(sample, radar_name, tf_vehicle_pc, self._read_radar_data) for radar_name in radars ] radar_data = {k: v for d in radar_data for k, v in d.items()} assert box_data.keys().isdisjoint(camera_data.keys()) assert box_data.keys().isdisjoint(radar_data.keys()) assert box_data.keys().isdisjoint(lidarseg_data.keys()) # return feature array. Add sample ID. d = { "sample_id": sample_id, "point_cloud": point_cloud, **box_data, **camera_data, **radar_data, **lidarseg_data, } return d
def from_file_multisweep( cls, nusc: 'NuScenes', sample_rec: Dict, chan: str, ref_chan: str, nsweeps: int = 5, min_distance: float = 1.0) -> Tuple['PointCloud', np.ndarray]: """ Return a point cloud that aggregates multiple sweeps. As every sweep is in a different coordinate frame, we need to map the coordinates to a single reference frame. As every sweep has a different timestamp, we need to account for that in the transformations and timestamps. :param nusc: A NuScenes instance. :param sample_rec: The current sample. :param chan: The lidar/radar channel from which we track back n sweeps to aggregate the point cloud. :param ref_chan: The reference channel of the current sample_rec that the point clouds are mapped to. :param nsweeps: Number of sweeps to aggregated. :param min_distance: Distance below which points are discarded. :return: (all_pc, all_times). The aggregated point cloud and timestamps. """ # Init. points = np.zeros((cls.nbr_dims(), 0)) all_pc = cls(points) all_times = np.zeros((1, 0)) # Get reference pose and timestamp. ref_sd_token = sample_rec['data'][ref_chan] ref_sd_rec = nusc.get('sample_data', ref_sd_token) ref_pose_rec = nusc.get('ego_pose', ref_sd_rec['ego_pose_token']) ref_cs_rec = nusc.get('calibrated_sensor', ref_sd_rec['calibrated_sensor_token']) ref_time = 1e-6 * ref_sd_rec['timestamp'] # Homogeneous transform from ego car frame to reference frame. ref_from_car = transform_matrix(ref_cs_rec['translation'], Quaternion(ref_cs_rec['rotation']), inverse=True) # Homogeneous transformation matrix from global to _current_ ego car frame. car_from_global = transform_matrix(ref_pose_rec['translation'], Quaternion( ref_pose_rec['rotation']), inverse=True) # Aggregate current and previous sweeps. sample_data_token = sample_rec['data'][chan] current_sd_rec = nusc.get('sample_data', sample_data_token) for _ in range(nsweeps): # Load up the pointcloud and remove points close to the sensor. current_pc = cls.from_file( osp.join(nusc.dataroot, current_sd_rec['filename'])) current_pc.remove_close(min_distance) # Get past pose. current_pose_rec = nusc.get('ego_pose', current_sd_rec['ego_pose_token']) global_from_car = transform_matrix( current_pose_rec['translation'], Quaternion(current_pose_rec['rotation']), inverse=False) # Homogeneous transformation matrix from sensor coordinate frame to ego car frame. current_cs_rec = nusc.get( 'calibrated_sensor', current_sd_rec['calibrated_sensor_token']) car_from_current = transform_matrix( current_cs_rec['translation'], Quaternion(current_cs_rec['rotation']), inverse=False) # Fuse four transformation matrices into one and perform transform. trans_matrix = reduce(np.dot, [ ref_from_car, car_from_global, global_from_car, car_from_current ]) current_pc.transform(trans_matrix) # Add time vector which can be used as a temporal feature. time_lag = ref_time - 1e-6 * current_sd_rec[ 'timestamp'] # Positive difference. times = time_lag * np.ones((1, current_pc.nbr_points())) all_times = np.hstack((all_times, times)) # Merge with key pc. all_pc.points = np.hstack((all_pc.points, current_pc.points)) # Abort if there are no previous sweeps. if current_sd_rec['prev'] == '': break else: current_sd_rec = nusc.get('sample_data', current_sd_rec['prev']) return all_pc, all_times
def load_annotations(self, idx): annotations = [] """ Converts nuScenes GT annotations to KITTI format. """ kitti_to_nu_lidar = Quaternion(axis=(0, 0, 1), angle=np.pi / 2) kitti_to_nu_lidar_inv = kitti_to_nu_lidar.inverse sample_token = self.sample_tokens[idx] sample = self.nusc.get('sample', sample_token) sample_annotation_tokens = sample['anns'] cam_front_token = sample['data'][self.cam_name] lidar_token = sample['data'][self.lidar_name] # Retrieve sensor records. sd_record_cam = self.nusc.get('sample_data', cam_front_token) sd_record_lid = self.nusc.get('sample_data', lidar_token) cs_record_cam = self.nusc.get('calibrated_sensor', sd_record_cam['calibrated_sensor_token']) cs_record_lid = self.nusc.get('calibrated_sensor', sd_record_lid['calibrated_sensor_token']) # Combine transformations and convert to KITTI format. # Note: cam uses same conventions in KITTI and nuScenes. lid_to_ego = transform_matrix(cs_record_lid['translation'], Quaternion(cs_record_lid['rotation']), inverse=False) ego_to_cam = transform_matrix(cs_record_cam['translation'], Quaternion(cs_record_cam['rotation']), inverse=True) velo_to_cam = np.dot(ego_to_cam, lid_to_ego) # Convert from KITTI to nuScenes LIDAR coordinates, where we apply velo_to_cam. velo_to_cam_kitti = np.dot(velo_to_cam, kitti_to_nu_lidar.transformation_matrix) # Currently not used. imu_to_velo_kitti = np.zeros((3, 4), dtype=np.float32) # Dummy values. r0_rect = Quaternion(axis=[1, 0, 0], angle=0) # Dummy values. # Projection matrix. p_left_kitti = np.zeros((3, 4), dtype=np.float32) p_left_kitti[:3, :3] = cs_record_cam[ 'camera_intrinsic'] # Cameras are always rectified. p_left_kitti = p_left_kitti[:3, :3] # Create KITTI style transforms. velo_to_cam_rot = velo_to_cam_kitti[:3, :3] velo_to_cam_trans = velo_to_cam_kitti[:3, 3] # Check that the rotation has the same format as in KITTI. assert (velo_to_cam_rot.round(0) == np.array([[0, -1, 0], [0, 0, -1], [1, 0, 0]])).all() assert (velo_to_cam_trans[1:3] < 0).all() # Retrieve the token from the lidar. # Note that this may be confusing as the filename of the camera will include the timestamp of the lidar, # not the camera. filename_cam_full = sd_record_cam['filename'] filename_lid_full = sd_record_lid['filename'] if self.is_train: for sample_annotation_token in sample_annotation_tokens: sample_annotation = self.nusc.get('sample_annotation', sample_annotation_token) # Convert nuScenes category to nuScenes detection challenge category. detection_name = category_to_detection_name( sample_annotation['category_name']) if detection_name in self.classes: # Get box in LIDAR frame. _, box_lidar_nusc, _ = self.nusc.get_sample_data( lidar_token, box_vis_level=BoxVisibility.NONE, selected_anntokens=[sample_annotation_token]) box_lidar_nusc = box_lidar_nusc[0] # Convert from nuScenes to KITTI box format. box_cam_kitti = KittiDB.box_nuscenes_to_kitti( box_lidar_nusc, Quaternion(matrix=velo_to_cam_rot), velo_to_cam_trans, r0_rect) # Convert quaternion to yaw angle. v = np.dot(box_cam_kitti.rotation_matrix, np.array([1, 0, 0])) yaw = -np.arctan2(v[2], v[0]) annotations.append({ "class": detection_name, "label": TYPE_ID_CONVERSION[detection_name], "dimensions": [ float(box_cam_kitti.wlh[2]), float(box_cam_kitti.wlh[0]), float(box_cam_kitti.wlh[1]) ], "locations": [ float(box_cam_kitti.center[0]), float(box_cam_kitti.center[1]), float(box_cam_kitti.center[2]) ], "rot_y": float(yaw) }) return annotations, p_left_kitti
def nuscenes_gt_to_kitti(self) -> None: """ Converts nuScenes GT annotations to KITTI format. """ kitti_to_nu_lidar = Quaternion(axis=(0, 0, 1), angle=np.pi / 2) kitti_to_nu_lidar_inv = kitti_to_nu_lidar.inverse imsize = (1600, 900) token_idx = 0 # Start tokens from 0. # Get assignment of scenes to splits. split_logs = create_splits_logs(self.split, self.nusc) # Create output folders. label_folder = os.path.join(self.nusc_kitti_dir, self.split, 'label_2') calib_folder = os.path.join(self.nusc_kitti_dir, self.split, 'calib') image_folder = os.path.join(self.nusc_kitti_dir, self.split, 'image_2') lidar_folder = os.path.join(self.nusc_kitti_dir, self.split, 'velodyne') for folder in [label_folder, calib_folder, image_folder, lidar_folder]: if not os.path.isdir(folder): os.makedirs(folder) # Use only the samples from the current split. sample_tokens = self._split_to_samples(split_logs) sample_tokens = sample_tokens[:self.image_count] tokens = [] for sample_token in sample_tokens: # Get sample data. sample = self.nusc.get('sample', sample_token) sample_annotation_tokens = sample['anns'] cam_front_token = sample['data'][self.cam_name] lidar_token = sample['data'][self.lidar_name] # Retrieve sensor records. sd_record_cam = self.nusc.get('sample_data', cam_front_token) sd_record_lid = self.nusc.get('sample_data', lidar_token) cs_record_cam = self.nusc.get( 'calibrated_sensor', sd_record_cam['calibrated_sensor_token']) cs_record_lid = self.nusc.get( 'calibrated_sensor', sd_record_lid['calibrated_sensor_token']) # Combine transformations and convert to KITTI format. # Note: cam uses same conventions in KITTI and nuScenes. lid_to_ego = transform_matrix(cs_record_lid['translation'], Quaternion( cs_record_lid['rotation']), inverse=False) ego_to_cam = transform_matrix(cs_record_cam['translation'], Quaternion( cs_record_cam['rotation']), inverse=True) velo_to_cam = np.dot(ego_to_cam, lid_to_ego) # Convert from KITTI to nuScenes LIDAR coordinates, where we apply velo_to_cam. velo_to_cam_kitti = np.dot(velo_to_cam, kitti_to_nu_lidar.transformation_matrix) # Currently not used. imu_to_velo_kitti = np.zeros((3, 4)) # Dummy values. r0_rect = Quaternion(axis=[1, 0, 0], angle=0) # Dummy values. # Projection matrix. p_left_kitti = np.zeros((3, 4)) p_left_kitti[:3, :3] = cs_record_cam[ 'camera_intrinsic'] # Cameras are always rectified. # Create KITTI style transforms. velo_to_cam_rot = velo_to_cam_kitti[:3, :3] velo_to_cam_trans = velo_to_cam_kitti[:3, 3] # Check that the rotation has the same format as in KITTI. assert (velo_to_cam_rot.round(0) == np.array([[0, -1, 0], [0, 0, -1], [1, 0, 0]])).all() assert (velo_to_cam_trans[1:3] < 0).all() # Retrieve the token from the lidar. # Note that this may be confusing as the filename of the camera will include the timestamp of the lidar, # not the camera. filename_cam_full = sd_record_cam['filename'] filename_lid_full = sd_record_lid['filename'] # token = '%06d' % token_idx # Alternative to use KITTI names. token_idx += 1 # Convert image (jpg to png). src_im_path = os.path.join(self.nusc.dataroot, filename_cam_full) dst_im_path = os.path.join(image_folder, sample_token + '.png') if not os.path.exists(dst_im_path): im = Image.open(src_im_path) im.save(dst_im_path, "PNG") # Convert lidar. # Note that we are only using a single sweep, instead of the commonly used n sweeps. src_lid_path = os.path.join(self.nusc.dataroot, filename_lid_full) dst_lid_path = os.path.join(lidar_folder, sample_token + '.bin') assert not dst_lid_path.endswith('.pcd.bin') pcl = LidarPointCloud.from_file(src_lid_path) pcl.rotate( kitti_to_nu_lidar_inv.rotation_matrix) # In KITTI lidar frame. with open(dst_lid_path, "w") as lid_file: pcl.points.T.tofile(lid_file) # Add to tokens. tokens.append(sample_token) # Create calibration file. kitti_transforms = dict() kitti_transforms['P0'] = np.zeros((3, 4)) # Dummy values. kitti_transforms['P1'] = np.zeros((3, 4)) # Dummy values. kitti_transforms['P2'] = p_left_kitti # Left camera transform. kitti_transforms['P3'] = np.zeros((3, 4)) # Dummy values. kitti_transforms[ 'R0_rect'] = r0_rect.rotation_matrix # Cameras are already rectified. kitti_transforms['Tr_velo_to_cam'] = np.hstack( (velo_to_cam_rot, velo_to_cam_trans.reshape(3, 1))) kitti_transforms['Tr_imu_to_velo'] = imu_to_velo_kitti calib_path = os.path.join(calib_folder, sample_token + '.txt') with open(calib_path, "w") as calib_file: for (key, val) in kitti_transforms.items(): val = val.flatten() val_str = '%.12e' % val[0] for v in val[1:]: val_str += ' %.12e' % v calib_file.write('%s: %s\n' % (key, val_str)) # Write label file. label_path = os.path.join(label_folder, sample_token + '.txt') if os.path.exists(label_path): print('Skipping existing file: %s' % label_path) continue else: print('Writing file: %s' % label_path) with open(label_path, "w") as label_file: for sample_annotation_token in sample_annotation_tokens: sample_annotation = self.nusc.get('sample_annotation', sample_annotation_token) # Get box in LIDAR frame. _, box_lidar_nusc, _ = self.nusc.get_sample_data( lidar_token, box_vis_level=BoxVisibility.NONE, selected_anntokens=[sample_annotation_token]) box_lidar_nusc = box_lidar_nusc[0] # Truncated: Set all objects to 0 which means untruncated. truncated = 0.0 # Occluded: Set all objects to full visibility as this information is not available in nuScenes. occluded = 0 # Convert nuScenes category to nuScenes detection challenge category. detection_name = category_to_detection_name( sample_annotation['category_name']) # Skip categories that are not part of the nuScenes detection challenge. if detection_name is None: continue # Convert from nuScenes to KITTI box format. box_cam_kitti = KittiDB.box_nuscenes_to_kitti( box_lidar_nusc, Quaternion(matrix=velo_to_cam_rot), velo_to_cam_trans, r0_rect) # Project 3d box to 2d box in image, ignore box if it does not fall inside. bbox_2d = KittiDB.project_kitti_box_to_image(box_cam_kitti, p_left_kitti, imsize=imsize) if bbox_2d is None: continue # Set dummy score so we can use this file as result. box_cam_kitti.score = 0 # Convert box to output string format. output = KittiDB.box_to_string(name=detection_name, box=box_cam_kitti, bbox_2d=bbox_2d, truncation=truncated, occlusion=occluded) # Write to disk. label_file.write(output + '\n')
def main(): SCENE_SPLITS['mini-val'] = SCENE_SPLITS['val'] if not os.path.exists(OUT_PATH): os.mkdir(OUT_PATH) for split in SPLITS: data_path = DATA_PATH + '{}/'.format(SPLITS[split]) nusc = NuScenes( version=SPLITS[split], dataroot=data_path, verbose=True) out_path = OUT_PATH + '{}.json'.format(split) categories_info = [{'name': CATS[i], 'id': i + 1} for i in range(len(CATS))] ret = {'images': [], 'annotations': [], 'categories': categories_info, 'videos': [], 'attributes': ATTRIBUTE_TO_ID} num_images = 0 num_anns = 0 num_videos = 0 # A "sample" in nuScenes refers to a timestamp with 6 cameras and 1 LIDAR. for sample in nusc.sample: scene_name = nusc.get('scene', sample['scene_token'])['name'] if not (split in ['mini', 'test']) and \ not (scene_name in SCENE_SPLITS[split]): continue if sample['prev'] == '': print('scene_name', scene_name) num_videos += 1 ret['videos'].append({'id': num_videos, 'file_name': scene_name}) frame_ids = {k: 0 for k in sample['data']} track_ids = {} # We decompose a sample into 6 images in our case. for sensor_name in sample['data']: if sensor_name in USED_SENSOR: image_token = sample['data'][sensor_name] image_data = nusc.get('sample_data', image_token) num_images += 1 # Complex coordinate transform. This will take time to understand. sd_record = nusc.get('sample_data', image_token) cs_record = nusc.get( 'calibrated_sensor', sd_record['calibrated_sensor_token']) pose_record = nusc.get('ego_pose', sd_record['ego_pose_token']) global_from_car = transform_matrix(pose_record['translation'], Quaternion(pose_record['rotation']), inverse=False) car_from_sensor = transform_matrix( cs_record['translation'], Quaternion(cs_record['rotation']), inverse=False) trans_matrix = np.dot(global_from_car, car_from_sensor) _, boxes, camera_intrinsic = nusc.get_sample_data( image_token, box_vis_level=BoxVisibility.ANY) calib = np.eye(4, dtype=np.float32) calib[:3, :3] = camera_intrinsic calib = calib[:3] frame_ids[sensor_name] += 1 # image information in COCO format image_info = {'id': num_images, 'file_name': image_data['filename'], 'calib': calib.tolist(), 'video_id': num_videos, 'frame_id': frame_ids[sensor_name], 'sensor_id': SENSOR_ID[sensor_name], 'sample_token': sample['token'], 'trans_matrix': trans_matrix.tolist(), 'width': sd_record['width'], 'height': sd_record['height'], 'pose_record_trans': pose_record['translation'], 'pose_record_rot': pose_record['rotation'], 'cs_record_trans': cs_record['translation'], 'cs_record_rot': cs_record['rotation']} ret['images'].append(image_info) anns = [] for box in boxes: det_name = category_to_detection_name(box.name) if det_name is None: continue num_anns += 1 v = np.dot(box.rotation_matrix, np.array([1, 0, 0])) yaw = -np.arctan2(v[2], v[0]) box.translate(np.array([0, box.wlh[2] / 2, 0])) category_id = CAT_IDS[det_name] amodel_center = project_to_image( np.array([box.center[0], box.center[1] - box.wlh[2] / 2, box.center[2]], np.float32).reshape(1, 3), calib)[0].tolist() sample_ann = nusc.get( 'sample_annotation', box.token) instance_token = sample_ann['instance_token'] if not (instance_token in track_ids): track_ids[instance_token] = len(track_ids) + 1 attribute_tokens = sample_ann['attribute_tokens'] attributes = [nusc.get('attribute', att_token)['name'] \ for att_token in attribute_tokens] att = '' if len(attributes) == 0 else attributes[0] if len(attributes) > 1: print(attributes) import pdb; pdb.set_trace() track_id = track_ids[instance_token] vel = nusc.box_velocity(box.token) # global frame vel = np.dot(np.linalg.inv(trans_matrix), np.array([vel[0], vel[1], vel[2], 0], np.float32)).tolist() # instance information in COCO format ann = { 'id': num_anns, 'image_id': num_images, 'category_id': category_id, 'dim': [box.wlh[2], box.wlh[0], box.wlh[1]], 'location': [box.center[0], box.center[1], box.center[2]], 'depth': box.center[2], 'occluded': 0, 'truncated': 0, 'rotation_y': yaw, 'amodel_center': amodel_center, 'iscrowd': 0, 'track_id': track_id, 'attributes': ATTRIBUTE_TO_ID[att], 'velocity': vel } bbox = KittiDB.project_kitti_box_to_image( copy.deepcopy(box), camera_intrinsic, imsize=(1600, 900)) alpha = _rot_y2alpha(yaw, (bbox[0] + bbox[2]) / 2, camera_intrinsic[0, 2], camera_intrinsic[0, 0]) ann['bbox'] = [bbox[0], bbox[1], bbox[2] - bbox[0], bbox[3] - bbox[1]] ann['area'] = (bbox[2] - bbox[0]) * (bbox[3] - bbox[1]) ann['alpha'] = alpha anns.append(ann) # Filter out bounding boxes outside the image visable_anns = [] for i in range(len(anns)): vis = True for j in range(len(anns)): if anns[i]['depth'] - min(anns[i]['dim']) / 2 > \ anns[j]['depth'] + max(anns[j]['dim']) / 2 and \ _bbox_inside(anns[i]['bbox'], anns[j]['bbox']): vis = False break if vis: visable_anns.append(anns[i]) else: pass for ann in visable_anns: ret['annotations'].append(ann) if DEBUG: img_path = data_path + image_info['file_name'] img = cv2.imread(img_path) img_3d = img.copy() for ann in visable_anns: bbox = ann['bbox'] cv2.rectangle(img, (int(bbox[0]), int(bbox[1])), (int(bbox[2] + bbox[0]), int(bbox[3] + bbox[1])), (0, 0, 255), 3, lineType=cv2.LINE_AA) box_3d = compute_box_3d(ann['dim'], ann['location'], ann['rotation_y']) box_2d = project_to_image(box_3d, calib) img_3d = draw_box_3d(img_3d, box_2d) pt_3d = unproject_2d_to_3d(ann['amodel_center'], ann['depth'], calib) pt_3d[1] += ann['dim'][0] / 2 print('location', ann['location']) print('loc model', pt_3d) pt_2d = np.array([(bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2], dtype=np.float32) pt_3d = unproject_2d_to_3d(pt_2d, ann['depth'], calib) pt_3d[1] += ann['dim'][0] / 2 print('loc ', pt_3d) cv2.imshow('img', img) cv2.imshow('img_3d', img_3d) cv2.waitKey() nusc.render_sample_data(image_token) plt.show() print('reordering images') images = ret['images'] video_sensor_to_images = {} for image_info in images: tmp_seq_id = image_info['video_id'] * 20 + image_info['sensor_id'] if tmp_seq_id in video_sensor_to_images: video_sensor_to_images[tmp_seq_id].append(image_info) else: video_sensor_to_images[tmp_seq_id] = [image_info] ret['images'] = [] for tmp_seq_id in sorted(video_sensor_to_images): ret['images'] = ret['images'] + video_sensor_to_images[tmp_seq_id] print('{} {} images {} boxes'.format( split, len(ret['images']), len(ret['annotations']))) print('out_path', out_path) json.dump(ret, open(out_path, 'w'))
yaw = [] sensor = 'LIDAR_TOP' nu_to_kitti_lidar = Quaternion(axis=(0, 0, 1), angle=np.pi / 2).inverse for scene in nusc.scene: current_sample_token = scene['first_sample_token'] last_sample_token = scene['last_sample_token'] while current_sample_token != last_sample_token: sample = nusc.get('sample', current_sample_token) lidar_top_data = nusc.get('sample_data', sample['data'][sensor]) if lidar_top_data['prev']: ego_pose = nusc.get('ego_pose', lidar_top_data['ego_pose_token']) calib_sensor = nusc.get('calibrated_sensor', lidar_top_data['calibrated_sensor_token']) lidar_top_data_prev = nusc.get('sample_data', lidar_top_data['prev']) ego_pose_prev = nusc.get('ego_pose', lidar_top_data_prev['ego_pose_token']) calib_sensor_prev = nusc.get('calibrated_sensor', lidar_top_data_prev['calibrated_sensor_token']) ego_to_global = transform_matrix(ego_pose['translation'], Quaternion(ego_pose['rotation'])) lidar_to_ego = transform_matrix(calib_sensor['translation'], Quaternion(calib_sensor['rotation'])) ego_to_global_prev = transform_matrix(ego_pose_prev['translation'], Quaternion(ego_pose_prev['rotation'])) lidar_to_ego_prev = transform_matrix(calib_sensor_prev['translation'], Quaternion(calib_sensor_prev['rotation'])) lidar_to_global = np.dot(ego_to_global, lidar_to_ego) lidar_to_global_prev = np.dot(ego_to_global_prev, lidar_to_ego_prev) delta = inv(lidar_to_global_prev).dot(lidar_to_global) y_translation.append(delta[1, 3]) delta_angles = rotationMatrixToEulerAngles(delta[0:3, 0:3]) * 180 / np.pi yaw.append(delta_angles[2]) for annotation_token in sample['anns']: annotation_metadata = nusc.get('sample_annotation', annotation_token) if annotation_metadata['num_lidar_pts'] == 0: continue
def __getitem__(self, idx): # set defaults here kitti_to_nu_lidar = Quaternion(axis=(0, 0, 1), angle=np.pi / 2) kitti_to_nu_lidar_inv = kitti_to_nu_lidar.inverse imsize = (1600, 900) # sample_token based on index sample_token = self.sample_tokens[idx] # Get sample data. sample = self.nusc.get('sample', sample_token) sample_annotation_tokens = sample['anns'] cam_front_token = sample['data'][self.cam_name] lidar_token = sample['data'][self.lidar_name] # Retrieve sensor records. sd_record_cam = self.nusc.get('sample_data', cam_front_token) sd_record_lid = self.nusc.get('sample_data', lidar_token) cs_record_cam = self.nusc.get('calibrated_sensor', sd_record_cam['calibrated_sensor_token']) cs_record_lid = self.nusc.get('calibrated_sensor', sd_record_lid['calibrated_sensor_token']) # Combine transformations and convert to KITTI format. # Note: cam uses same conventions in KITTI and nuScenes. lid_to_ego = transform_matrix(cs_record_lid['translation'], Quaternion(cs_record_lid['rotation']), inverse=False) ego_to_cam = transform_matrix(cs_record_cam['translation'], Quaternion(cs_record_cam['rotation']), inverse=True) velo_to_cam = np.dot(ego_to_cam, lid_to_ego) # Convert from KITTI to nuScenes LIDAR coordinates, where we apply velo_to_cam. velo_to_cam_kitti = np.dot(velo_to_cam, kitti_to_nu_lidar.transformation_matrix) r0_rect = Quaternion(axis=[1, 0, 0], angle=0) # Dummy values. # Projection matrix. p_left_kitti = np.zeros((3, 4)) p_left_kitti[:3, :3] = cs_record_cam[ 'camera_intrinsic'] # Cameras are always rectified. # Create KITTI style transforms. velo_to_cam_rot = velo_to_cam_kitti[:3, :3] velo_to_cam_trans = velo_to_cam_kitti[:3, 3] # Check that the rotation has the same format as in KITTI. assert (velo_to_cam_rot.round(0) == np.array([[0, -1, 0], [0, 0, -1], [1, 0, 0]])).all() assert (velo_to_cam_trans[1:3] < 0).all() # Retrieve the token from the lidar. # Note that this may be confusing as the filename of the camera will include the timestamp of the lidar, # not the camera. filename_cam_full = sd_record_cam['filename'] # set the img variable to its data here src_im_path = os.path.join(self.nusc.dataroot, filename_cam_full) img = Image.open(src_im_path) # Create calibration matrix. K = p_left_kitti K = [float(i) for i in K] K = np.array(K, dtype=np.float32).reshape(3, 4) K = K[:3, :3] # populate the list of object annotations for this sample anns = [] for sample_annotation_token in sample_annotation_tokens: sample_annotation = self.nusc.get('sample_annotation', sample_annotation_token) # Get box in LIDAR frame. _, box_lidar_nusc, _ = self.nusc.get_sample_data( lidar_token, box_vis_level=BoxVisibility.NONE, selected_anntokens=[sample_annotation_token]) box_lidar_nusc = box_lidar_nusc[0] # Truncated: Set all objects to 0 which means untruncated. truncated = 0.0 # Occluded: Set all objects to full visibility as this information is not available in nuScenes. occluded = 0 # Convert nuScenes category to nuScenes detection challenge category. detection_name = category_to_detection_name( sample_annotation['category_name']) # Skip categories that are not part of the nuScenes detection challenge. if detection_name is None: continue # Convert from nuScenes to KITTI box format. box_cam_kitti = KittiDB.box_nuscenes_to_kitti( box_lidar_nusc, Quaternion(matrix=velo_to_cam_rot), velo_to_cam_trans, r0_rect) # Project 3d box to 2d box in image, ignore box if it does not fall inside. bbox_2d = KittiDB.project_kitti_box_to_image(box_cam_kitti, p_left_kitti, imsize=imsize) if bbox_2d is None: continue # Set dummy score so we can use this file as result. box_cam_kitti.score = 0 # Convert box to output string format. output = KittiDB.box_to_string(name=detection_name, box=box_cam_kitti, bbox_2d=bbox_2d, truncation=truncated, occlusion=occluded) fieldnames = [ 'type', 'truncated', 'occluded', 'alpha', 'xmin', 'ymin', 'xmax', 'ymax', 'dh', 'dw', 'dl', 'lx', 'ly', 'lz', 'ry' ] if self.is_train: f = StringIO(output) reader = csv.DictReader(f, delimiter=' ', fieldnames=fieldnames) for line, row in enumerate(reader): if row["type"] in self.classes: anns.append({ "class": row["type"], "label": TYPE_ID_CONVERSION[row["type"]], "truncation": float(row["truncated"]), "occlusion": float(row["occluded"]), "alpha": float(row["alpha"]), "dimensions": [ float(row['dl']), float(row['dh']), float(row['dw']) ], "locations": [ float(row['lx']), float(row['ly']), float(row['lz']) ], "rot_y": float(row["ry"]) }) center = np.array([i / 2 for i in img.size], dtype=np.float32) size = np.array([i for i in img.size], dtype=np.float32) """ resize, horizontal flip, and affine augmentation are performed here. since it is complicated to compute heatmap w.r.t transform. """ flipped = False if (self.is_train) and (random.random() < self.flip_prob): flipped = True img = img.transpose(Image.FLIP_LEFT_RIGHT) center[0] = size[0] - center[0] - 1 K[0, 2] = size[0] - K[0, 2] - 1 affine = False if (self.is_train) and (random.random() < self.aug_prob): affine = True shift, scale = self.shift_scale[0], self.shift_scale[1] shift_ranges = np.arange(-shift, shift + 0.1, 0.1) center[0] += size[0] * random.choice(shift_ranges) center[1] += size[1] * random.choice(shift_ranges) scale_ranges = np.arange(1 - scale, 1 + scale + 0.1, 0.1) size *= random.choice(scale_ranges) center_size = [center, size] trans_affine = get_transfrom_matrix( center_size, [self.input_width, self.input_height]) trans_affine_inv = np.linalg.inv(trans_affine) img = img.transform( (self.input_width, self.input_height), method=Image.AFFINE, data=trans_affine_inv.flatten()[:6], resample=Image.BILINEAR, ) trans_mat = get_transfrom_matrix( center_size, [self.output_width, self.output_height]) if not self.is_train: # for inference we parametrize with original size target = ParamsList(image_size=size, is_train=self.is_train) target.add_field("trans_mat", trans_mat) target.add_field("K", K) if self.transforms is not None: img, target = self.transforms(img, target) return img, target, idx heat_map = np.zeros( [self.num_classes, self.output_height, self.output_width], dtype=np.float32) regression = np.zeros([self.max_objs, 3, 8], dtype=np.float32) cls_ids = np.zeros([self.max_objs], dtype=np.int32) proj_points = np.zeros([self.max_objs, 2], dtype=np.int32) p_offsets = np.zeros([self.max_objs, 2], dtype=np.float32) dimensions = np.zeros([self.max_objs, 3], dtype=np.float32) locations = np.zeros([self.max_objs, 3], dtype=np.float32) rotys = np.zeros([self.max_objs], dtype=np.float32) reg_mask = np.zeros([self.max_objs], dtype=np.uint8) flip_mask = np.zeros([self.max_objs], dtype=np.uint8) for i, a in enumerate(anns): a = a.copy() cls = a["label"] locs = np.array(a["locations"]) rot_y = np.array(a["rot_y"]) if flipped: locs[0] *= -1 rot_y *= -1 point, box2d, box3d = encode_label(K, rot_y, a["dimensions"], locs) point = affine_transform(point, trans_mat) box2d[:2] = affine_transform(box2d[:2], trans_mat) box2d[2:] = affine_transform(box2d[2:], trans_mat) box2d[[0, 2]] = box2d[[0, 2]].clip(0, self.output_width - 1) box2d[[1, 3]] = box2d[[1, 3]].clip(0, self.output_height - 1) h, w = box2d[3] - box2d[1], box2d[2] - box2d[0] if (0 < point[0] < self.output_width) and (0 < point[1] < self.output_height): point_int = point.astype(np.int32) p_offset = point - point_int radius = gaussian_radius(h, w) radius = max(0, int(radius)) heat_map[cls] = draw_umich_gaussian(heat_map[cls], point_int, radius) cls_ids[i] = cls regression[i] = box3d proj_points[i] = point_int p_offsets[i] = p_offset dimensions[i] = np.array(a["dimensions"]) locations[i] = locs rotys[i] = rot_y reg_mask[i] = 1 if not affine else 0 flip_mask[i] = 1 if not affine and flipped else 0 target = ParamsList(image_size=img.size, is_train=self.is_train) target.add_field("hm", heat_map) target.add_field("reg", regression) target.add_field("cls_ids", cls_ids) target.add_field("proj_p", proj_points) target.add_field("dimensions", dimensions) target.add_field("locations", locations) target.add_field("rotys", rotys) target.add_field("trans_mat", trans_mat) target.add_field("K", K) target.add_field("reg_mask", reg_mask) target.add_field("flip_mask", flip_mask) if self.transforms is not None: img, target = self.transforms(img, target) return img, target, idx
def nusc2coco(data_dir, nusc, scenes, input_seq_id): ret = defaultdict(list) for k, v in cats_mapping.items(): ret['categories'].append(dict(id=v, name=k)) vid_id = 0 fr_id = 0 for sensor in USED_SENSOR: print(f'converting {sensor}') seq_id = input_seq_id for sample in tqdm(nusc.sample): if not (sample["scene_token"] in scenes): continue image_token = sample['data'][sensor] sd_record = nusc.get('sample_data', image_token) img_name = osp.join(data_dir, sd_record['filename']) height = sd_record['height'] width = sd_record['width'] cs_record = nusc.get('calibrated_sensor', sd_record['calibrated_sensor_token']) pose_record = nusc.get('ego_pose', sd_record['ego_pose_token']) global_from_car = transform_matrix(pose_record['translation'], Quaternion( pose_record['rotation']), inverse=False) car_from_sensor = transform_matrix(cs_record['translation'], Quaternion( cs_record['rotation']), inverse=False) trans_matrix = np.dot(global_from_car, car_from_sensor) rotation_matrix = trans_matrix[:3, :3] position = trans_matrix[:3, 3:].flatten().tolist() rotation = R.from_matrix(rotation_matrix).as_euler('xyz').tolist() pose_dict = dict(rotation=rotation, position=position) _, boxes, camera_intrinsic = nusc.get_sample_data( image_token, box_vis_level=BoxVisibility.ANY) calib = np.eye(4, dtype=np.float32) calib[:3, :3] = camera_intrinsic calib = calib[:3].tolist() img_info = dict(file_name=img_name, cali=calib, pose=pose_dict, sensor_id=SENSOR_ID[sensor], height=height, width=width, fov=60, near_clip=0.15, id=len(ret['images']), video_id=vid_id, index=fr_id) ret['images'].append(img_info) anns = [] for box in boxes: cat = general_to_nusc_cats[box.name] if cat in ['ignore']: continue v = np.dot(box.rotation_matrix, np.array([1, 0, 0])) yaw = -np.arctan2(v[2], v[0]).tolist() center_2d = project_to_image( np.array([box.center[0], box.center[1], box.center[2]], np.float32).reshape(1, 3), calib)[0].tolist() sample_ann = nusc.get('sample_annotation', box.token) instance_token = sample_ann['instance_token'] if instance_token not in trackid_maps.keys(): trackid_maps[instance_token] = len(trackid_maps) bbox = project_kitti_box_to_image(copy.deepcopy(box), camera_intrinsic, imsize=(width, height)) if bbox is None: continue x1, y1, x2, y2 = bbox alpha = _rot_y2alpha(yaw, (x1 + x2) / 2, camera_intrinsic[0, 2], camera_intrinsic[0, 0]).tolist() delta_2d = [ center_2d[0] - (x1 + x2) / 2, center_2d[1] - (y1 + y2) / 2 ] ann = dict( image_id=ret['images'][-1]['id'], category_id=cats_mapping[cat], instance_id=trackid_maps[instance_token], alpha=float(alpha), roty=float(yaw), dimension=[box.wlh[2], box.wlh[0], box.wlh[1]], translation=[box.center[0], box.center[1], box.center[2]], is_occluded=0, is_truncated=0, bbox=[x1, y1, x2 - x1, y2 - y1], area=(x2 - x1) * (y2 - y1), delta_2d=delta_2d, center_2d=center_2d, iscrowd=False, ignore=False, segmentation=[[x1, y1, x1, y2, x2, y2, x2, y1]]) anns.append(ann) # Filter out bounding boxes outside the image visable_anns = [] for i in range(len(anns)): vis = True for j in range(len(anns)): if anns[i]['translation'][2] - min(anns[i]['dimension']) / 2 > \ anns[j]['translation'][2] + max(anns[j]['dimension']) / 2 and \ _bbox_inside(anns[i]['bbox'], anns[j]['bbox']): vis = False break if vis: visable_anns.append(anns[i]) else: pass for ann in visable_anns: ann['id'] = len(ret['annotations']) ret['annotations'].append(ann) fr_id += 1 if sample['next'] == '': vid_info = dict(id=vid_id, name=f'{seq_id:05}_{SENSOR_ID[sensor]}', scene_token=sample["scene_token"], n_frames=fr_id) ret['videos'].append(vid_info) seq_id += 1 vid_id += 1 fr_id = 0 return ret