def tokens_to_data_pairs(nusc: NuScenes, cam_sd_tokens: List[str], rad_sd_tokens: List[str], depth: float, threshold: float) -> list(zip()): """ This method takes a pair of lists with the Camera and Radar sample_data tokens, filters the RadarPointCloud for detections closer than the parameter depth, loads the actual data in two corresponding lists and returns the zipped lists :param nusc: Nuscenes instance :param cam_sd_tokens: List with all the camera sample_data tokens :param rad_sd_tokens: List with all the radar sample_data tokens :param depth: Distance from the radar sensor (x value) above which all detections are omitted :param threshold: No pairs of points in the resulted dataset will have distance less than this threshold :return: list(zip(np.array, np.array)) List of zipped array lists with the data """ rgb_images_list = [] for i in range(len(cam_sd_tokens)): cam_sd_path = nusc.get_sample_data_path(cam_sd_tokens[i]) if not os.path.isfile(cam_sd_path): continue #im = Image.open(cam_sd_path) #im = im.resize((IMAGE_WIDTH, IMAGE_HEIGHT), Image.BILINEAR) img = cv2.imread(cam_sd_path) #Resize with Bilinear interpolation img = cv2.resize(img, (IMAGE_WIDTH, IMAGE_HEIGHT)) img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) rgb_images_list.append(img) radar_pcl_list = [] for i in range(len(rad_sd_tokens)): rad_sd_path = nusc.get_sample_data_path(rad_sd_tokens[i]) if not os.path.isfile(rad_sd_path): continue #radar_pcl = RadarPointCloud.from_file(rad_sd_path, invalid_states = range(18), dynprop_states = range(18), ambig_states = range(18)) #radar_pcl = RadarPointCloud.from_file(rad_sd_path, invalid_states = None, dynprop_states = [0,2,6], ambig_states = None) radar_pcl = RadarPointCloud.from_file(rad_sd_path) # radar_pcl.points.shape is (18, num_points) radar_pcl.points, dist_list = filter_pointcloud( radar_pcl.points, threshold) # radar_pcl.points.shape became (3, num_points) #RADNET expects shape (num_points, 4) radar_pcl.points = radar_pcl.points.transpose() #radar_pcl.points = radar_pcl.points[:, :3] #radar_pcl.points = radar_pcl.points[radar_pcl.points[:, 0] < depth] radar_pcl.points = np.hstack((radar_pcl.points, np.ones((radar_pcl.points.shape[0], 1), dtype=radar_pcl.points.dtype))) radar_pcl_list.append(radar_pcl) assert (len(rgb_images_list) == len(radar_pcl_list)) image_radar_pairs = list(zip(rgb_images_list, radar_pcl_list)) del rgb_images_list del radar_pcl_list return image_radar_pairs
def tokens_to_data_pairs(nusc: NuScenes, cam_sd_tokens: List[str], rad_sd_tokens: List[str]) -> list(zip()): """ This method takes a pair of lists with the Camera and Radar sample_data tokens, loads the actual data in two corresponding lists and returns the zipped lists :param nusc: Nuscenes instance :param cam_sd_tokens: List with all the camera sample_data tokens :param rad_sd_tokens: List with all the radar sample_data tokens :return: list(zip(np.array, np.array)) List of zipped array lists with the data """ rgb_images_list = [] for i in range(len(cam_sd_tokens)): cam_sd_path = nusc.get_sample_data_path(cam_sd_tokens[i]) if not os.path.isfile(cam_sd_path): continue #im = Image.open(cam_sd_path) #im = im.resize((IMAGE_WIDTH, IMAGE_HEIGHT), Image.BILINEAR) img = cv2.imread(cam_sd_path) #Resize with Bilinear interpolation img = cv2.resize(img, (IMAGE_WIDTH, IMAGE_HEIGHT)) img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) rgb_images_list.append(img) radar_pcl_list = [] for i in range(len(rad_sd_tokens)): rad_sd_path = nusc.get_sample_data_path(rad_sd_tokens[i]) if not os.path.isfile(rad_sd_path): continue #radar_pcl = RadarPointCloud.from_file(rad_sd_path, invalid_states = range(18), dynprop_states = range(18), ambig_states = range(18)) #radar_pcl = RadarPointCloud.from_file(rad_sd_path, invalid_states = None, dynprop_states = [0,2,6], ambig_states = None) radar_pcl = RadarPointCloud.from_file(rad_sd_path) #nuScenes RadarPointCloud has shape (18, num_points) #RADNET expects (num_points, 4) radar_pcl.points = radar_pcl.points.transpose() radar_pcl.points = radar_pcl.points[:, :3] radar_pcl.points = np.hstack((radar_pcl.points, np.ones((radar_pcl.points.shape[0], 1), dtype=radar_pcl.points.dtype))) radar_pcl_list.append(radar_pcl) assert (len(rgb_images_list) == len(radar_pcl_list)) image_radar_pairs = list(zip(rgb_images_list, radar_pcl_list)) del rgb_images_list del radar_pcl_list return image_radar_pairs
def convert(split='val', data_path='data/nuscenes/', save_path='data/nuscenes/depth_maps'): nusc = NuScenes(version=SPLITS[split], dataroot=data_path, verbose=True) nusc_exp = NuScenesExplorer(nusc) save_dir = os.path.join(save_path, split) if not os.path.isdir(save_dir): pass ret = [] for sample in nusc.sample: sample_token = sample['token'] print(sample_token, len(ret)) lidar_token = sample['data'][LidarName] for cam_name in CamNames: cam_token = sample['data'][cam_name] depth_map_path = sample_token + cam_name + '.pts' depth_map_path = os.path.join(save_dir, 'depth_data', depth_map_path) img_path = nusc.get_sample_data_path(cam_token) data_info = {} data_info['sample_token'] = sample_token data_info['lidar_token'] = lidar_token data_info['depth_path'] = depth_map_path data_info['img_path'] = img_path ret.append(data_info) continue points, coloring, im = nusc_exp.map_pointcloud_to_image( lidar_token, cam_token) float_x_cords = points[0] # < 1600 float_y_cords = points[1] # < 900 float_depth = coloring # point_with_depth = np.stack( [float_x_cords, float_y_cords, float_depth], axis=-1) np.save(depth_map_path, point_with_depth) #nusc.render_pointcloud_in_image(sample_token, camera_channel='CAM_FRONT', out_path='./render.png', verbose=False) meta_file_path = os.path.join(save_dir, 'meta.json') with open(meta_file_path, 'w') as f: json.dump(ret, f)
def tokens_to_data(nusc: NuScenes, rad_sd_tokens: List[str]) -> List: """ This method takes a list with the Radar sample_data tokens and loads the actual data in a new list :param nusc: Nuscenes instance :param rad_sd_tokens: List with all the radar sample_data tokens :return: list((np.array) List of numpy array with the radar data """ radar_pcl_list = [] for i in range(len(rad_sd_tokens)): rad_sd_path = nusc.get_sample_data_path(rad_sd_tokens[i]) if not os.path.isfile(rad_sd_path): continue radar_pcl = RadarPointCloud.from_file(rad_sd_path) #nuScenes RadarPointCloud has shape (18, num_points) #RADNET expects (num_points, 4) radar_pcl.points = radar_pcl.points.transpose() radar_pcl.points = radar_pcl.points[:, :3] radar_pcl.points = np.hstack((radar_pcl.points, np.ones((radar_pcl.points.shape[0], 1), dtype=radar_pcl.points.dtype))) radar_pcl_list.append(radar_pcl) return radar_pcl_list
first_sample_rec['data'][channel], ) current_time = first_sample_rec['timestamp'] while current_time < last_sample_rec['timestamp']: current_time += time_step for channel, sd_rec in current_recs.items(): while sd_rec['timestamp'] < current_time and sd_rec[ 'next'] != '': sd_rec = nusc.get('sample_data', sd_rec['next']) current_recs[channel] = sd_rec count_frames = count_frames + 1 cam_token = sd_rec['token'] cam_record = nusc.get('sample_data', cam_token) cam_path = nusc.get_sample_data_path(cam_token) cam_path, boxes, camera_intrinsic = nusc.get_sample_data( sd_rec['token'], ) im = Image.open(cam_path) im.save( data_directory + spl + str(scene_id) + camera_channel + '/img1/' + str(count_frames) + '.jpg', 'JPEG', ) TL_found = False bb = [] patch_radius = 700 sample_record = nusc.get('sample', sample_token) log_record = nusc.get('log', scene_record['log_token']) log_location = log_record['location'] nusc_map = NuScenesMap(
def render_tracking_result(estimation_file: str, scene_name: str, version: str, dataroot: str): ''' Render tracking result onto CAM_FRONT image :param estimation_file: name of esitmation file produced by nuscens_tracking_pmbm.py :param scene_name: name of the scene whose tracking result is about to be rendered :param version: version of NuScene dataset (mini, trainval, test) the scene to rendered belong to :param dataroot: directory contains NuScenes dataset ''' # Load tracking data with open(estimation_file, 'r') as infile: all_tracking_result = json.load(infile) num_unique_colors = 200 all_color_indicies = np.linspace( 0, 1.0, num=num_unique_colors) # allow up to 200 unique colors # load NuScenese styff nusc = NuScenes(version=version, dataroot=dataroot, verbose=False) my_scene_token = nusc.field2token('scene', 'name', scene_name)[0] my_scene = nusc.get('scene', my_scene_token) current_time_step = 0 current_sample_token = my_scene['first_sample_token'] while True: # get necessary record sample_record = nusc.get('sample', current_sample_token) camera_token = sample_record['data']['CAM_FRONT'] sd_record = nusc.get('sample_data', camera_token) cs_record = nusc.get('calibrated_sensor', sd_record['calibrated_sensor_token']) pose_record = nusc.get('ego_pose', sd_record['ego_pose_token']) # get camera information cam_intrinsic = np.array(cs_record['camera_intrinsic']) imsize = (sd_record['width'], sd_record['height']) impath = nusc.get_sample_data_path(camera_token) im = cv2.imread(impath) # get tracking result current_tracks = all_tracking_result[str(current_time_step)] for target_id, target in current_tracks.items(): box = Box4Track(center=target['translation'] + [target['height']], orientation=Quaternion( axis=[0, 0, 1], angle=target['orientation']), size=target['size'], name=target['class'], label=int(target_id)) box.to_camera_frame(pose_record, cs_record) # render box on image if not box_in_image(box, cam_intrinsic, imsize, BoxVisibility.ANY): # print('Box {} not in image'.format(box.name)) continue # get color c = np.array(plt.cm.Spectral(box.label % num_unique_colors)) c = np.round(c * 255) box.render_track(im, view=cam_intrinsic, normalize=True, color=(c[0], c[1], c[2])) # move on current_time_step += 1 current_sample_token = sample_record['next'] if current_sample_token == '': break cv2.imshow('CAM_FRONT', im) key = cv2.waitKey(1000) # wait 100ms if key == 32: # if space is pressed, pause. key = cv2.waitKey() if key == 27: # if ESC is pressed, exit. cv2.destroyAllWindows() break
class NuscenesReader: scene_split_lists = {"train": set(split_train), "val": set(split_val)} sample_id_template = "ns_{}_{:02d}" np_str_to_cat = np.vectorize(nuscenes_category_to_object_class, otypes=[np.int64]) np_str_to_sem = np.vectorize(nuscenes_category_to_semantic_class, otypes=[np.int64]) # Nuscenes LiDAR has x-axis to vehicle right and y-axis to front. # Turn this 90 degrees to have x-axis facing the front turn_matrix = np.linalg.inv( np.asarray([[0, 1, 0, 0], [-1, 0, 0, 0], [0, 0, 1, 0], [0, 0, 0, 1]], dtype=np.float64)) turn_quaternion = Quaternion(axis=(0.0, 0.0, 1.0), radians=-np.pi / 2.0) def __init__(self, nuscenes_root, version="v1.0-trainval", max_scenes=None, *, read_radar: bool = True, read_camera: bool = True, read_semantics: bool = True, read_bounding_boxes: bool = True): self.nusc = NuScenes(version=version, dataroot=nuscenes_root, verbose=False) self.root = pathlib.Path(nuscenes_root) # global counter to sanity-check if we calculate the same number of points # within boxes as the dataset authors self.ns_lidar_pts_to_calculated_diff = 0 # flags defining the data entries to return from 'read' self.read_radar = read_radar self.read_camera = read_camera self.read_semantics = read_semantics self.read_bounding_boxes = read_bounding_boxes if self.read_semantics and not hasattr(self.nusc, "lidarseg"): raise RuntimeError("Error: nuScenes-lidarseg not installed!") # assert that the training targets range from 0 - (|mapping| - 1) assert len(set( NUSCENES_SEM_CLASSES.values())) == len(NUSCENES_SEM_CLASSES) assert all(a == b for a, b in zip(sorted(NUSCENES_SEM_CLASSES.values()), range(len(NUSCENES_SEM_CLASSES)))) split_name = { "v1.0-trainval": "nuscenes_default", "v1.0-mini": "nuscenes_mini", }.get(version, "nuscenes_{}".format(version)) # create split dict self.split = { "name": split_name, "data": {k: [] for k in self.scene_split_lists.keys()}, } for i, scene in enumerate(self.nusc.scene): if max_scenes is not None and i > max_scenes: break name = scene["name"] for k, v in self.scene_split_lists.items(): if name in v: split_list = self.split["data"][k] split_list.extend([ self.sample_id_template.format(name, i) for i in range(0, scene["nbr_samples"]) ]) break else: raise RuntimeError( "Found scene that is not in a split: {}".format(name)) def _make_path(self, filename: str): return str(self.root / filename) def __iter__(self): self._scene_iter = self.nusc.scene.__iter__() self._next_scene() return self def __next__(self) -> typing.Tuple[dict, int]: try: return self._sample_iter.__next__() except StopIteration: self._next_scene() return self._sample_iter.__next__() def make_sample_id(self, sample: typing.Tuple[dict, int]) -> str: scene = self.nusc.get("scene", sample[0]["scene_token"]) return self.sample_id_template.format(scene["name"], sample[1]) def _next_scene(self): self._current_scene = self._scene_iter.__next__() class SampleIteration: """Iterate over nuscenes samples of a scene. Add an additional sample index. """ def __init__(self, scene, nusc): self.nusc = nusc self._pos = scene["first_sample_token"] self._index = itertools.count().__iter__() def __next__(self): if not self._pos: raise StopIteration() sample = self.nusc.get("sample", self._pos) self._pos = sample["next"] return sample, self._index.__next__() self._sample_iter = SampleIteration(self._current_scene, self.nusc) def read(self, sample: typing.Tuple[dict, int], _sample_id: str): d = self._read_sample(sample) def convert(entry): """Convert data types to be compatible with tfrecords features.""" if isinstance(entry, str): return entry.encode("utf-8") if isinstance(entry, np.ndarray): entry = entry.flatten() if not np.issubdtype(entry.dtype, np.number): entry = tf.io.serialize_tensor(tf.convert_to_tensor(entry)) elif entry.dtype == np.float64: entry = entry.astype(np.float32) return entry return entry return {k: convert(v) for k, v in d.items()} def _read_sample(self, sample: typing.Tuple[dict, int]): radars = ["RADAR_FRONT", "RADAR_FRONT_LEFT", "RADAR_FRONT_RIGHT"] cameras = ["CAM_FRONT"] sample_id = self.make_sample_id(sample) sample, sample_index = sample sample_data_token: str = sample["data"]["LIDAR_TOP"] lidar_sample_data = self.nusc.get("sample_data", sample_data_token) assert lidar_sample_data["is_key_frame"] data_path, box_list, cam_intrinsic = self.nusc.get_sample_data( sample_data_token) # POINT CLOUD [(x y z I) x P]. Intensity from 0.0 to 255.0 point_cloud_orig = LidarPointCloud.from_file(data_path) if point_cloud_orig.points.dtype != np.float32: raise RuntimeError("Point cloud has wrong data type.") # -> [P x (x y z I)] point_cloud_orig = point_cloud_orig.points.transpose() camera_data = {} radar_data = {} box_data = {} lidarseg_data = {} if self.read_semantics: lidarseg_data = self._read_lidarseg_data(sample_data_token, point_cloud_orig) if self.read_bounding_boxes: box_data = self._read_bounding_boxes(box_list, point_cloud_orig, sample) # rotate point cloud 90 degrees around z-axis, # so that x-axis faces front of vehicle x = point_cloud_orig[:, 0:1] y = point_cloud_orig[:, 1:2] point_cloud = np.concatenate((y, -x, point_cloud_orig[:, 2:4]), axis=-1) # LiDAR extrinsic calibration calibration_lidar = self.nusc.get( "calibrated_sensor", lidar_sample_data["calibrated_sensor_token"]) tf_lidar = transform_matrix(calibration_lidar["translation"], Quaternion(calibration_lidar["rotation"])) # vehicle -> point cloud coords (turned by 90 degrees) tf_vehicle_pc = np.linalg.inv( np.dot(tf_lidar, NuscenesReader.turn_matrix)) if self.read_camera: # CAMERAS camera_data: [{ str: typing.Any }] = [ self._read_sensor_data_and_extrinsics(sample, cam_name, tf_vehicle_pc, self._read_camera_data) for cam_name in cameras ] camera_data = {k: v for d in camera_data for k, v in d.items()} if self.read_radar: # RADARS radar_data: [{ str: typing.Any }] = [ self._read_sensor_data_and_extrinsics(sample, radar_name, tf_vehicle_pc, self._read_radar_data) for radar_name in radars ] radar_data = {k: v for d in radar_data for k, v in d.items()} assert box_data.keys().isdisjoint(camera_data.keys()) assert box_data.keys().isdisjoint(radar_data.keys()) assert box_data.keys().isdisjoint(lidarseg_data.keys()) # return feature array. Add sample ID. d = { "sample_id": sample_id, "point_cloud": point_cloud, **box_data, **camera_data, **radar_data, **lidarseg_data, } return d def _read_lidarseg_data(self, sample_data_token: str, point_cloud_orig: np.ndarray): # self.nusc.lidarseg_idx2name_mapping lidarseg_labels_filename = ( pathlib.Path(self.nusc.dataroot) / self.nusc.get("lidarseg", sample_data_token)["filename"]) # Load labels from .bin file. points_label = np.fromfile(str(lidarseg_labels_filename), dtype=np.uint8) # [num_points] if points_label.shape[0] != point_cloud_orig.shape[0]: raise ValueError("Semantic labels do not match point cloud.") return {"semantic_labels": tf.io.serialize_tensor(points_label)} def _read_bounding_boxes(self, box_list, point_cloud_orig, sample): # create transform matrices for all boxes dt = np.float32 if box_list: r = np.asarray([x.rotation_matrix for x in box_list]) c = np.asarray([x.center for x in box_list]) center_pos = np.asarray([x.center for x in box_list], dtype=dt) # NuScenes format: width, length, height. Reorder to l w h box_dims_lwh = np.asarray([x.wlh for x in box_list], dtype=np.float64)[:, [1, 0, 2]] box_rot = np.asarray([(self.turn_quaternion * x.orientation).q for x in box_list], dtype=dt) else: r = np.zeros(shape=[0, 3, 3], dtype=np.float64) c = np.zeros(shape=[0, 3], dtype=np.float64) center_pos = np.zeros(shape=[0, 3], dtype=np.float64) box_dims_lwh = np.zeros(shape=[0, 3], dtype=np.float64) box_rot = np.zeros(shape=[0, 4], dtype=np.float64) rc = np.concatenate((r, c[:, :, None]), axis=-1) tfs = np.concatenate( ( rc, np.broadcast_to( tf.constant([0, 0, 0, 1], dtype=rc.dtype)[None, None, :], [rc.shape[0], 1, 4], ), ), axis=1, ) total_points_per_box, mapping = assign_point_cloud_to_bounding_boxes( point_cloud_orig, bounding_boxes_tfs=tfs, bounding_boxes_dims=box_dims_lwh, ) # 3D BOXES IN LIDAR COORDS [(x y z) (w x y z) (l w h)] bboxes_spatial = np.empty(shape=(len(box_list), 10), dtype=dt) bboxes_spatial[:, 0] = center_pos[:, 1] bboxes_spatial[:, 1] = -center_pos[:, 0] bboxes_spatial[:, 2] = center_pos[:, 2] bboxes_spatial[:, 3:7] = box_rot bboxes_spatial[:, 7:10] = box_dims_lwh.astype(dt) object_str = np.array([x.name for x in box_list], dtype=np.unicode) object_category = NuscenesReader.np_str_to_cat(object_str).astype( np.int64) class_value = NuscenesReader.np_str_to_sem(object_str).astype(np.int64) lidar_pts = np.asarray([ x["num_lidar_pts"] for x in [ self.nusc.get("sample_annotation", ann_token) for ann_token in sample["anns"] ] ]) # rotate box transforms (same as bboxes_spatial) tfs = np.matmul(np.linalg.inv(self.turn_matrix)[None, ...], tfs) # track to see if box calculation gives the same results as the # lidar points counter from the nuscenes dataset diff = np.sum(np.abs(lidar_pts - total_points_per_box)) self.ns_lidar_pts_to_calculated_diff += diff return { "bounding_boxes_3d_spatial": bboxes_spatial, "bounding_boxes_3d_transforms": tfs, "bounding_boxes_class": class_value, "bounding_boxes_category": object_category, "bounding_boxes_class_str": object_str, "bounding_boxes_point_counter": total_points_per_box.astype(np.int64), "bounding_boxes_point_mapping": mapping, } @staticmethod def _read_sensor_data_and_extrinsics(sample, sensor_name: str, coord_transform: np.ndarray, reader_func): data, extrinsics = reader_func(sample["data"][sensor_name]) extrinsics = np.matmul(coord_transform, extrinsics) return { **{k.format(sensor_name.lower()): v for k, v in data.items()}, "{}_extrinsics".format(sensor_name.lower()): extrinsics, } def _read_camera_data(self, sample_data_token): sd_record = self.nusc.get("sample_data", sample_data_token) cs_record = self.nusc.get("calibrated_sensor", sd_record["calibrated_sensor_token"]) sensor_record = self.nusc.get("sensor", cs_record["sensor_token"]) assert sensor_record["modality"] == "camera" # currently using only keyframes assert sd_record["is_key_frame"] data_path = self.nusc.get_sample_data_path(sample_data_token) imsize = (sd_record["width"], sd_record["height"]) cam_intrinsic = np.array(cs_record["camera_intrinsic"]) cam_extrinsics = transform_matrix(cs_record["translation"], Quaternion(cs_record["rotation"])) with open(data_path, "rb") as in_file: img_bytes_jpg = in_file.read() return ( { "{}_jpg": img_bytes_jpg, "{}_size": np.asarray(imsize, dtype=np.int64), "{}_intrinsics": cam_intrinsic.astype(np.float32), }, cam_extrinsics, ) def _read_radar_data(self, sample_data_token): sd_record = self.nusc.get("sample_data", sample_data_token) cs_record = self.nusc.get("calibrated_sensor", sd_record["calibrated_sensor_token"]) sensor_record = self.nusc.get("sensor", cs_record["sensor_token"]) assert sensor_record["modality"] == "radar" # currently using only keyframes assert sd_record["is_key_frame"] data_path = self.nusc.get_sample_data_path(sample_data_token) radar_point_cloud = RadarPointCloud.from_file(data_path) points = tf.convert_to_tensor(radar_point_cloud.points.transpose()) radar_extrinsics = transform_matrix(cs_record["translation"], Quaternion(cs_record["rotation"])) return {"{}_points": tf.io.serialize_tensor(points)}, radar_extrinsics
class NuScenes_utils(): def __init__(self, phase): assert phase in ['train', 'validation', 'test'] self.phase = phase self.prep_list_of_sessions() self.splits = { 'train': train_split, 'validation': val_split, 'test': test_split } def is_phase_session(self, phase, session_ind=None): if session_ind is None: res = np.array( [n in self.splits[phase] for n in self.session_names]) else: res = self.session_names[session_ind] in self.splits[phase] return res def is_location_session(self, location, session_ind=None): if session_ind is None: res = np.array([location in l for l in self.session_locations]) else: res = location in self.session_locations[session_ind] return res def load_PC(self, session_ind, cloud_ind, cache_file=None): assert session_ind < self.num_sessions assert cloud_ind < self.session_lengths[ session_ind], f"Requested cloud {cloud_ind}, but session {session_ind} only contains {self.session_lengths[session_ind]} clouds" lidar_token = self.cloud_tokens[session_ind][cloud_ind] cloud = self.load_cloud_raw(lidar_token) if (cache_file is not None) and not os.path.isfile(cache_file): np.save(cache_file, cloud) return cloud def get_relative_motion_A_to_B(self, session_ind, cloud_ind_A, cloud_ind_B): token_A = self.cloud_tokens[session_ind][cloud_ind_A] posA = self.load_position_raw(token_A) token_B = self.cloud_tokens[session_ind][cloud_ind_B] posB = self.load_position_raw(token_B) mot_A_to_B = np.linalg.inv(posB) @ posA return mot_A_to_B def prep_list_of_sessions(self): if self.phase in ['train', 'validation']: version = 'v1.0-trainval' elif self.phase == 'test': version = 'v1.0-test' self.NuScenes_data = NuScenes(version=version, dataroot=DATASET_ROOT, verbose=True) self.num_sessions = len(self.NuScenes_data.scene) self.cloud_tokens = [] self.session_lengths = [] self.session_locations = [] self.session_names = [] for session_ind in range(self.num_sessions): record = self.NuScenes_data.scene[session_ind] session_token = record['token'] self.session_names.append(record['name']) location = self.NuScenes_data.get('log', record['log_token'])['location'] self.session_locations.append(location) sample_token = record["first_sample_token"] sample = self.NuScenes_data.get("sample", sample_token) lidar_token = sample["data"]["LIDAR_TOP"] cur_lidar_tokens = [] while len(lidar_token) > 0: cur_lidar_tokens.append(lidar_token) lidar_data = self.NuScenes_data.get("sample_data", lidar_token) lidar_token = lidar_data["next"] self.cloud_tokens.append(cur_lidar_tokens) self.session_lengths.append(len(cur_lidar_tokens)) def load_position_raw(self, sample_lidar_token): lidar_data = self.NuScenes_data.get("sample_data", sample_lidar_token) ego_pose = self.NuScenes_data.get("ego_pose", lidar_data["ego_pose_token"]) calibrated_sensor = self.NuScenes_data.get( "calibrated_sensor", lidar_data["calibrated_sensor_token"]) # Homogeneous transformation matrix from car frame to world frame. global_from_car = transform_matrix(ego_pose['translation'], Quaternion(ego_pose['rotation']), inverse=False) return global_from_car def load_cloud_raw(self, sample_lidar_token): lidar_data = self.NuScenes_data.get("sample_data", sample_lidar_token) lidar_filepath = self.NuScenes_data.get_sample_data_path( sample_lidar_token) ego_pose = self.NuScenes_data.get("ego_pose", lidar_data["ego_pose_token"]) calibrated_sensor = self.NuScenes_data.get( "calibrated_sensor", lidar_data["calibrated_sensor_token"]) # Homogeneous transformation matrix from sensor coordinate frame to ego car frame. car_from_sensor = transform_matrix(calibrated_sensor['translation'], Quaternion( calibrated_sensor['rotation']), inverse=False) lidar_pointcloud = LidarPointCloud.from_file(lidar_filepath) # The lidar pointcloud is defined in the sensor's reference frame. # We want it in the car's reference frame, so we transform each point lidar_pointcloud.transform(car_from_sensor) return lidar_pointcloud.points[:3, :].T
class NuScenesDataset(Dataset): """ NuScenes dataset loader and producer """ def __init__(self, mode, split='training', img_list='trainval', is_training=True, workers_num=1): """ mode: 'loading', 'preprocessing' """ self.mode = mode self.dataset_dir = os.path.join(cfg.ROOT_DIR, cfg.DATASET.KITTI.BASE_DIR_PATH) self.max_sweeps = cfg.DATASET.NUSCENES.NSWEEPS self.is_training = is_training self.img_list = img_list self.workers_num = workers_num # cast labels from NuScenes name to useful name self.useful_cls_dict = { 'animal': 'ignore', 'human.pedestrian.personal_mobility': 'ignore', 'human.pedestrian.stroller': 'ignore', 'human.pedestrian.wheelchair': 'ignore', 'movable_object.debris': 'ignore', 'movable_object.pushable_pullable': 'ignore', 'static_object.bicycle_rack': 'ignore', 'vehicle.emergency.ambulance': 'ignore', 'vehicle.emergency.police': 'ignore', 'movable_object.barrier': 'barrier', 'vehicle.bicycle': 'bicycle', 'vehicle.bus.bendy': 'bus', 'vehicle.bus.rigid': 'bus', 'vehicle.car': 'car', 'vehicle.construction': 'construction_vehicle', 'vehicle.motorcycle': 'motorcycle', 'human.pedestrian.adult': 'pedestrian', 'human.pedestrian.child': 'pedestrian', 'human.pedestrian.construction_worker': 'pedestrian', 'human.pedestrian.police_officer': 'pedestrian', 'movable_object.trafficcone': 'traffic_cone', 'vehicle.trailer': 'trailer', 'vehicle.truck': 'truck' } # cast attribute to index self.attribute_idx_list = { 'vehicle.moving': 0, 'vehicle.stopped': 1, 'vehicle.parked': 2, 'cycle.with_rider': 3, 'cycle.without_rider': 4, 'pedestrian.sitting_lying_down': 5, 'pedestrian.standing': 6, 'pedestrian.moving': 7, 'default': -1, } self.idx_attribute_list = dict([ (v, k) for k, v in self.attribute_idx_list.items() ]) self.AttributeIdxLabelMapping = { "car": ['vehicle.moving', 'vehicle.stopped', 'vehicle.parked'], "truck": ['vehicle.moving', 'vehicle.stopped', 'vehicle.parked'], "bus": ['vehicle.moving', 'vehicle.stopped', 'vehicle.parked'], "trailer": ['vehicle.moving', 'vehicle.stopped', 'vehicle.parked'], "construction_vehicle": ['vehicle.moving', 'vehicle.stopped', 'vehicle.parked'], "pedestrian": [ 'pedestrian.sitting_lying_down', 'pedestrian.standing', 'pedestrian.moving' ], "motorcycle": ['cycle.with_rider', 'cycle.without_rider', ''], "bicycle": ['cycle.with_rider', 'cycle.without_rider', ''], "traffic_cone": ['', '', ''], "barrier": ['', '', ''], } self.DefaultAttribute = { "car": "vehicle.parked", "pedestrian": "pedestrian.moving", "trailer": "vehicle.parked", "truck": "vehicle.parked", "bus": "vehicle.parked", "motorcycle": "cycle.without_rider", "construction_vehicle": "vehicle.parked", "bicycle": "cycle.without_rider", "barrier": "", "traffic_cone": "", } self.cls_list = cfg.DATASET.KITTI.CLS_LIST self.idx2cls_dict = dict([(idx + 1, cls) for idx, cls in enumerate(self.cls_list)]) self.cls2idx_dict = dict([(cls, idx + 1) for idx, cls in enumerate(self.cls_list)]) self.sv_npy_path = os.path.join( cfg.ROOT_DIR, cfg.DATASET.KITTI.SAVE_NUMPY_PATH, 'NuScenes', '{}_{}'.format(img_list, self.max_sweeps)) self.train_list = os.path.join(self.sv_npy_path, 'infos.pkl') self.voxel_generator = VoxelGenerator() self.test_mode = cfg.TEST.TEST_MODE if self.test_mode == 'mAP': self.evaluation = self.evaluate_map self.logger_and_select_best = self.logger_and_select_best_map elif self.test_mode == 'Recall': self.evaluation = self.evaluate_recall self.logger_and_select_best = self.logger_and_select_best_recall else: raise Exception('No other evaluation mode.') if mode == 'loading': # data loader with open(self.train_list, 'rb') as f: self.train_npy_list = pickle.load(f) self.sample_num = len(self.train_npy_list) if self.is_training: self.data_augmentor = DataAugmentor( 'NuScenes', workers_num=self.workers_num) elif mode == 'preprocessing': # preprocess raw data if img_list == 'train': self.nusc = NuScenes(dataroot=self.dataset_dir, version='v1.0-trainval') self.scenes = [ scene for scene in self.nusc.scene if scene['name'] in train_scene ] elif img_list == 'val': self.nusc = NuScenes(dataroot=self.dataset_dir, version='v1.0-trainval') self.scenes = [ scene for scene in self.nusc.scene if scene['name'] in val_scene ] else: # test self.nusc = NuScenes(dataroot=self.dataset_dir, version='v1.0-test') self.scenes = self.nusc.scene self.sample_data_token_list = OrderedDict() sample_num = 0 for scene in self.scenes: # static the sample num, and save all sample_data_token self.sample_data_token_list[scene['token']] = [] all_sample = self.nusc.field2token('sample', 'scene_token', scene['token']) sample_num += len(all_sample) for sample in all_sample: # all sample token sample = self.nusc.get('sample', sample) cur_token = sample['token'] cur_data_token = sample['data']['LIDAR_TOP'] self.sample_data_token_list[scene['token']].append( cur_data_token) self.sample_num = sample_num self.extents = cfg.DATASET.POINT_CLOUD_RANGE self.extents = np.reshape(self.extents, [3, 2]) if not os.path.exists(self.sv_npy_path): os.makedirs(self.sv_npy_path) # also calculate the mean size here self.cls_size_dict = dict([(cls, np.array([0, 0, 0], dtype=np.float32)) for cls in self.cls_list]) self.cls_num_dict = dict([(cls, 0) for cls in self.cls_list]) # the save path for MixupDB if self.img_list in [ 'train', 'val', 'trainval' ] and cfg.TEST.WITH_GT and cfg.TRAIN.AUGMENTATIONS.MIXUP.OPEN: self.mixup_db_cls_path = dict() self.mixup_db_trainlist_path = dict() self.mixup_db_class = cfg.TRAIN.AUGMENTATIONS.MIXUP.CLASS for cls in self.mixup_db_class: mixup_db_cls_path = os.path.join( cfg.ROOT_DIR, cfg.DATASET.KITTI.SAVE_NUMPY_PATH, cfg.TRAIN.AUGMENTATIONS.MIXUP.SAVE_NUMPY_PATH, cfg.TRAIN.AUGMENTATIONS.MIXUP.PC_LIST, '{}'.format(cls)) mixup_db_trainlist_path = os.path.join( mixup_db_cls_path, 'train_list.txt') if not os.path.exists(mixup_db_cls_path): os.makedirs(mixup_db_cls_path) self.mixup_db_cls_path[cls] = mixup_db_cls_path self.mixup_db_trainlist_path[cls] = mixup_db_trainlist_path def __len__(self): return self.sample_num def load_samples(self, sample_idx, pipename): """ load data per thread """ pipename = int(pipename) biggest_label_num = 0 sample_dict = self.train_npy_list[sample_idx] points_path = sample_dict[maps_dict.KEY_POINT_CLOUD] sweeps = sample_dict[maps_dict.KEY_SWEEPS] sample_name = sample_dict[maps_dict.KEY_SAMPLE_NAME] cur_transformation_matrix = sample_dict[ maps_dict.KEY_TRANSFORMRATION_MATRIX] ts = sample_dict[maps_dict.KEY_TIMESTAMPS] / 1e6 # then first read points and stack points from multiple frame points = np.fromfile(points_path, dtype=np.float32) points = points.reshape((-1, 5)) points = cast_points_to_kitti(points) points[:, 3] /= 255 points[:, 4] = 0 sweep_points_list = [points] original_cur_sweep_points = points cur_sweep_points_num = points.shape[0] for sweep in sweeps: points_sweep = np.fromfile(sweep['lidar_path'], dtype=np.float32) points_sweep = points_sweep.reshape((-1, 5)) sweep_ts = sweep['timestamp'] / 1e6 points_sweep[:, 3] /= 255 points_sweep[:, :3] = points_sweep[:, :3] @ sweep[ 'sweep2lidar_rotation'].T points_sweep[:, :3] += sweep['sweep2lidar_translation'] points_sweep[:, 4] = ts - sweep_ts points_sweep = cast_points_to_kitti(points_sweep) sweep_points_list.append(points_sweep) if cfg.DATASET.NUSCENES.INPUT_FEATURE_CHANNEL == 4: points = np.concatenate(sweep_points_list, axis=0)[:, [0, 1, 2, 4]] else: points = np.concatenate(sweep_points_list, axis=0) # then read groundtruth file if have if self.is_training or cfg.TEST.WITH_GT: label_boxes_3d = sample_dict[maps_dict.KEY_LABEL_BOXES_3D] label_boxes_3d = cast_box_3d_to_kitti_format(label_boxes_3d) label_classes_name = sample_dict[maps_dict.KEY_LABEL_CLASSES] label_classes = np.array([ self.cls2idx_dict[label_class] for label_class in label_classes_name ]) label_attributes = sample_dict[maps_dict.KEY_LABEL_ATTRIBUTES] label_velocity = sample_dict[ maps_dict.KEY_LABEL_VELOCITY] # [-1, 2] ry_cls_label, residual_angle = encode_angle2class_np( label_boxes_3d[:, -1], cfg.MODEL.ANGLE_CLS_NUM) else: # not is_training and no_gt label_boxes_3d = np.zeros([1, 7], np.float32) label_classes = np.zeros([1], np.int32) label_attributes = np.zeros([1], np.int32) label_velocity = np.zeros([1, 2], np.float32) ry_cls_label = np.zeros([1], np.int32) residual_angle = np.zeros([1], np.float32) if self.is_training: # data augmentation points, label_boxes_3d, label_classes, label_attributes, label_velocity, cur_sweep_points_num = self.data_augmentor.nuscenes_forward( points, label_boxes_3d, label_classes, pipename, label_attributes, label_velocity, cur_sweep_points_num) ry_cls_label, residual_angle = encode_angle2class_np( label_boxes_3d[:, -1], cfg.MODEL.ANGLE_CLS_NUM) cur_label_num = len(label_boxes_3d) # then randomly choose some points cur_sweep_points = points[:cur_sweep_points_num, :] # [-1, 4] other_sweep_points = points[cur_sweep_points_num:, :] # [-1, 4] if len(other_sweep_points) == 0: other_sweep_points = cur_sweep_points.copy() np.random.shuffle(cur_sweep_points) np.random.shuffle(other_sweep_points) input_sample_points, num_points_per_voxel = self.voxel_generator.generate_nusc( cur_sweep_points, other_sweep_points, cfg.DATASET.NUSCENE.MAX_CUR_SAMPLE_POINTS_NUM ) # points, [num_voxels, num_points, 5], sem_labels, [num_voxels, num_points] cur_sample_points = input_sample_points[:cfg.DATASET.NUSCENE. MAX_CUR_SAMPLE_POINTS_NUM] other_sample_points = input_sample_points[cfg.DATASET.NUSCENE. MAX_CUR_SAMPLE_POINTS_NUM:] biggest_label_num = max(biggest_label_num, cur_label_num) return biggest_label_num, input_sample_points, cur_sample_points, other_sample_points, label_boxes_3d, ry_cls_label, residual_angle, label_classes, label_attributes, label_velocity, sample_name, cur_transformation_matrix, sweeps, original_cur_sweep_points def load_batch(self, batch_size): perm = np.arange( self.sample_num).tolist() # a list indicates each data dp = DataFromList(perm, is_train=self.is_training, shuffle=self.is_training) dp = MultiProcessMapData(dp, self.load_samples, self.workers_num) use_list = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1] use_concat = [0, 0, 0, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0] dp = BatchDataNuscenes(dp, batch_size, use_concat=use_concat, use_list=use_list) dp.reset_state() dp = dp.get_data() return dp # Preprocess data def preprocess_samples(self, cur_scene_key, sample_data_token): sample_dicts = [] biggest_label_num = 0 cur_sample_data = self.nusc.get('sample_data', sample_data_token) cur_sample_token = cur_sample_data['sample_token'] cur_sample = self.nusc.get('sample', cur_sample_token) ego_pose = self.nusc.get('ego_pose', cur_sample_data['ego_pose_token']) calibrated_sensor = self.nusc.get( 'calibrated_sensor', cur_sample_data['calibrated_sensor_token']) l2e_r = calibrated_sensor['rotation'] l2e_t = calibrated_sensor['translation'] e2g_r = ego_pose['rotation'] e2g_t = ego_pose['translation'] l2e_r_mat = Quaternion(l2e_r).rotation_matrix e2g_r_mat = Quaternion(e2g_r).rotation_matrix cur_timestamp = cur_sample['timestamp'] cur_transformation_matrix = { 'lidar2ego_translation': l2e_t, 'lidar2ego_rotation': l2e_r, 'ego2global_translation': e2g_t, 'ego2global_rotation': e2g_r, } # get point cloud in former 0.5 second sweeps = [] while len(sweeps) < self.max_sweeps: if not cur_sample_data['prev'] == '': # has next frame cur_sample_data = self.nusc.get('sample_data', cur_sample_data['prev']) cur_ego_pose = self.nusc.get('ego_pose', cur_sample_data['ego_pose_token']) cur_calibrated_sensor = self.nusc.get( 'calibrated_sensor', cur_sample_data['calibrated_sensor_token']) cur_lidar_path, cur_sweep_boxes, _ = self.nusc.get_sample_data( cur_sample_data['token']) sweep = { "lidar_path": cur_lidar_path, "sample_data_token": cur_sample_data['token'], "lidar2ego_translation": cur_calibrated_sensor['translation'], "lidar2ego_rotation": cur_calibrated_sensor['rotation'], "ego2global_translation": cur_ego_pose['translation'], "ego2global_rotation": cur_ego_pose['rotation'], "timestamp": cur_sample_data["timestamp"] } l2e_r_s = sweep["lidar2ego_rotation"] l2e_t_s = sweep["lidar2ego_translation"] e2g_r_s = sweep["ego2global_rotation"] e2g_t_s = sweep["ego2global_translation"] # sweep->ego->global->ego'->lidar l2e_r_s_mat = Quaternion(l2e_r_s).rotation_matrix e2g_r_s_mat = Quaternion(e2g_r_s).rotation_matrix R = (l2e_r_s_mat.T @ e2g_r_s_mat.T) @ ( np.linalg.inv(e2g_r_mat).T @ np.linalg.inv(l2e_r_mat).T) T = (l2e_t_s @ e2g_r_s_mat.T + e2g_t_s) @ ( np.linalg.inv(e2g_r_mat).T @ np.linalg.inv(l2e_r_mat).T) T -= e2g_t @ (np.linalg.inv(e2g_r_mat).T @ np.linalg.inv( l2e_r_mat).T) + l2e_t @ np.linalg.inv(l2e_r_mat).T sweep["sweep2lidar_rotation"] = R.T # points @ R.T + T sweep["sweep2lidar_translation"] = T sweeps.append(sweep) else: # prev is none break # then load gt_boxes_3d if self.img_list in ['train', 'val'] and cfg.TEST.WITH_GT: cur_data_path, all_boxes, _ = self.nusc.get_sample_data( sample_data_token) # then first parse boxes labels locs = np.array([box.center for box in all_boxes]).reshape(-1, 3) sizes = np.array([box.wlh for box in all_boxes]).reshape(-1, 3) rots = np.array([ box.orientation.yaw_pitch_roll[0] for box in all_boxes ]).reshape(-1, 1) all_boxes_3d = np.concatenate([locs, sizes, -rots], axis=-1) annos_tokens = cur_sample['anns'] all_velocity = np.array([ self.nusc.box_velocity(ann_token)[:2] for ann_token in annos_tokens ]) # [-1, 2] for i in range(len(all_boxes)): velo = np.array([*all_velocity[i], 0.0]) velo = velo @ np.linalg.inv(e2g_r_mat).T @ np.linalg.inv( l2e_r_mat).T all_velocity[i] = velo[:2] # [-1, 2] attribute_tokens = [ self.nusc.get('sample_annotation', ann_token)['attribute_tokens'] for ann_token in annos_tokens ] all_attribute = [] for attribute_token in attribute_tokens: if len(attribute_token) == 0: all_attribute.append([]) else: all_attribute.append( self.nusc.get('attribute', attribute_token[0])['name']) # then filter these ignore labels categories = np.array([box.name for box in all_boxes]) if self.img_list == 'train': useful_idx = [ index for index, category in enumerate(categories) if self.useful_cls_dict[category] != 'ignore' ] else: useful_idx = [ index for index, category in enumerate(categories) ] if len(useful_idx) == 0: if self.img_list == 'train': return None, biggest_label_num else: all_boxes_3d = np.ones([1, 7], dtype=np.float32) all_boxes_classes = np.array(['ignore']) all_attribute = np.array([-1]) all_velocity = np.array([[0, 0]], dtype=np.float32) else: all_boxes_3d = all_boxes_3d[useful_idx] categories = categories[useful_idx] all_boxes_classes = np.array( [self.useful_cls_dict[cate] for cate in categories]) # now calculate the mean size of each box for tmp_idx, all_boxes_class in enumerate(all_boxes_classes): cur_mean_size = self.cls_size_dict[ all_boxes_class] * self.cls_num_dict[all_boxes_class] cur_cls_num = self.cls_num_dict[all_boxes_class] + 1 cur_total_size = cur_mean_size + all_boxes_3d[tmp_idx, [ 4, 5, 3 ]] # [l, w, h] cur_mean_size = cur_total_size / cur_cls_num self.cls_size_dict[all_boxes_class] = cur_mean_size self.cls_num_dict[all_boxes_class] = cur_cls_num all_attribute = [ all_attribute[tmp_idx] for tmp_idx in useful_idx ] tmp_attribute = [] for attr in all_attribute: if attr == []: tmp_attribute.append(-1) else: tmp_attribute.append(self.attribute_idx_list[attr]) all_attribute = tmp_attribute all_attribute = np.array(all_attribute, dtype=np.int32) all_velocity = [ all_velocity[tmp_idx] for tmp_idx in useful_idx ] all_velocity = np.array(all_velocity, dtype=np.float32) else: cur_data_path = self.nusc.get_sample_data_path(sample_data_token) # then generate the bev_maps if self.img_list in ['train', 'val', 'trainval'] and cfg.TEST.WITH_GT: sample_dict = { maps_dict.KEY_LABEL_BOXES_3D: all_boxes_3d, maps_dict.KEY_LABEL_CLASSES: all_boxes_classes, maps_dict.KEY_LABEL_ATTRIBUTES: all_attribute, maps_dict.KEY_LABEL_VELOCITY: all_velocity, maps_dict.KEY_LABEL_NUM: len(all_boxes_3d), maps_dict.KEY_POINT_CLOUD: cur_data_path, maps_dict.KEY_TRANSFORMRATION_MATRIX: cur_transformation_matrix, maps_dict.KEY_SAMPLE_NAME: '{}/{}/{}'.format(cur_scene_key, cur_sample_token, sample_data_token), maps_dict.KEY_SWEEPS: sweeps, maps_dict.KEY_TIMESTAMPS: cur_timestamp, } biggest_label_num = max(len(all_boxes_3d), biggest_label_num) else: # img_list is test sample_dict = { maps_dict.KEY_POINT_CLOUD: cur_data_path, maps_dict.KEY_SAMPLE_NAME: '{}/{}/{}'.format(cur_scene_key, cur_sample_token, sample_data_token), maps_dict.KEY_TRANSFORMRATION_MATRIX: cur_transformation_matrix, maps_dict.KEY_SWEEPS: sweeps, maps_dict.KEY_TIMESTAMPS: cur_timestamp, } return sample_dict, biggest_label_num def preprocess_batch(self): # if create_gt_dataset, then also create a boxes_numpy, saving all points if cfg.TRAIN.AUGMENTATIONS.MIXUP.OPEN: # also save mixup database mixup_label_dict = dict([(cls, []) for cls in self.mixup_db_class]) sample_dicts_list = [] for scene_key, v in tqdm.tqdm(self.sample_data_token_list.items()): for sample_data_token in v: sample_dict, tmp_biggest_label_num = self.preprocess_samples( scene_key, sample_data_token) if sample_dict is None: continue # else save the result sample_dicts_list.append(sample_dict) # create_gt_dataset if self.img_list in [ 'train', 'val', 'trainval' ] and cfg.TEST.WITH_GT and cfg.TRAIN.AUGMENTATIONS.MIXUP.OPEN: mixup_sample_dicts = self.generate_mixup_sample( sample_dict) if mixup_sample_dicts is None: continue for mixup_sample_dict in mixup_sample_dicts: cur_cls = mixup_sample_dict[ maps_dict.KEY_SAMPLED_GT_CLSES] mixup_label_dict[cur_cls].append(mixup_sample_dict) # save preprocessed data with open(self.train_list, 'wb') as f: pickle.dump(sample_dicts_list, f) for k, v in self.cls_num_dict.items(): print('class name: %s / class num: %d / mean size: (%f, %f, %f)' % (k, v, self.cls_size_dict[k][0], self.cls_size_dict[k][1], self.cls_size_dict[k][2])) # [l, w, h] if self.img_list in [ 'train', 'val', 'trainval' ] and cfg.TEST.WITH_GT and cfg.TRAIN.AUGMENTATIONS.MIXUP.OPEN: print('**** Generating groundtruth database ****') for cur_cls_name, mixup_sample_dict in mixup_label_dict.items(): cur_mixup_db_cls_path = self.mixup_db_cls_path[cur_cls_name] cur_mixup_db_trainlist_path = self.mixup_db_trainlist_path[ cur_cls_name] print('**** Class %s ****' % cur_cls_name) with open(cur_mixup_db_trainlist_path, 'w') as f: for tmp_idx, tmp_cur_mixup_sample_dict in tqdm.tqdm( enumerate(mixup_sample_dict)): f.write('%06d.npy\n' % tmp_idx) np.save( os.path.join(cur_mixup_db_cls_path, '%06d.npy' % tmp_idx), tmp_cur_mixup_sample_dict) print('Ending of the preprocess !!!') def generate_mixup_sample(self, sample_dict): """ This function is bound for generating mixup dataset """ all_boxes_3d = sample_dict[maps_dict.KEY_LABEL_BOXES_3D] all_boxes_classes = sample_dict[maps_dict.KEY_LABEL_CLASSES] point_cloud_path = sample_dict[maps_dict.KEY_POINT_CLOUD] # then we first cast all_boxes_3d to kitti format all_boxes_3d = cast_box_3d_to_kitti_format(all_boxes_3d) # load points points = np.fromfile(point_cloud_path, dtype=np.float32).reshape( (-1, 5)) points = cast_points_to_kitti(points) points[:, 3] /= 255 points[:, 4] = 0 # timestamp is zero points_mask = check_inside_points(points, all_boxes_3d) # [pts_num, gt_num] points_masks_num = np.sum(points_masks, axis=0) # [gt_num] valid_box_idx = np.where( points_masks_num >= cfg.DATASET.MIN_POINTS_NUM)[0] if len(valid_box_idx) == 0: return None valid_label_boxes_3d = all_boxes_3d[valid_box_idx] valid_label_classes = all_boxes_classes[valid_box_idx] sample_dicts = [] for index, i in enumerate(valid_box_idx): cur_points_mask = points_mask[:, i] cur_points_idx = np.where(cur_points_mask)[0] cur_inside_points = points[cur_points_idx, :] sample_dict = { # 0 timestamp and /255 reflectance maps_dict.KEY_SAMPLED_GT_POINTS: cur_inside_points, # kitti format points maps_dict.KEY_SAMPLED_GT_LABELS_3D: valid_label_boxes_3d[index], maps_dict.KEY_SAMPLED_GT_CLSES: valid_label_classes[index], } sample_dicts.append(sample_dict) return sample_dicts # Evaluation def set_evaluation_tensor(self, model): # get prediction results, bs = 1 pred_bbox_3d = tf.squeeze(model.output[maps_dict.PRED_3D_BBOX][-1], axis=0) pred_cls_score = tf.squeeze(model.output[maps_dict.PRED_3D_SCORE][-1], axis=0) pred_cls_category = tf.squeeze( model.output[maps_dict.PRED_3D_CLS_CATEGORY][-1], axis=0) pred_list = [pred_bbox_3d, pred_cls_score, pred_cls_category] if len(model.output[maps_dict.PRED_3D_ATTRIBUTE]) > 0: pred_attribute = tf.squeeze( model.output[maps_dict.PRED_3D_ATTRIBUTE][-1], axis=0) pred_velocity = tf.squeeze( model.output[maps_dict.PRED_3D_VELOCITY][-1], axis=0) pred_list.extend([pred_attribute, pred_velocity]) return pred_list def evaluate_map(self, sess, feeddict_producer, pred_list, val_size, cls_thresh, log_dir, placeholders=None): submissions = {} submissions['meta'] = dict() submissions['meta']['use_camera'] = False submissions['meta']['use_lidar'] = True submissions['meta']['use_radar'] = False submissions['meta']['use_map'] = False submissions['meta']['use_external'] = False submissions_results = dict() pred_attr_velo = (len(pred_list) == 5) for i in tqdm.tqdm(range(val_size)): feed_dict = feeddict_producer.create_feed_dict() if pred_attr_velo: pred_bbox_3d_op, pred_cls_score_op, pred_cls_category_op, pred_attr_op, pred_velo_op = sess.run( pred_list, feed_dict=feed_dict) else: pred_bbox_3d_op, pred_cls_score_op, pred_cls_category_op = sess.run( pred_list, feed_dict=feed_dict) pred_cls_category_op += 1 # label from 1 to n sample_name, cur_transformation_matrix, sweeps = feeddict_producer.info sample_name = sample_name[0] cur_transformation_matrix = cur_transformation_matrix[0] sweeps = sweeps[0] cur_scene_key, cur_sample_token, cur_sample_data_token = sample_name.split( '/') select_idx = np.where(pred_cls_score_op >= cls_thresh)[0] pred_cls_score_op = pred_cls_score_op[select_idx] pred_cls_category_op = pred_cls_category_op[select_idx] pred_bbox_3d_op = pred_bbox_3d_op[select_idx] if pred_attr_velo: pred_attr_op = pred_attr_op[select_idx] pred_velo_op = pred_velo_op[select_idx] else: pred_attr_op, pred_velo_op = None, None if len(pred_bbox_3d_op) > 500: arg_sort_idx = np.argsort(pred_cls_score_op)[::-1] arg_sort_idx = arg_sort_idx[:500] pred_cls_score_op = pred_cls_score_op[arg_sort_idx] pred_cls_category_op = pred_cls_category_op[arg_sort_idx] pred_bbox_3d_op = pred_bbox_3d_op[arg_sort_idx] if pred_attr_velo: pred_attr_op = pred_attr_op[arg_sort_idx] pred_velo_op = pred_velo_op[arg_sort_idx] # then transform pred_bbox_op to nuscenes_box boxes = cast_kitti_format_to_nusc_box_3d( pred_bbox_3d_op, pred_cls_score_op, pred_cls_category_op, cur_attribute=pred_attr_op, cur_velocity=pred_velo_op, classes=self.idx2cls_dict) for box in boxes: velocity = box.velocity[:2].tolist() if len(sweeps) == 0: velocity = (np.nan, np.nan) box.velocity = np.array([*velocity, 0.0]) # then cast the box from ego to global boxes = _lidar_nusc_box_to_global(cur_transformation_matrix, boxes, self.idx2cls_dict, eval_version='cvpr_2019') annos = [] for box in boxes: name = self.idx2cls_dict[box.label] if box.name == -1: attr = self.DefaultAttribute[name] else: attr = self.AttributeIdxLabelMapping[name][box.name] velocity = box.velocity[:2].tolist() nusc_anno = { "sample_token": cur_sample_token, "translation": box.center.tolist(), "size": box.wlh.tolist(), "rotation": box.orientation.elements.tolist(), "velocity": velocity, "detection_name": name, "detection_score": box.score, "attribute_name": attr, } annos.append(nusc_anno) submissions_results[info['sample_token']] = annos submissions['results'] = submissions_results res_path = os.path.join(log_dir, "results_nusc_1.json") with open(res_path, "w") as f: json.dump(submissions, f) eval_main_file = os.path.join(cfg.ROOT_DIR, 'lib/core/nusc_eval.py') root_path = self.dataset_dir cmd = f"python3 {str(eval_main_file)} --root_path=\"{str(root_path)}\"" cmd += f" --version={'v1.0-trainval'} --eval_version={'cvpr_2019'}" cmd += f" --res_path=\"{str(res_path)}\" --eval_set={'val'}" cmd += f" --output_dir=\"{LOG_FOUT_DIR}\"" # use subprocess can release all nusc memory after evaluation subprocess.check_output(cmd, shell=True) os.system('rm \"%s\"' % res_path) # remove former result file with open(os.path.join(log_dir, "metrics_summary.json"), "r") as f: metrics = json.load(f) return metrics def evaluate_recall(self, sess, feeddict_producer, pred_list, val_size, cls_thresh, log_dir, placeholders=None): pass def logger_and_select_best_map(self, metrics, log_string): detail = {} result = f"Nusc v1.0-trainval Evaluation\n" final_score = [] for name in self.cls_list: detail[name] = {} for k, v in metrics["label_aps"][name].items(): detail[name][f"dist@{k}"] = v tp_errs = [] tp_names = [] for k, v in metrics["label_tp_errors"][name].items(): detail[name][k] = v tp_errs.append(f"{v:.4f}") tp_names.append(k) threshs = ', '.join(list(metrics["label_aps"][name].keys())) scores = list(metrics["label_aps"][name].values()) final_score.append(np.mean(scores)) scores = ', '.join([f"{s * 100:.2f}" for s in scores]) result += f"{name} Nusc dist AP@{threshs} and TP errors\n" result += scores result += "\n" result += "mAP: %0.2f\n" % ( np.mean(list(metrics["label_aps"][name].values())) * 100) result += ', '.join(tp_names) + ": " + ', '.join(tp_errs) result += "\n" result += 'NDS score: %0.2f\n' % (metrics['nd_score'] * 100) log_string(result) cur_result = metrics['nd_score'] return cur_result def logger_and_select_best_recall(self, metrics, log_string): pass # save prediction results def save_predictions(self, sess, feeddict_producer, pred_list, val_size, cls_thresh, log_dir, placeholders=None): pass
def test(estimator=None): if args.Joint: nusc = NuScenes( version='v1.0-trainval', verbose=True, dataroot=args.nuscenes_root, ) dataset_image_folder_format = os.path.join( args.nuscenes_data_root, args.type+'/'+'{}/img1', ) detection_file_name_format = os.path.join( args.nuscenes_data_root, args.type+'/'+'{}/gt/gt.txt', ) if not os.path.exists(args.log_folder): os.mkdir(args.log_folder) save_folder = args.log_folder if not os.path.exists(save_folder): os.mkdir(save_folder) saved_file_name_format = os.path.join(save_folder, 'Video'+'{}.txt') save_video_name_format = os.path.join(save_folder, 'Video'+'{}.avi') test_dataset_index = os.listdir( os.path.join(args.nuscenes_data_root, args.type), ) def f(format_str): return [ format_str.format(index) for index in test_dataset_index ] for image_folder, detection_file_name, saved_file_name, save_video_name in zip(f(dataset_image_folder_format), f(detection_file_name_format), f(saved_file_name_format), f(save_video_name_format)): if path.exists(image_folder) and os.path.getsize(detection_file_name) > 0: tracker = Tracker(estimator) reader = DataReader( image_folder=image_folder, detection_file_name=detection_file_name, ) result = list() first_run = True for i, item in enumerate(reader): if i > len(reader): break if item is None: print('item is none') continue img = item[0] det = item[1] if img is None or det is None or len(det) == 0: continue if len(det) > config['max_object']: det = det[:config['max_object'], :] h, w, _ = img.shape if first_run: vw = cv2.VideoWriter( save_video_name, cv2.VideoWriter_fourcc( 'M', 'J', 'P', 'G', ), 10, (w, h), ) first_run = False features = det[:, 6:12].astype(float) tokens = det[:, 12:] if args.Joint: tokens = tokens[0][1:] frame_token = tokens[0] first_frame_token = tokens[1] current_cam_record = nusc.get('sample_data', frame_token) current_cam_path = nusc.get_sample_data_path(frame_token) current_cam_path, boxes, current_camera_intrinsic = nusc.get_sample_data( current_cam_record['token'], ) current_cs_record = nusc.get( 'calibrated_sensor', current_cam_record['calibrated_sensor_token'], ) current_poserecord = nusc.get( 'ego_pose', current_cam_record['ego_pose_token'], ) first_cam_record = nusc.get( 'sample_data', first_frame_token, ) first_cam_path = nusc.get_sample_data_path( first_frame_token, ) first_cam_path, boxes, first_camera_intrinsic = nusc.get_sample_data( first_cam_record['token'], ) first_cs_record = nusc.get( 'calibrated_sensor', first_cam_record['calibrated_sensor_token'], ) first_poserecord = nusc.get( 'ego_pose', first_cam_record['ego_pose_token'], ) det[:, [2, 4]] /= float(w) det[:, [3, 5]] /= float(h) if args.Joint: image_org = tracker.update( args.Joint, img, det[ :, 2:6 ], args.show_image, i, features, tokens, False, current_cs_record, current_poserecord, first_cs_record, first_poserecord, first_camera_intrinsic, ) else: image_org = tracker.update( args.Joint, img, det[:, 2:6], args.show_image, i, ) vw.write(image_org) # save result for t in tracker.tracks: n = t.nodes[-1] if t.age == 1: b = n.get_box(tracker.frame_index-1, tracker.recorder) # print('n.pose ',n.pose.cpu().data.numpy()) if args.Joint: result.append( [i] + [t.id] + [ b[0]*w, b[1]*h, b[2]*w, b[3] * h, ] + list(n.pose.cpu().data.numpy()), ) else: result.append( [i] + [t.id] + [b[0]*w, b[1]*h, b[2]*w, b[3]*h], ) # save data np.savetxt(saved_file_name, np.array(result), fmt='%10f')
def create_annotations(self, version, max_sweeps): from nuscenes.nuscenes import NuScenes nusc = NuScenes(version=version, dataroot=self.cfg.DATA.ROOTDIR, verbose=True) from nuscenes.utils import splits available_vers = ["v1.0-trainval", "v1.0-test", "v1.0-mini"] assert version in available_vers if version == "v1.0-trainval": train_scenes = splits.train val_scenes = splits.val elif version == "v1.0-test": train_scenes = splits.test val_scenes = [] elif version == "v1.0-mini": train_scenes = splits.mini_train val_scenes = splits.mini_val else: raise ValueError("unknown") root_path = Path(self.cfg.DATA.ROOTDIR) available_scenes = self._get_available_scenes(nusc) available_scene_names = [s["name"] for s in available_scenes] train_scenes = list(filter(lambda x: x in available_scene_names, train_scenes)) val_scenes = list(filter(lambda x: x in available_scene_names, val_scenes)) train_scenes = set([ available_scenes[available_scene_names.index(s)]["token"] for s in train_scenes ]) val_scenes = set([ available_scenes[available_scene_names.index(s)]["token"] for s in val_scenes ]) print( f"train scene: {len(train_scenes)}, val scene: {len(val_scenes)}") self.train_infos = dict() self.val_infos = dict() train_index = 0 val_index = 0 for sample in tqdm(nusc.sample, desc="Generating train infos..."): lidar_token = sample["data"]["LIDAR_TOP"] sd_rec = nusc.get('sample_data', lidar_token) cs_record = nusc.get('calibrated_sensor', sd_rec['calibrated_sensor_token']) pose_record = nusc.get('ego_pose', sd_rec['ego_pose_token']) lidar_path, boxes, _ = nusc.get_sample_data(lidar_token) assert Path(lidar_path).exists(), ("some lidar file miss...+_+") item = { "lidar_path": lidar_path, "token": sample["token"], "sweeps": [], "calib": cs_record, "ego_pose": pose_record, "timestamp": sample["timestamp"] } l2e_t = cs_record['translation'] l2e_r = cs_record['rotation'] e2g_t = pose_record['translation'] e2g_r = pose_record['rotation'] l2e_r_mat = Quaternion(l2e_r).rotation_matrix e2g_r_mat = Quaternion(e2g_r).rotation_matrix sweeps = [] while len(sweeps) < max_sweeps: if not sd_rec['prev'] == "": sd_rec = nusc.get('sample_data', sd_rec['prev']) cs_record = nusc.get('calibrated_sensor', sd_rec['calibrated_sensor_token']) pose_record = nusc.get('ego_pose', sd_rec['ego_pose_token']) lidar_path = nusc.get_sample_data_path(sd_rec['token']) sweep = { "lidar_path": lidar_path, "sample_data_token": sd_rec['token'], "timestamp": sd_rec["timestamp"] } l2e_r_s = cs_record["rotation"] l2e_t_s = cs_record["translation"] e2g_r_s = pose_record["rotation"] e2g_t_s = pose_record["translation"] ## sweep->ego->global->ego'->lidar l2e_r_s_mat = Quaternion(l2e_r_s).rotation_matrix e2g_r_s_mat = Quaternion(e2g_r_s).rotation_matrix R = (l2e_r_s_mat.T @ e2g_r_s_mat.T) @ ( np.linalg.inv(e2g_r_mat).T @ np.linalg.inv(l2e_r_mat).T ) T = (l2e_t_s @ e2g_r_s_mat.T + e2g_t_s) @ ( np.linalg.inv(e2g_r_mat).T @ np.linalg.inv(l2e_r_mat).T ) T -= e2g_t @ (np.linalg.inv(e2g_r_mat).T @ np.linalg.inv( l2e_r_mat).T) + l2e_t @ np.linalg.inv(l2e_r_mat).T sweep["sweep2lidar_rotation"] = R.T sweep["sweep2lidar_translation"] = T sweeps.append(sweep) else: break item["sweeps"] = sweeps annotations = [ nusc.get('sample_annotation', token) for token in sample['anns'] ] locs = np.array([b.center for b in boxes]).reshape(-1, 3) dims = np.array([b.wlh for b in boxes]).reshape(-1, 3) rots = np.array([b.orientation.yaw_pitch_roll[0] for b in boxes]).reshape(-1, 1) names = [b.name for b in boxes] class_idx = [] for i in range(len(names)): if names[i] in NuscenesDataset.NameMapping: names[i] = NuscenesDataset.NameMapping[names[i]] class_idx.append(nuscenes_class_name_to_idx(names[i])) names = np.array(names) class_idx = np.array(class_idx) gt_boxes = np.concatenate([locs, dims, rots], axis=1) ## In Nuscenes Dataset, need to filter some rare class mask = np.array([NuscenesDataset.Nuscenes_classes.count(s) > 0 for s in names], dtype=np.bool_) gt_boxes = gt_boxes[mask] names = names[mask] # assert len(gt_boxes) == len( # annotations), f"{len(gt_boxes)}, {len(annotations)}." assert len(gt_boxes) == len(names), f"{len(gt_boxes)}, {len(names)}" assert len(gt_boxes) == len(class_idx), f"{len(gt_boxes)}, {len(class_idx)}" item["boxes"] = gt_boxes item["class_idx"] = class_idx item["names"] = names item["num_lidar_pts"] = np.array( [a["num_lidar_pts"] for a in annotations] ) item["num_radar_pts"] = np.array( [a["num_radar_pts"] for a in annotations] ) if sample["scene_token"] in train_scenes: self.train_infos[train_index] = item train_index += 1 else: self.val_infos[val_index] = item val_index += 1
def train(): net.train() current_lr = config['learning_rate'] print('Loading Dataset...') dataset = TrainDataset( args.nuscenes_data_root, SSJAugmentation( config['image_size'], means, ), ) print('length = ', len(dataset)) epoch_size = len(dataset) // args.batch_size step_index = 0 batch_iterator = None batch_size = 1 data_loader = data.DataLoader( dataset, batch_size, num_workers=args.num_workers, shuffle=True, collate_fn=collate_fn, pin_memory=False, ) if args.Joint: nusc = NuScenes( version='v1.0-trainval', verbose=True, dataroot=args.nuscenes_root, ) for iteration in range(0, 380000): print(iteration) if (not batch_iterator) or (iteration % epoch_size == 0): # create batch iterator batch_iterator = iter(data_loader) all_epoch_loss = [] print('pass 0') if iteration in stepvalues: step_index += 1 current_lr = current_lr * 0.5 for param_group in optimizer.param_groups: param_group['lr'] = current_lr # load train data img_pre, img_next, boxes_pre, boxes_next, labels, valid_pre, valid_next, current_poses, next_poses, pre_bbox, next_bbox, img_org_pre, img_org_next, current_tokens, next_tokens =\ next(batch_iterator) if args.Joint: current_token = current_tokens[0] next_token = next_tokens[0] first_token = current_tokens[1] current_cam_record = nusc.get('sample_data', current_token) current_cam_path = nusc.get_sample_data_path(current_token) current_cam_path, boxes, current_camera_intrinsic = nusc.get_sample_data( current_cam_record['token'], ) current_cs_record = nusc.get( 'calibrated_sensor', current_cam_record['calibrated_sensor_token'], ) current_poserecord = nusc.get( 'ego_pose', current_cam_record['ego_pose_token'], ) next_cam_record = nusc.get('sample_data', next_token) next_cam_path = nusc.get_sample_data_path(next_token) next_cam_path, boxes, next_camera_intrinsic = nusc.get_sample_data( next_cam_record['token'], ) next_cs_record = nusc.get( 'calibrated_sensor', next_cam_record['calibrated_sensor_token'], ) next_poserecord = nusc.get( 'ego_pose', next_cam_record['ego_pose_token'], ) first_cam_record = nusc.get('sample_data', first_token) first_cam_path = nusc.get_sample_data_path(first_token) first_cam_path, boxes, first_camera_intrinsic = nusc.get_sample_data( first_cam_record['token'], ) first_cs_record = nusc.get( 'calibrated_sensor', first_cam_record['calibrated_sensor_token'], ) first_poserecord = nusc.get( 'ego_pose', first_cam_record['ego_pose_token'], ) if args.cuda: img_pre = (img_pre.cuda()) img_next = (img_next.cuda()) boxes_pre = (boxes_pre.cuda()) boxes_next = (boxes_next.cuda()) valid_pre = (valid_pre.cuda()) valid_next = (valid_next.cuda()) labels = (labels.cuda()) current_poses = (current_poses.cuda()) next_poses = (next_poses.cuda()) pre_bbox = (pre_bbox.cuda()) next_bbox = (next_bbox.cuda()) img_org_pre = (img_org_pre.cuda()) img_org_next = (img_org_next.cuda()) # forward if args.Joint: out, pose_loss = net( args.Joint, img_pre, img_next, boxes_pre, boxes_next, valid_pre, valid_next, current_poses, next_poses, pre_bbox, next_bbox, img_org_pre, img_org_next, current_cs_record, current_poserecord, next_cs_record, next_poserecord, first_cs_record, first_poserecord, first_camera_intrinsic, ) else: out = net( img_pre, img_next, boxes_pre, boxes_next, valid_pre, valid_next, ) optimizer.zero_grad() loss_pre, loss_next, loss_similarity, loss, accuracy_pre, accuracy_next, accuracy, predict_indexes = criterion( out, labels, valid_pre, valid_next, ) total_loss = loss + 0.1 * pose_loss total_loss.backward() optimizer.step() optimizer.zero_grad() all_epoch_loss += [loss.data.cpu()] print( 'iter ' + repr(iteration) + ', ' + repr(epoch_size) + ' || epoch: %.4f ' % (iteration / (float)(epoch_size)) + ' || Loss: %.4f ||' % (loss.data.item()), end=' ', ) if iteration % 1000 == 0: result_image = show_batch_circle_image( img_pre, img_next, boxes_pre, boxes_next, valid_pre, valid_next, predict_indexes, iteration, ) if iteration % 1000 == 0: print('Saving state, iter:', iteration) torch.save( net.state_dict(), os.path.join( args.save_folder, 'model_' + repr(iteration) + '.pth', ), )
def main(): args = parse_args() nusc = NuScenes(version=args.version, dataroot=args.dataroot, verbose=args.verbose) ann_dir = os.path.join(args.saveroot, "frames_ann") os.makedirs(ann_dir, exist_ok=True) ego_pose_dir = os.path.join(args.saveroot, "ego_pose") os.makedirs(ego_pose_dir, exist_ok=True) scene_count = 0 for scene in nusc.scene: instance_ids = dict() first_sample_token = scene["first_sample_token"] sample = nusc.get("sample", first_sample_token) cam_sample = nusc.get("sample_data", sample["data"]["CAM_FRONT"]) instances_ann = list() ego_poses = list() scene_dir = os.path.join(args.saveroot, "frames", scene["name"]) os.makedirs(scene_dir, exist_ok=True) has_more_frames = True while has_more_frames: frame_path = nusc.get_sample_data_path(cam_sample["token"]) # img = cv2.imread(frame_path) ego_pose = nusc.get("ego_pose", cam_sample["ego_pose_token"]) ego_poses.append([ego_pose["timestamp"]] + ego_pose["rotation"] + ego_pose["translation"]) if cam_sample["is_key_frame"]: frame_time_stamp = int( frame_path.split("__")[-1].split(".")[0]) reproj_bboxes = get_2d_boxes(nusc, cam_sample["token"], visibilities=['3', '4']) for reproj_bbox in reproj_bboxes: if "human" in reproj_bbox[ "category_name"] or "vehicle" in reproj_bbox[ "category_name"]: instance_token = reproj_bbox["instance_token"] if instance_token not in instance_ids: instance_id = len(instance_ids.keys()) + 1 instance_ids[instance_token] = instance_id x1, y1, x2, y2, depth = reproj_bbox["bbox_corners"] instance_id = instance_ids[instance_token] instances_ann.append([ frame_time_stamp, instance_id, x1, y1, x2, y2, depth ]) # shutil.copy(frame_path, os.path.join(scene_dir, frame_path.split("/")[-1])) if not cam_sample["next"] == "": cam_sample = nusc.get("sample_data", cam_sample["next"]) else: has_more_frames = False df_ins = pd.DataFrame(instances_ann) df_ins.to_csv(os.path.join( ann_dir, "{}_instances_ann.csv".format(scene["name"])), header=False, index=False) df_ego_pose = pd.DataFrame(ego_poses) df_ego_pose.to_csv(os.path.join( ego_pose_dir, "{}_ego_pose.csv".format(scene["name"])), header=False, index=False) scene_count += 1 print( "{} scenes have been successfully processed!".format(scene_count))
def render_image_annotations(sample_data_token: str, with_anns: bool = True, out_path = None, box_vis_level: BoxVisibility = BoxVisibility.ANY, axes_limit: float = 40,ax: Axes = None, image_annotations_token2ind = {}, image_annotations = [],visibilities = ['','1','2','3','4'],cam_type = 'CAM_FRONT'): _cam_type = '/' + cam_type + '/' nusc = NuScenes(version='v1.0-mini', dataroot='dataset/nuScenes/v1.0-mini', verbose=True) # Get sensor modality. sd_record = nusc.get('sample_data', sample_data_token) sensor_modality = sd_record['sensor_modality']#'camera' ''' data_path, boxes, camera_intrinsic = nusc.get_sample_data(sample_data_token, box_vis_level=box_vis_level) ''' cs_record = nusc.get('calibrated_sensor', sd_record['calibrated_sensor_token']) sensor_record = nusc.get('sensor', cs_record['sensor_token']) pose_record = nusc.get('ego_pose', sd_record['ego_pose_token']) data_path = nusc.get_sample_data_path(sample_data_token)#'dataset/nuScenes/v1.0-mini/samples/CAM_FRONT/n015-2018-07-24-11-22-45+0800__CAM_FRONT__1532402927612460.jpg' camera_intrinsic = np.array(cs_record['camera_intrinsic']) imsize = (sd_record['width'], sd_record['height']) ''' boxes[0]: label: nan, score: nan, xyz: [373.26, 1130.42, 0.80], wlh: [0.62, 0.67, 1.64], rot axis: [0.00, 0.00, -1.00], ang(degrees): 21.09, ang(rad): 0.37, vel: nan, nan, nan, name: human.pedestrian.adult, token: ef63a697930c4b20a6b9791f423351da ''' boxes2d = get_boxes2d(sample_data_token=sample_data_token, image_annotations_token2ind=image_annotations_token2ind, image_annotations=image_annotations) ''' box2d_list = [] for box2d in boxes2d: if use_flat_vehicle_coordinates: # Move box to ego vehicle coord system parallel to world z plane. yaw = Quaternion(pose_record['rotation']).yaw_pitch_roll[0] box.translate(-np.array(pose_record['translation'])) box.rotate(Quaternion(scalar=np.cos(yaw / 2), vector=[0, 0, np.sin(yaw / 2)]).inverse) else: # Move box to ego vehicle coord system. box.translate(-np.array(pose_record['translation'])) box.rotate(Quaternion(pose_record['rotation']).inverse) # Move box to sensor coord system. box.translate(-np.array(cs_record['translation'])) box.rotate(Quaternion(cs_record['rotation']).inverse) if sensor_record['modality'] == 'camera' and not \ box_in_image(box, cam_intrinsic, imsize, vis_level=box_vis_level): continue box2d_list.append(box2d) ''' data = Image.open(data_path) # Init axes. if ax is None: _, ax = plt.subplots(1, 1, figsize=(9, 16)) # Show image. ax.imshow(data) # Show boxes. if with_anns: for box2d in boxes2d: print(box2d.corners, box2d.name,box2d.filename[8:22]) print(box2d.token) c = np.array(get_color(box2d.name)) / 255.0 #ipdb.set_trace() if box2d.visibility in visibilities and (_cam_type in box2d.filename): box2d.render(ax, view=camera_intrinsic, normalize=True, colors=(c, c, c)) # Limit visible range. #ax.set_xlim(0, data.size[0]) #ax.set_ylim(data.size[1], 0) #ax.axis('off') ax.set_title(sd_record['channel']) #ax.set_aspect('equal') if out_path is not None: plt.savefig(out_path)