def transform_bounding_box_to_sensor_coord_and_get_corners(box: Box, sample_data_token: str, lyftd: LyftDataset, frustum_pointnet_convention=False): """ Transform the bounding box to Get the bounding box corners :param box: :param sample_data_token: camera data token :param level5data: :return: """ transformed_box = transform_box_from_world_to_sensor_coordinates(box, sample_data_token, lyftd) sd_record = lyftd.get("sample_data", sample_data_token) cs_record = lyftd.get("calibrated_sensor", sd_record["calibrated_sensor_token"]) sensor_record = lyftd.get("sensor", cs_record["sensor_token"]) if sensor_record['modality'] == 'camera': cam_intrinsic_mtx = np.array(cs_record["camera_intrinsic"]) else: cam_intrinsic_mtx = None box_corners_on_cam_coord = transformed_box.corners() # Regarrange to conform Frustum-pointnet's convention if frustum_pointnet_convention: rearranged_idx = [0, 3, 7, 4, 1, 2, 6, 5] box_corners_on_cam_coord = box_corners_on_cam_coord[:, rearranged_idx] assert np.allclose((box_corners_on_cam_coord[:, 0] + box_corners_on_cam_coord[:, 6]) / 2, np.array(transformed_box.center)) # For perspective transformation, the normalization should set to be True box_corners_on_image = view_points(box_corners_on_cam_coord, view=cam_intrinsic_mtx, normalize=True) return box_corners_on_image, box_corners_on_cam_coord
def select_annotation_boxes(sample_token, lyftd: LyftDataset, box_vis_level: BoxVisibility = BoxVisibility.ALL, camera_type=['CAM_FRONT', 'CAM_BACK', 'CAM_FRONT_LEFT', 'CAM_FRONT_RIGHT', 'CAM_BACK_RIGHT', 'CAM_BACK_LEFT']) -> (str, str, Box): """ Select annotations that is a camera image defined by box_vis_level :param sample_token: :param box_vis_level:BoxVisbility.ALL or BoxVisibility.ANY :param camera_type: a list of camera that the token should be selected from :return: yield (sample_token,cam_token, Box) """ sample_record = lyftd.get('sample', sample_token) cams = [key for key in sample_record["data"].keys() if "CAM" in key] cams = [cam for cam in cams if cam in camera_type] for ann_token in sample_record['anns']: for cam in cams: cam_token = sample_record["data"][cam] _, boxes_in_sensor_coord, cam_intrinsic = lyftd.get_sample_data( cam_token, box_vis_level=box_vis_level, selected_anntokens=[ann_token] ) if len(boxes_in_sensor_coord) > 0: assert len(boxes_in_sensor_coord) == 1 box_in_world_coord = lyftd.get_box(boxes_in_sensor_coord[0].token) yield sample_token, cam_token, box_in_world_coord
def transform_box_from_world_to_flat_sensor_coordinates(first_train_sample_box: Box, sample_data_token: str, lyftd: LyftDataset): sample_box = first_train_sample_box.copy() sd_record = lyftd.get("sample_data", sample_data_token) cs_record = lyftd.get("calibrated_sensor", sd_record["calibrated_sensor_token"]) sensor_record = lyftd.get("sensor", cs_record["sensor_token"]) pose_record = lyftd.get("ego_pose", sd_record["ego_pose_token"]) # Move box to ego vehicle coord system parallel to world z plane ypr = Quaternion(pose_record["rotation"]).yaw_pitch_roll yaw = ypr[0] sample_box.translate(-np.array(pose_record["translation"])) sample_box.rotate(Quaternion(scalar=np.cos(yaw / 2), vector=[0, 0, np.sin(yaw / 2)]).inverse) # Move box to sensor vehicle coord system parallel to world z plane # We need to steps, because camera coordinate is x(right), z(front), y(down) inv_ypr = Quaternion(cs_record["rotation"]).inverse.yaw_pitch_roll angz = inv_ypr[0] angx = inv_ypr[2] sample_box.translate(-np.array(cs_record['translation'])) # rotate around z-axis sample_box.rotate(Quaternion(scalar=np.cos(angz / 2), vector=[0, 0, np.sin(angz / 2)])) # rotate around x-axis (by 90 degrees) angx = 90 sample_box.rotate(Quaternion(scalar=np.cos(angx / 2), vector=[np.sin(angx / 2), 0, 0])) return sample_box
def generateXYZRGB(ds: LyftDataset, sample_token: str, config: dict, folder: str): sample = ds.get("sample", sample_token) sample_lidar_token = sample["data"]["LIDAR_TOP"] lidar_data = ds.get("sample_data", sample_lidar_token) lidar_filepath = ds.get_sample_data_path(sample_lidar_token) ego_pose = ds.get("ego_pose", lidar_data["ego_pose_token"]) calibrated_sensor = ds.get("calibrated_sensor", lidar_data["calibrated_sensor_token"]) global_from_car = transform_matrix(ego_pose['translation'], Quaternion(ego_pose['rotation']), inverse=False) car_from_sensor = transform_matrix(calibrated_sensor['translation'], Quaternion( calibrated_sensor['rotation']), inverse=False) try: lidar_pointcloud = LidarPointCloud.from_file(lidar_filepath) pc = copy.deepcopy(lidar_pointcloud) lidar_pointcloud.transform(car_from_sensor) except Exception as e: print("Failed to load Lidar Pointcloud for {}: {}:".format( sample_token, e)) pass XYZRGB = getXYZRGB(ds, pc, sample, lidar_data) np.savez(folder + sample_lidar_token + ".xyzrgb")
def transform_pc_to_camera_coord(cam: dict, pointsensor: dict, point_cloud_3d: LidarPointCloud, lyftd: LyftDataset): warnings.warn("The point cloud is transformed to camera coordinates in place", UserWarning) # Points live in the point sensor frame. So they need to be transformed via global to the image plane. # First step: transform the point-cloud to the ego vehicle frame for the timestamp of the sweep. cs_record = lyftd.get("calibrated_sensor", pointsensor["calibrated_sensor_token"]) point_cloud_3d.rotate(Quaternion(cs_record["rotation"]).rotation_matrix) point_cloud_3d.translate(np.array(cs_record["translation"])) # Second step: transform to the global frame. poserecord = lyftd.get("ego_pose", pointsensor["ego_pose_token"]) point_cloud_3d.rotate(Quaternion(poserecord["rotation"]).rotation_matrix) point_cloud_3d.translate(np.array(poserecord["translation"])) # Third step: transform into the ego vehicle frame for the timestamp of the image. poserecord = lyftd.get("ego_pose", cam["ego_pose_token"]) point_cloud_3d.translate(-np.array(poserecord["translation"])) point_cloud_3d.rotate(Quaternion(poserecord["rotation"]).rotation_matrix.T) # Fourth step: transform into the camera. cs_record = lyftd.get("calibrated_sensor", cam["calibrated_sensor_token"]) point_cloud_3d.translate(-np.array(cs_record["translation"])) point_cloud_3d.rotate(Quaternion(cs_record["rotation"]).rotation_matrix.T) # Take the actual picture (matrix multiplication with camera-matrix + renormalization). point_cloud_2d = view_points(point_cloud_3d.points[:3, :], np.array(cs_record["camera_intrinsic"]), normalize=True) return point_cloud_3d, point_cloud_2d
def create_lyft_infos( root_path=Path("/media/zzhou/data/data-lyft-3D-objects/")): # ==================================================================== # A. Creating an index and splitting into train and validation scenes # ==================================================================== level5data = LyftDataset(data_path=root_path, json_path=root_path + 'train_data', verbose=True) classes = [ "car", "motorcycle", "bus", "bicycle", "truck", "pedestrian", "other_vehicle", "animal", "emergency_vehicle" ] records = [(level5data.get('sample', record['first_sample_token'])['timestamp'], record) for record in level5data.scene] entries = [] for start_time, record in sorted(records): start_time = level5data.get( 'sample', record['first_sample_token'])['timestamp'] / 1000000 token = record['token'] name = record['name'] date = datetime.utcfromtimestamp(start_time) host = "-".join(record['name'].split("-")[:2]) first_sample_token = record["first_sample_token"] entries.append((host, name, date, token, first_sample_token)) df = pd.DataFrame(entries, columns=[ "host", "scene_name", "date", "scene_token", "first_sample_token" ]) validation_hosts = ["host-a007", "host-a008", "host-a009"] validation_df = df[df["host"].isin(validation_hosts)] vi = validation_df.index train_df = df[~df.index.isin(vi)] print(len(train_df), len(validation_df), "train/validation split scene counts") # ==================================================================== # B. build SECOND database # ==================================================================== metadata = { "version": f"{len(train_df)} train / {len(validation_df)} validation split scene counts", } train_lyft_infos = _scan_sample_token(level5data, train_df) data = { "infos": train_lyft_infos, "metadata": metadata, } with open(root_path / "infos_train.pkl", 'wb') as f: pickle.dump(data, f) val_lyft_infos = _scan_sample_token(level5data, validation_df) data["infos"] = val_lyft_infos with open(root_path / "infos_val.pkl", 'wb') as f: pickle.dump(data, f)
def transform_world_to_image_coordinate(word_coord_array, camera_token: str, lyftd: LyftDataset): sd_record = lyftd.get("sample_data", camera_token) cs_record = lyftd.get("calibrated_sensor", sd_record["calibrated_sensor_token"]) sensor_record = lyftd.get("sensor", cs_record["sensor_token"]) pose_record = lyftd.get("ego_pose", sd_record["ego_pose_token"]) cam_intrinsic_mtx = np.array(cs_record["camera_intrinsic"]) # homogeneous coordinate to non-homogeneous one pose_to_sense_rot_mtx = Quaternion(cs_record['rotation']).rotation_matrix.T world_to_pose_rot_mtx = Quaternion(pose_record['rotation']).rotation_matrix.T ego_coord_array = np.dot(world_to_pose_rot_mtx, word_coord_array) t = np.array(pose_record['translation']) for i in range(3): ego_coord_array[i, :] = ego_coord_array[i, :] - t[i] sense_coord_array = np.dot(pose_to_sense_rot_mtx, ego_coord_array) t = np.array(cs_record['translation']) for i in range(3): sense_coord_array[i, :] = sense_coord_array[i, :] - t[i] return view_points(sense_coord_array, cam_intrinsic_mtx, normalize=True)
def main(): parser = argparse.ArgumentParser() arg = parser.add_argument arg('--model_name', type=str, default='mask_512', help='String model name from models dictionary') arg('--seed', type=int, default=1234, help='Random seed') arg('--fold', type=int, default=0, help='Validation fold') arg('--weights_dir', type=str, default='', help='Directory for loading model weights') arg('--epochs', type=int, default=12, help='Current epoch') arg('--lr', type=float, default=1e-3, help='Initial learning rate') arg('--debug', type=bool, default=False, help='If the debugging mode') args = parser.parse_args() set_seed(args.seed) # get data if ON_SERVER: level5data = LyftDataset(data_path = '.', json_path='../../input/train_data', verbose=True) # server else: level5data = LyftDataset(data_path = '../input/', json_path='../input/train_data', verbose=True) # local laptop classes = ["car", "motorcycle", "bus", "bicycle", "truck", "pedestrian", "other_vehicle", "animal", "emergency_vehicle"] # "bev" folder data_folder = os.path.join(OUTPUT_ROOT, "bev_data") # choose model model = get_maskrcnn_model(NUM_CLASSES) checkpoint= f'{OUTPUT_ROOT}/checkpoints/' train(model, model_name='mask_512', data_folder=data_folder, level5data = level5data, fold=args.fold, debug=args.debug, img_size=IMG_SIZE, bev_shape=BEV_SHAPE, epochs=args.epochs, batch_size=16, num_workers=4, learning_rate = args.lr, resume_weights=args.weights_dir, resume_epoch=0)
def __init__(self, dataset_path): self.lyft_data = LyftDataset(data_path=dataset_path, json_path=dataset_path + 'data', verbose=True) self.sample_size = len(self.lyft_data.sample) self.transform = transforms.Compose([ transforms.ToTensor(), ])
def __init__(self, phase): df_name = phase if phase is not 'test' else 'sample_submission' dataset = 'train' if phase is not 'test' else 'test' self.phase = phase self.df = pd.read_csv( os.path.join(cfg.work_dir, 'data', df_name + '.csv')) self.lyft_dataset = LyftDataset( data_path=os.path.join(cfg.input_dir, dataset), json_path=os.path.join(cfg.input_dir, dataset, 'data'), verbose=False)
def __init__(self, dataset_type, dir_output, lidar_range, img_size): self.dataset_type = dataset_type self.dir_output = dir_output self.lidar_range = lidar_range self.img_size = img_size self.level5data = LyftDataset( data_path='../../dataset/{}'.format(dataset_type), json_path='../../dataset/{}/data/'.format(dataset_type), verbose=True)
def __init__(self, root, split=None): self.data = LyftDataset(data_path=root, json_path=os.path.join( root, os.path.basename(os.path.normpath(root))), verbose=False) self.explorer = LyftDatasetExplorer(self.data) super().__init__(root, split)
def transform_box_from_world_to_ego_coordinates(first_train_sample_box: Box, sample_data_token: str, lyftd: LyftDataset): sample_box = first_train_sample_box.copy() sd_record = lyftd.get("sample_data", sample_data_token) cs_record = lyftd.get("calibrated_sensor", sd_record["calibrated_sensor_token"]) sensor_record = lyftd.get("sensor", cs_record["sensor_token"]) pose_record = lyftd.get("ego_pose", sd_record["ego_pose_token"]) # Move box to ego vehicle coord system sample_box.translate(-np.array(pose_record["translation"])) sample_box.rotate(Quaternion(pose_record["rotation"]).inverse) return sample_box
def get_sensor_to_world_transform_matrix_from_sample_data_token(sample_data_token, lyftd: LyftDataset): sd_record = lyftd.get("sample_data", sample_data_token) cs_record = lyftd.get("calibrated_sensor", sd_record["calibrated_sensor_token"]) sensor_record = lyftd.get("sensor", cs_record["sensor_token"]) pose_record = lyftd.get("ego_pose", sd_record["ego_pose_token"]) sensor_to_ego_mtx = transform_matrix(translation=np.array(cs_record["translation"]), rotation=Quaternion(cs_record["rotation"])) ego_to_world_mtx = transform_matrix(translation=np.array(pose_record["translation"]), rotation=Quaternion(pose_record["rotation"])) return np.dot(ego_to_world_mtx, sensor_to_ego_mtx)
def convert_box_to_world_coord_with_sample_data_token(input_sample_box, sample_data_token, lyftd: LyftDataset): sample_box = input_sample_box.copy() sd_record = lyftd.get("sample_data", sample_data_token) cs_record = lyftd.get("calibrated_sensor", sd_record["calibrated_sensor_token"]) sensor_record = lyftd.get("sensor", cs_record["sensor_token"]) pose_record = lyftd.get("ego_pose", sd_record["ego_pose_token"]) # Move box from sensor to vehicle ego coord system sample_box.rotate(Quaternion(cs_record["rotation"])) sample_box.translate(np.array(cs_record["translation"])) # Move box from ego vehicle to world coord system sample_box.rotate(Quaternion(pose_record["rotation"])) sample_box.translate(np.array(pose_record["translation"])) return sample_box
def read_frustum_pointnet_output(ldt: LyftDataset, inference_pickle_file, token_pickle_file, from_rgb_detection: bool): with open(inference_pickle_file, 'rb') as fp: ps_list = pickle.load(fp) if not from_rgb_detection: seg_list = pickle.load(fp) segp_list = pickle.load(fp) center_list = pickle.load(fp) heading_cls_list = pickle.load(fp) heading_res_list = pickle.load(fp) size_cls_list = pickle.load(fp) size_res_list = pickle.load(fp) rot_angle_list = pickle.load(fp) score_list = pickle.load(fp) if from_rgb_detection: onehot_list = pickle.load(fp) with open(token_pickle_file, 'rb') as fp: sample_token_list = pickle.load(fp) annotation_token_list = pickle.load(fp) camera_data_token_list = pickle.load(fp) type_list = pickle.load(fp) ldt.get('sample', sample_token_list[0]) if annotation_token_list: ldt.get('sample_annotation', annotation_token_list[0]) print("lengh of sample token:", len(sample_token_list)) print("lengh of ps list:", len(ps_list)) assert len(sample_token_list) == len(ps_list) boxes = [] gt_boxes = [] for data_idx in range(len(ps_list)): inferred_box = get_box_from_inference( lyftd=ldt, heading_cls=heading_cls_list[data_idx], heading_res=heading_res_list[data_idx], rot_angle=rot_angle_list[data_idx], size_cls=size_cls_list[data_idx], size_res=size_res_list[data_idx], center_coord=center_list[data_idx], sample_data_token=camera_data_token_list[data_idx], score=score_list[data_idx]) inferred_box.name = type_list[data_idx] boxes.append(inferred_box) if not from_rgb_detection: gt_boxes.append(ldt.get_box(annotation_token_list[data_idx])) return boxes, gt_boxes, sample_token_list
def transform_box_from_world_to_flat_vehicle_coordinates(first_train_sample_box: Box, sample_data_token: str, lyftd: LyftDataset): sample_box = first_train_sample_box.copy() sd_record = lyftd.get("sample_data", sample_data_token) cs_record = lyftd.get("calibrated_sensor", sd_record["calibrated_sensor_token"]) sensor_record = lyftd.get("sensor", cs_record["sensor_token"]) pose_record = lyftd.get("ego_pose", sd_record["ego_pose_token"]) # Move box to ego vehicle coord system parallel to world z plane ypr = Quaternion(pose_record["rotation"]).yaw_pitch_roll yaw = ypr[0] sample_box.translate(-np.array(pose_record["translation"])) sample_box.rotate(Quaternion(scalar=np.cos(yaw / 2), vector=[0, 0, np.sin(yaw / 2)]).inverse) return sample_box
def prepare_data_pool(df, data_folder, bev_shape, voxel_size, z_offset, box_scale): """Prepare input data Args: df: train or val tokens data_folder: diractory to save data bev_shape: BEV image shape voxel_size: size of voxels for voxelization z_offset: offset in the vertical direction before voxelization box_scale: rescale of the bounding boxes projections """ NUM_WORKERS = os.cpu_count() print('Number of CPU: ', NUM_WORKERS) level5data = LyftDataset(data_path = '.', json_path='../../input/train_data', verbose=True) classes = ["car", "motorcycle", "bus", "bicycle", "truck", "pedestrian", "other_vehicle", "animal", "emergency_vehicle"] print("Preparing data into {} using {} workers".format(data_folder, NUM_WORKERS)) first_samples = df.first_sample_token.values os.makedirs(data_folder, exist_ok=True) process_func = partial(prepare_training_data_for_scene, output_folder=data_folder, level5data=level5data, bev_shape=bev_shape, voxel_size=voxel_size, z_offset=z_offset, box_scale=box_scale) pool = Pool(NUM_WORKERS) for _ in tqdm(pool.imap_unordered(process_func, first_samples), total=len(first_samples)): pass pool.close() del pool
def create_lyft_infos(root_path, json_path, max_sweeps=10): from lyft_dataset_sdk.lyftdataset import LyftDataset # from lyft_dataset_sdk.utils import s lyft = LyftDataset(data_path=root_path, json_path=json_path, verbose=True) root_path = Path(root_path) available_scenes = _get_available_scenes(lyft) print(f"available scenes: {len(available_scenes)}") available_scene_names = [s["name"] for s in available_scenes] available_scene_tokens = [s["token"] for s in available_scenes] # print("available_scenes_names:", available_scene_names) # train_scenes = list( # filter(lambda x:x in available_scene_names, available_scenes)) # print(f"train_scenes: {len(train_scenes)}") # train_scenes = set([ # available_scenes[available_scene_names.index(s)]["token"] # for s in available_scenes # ]) train_scenes = available_scene_tokens print(f"train scenes: {len(train_scenes)}") train_lyft_infos = _fill_train_infos(lyft, train_scenes, False, max_sweeps=max_sweeps) metadata = { "version": "v1.01-train", } print(f"train sample: {len(train_lyft_infos)}") data = { "infos": train_lyft_infos, "metadata": metadata, } with open(root_path / "infos_train.pkl", 'wb') as f: pickle.dump(data, f)
def convert_box_to_world_coord(box: Box, sample_token, sensor_type, lyftd: LyftDataset): sample_box = box.copy() sample_record = lyftd.get('sample', sample_token) sample_data_token = sample_record['data'][sensor_type] converted_sample_box = convert_box_to_world_coord_with_sample_data_token(sample_box, sample_data_token) return converted_sample_box
def select_2d_annotation_boxes(ldf: LyftDataset, classifier, sample_token, camera_type=['CAM_FRONT', 'CAM_BACK', 'CAM_FRONT_LEFT', 'CAM_FRONT_RIGHT', 'CAM_BACK_RIGHT', 'CAM_BACK_LEFT']) -> (str, str, np.ndarray): sample_record = ldf.get('sample', sample_token) cams = [key for key in sample_record["data"].keys() if "CAM" in key] cams = [cam for cam in cams if cam in camera_type] for cam in cams: cam_token = sample_record["data"][cam] image_file_path = ldf.get_sample_data_path(cam_token) image_array = imread(image_file_path) det_result = classifier.detect_multi_object(image_array, score_threshold=[0.4, 0.4, 0.4]) for i in range(det_result.shape[0]): bounding_2d_box = det_result[i, 0:4] score = det_result[i, 4] class_idx = det_result[i, 5] yield sample_token, cam_token, bounding_2d_box, score, class_idx
def transform_image_to_world_coordinate(image_array: np.array, camera_token: str, lyftd: LyftDataset): """ :param image_array: 3xN np.ndarray :param camera_token: :return: """ sd_record = lyftd.get("sample_data", camera_token) cs_record = lyftd.get("calibrated_sensor", sd_record["calibrated_sensor_token"]) sensor_record = lyftd.get("sensor", cs_record["sensor_token"]) pose_record = lyftd.get("ego_pose", sd_record["ego_pose_token"]) # inverse the viewpoint transformation cam_intrinsic_mtx = np.array(cs_record["camera_intrinsic"]) image_in_cam_coord = np.dot(np.linalg.inv(cam_intrinsic_mtx), image_array) print(image_in_cam_coord) # TODO: think of how to do normalization properly # image_in_cam_coord = image_in_cam_coord / image_in_cam_coord[3:].ravel() # homogeneous coordinate to non-homogeneous one image_in_cam_coord = image_in_cam_coord[0:3, :] sens_to_pose_rot_mtx = Quaternion(cs_record['rotation']).rotation_matrix image_in_pose_coord = np.dot(sens_to_pose_rot_mtx, image_in_cam_coord) t = np.array(cs_record['translation']) for i in range(3): image_in_pose_coord[i, :] = image_in_cam_coord[i, :] + t[i] print(cs_record) print("in pose record:", image_in_pose_coord) pose_to_world_rot_mtx = Quaternion(pose_record['rotation']).rotation_matrix image_in_world_coord = np.dot(pose_to_world_rot_mtx, image_in_pose_coord) t = np.array(pose_record['translation']) for i in range(3): image_in_world_coord[i, :] = image_in_world_coord[i, :] + t[i] return image_in_world_coord
def create_lyft_info(version, data_path, save_path, split, max_sweeps=10): from lyft_dataset_sdk.lyftdataset import LyftDataset from . import lyft_utils data_path = data_path / version save_path = save_path / version split_path = data_path.parent / 'ImageSets' if split is not None: save_path = save_path / split split_path = split_path / split save_path.mkdir(exist_ok=True) assert version in ['trainval', 'one_scene', 'test'] if version == 'trainval': train_split_path = split_path / 'train.txt' val_split_path = split_path / 'val.txt' elif version == 'test': train_split_path = split_path / 'test.txt' val_split_path = None elif version == 'one_scene': train_split_path = split_path / 'one_scene.txt' val_split_path = split_path / 'one_scene.txt' else: raise NotImplementedError train_scenes = [x.strip() for x in open(train_split_path).readlines()] if train_split_path.exists() else [] val_scenes = [x.strip() for x in open(val_split_path).readlines()] if val_split_path is not None and val_split_path.exists() else [] lyft = LyftDataset(json_path=data_path / 'data', data_path=data_path, verbose=True) available_scenes = lyft_utils.get_available_scenes(lyft) available_scene_names = [s['name'] for s in available_scenes] train_scenes = list(filter(lambda x: x in available_scene_names, train_scenes)) val_scenes = list(filter(lambda x: x in available_scene_names, val_scenes)) train_scenes = set([available_scenes[available_scene_names.index(s)]['token'] for s in train_scenes]) val_scenes = set([available_scenes[available_scene_names.index(s)]['token'] for s in val_scenes]) print('%s: train scene(%d), val scene(%d)' % (version, len(train_scenes), len(val_scenes))) train_lyft_infos, val_lyft_infos = lyft_utils.fill_trainval_infos( data_path=data_path, lyft=lyft, train_scenes=train_scenes, val_scenes=val_scenes, test='test' in version, max_sweeps=max_sweeps ) if version == 'test': print('test sample: %d' % len(train_lyft_infos)) with open(save_path / f'lyft_infos_test.pkl', 'wb') as f: pickle.dump(train_lyft_infos, f) else: print('train sample: %d, val sample: %d' % (len(train_lyft_infos), len(val_lyft_infos))) with open(save_path / f'lyft_infos_train.pkl', 'wb') as f: pickle.dump(train_lyft_infos, f) with open(save_path / f'lyft_infos_val.pkl', 'wb') as f: pickle.dump(val_lyft_infos, f)
def main(config: dict): scriptStart = time.time() print("=" * 80) seed_everything(1) if config["isTrain"]: print("Preprocessing train set") ll5ds = LyftDataset(data_path=config["PATH"] + "train/", json_path=config["PATH"] + "train/data/", verbose=True) else: print("Preprocessing test set") ll5ds = LyftDataset(data_path=config["PATH"] + "test/", json_path=config["PATH"] + "test/data/", verbose=True) checkCreateFolders(config) generateDataset(ll5ds, config) print("Dataset generated in {:.2f} min.".format( (time.time() - scriptStart) / 60)) print("=" * 80)
def prep_list_of_sessions(self): self.level5data = LyftDataset(json_path=DATASET_ROOT + "/%s_data" % self.phase, data_path=DATASET_ROOT, verbose=True) self.num_sessions = len(self.level5data.scene) self.cloud_tokens = [] self.session_lengths = [] for session_ind in range(self.num_sessions): record = self.level5data.scene[session_ind] session_token = record['token'] sample_token = record["first_sample_token"] sample = self.level5data.get("sample", sample_token) lidar_token = sample["data"]["LIDAR_TOP"] cur_lidar_tokens = [] while len(lidar_token) > 0: cur_lidar_tokens.append(lidar_token) lidar_data = self.level5data.get("sample_data", lidar_token) lidar_token = lidar_data["next"] self.cloud_tokens.append(cur_lidar_tokens) self.session_lengths.append(len(cur_lidar_tokens))
def extract_single_box(train_objects, idx, lyftd: LyftDataset) -> Box: first_train_id, first_train_sample_box = get_train_data_sample_token_and_box(idx, train_objects) first_train_sample = lyftd.get('sample', first_train_id) sample_data_token = first_train_sample['data']['LIDAR_TOP'] first_train_sample_box = transform_box_from_world_to_sensor_coordinates(first_train_sample_box, sample_data_token, lyftd) return first_train_sample_box, sample_data_token
def generating_scenes(train_root_path, test_root_path): ############################ Set random seed for list shuffle SEED = 42 random.seed(SEED) ############################ load lyft dataset level5data = LyftDataset(data_path=train_root_path, \ json_path=Path(train_root_path) / "data", verbose=True) level5data_test = LyftDataset(data_path=test_root_path, \ json_path=Path(test_root_path) / "data", verbose=True) train_scenes = [] val_scenes = [] all_scenes = [] test_scenes = [] for sample in level5data.sample: all_scenes.append(sample["scene_token"]) for sample in level5data_test.sample: test_scenes.append(sample["scene_token"]) all_scenes = list(set(all_scenes)) test_scenes = list(set(test_scenes)) random.shuffle(all_scenes) train_scenes = all_scenes[:int(5 * len(all_scenes) / 6)] val_scenes = all_scenes[int(5 * len(all_scenes) / 6):] ############################ save splitting result with open(Path(train_root_path) / "train_scenes.txt", "w") as f: for item in train_scenes: f.write("%s\n" % item) with open(Path(train_root_path) / "val_scenes.txt", "w") as f: for item in val_scenes: f.write("%s\n" % item) with open(Path(test_root_path) / "test_scenes.txt", "w") as f: for item in test_scenes: f.write("%s\n" % item)
class LyftDataClass(Dataset): def __init__(self, data_path, json_path, verbose=True, map_resolution=0.1): self._lyftdataset = LyftDataset(data_path, json_path, verbose=verbose, map_resolution=map_resolution) def __len__(self): return len(self._lyftdataset.scene) def __getitem__(self, idx): sample_token = self._lyftdataset.sample[idx] sample_record = self.getFromLyft("sample", sample_token) return Sample(self, sample_record) def getFromLyft(self, table_name, token): return self._lyftdataset.get(table_name, token) def getFromLyftDatapath(self): return self._lyftdataset.data_path def getFromLyftBoxes(self, sample_data_token): return self._lyftdataset.getBoxes(sample_data_token)
def transform_image_to_cam_coordinate(image_array_p: np.array, camera_token: str, lyftd: LyftDataset): sd_record = lyftd.get("sample_data", camera_token) cs_record = lyftd.get("calibrated_sensor", sd_record["calibrated_sensor_token"]) sensor_record = lyftd.get("sensor", cs_record["sensor_token"]) pose_record = lyftd.get("ego_pose", sd_record["ego_pose_token"]) # inverse the viewpoint transformation def normalization(input_array): input_array[0:2, :] = input_array[0:2, :] * input_array[2:3, :].repeat(2, 0).reshape(2, input_array.shape[1]) return input_array image_array = normalization(np.copy(image_array_p)) image_array = np.concatenate((image_array.ravel(), np.array([1]))) image_array = image_array.reshape(4, 1) cam_intrinsic_mtx = np.array(cs_record["camera_intrinsic"]) view = cam_intrinsic_mtx viewpad = np.eye(4) viewpad[: view.shape[0], : view.shape[1]] = view image_in_cam_coord = np.dot(np.linalg.inv(viewpad), image_array) return image_in_cam_coord[0:3, :]
def read_frustum_pointnet_output_v2(ldt: LyftDataset, inference_tfrecord_file): raw_dataset = tf.data.TFRecordDataset(inference_tfrecord_file) for raw_record in raw_dataset: example = parse_inference_record(raw_record) inferred_box = get_box_from_inference( lyftd=ldt, heading_cls=example['rot_heading_angle_class'].numpy(), heading_res=example['rot_heading_angle_residual'].numpy(), rot_angle=example['frustum_angle'].numpy(), size_cls=example['size_class'].numpy(), size_res=example['size_residual'].numpy(), center_coord=example['rot_box_center'].numpy(), sample_data_token=example['camera_token'].numpy().decode('utf8'), score=example['score'].numpy(), type_name=example['type_name'].numpy().decode('utf8')) pc_record = ldt.get("sample_data", example['camera_token'].numpy().decode('utf8')) sample_of_pc_record = ldt.get("sample", pc_record['sample_token']) yield inferred_box, pc_record['sample_token']