Esempio n. 1
0
def transform_bounding_box_to_sensor_coord_and_get_corners(box: Box, sample_data_token: str, lyftd: LyftDataset,
                                                           frustum_pointnet_convention=False):
    """
    Transform the bounding box to Get the bounding box corners

    :param box:
    :param sample_data_token: camera data token
    :param level5data:
    :return:
    """
    transformed_box = transform_box_from_world_to_sensor_coordinates(box, sample_data_token, lyftd)
    sd_record = lyftd.get("sample_data", sample_data_token)
    cs_record = lyftd.get("calibrated_sensor", sd_record["calibrated_sensor_token"])
    sensor_record = lyftd.get("sensor", cs_record["sensor_token"])

    if sensor_record['modality'] == 'camera':
        cam_intrinsic_mtx = np.array(cs_record["camera_intrinsic"])
    else:
        cam_intrinsic_mtx = None

    box_corners_on_cam_coord = transformed_box.corners()

    # Regarrange to conform Frustum-pointnet's convention

    if frustum_pointnet_convention:
        rearranged_idx = [0, 3, 7, 4, 1, 2, 6, 5]
        box_corners_on_cam_coord = box_corners_on_cam_coord[:, rearranged_idx]

        assert np.allclose((box_corners_on_cam_coord[:, 0] + box_corners_on_cam_coord[:, 6]) / 2,
                           np.array(transformed_box.center))

    # For perspective transformation, the normalization should set to be True
    box_corners_on_image = view_points(box_corners_on_cam_coord, view=cam_intrinsic_mtx, normalize=True)

    return box_corners_on_image, box_corners_on_cam_coord
Esempio n. 2
0
def select_annotation_boxes(sample_token, lyftd: LyftDataset, box_vis_level: BoxVisibility = BoxVisibility.ALL,
                            camera_type=['CAM_FRONT', 'CAM_BACK', 'CAM_FRONT_LEFT', 'CAM_FRONT_RIGHT', 'CAM_BACK_RIGHT',
                                         'CAM_BACK_LEFT']) -> (str, str, Box):
    """
    Select annotations that is a camera image defined by box_vis_level


    :param sample_token:
    :param box_vis_level:BoxVisbility.ALL or BoxVisibility.ANY
    :param camera_type: a list of camera that the token should be selected from
    :return: yield (sample_token,cam_token, Box)
    """
    sample_record = lyftd.get('sample', sample_token)

    cams = [key for key in sample_record["data"].keys() if "CAM" in key]
    cams = [cam for cam in cams if cam in camera_type]
    for ann_token in sample_record['anns']:
        for cam in cams:
            cam_token = sample_record["data"][cam]
            _, boxes_in_sensor_coord, cam_intrinsic = lyftd.get_sample_data(
                cam_token, box_vis_level=box_vis_level, selected_anntokens=[ann_token]
            )
            if len(boxes_in_sensor_coord) > 0:
                assert len(boxes_in_sensor_coord) == 1
                box_in_world_coord = lyftd.get_box(boxes_in_sensor_coord[0].token)
                yield sample_token, cam_token, box_in_world_coord
Esempio n. 3
0
def transform_box_from_world_to_flat_sensor_coordinates(first_train_sample_box: Box, sample_data_token: str,
                                                        lyftd: LyftDataset):
    sample_box = first_train_sample_box.copy()
    sd_record = lyftd.get("sample_data", sample_data_token)
    cs_record = lyftd.get("calibrated_sensor", sd_record["calibrated_sensor_token"])
    sensor_record = lyftd.get("sensor", cs_record["sensor_token"])
    pose_record = lyftd.get("ego_pose", sd_record["ego_pose_token"])

    # Move box to ego vehicle coord system parallel to world z plane
    ypr = Quaternion(pose_record["rotation"]).yaw_pitch_roll
    yaw = ypr[0]

    sample_box.translate(-np.array(pose_record["translation"]))
    sample_box.rotate(Quaternion(scalar=np.cos(yaw / 2), vector=[0, 0, np.sin(yaw / 2)]).inverse)

    # Move box to sensor vehicle coord system parallel to world z plane
    # We need to steps, because camera coordinate is x(right), z(front), y(down)

    inv_ypr = Quaternion(cs_record["rotation"]).inverse.yaw_pitch_roll

    angz = inv_ypr[0]
    angx = inv_ypr[2]

    sample_box.translate(-np.array(cs_record['translation']))

    # rotate around z-axis
    sample_box.rotate(Quaternion(scalar=np.cos(angz / 2), vector=[0, 0, np.sin(angz / 2)]))
    # rotate around x-axis (by 90 degrees)
    angx = 90
    sample_box.rotate(Quaternion(scalar=np.cos(angx / 2), vector=[np.sin(angx / 2), 0, 0]))

    return sample_box
Esempio n. 4
0
def generateXYZRGB(ds: LyftDataset, sample_token: str, config: dict,
                   folder: str):

    sample = ds.get("sample", sample_token)

    sample_lidar_token = sample["data"]["LIDAR_TOP"]
    lidar_data = ds.get("sample_data", sample_lidar_token)
    lidar_filepath = ds.get_sample_data_path(sample_lidar_token)

    ego_pose = ds.get("ego_pose", lidar_data["ego_pose_token"])
    calibrated_sensor = ds.get("calibrated_sensor",
                               lidar_data["calibrated_sensor_token"])

    global_from_car = transform_matrix(ego_pose['translation'],
                                       Quaternion(ego_pose['rotation']),
                                       inverse=False)

    car_from_sensor = transform_matrix(calibrated_sensor['translation'],
                                       Quaternion(
                                           calibrated_sensor['rotation']),
                                       inverse=False)

    try:
        lidar_pointcloud = LidarPointCloud.from_file(lidar_filepath)
        pc = copy.deepcopy(lidar_pointcloud)
        lidar_pointcloud.transform(car_from_sensor)

    except Exception as e:
        print("Failed to load Lidar Pointcloud for {}: {}:".format(
            sample_token, e))
        pass

    XYZRGB = getXYZRGB(ds, pc, sample, lidar_data)
    np.savez(folder + sample_lidar_token + ".xyzrgb")
def transform_pc_to_camera_coord(cam: dict, pointsensor: dict, point_cloud_3d: LidarPointCloud, lyftd: LyftDataset):
    warnings.warn("The point cloud is transformed to camera coordinates in place", UserWarning)

    # Points live in the point sensor frame. So they need to be transformed via global to the image plane.
    # First step: transform the point-cloud to the ego vehicle frame for the timestamp of the sweep.
    cs_record = lyftd.get("calibrated_sensor", pointsensor["calibrated_sensor_token"])
    point_cloud_3d.rotate(Quaternion(cs_record["rotation"]).rotation_matrix)
    point_cloud_3d.translate(np.array(cs_record["translation"]))
    # Second step: transform to the global frame.
    poserecord = lyftd.get("ego_pose", pointsensor["ego_pose_token"])
    point_cloud_3d.rotate(Quaternion(poserecord["rotation"]).rotation_matrix)
    point_cloud_3d.translate(np.array(poserecord["translation"]))
    # Third step: transform into the ego vehicle frame for the timestamp of the image.
    poserecord = lyftd.get("ego_pose", cam["ego_pose_token"])
    point_cloud_3d.translate(-np.array(poserecord["translation"]))
    point_cloud_3d.rotate(Quaternion(poserecord["rotation"]).rotation_matrix.T)
    # Fourth step: transform into the camera.
    cs_record = lyftd.get("calibrated_sensor", cam["calibrated_sensor_token"])
    point_cloud_3d.translate(-np.array(cs_record["translation"]))
    point_cloud_3d.rotate(Quaternion(cs_record["rotation"]).rotation_matrix.T)

    # Take the actual picture (matrix multiplication with camera-matrix + renormalization).
    point_cloud_2d = view_points(point_cloud_3d.points[:3, :],
                                 np.array(cs_record["camera_intrinsic"]), normalize=True)

    return point_cloud_3d, point_cloud_2d
Esempio n. 6
0
def create_lyft_infos(
        root_path=Path("/media/zzhou/data/data-lyft-3D-objects/")):
    # ====================================================================
    # A. Creating an index and splitting into train and validation scenes
    # ====================================================================
    level5data = LyftDataset(data_path=root_path,
                             json_path=root_path + 'train_data',
                             verbose=True)

    classes = [
        "car", "motorcycle", "bus", "bicycle", "truck", "pedestrian",
        "other_vehicle", "animal", "emergency_vehicle"
    ]
    records = [(level5data.get('sample',
                               record['first_sample_token'])['timestamp'],
                record) for record in level5data.scene]
    entries = []
    for start_time, record in sorted(records):
        start_time = level5data.get(
            'sample', record['first_sample_token'])['timestamp'] / 1000000
        token = record['token']
        name = record['name']
        date = datetime.utcfromtimestamp(start_time)
        host = "-".join(record['name'].split("-")[:2])
        first_sample_token = record["first_sample_token"]
        entries.append((host, name, date, token, first_sample_token))
    df = pd.DataFrame(entries,
                      columns=[
                          "host", "scene_name", "date", "scene_token",
                          "first_sample_token"
                      ])

    validation_hosts = ["host-a007", "host-a008", "host-a009"]

    validation_df = df[df["host"].isin(validation_hosts)]
    vi = validation_df.index
    train_df = df[~df.index.isin(vi)]
    print(len(train_df), len(validation_df),
          "train/validation split scene counts")

    # ====================================================================
    # B. build SECOND database
    # ====================================================================
    metadata = {
        "version":
        f"{len(train_df)} train / {len(validation_df)} validation split scene counts",
    }

    train_lyft_infos = _scan_sample_token(level5data, train_df)
    data = {
        "infos": train_lyft_infos,
        "metadata": metadata,
    }
    with open(root_path / "infos_train.pkl", 'wb') as f:
        pickle.dump(data, f)

    val_lyft_infos = _scan_sample_token(level5data, validation_df)
    data["infos"] = val_lyft_infos
    with open(root_path / "infos_val.pkl", 'wb') as f:
        pickle.dump(data, f)
Esempio n. 7
0
def transform_world_to_image_coordinate(word_coord_array, camera_token: str, lyftd: LyftDataset):
    sd_record = lyftd.get("sample_data", camera_token)
    cs_record = lyftd.get("calibrated_sensor", sd_record["calibrated_sensor_token"])
    sensor_record = lyftd.get("sensor", cs_record["sensor_token"])
    pose_record = lyftd.get("ego_pose", sd_record["ego_pose_token"])

    cam_intrinsic_mtx = np.array(cs_record["camera_intrinsic"])

    # homogeneous coordinate to non-homogeneous one

    pose_to_sense_rot_mtx = Quaternion(cs_record['rotation']).rotation_matrix.T
    world_to_pose_rot_mtx = Quaternion(pose_record['rotation']).rotation_matrix.T

    ego_coord_array = np.dot(world_to_pose_rot_mtx, word_coord_array)

    t = np.array(pose_record['translation'])
    for i in range(3):
        ego_coord_array[i, :] = ego_coord_array[i, :] - t[i]

    sense_coord_array = np.dot(pose_to_sense_rot_mtx, ego_coord_array)
    t = np.array(cs_record['translation'])
    for i in range(3):
        sense_coord_array[i, :] = sense_coord_array[i, :] - t[i]

    return view_points(sense_coord_array, cam_intrinsic_mtx, normalize=True)
Esempio n. 8
0
def main():
    parser = argparse.ArgumentParser()                                
    arg = parser.add_argument    
    arg('--model_name', type=str, default='mask_512', help='String model name from models dictionary')
    arg('--seed', type=int, default=1234, help='Random seed')
    arg('--fold', type=int, default=0, help='Validation fold')
    arg('--weights_dir', type=str, default='', help='Directory for loading model weights')
    arg('--epochs', type=int, default=12, help='Current epoch')
    arg('--lr', type=float, default=1e-3, help='Initial learning rate')
    arg('--debug', type=bool, default=False, help='If the debugging mode')
    args = parser.parse_args()      
    set_seed(args.seed)

    # get data
    if ON_SERVER:
        level5data = LyftDataset(data_path = '.', json_path='../../input/train_data', verbose=True) # server
    else:
        level5data = LyftDataset(data_path = '../input/', json_path='../input/train_data', verbose=True) # local laptop
    
    classes = ["car", "motorcycle", "bus", "bicycle", "truck", "pedestrian", "other_vehicle", "animal", "emergency_vehicle"]
    
    # "bev" folder
    data_folder = os.path.join(OUTPUT_ROOT, "bev_data")

    # choose model
    model = get_maskrcnn_model(NUM_CLASSES)     
    
    checkpoint= f'{OUTPUT_ROOT}/checkpoints/'

    train(model, model_name='mask_512', data_folder=data_folder, 
          level5data = level5data, 
          fold=args.fold, debug=args.debug, img_size=IMG_SIZE, bev_shape=BEV_SHAPE,
          epochs=args.epochs, batch_size=16, num_workers=4, 
          learning_rate = args.lr, resume_weights=args.weights_dir, resume_epoch=0)
Esempio n. 9
0
    def __init__(self, dataset_path):
        self.lyft_data = LyftDataset(data_path=dataset_path,
                                     json_path=dataset_path + 'data',
                                     verbose=True)
        self.sample_size = len(self.lyft_data.sample)

        self.transform = transforms.Compose([
            transforms.ToTensor(),
        ])
Esempio n. 10
0
 def __init__(self, phase):
     df_name = phase if phase is not 'test' else 'sample_submission'
     dataset = 'train' if phase is not 'test' else 'test'
     self.phase = phase
     self.df = pd.read_csv(
         os.path.join(cfg.work_dir, 'data', df_name + '.csv'))
     self.lyft_dataset = LyftDataset(
         data_path=os.path.join(cfg.input_dir, dataset),
         json_path=os.path.join(cfg.input_dir, dataset, 'data'),
         verbose=False)
Esempio n. 11
0
    def __init__(self, dataset_type, dir_output, lidar_range, img_size):

        self.dataset_type = dataset_type
        self.dir_output = dir_output
        self.lidar_range = lidar_range
        self.img_size = img_size
        self.level5data = LyftDataset(
            data_path='../../dataset/{}'.format(dataset_type),
            json_path='../../dataset/{}/data/'.format(dataset_type),
            verbose=True)
Esempio n. 12
0
    def __init__(self, root, split=None):

        self.data = LyftDataset(data_path=root,
                                json_path=os.path.join(
                                    root,
                                    os.path.basename(os.path.normpath(root))),
                                verbose=False)

        self.explorer = LyftDatasetExplorer(self.data)

        super().__init__(root, split)
Esempio n. 13
0
def transform_box_from_world_to_ego_coordinates(first_train_sample_box: Box, sample_data_token: str,
                                                lyftd: LyftDataset):
    sample_box = first_train_sample_box.copy()
    sd_record = lyftd.get("sample_data", sample_data_token)
    cs_record = lyftd.get("calibrated_sensor", sd_record["calibrated_sensor_token"])
    sensor_record = lyftd.get("sensor", cs_record["sensor_token"])
    pose_record = lyftd.get("ego_pose", sd_record["ego_pose_token"])
    # Move box to ego vehicle coord system
    sample_box.translate(-np.array(pose_record["translation"]))
    sample_box.rotate(Quaternion(pose_record["rotation"]).inverse)

    return sample_box
Esempio n. 14
0
def get_sensor_to_world_transform_matrix_from_sample_data_token(sample_data_token, lyftd: LyftDataset):
    sd_record = lyftd.get("sample_data", sample_data_token)
    cs_record = lyftd.get("calibrated_sensor", sd_record["calibrated_sensor_token"])
    sensor_record = lyftd.get("sensor", cs_record["sensor_token"])
    pose_record = lyftd.get("ego_pose", sd_record["ego_pose_token"])

    sensor_to_ego_mtx = transform_matrix(translation=np.array(cs_record["translation"]),
                                         rotation=Quaternion(cs_record["rotation"]))

    ego_to_world_mtx = transform_matrix(translation=np.array(pose_record["translation"]),
                                        rotation=Quaternion(pose_record["rotation"]))

    return np.dot(ego_to_world_mtx, sensor_to_ego_mtx)
Esempio n. 15
0
def convert_box_to_world_coord_with_sample_data_token(input_sample_box, sample_data_token, lyftd: LyftDataset):
    sample_box = input_sample_box.copy()

    sd_record = lyftd.get("sample_data", sample_data_token)
    cs_record = lyftd.get("calibrated_sensor", sd_record["calibrated_sensor_token"])
    sensor_record = lyftd.get("sensor", cs_record["sensor_token"])
    pose_record = lyftd.get("ego_pose", sd_record["ego_pose_token"])
    #  Move box from sensor to vehicle ego coord system
    sample_box.rotate(Quaternion(cs_record["rotation"]))
    sample_box.translate(np.array(cs_record["translation"]))
    # Move box from ego vehicle to world coord system
    sample_box.rotate(Quaternion(pose_record["rotation"]))
    sample_box.translate(np.array(pose_record["translation"]))

    return sample_box
Esempio n. 16
0
def read_frustum_pointnet_output(ldt: LyftDataset, inference_pickle_file,
                                 token_pickle_file, from_rgb_detection: bool):
    with open(inference_pickle_file, 'rb') as fp:
        ps_list = pickle.load(fp)
        if not from_rgb_detection:
            seg_list = pickle.load(fp)
        segp_list = pickle.load(fp)
        center_list = pickle.load(fp)
        heading_cls_list = pickle.load(fp)
        heading_res_list = pickle.load(fp)
        size_cls_list = pickle.load(fp)
        size_res_list = pickle.load(fp)
        rot_angle_list = pickle.load(fp)
        score_list = pickle.load(fp)
        if from_rgb_detection:
            onehot_list = pickle.load(fp)
    with open(token_pickle_file, 'rb') as fp:
        sample_token_list = pickle.load(fp)
        annotation_token_list = pickle.load(fp)
        camera_data_token_list = pickle.load(fp)
        type_list = pickle.load(fp)
        ldt.get('sample', sample_token_list[0])
        if annotation_token_list:
            ldt.get('sample_annotation', annotation_token_list[0])

    print("lengh of sample token:", len(sample_token_list))
    print("lengh of ps list:", len(ps_list))
    assert len(sample_token_list) == len(ps_list)

    boxes = []
    gt_boxes = []
    for data_idx in range(len(ps_list)):
        inferred_box = get_box_from_inference(
            lyftd=ldt,
            heading_cls=heading_cls_list[data_idx],
            heading_res=heading_res_list[data_idx],
            rot_angle=rot_angle_list[data_idx],
            size_cls=size_cls_list[data_idx],
            size_res=size_res_list[data_idx],
            center_coord=center_list[data_idx],
            sample_data_token=camera_data_token_list[data_idx],
            score=score_list[data_idx])
        inferred_box.name = type_list[data_idx]
        boxes.append(inferred_box)
        if not from_rgb_detection:
            gt_boxes.append(ldt.get_box(annotation_token_list[data_idx]))

    return boxes, gt_boxes, sample_token_list
Esempio n. 17
0
def transform_box_from_world_to_flat_vehicle_coordinates(first_train_sample_box: Box, sample_data_token: str,
                                                         lyftd: LyftDataset):
    sample_box = first_train_sample_box.copy()
    sd_record = lyftd.get("sample_data", sample_data_token)
    cs_record = lyftd.get("calibrated_sensor", sd_record["calibrated_sensor_token"])
    sensor_record = lyftd.get("sensor", cs_record["sensor_token"])
    pose_record = lyftd.get("ego_pose", sd_record["ego_pose_token"])

    # Move box to ego vehicle coord system parallel to world z plane
    ypr = Quaternion(pose_record["rotation"]).yaw_pitch_roll
    yaw = ypr[0]

    sample_box.translate(-np.array(pose_record["translation"]))
    sample_box.rotate(Quaternion(scalar=np.cos(yaw / 2), vector=[0, 0, np.sin(yaw / 2)]).inverse)

    return sample_box
Esempio n. 18
0
def prepare_data_pool(df, data_folder, bev_shape, voxel_size, z_offset, box_scale):
    """Prepare input data 
    Args: 
        df: train or val tokens
        data_folder: diractory to save data
        bev_shape: BEV image shape
        voxel_size: size of voxels for voxelization
        z_offset: offset in the vertical direction before voxelization 
        box_scale: rescale of the bounding boxes projections
    """
    NUM_WORKERS = os.cpu_count() 
    print('Number of CPU: ', NUM_WORKERS)

    level5data = LyftDataset(data_path = '.', json_path='../../input/train_data', verbose=True)
    classes = ["car", "motorcycle", "bus", "bicycle", "truck", "pedestrian", "other_vehicle", "animal", "emergency_vehicle"]
    
    print("Preparing data into {} using {} workers".format(data_folder, NUM_WORKERS))
    first_samples = df.first_sample_token.values
    os.makedirs(data_folder, exist_ok=True)
    
    process_func = partial(prepare_training_data_for_scene,
                           output_folder=data_folder, level5data=level5data, bev_shape=bev_shape, voxel_size=voxel_size, z_offset=z_offset, box_scale=box_scale)

    pool = Pool(NUM_WORKERS)
    for _ in tqdm(pool.imap_unordered(process_func, first_samples), total=len(first_samples)):
        pass
    pool.close()
    del pool
Esempio n. 19
0
def create_lyft_infos(root_path, json_path, max_sweeps=10):
    from lyft_dataset_sdk.lyftdataset import LyftDataset
    # from lyft_dataset_sdk.utils import s
    lyft = LyftDataset(data_path=root_path, json_path=json_path, verbose=True)
    root_path = Path(root_path)
    available_scenes = _get_available_scenes(lyft)
    print(f"available scenes: {len(available_scenes)}")
    available_scene_names = [s["name"] for s in available_scenes]
    available_scene_tokens = [s["token"] for s in available_scenes]
    # print("available_scenes_names:", available_scene_names)
    # train_scenes = list(
    #     filter(lambda x:x in available_scene_names, available_scenes))
    # print(f"train_scenes: {len(train_scenes)}")

    # train_scenes = set([
    # available_scenes[available_scene_names.index(s)]["token"]
    # for s in available_scenes
    # ])
    train_scenes = available_scene_tokens
    print(f"train scenes: {len(train_scenes)}")
    train_lyft_infos = _fill_train_infos(lyft,
                                         train_scenes,
                                         False,
                                         max_sweeps=max_sweeps)
    metadata = {
        "version": "v1.01-train",
    }

    print(f"train sample: {len(train_lyft_infos)}")
    data = {
        "infos": train_lyft_infos,
        "metadata": metadata,
    }
    with open(root_path / "infos_train.pkl", 'wb') as f:
        pickle.dump(data, f)
Esempio n. 20
0
def convert_box_to_world_coord(box: Box, sample_token, sensor_type, lyftd: LyftDataset):
    sample_box = box.copy()
    sample_record = lyftd.get('sample', sample_token)
    sample_data_token = sample_record['data'][sensor_type]

    converted_sample_box = convert_box_to_world_coord_with_sample_data_token(sample_box, sample_data_token)

    return converted_sample_box
Esempio n. 21
0
def select_2d_annotation_boxes(ldf: LyftDataset, classifier, sample_token,
                               camera_type=['CAM_FRONT', 'CAM_BACK', 'CAM_FRONT_LEFT', 'CAM_FRONT_RIGHT',
                                            'CAM_BACK_RIGHT', 'CAM_BACK_LEFT']) -> (str, str, np.ndarray):
    sample_record = ldf.get('sample', sample_token)

    cams = [key for key in sample_record["data"].keys() if "CAM" in key]
    cams = [cam for cam in cams if cam in camera_type]
    for cam in cams:
        cam_token = sample_record["data"][cam]
        image_file_path = ldf.get_sample_data_path(cam_token)
        image_array = imread(image_file_path)
        det_result = classifier.detect_multi_object(image_array, score_threshold=[0.4, 0.4, 0.4])
        for i in range(det_result.shape[0]):
            bounding_2d_box = det_result[i, 0:4]
            score = det_result[i, 4]
            class_idx = det_result[i, 5]
            yield sample_token, cam_token, bounding_2d_box, score, class_idx
Esempio n. 22
0
def transform_image_to_world_coordinate(image_array: np.array, camera_token: str, lyftd: LyftDataset):
    """

    :param image_array: 3xN np.ndarray
    :param camera_token:
    :return:
    """

    sd_record = lyftd.get("sample_data", camera_token)
    cs_record = lyftd.get("calibrated_sensor", sd_record["calibrated_sensor_token"])
    sensor_record = lyftd.get("sensor", cs_record["sensor_token"])
    pose_record = lyftd.get("ego_pose", sd_record["ego_pose_token"])

    # inverse the viewpoint transformation

    cam_intrinsic_mtx = np.array(cs_record["camera_intrinsic"])
    image_in_cam_coord = np.dot(np.linalg.inv(cam_intrinsic_mtx), image_array)

    print(image_in_cam_coord)
    # TODO: think of how to do normalization properly
    # image_in_cam_coord = image_in_cam_coord / image_in_cam_coord[3:].ravel()

    # homogeneous coordinate to non-homogeneous one
    image_in_cam_coord = image_in_cam_coord[0:3, :]

    sens_to_pose_rot_mtx = Quaternion(cs_record['rotation']).rotation_matrix

    image_in_pose_coord = np.dot(sens_to_pose_rot_mtx, image_in_cam_coord)
    t = np.array(cs_record['translation'])
    for i in range(3):
        image_in_pose_coord[i, :] = image_in_cam_coord[i, :] + t[i]

    print(cs_record)

    print("in pose record:", image_in_pose_coord)

    pose_to_world_rot_mtx = Quaternion(pose_record['rotation']).rotation_matrix

    image_in_world_coord = np.dot(pose_to_world_rot_mtx,
                                  image_in_pose_coord)
    t = np.array(pose_record['translation'])
    for i in range(3):
        image_in_world_coord[i, :] = image_in_world_coord[i, :] + t[i]

    return image_in_world_coord
Esempio n. 23
0
def create_lyft_info(version, data_path, save_path, split, max_sweeps=10):
    from lyft_dataset_sdk.lyftdataset import LyftDataset
    from . import lyft_utils
    data_path = data_path / version
    save_path = save_path / version
    split_path = data_path.parent / 'ImageSets'

    if split is not None:
        save_path = save_path / split
        split_path = split_path / split

    save_path.mkdir(exist_ok=True)

    assert version in ['trainval', 'one_scene', 'test']

    if version == 'trainval':
        train_split_path = split_path / 'train.txt'
        val_split_path = split_path / 'val.txt'
    elif version == 'test':
        train_split_path = split_path / 'test.txt'
        val_split_path = None
    elif version == 'one_scene':
        train_split_path = split_path / 'one_scene.txt'
        val_split_path = split_path / 'one_scene.txt'
    else:
        raise NotImplementedError

    train_scenes = [x.strip() for x in open(train_split_path).readlines()] if train_split_path.exists() else []
    val_scenes = [x.strip() for x in open(val_split_path).readlines()] if val_split_path is not None and val_split_path.exists() else []

    lyft = LyftDataset(json_path=data_path / 'data', data_path=data_path, verbose=True)

    available_scenes = lyft_utils.get_available_scenes(lyft)
    available_scene_names = [s['name'] for s in available_scenes]
    train_scenes = list(filter(lambda x: x in available_scene_names, train_scenes))
    val_scenes = list(filter(lambda x: x in available_scene_names, val_scenes))
    train_scenes = set([available_scenes[available_scene_names.index(s)]['token'] for s in train_scenes])
    val_scenes = set([available_scenes[available_scene_names.index(s)]['token'] for s in val_scenes])

    print('%s: train scene(%d), val scene(%d)' % (version, len(train_scenes), len(val_scenes)))

    train_lyft_infos, val_lyft_infos = lyft_utils.fill_trainval_infos(
        data_path=data_path, lyft=lyft, train_scenes=train_scenes, val_scenes=val_scenes,
        test='test' in version, max_sweeps=max_sweeps
    )

    if version == 'test':
        print('test sample: %d' % len(train_lyft_infos))
        with open(save_path / f'lyft_infos_test.pkl', 'wb') as f:
            pickle.dump(train_lyft_infos, f)
    else:
        print('train sample: %d, val sample: %d' % (len(train_lyft_infos), len(val_lyft_infos)))
        with open(save_path / f'lyft_infos_train.pkl', 'wb') as f:
            pickle.dump(train_lyft_infos, f)
        with open(save_path / f'lyft_infos_val.pkl', 'wb') as f:
            pickle.dump(val_lyft_infos, f)
Esempio n. 24
0
def main(config: dict):
    scriptStart = time.time()
    print("=" * 80)
    seed_everything(1)
    if config["isTrain"]:
        print("Preprocessing train set")
        ll5ds = LyftDataset(data_path=config["PATH"] + "train/",
                            json_path=config["PATH"] + "train/data/",
                            verbose=True)
    else:
        print("Preprocessing test set")
        ll5ds = LyftDataset(data_path=config["PATH"] + "test/",
                            json_path=config["PATH"] + "test/data/",
                            verbose=True)
    checkCreateFolders(config)
    generateDataset(ll5ds, config)
    print("Dataset generated in {:.2f} min.".format(
        (time.time() - scriptStart) / 60))
    print("=" * 80)
Esempio n. 25
0
    def prep_list_of_sessions(self):
        self.level5data = LyftDataset(json_path=DATASET_ROOT + "/%s_data" % self.phase, data_path=DATASET_ROOT, verbose=True)
        self.num_sessions = len(self.level5data.scene)
        self.cloud_tokens = []
        self.session_lengths = []

        for session_ind in range(self.num_sessions):
            
            record = self.level5data.scene[session_ind]
            session_token = record['token']            
            sample_token = record["first_sample_token"]
            sample = self.level5data.get("sample", sample_token)
            lidar_token = sample["data"]["LIDAR_TOP"]
            cur_lidar_tokens = []
            while len(lidar_token) > 0:
                cur_lidar_tokens.append(lidar_token)
                lidar_data = self.level5data.get("sample_data", lidar_token)
                lidar_token = lidar_data["next"]
            self.cloud_tokens.append(cur_lidar_tokens)
            self.session_lengths.append(len(cur_lidar_tokens))
Esempio n. 26
0
def extract_single_box(train_objects, idx, lyftd: LyftDataset) -> Box:
    first_train_id, first_train_sample_box = get_train_data_sample_token_and_box(idx, train_objects)

    first_train_sample = lyftd.get('sample', first_train_id)

    sample_data_token = first_train_sample['data']['LIDAR_TOP']

    first_train_sample_box = transform_box_from_world_to_sensor_coordinates(first_train_sample_box,
                                                                            sample_data_token, lyftd)

    return first_train_sample_box, sample_data_token
def generating_scenes(train_root_path, test_root_path):
    ############################ Set random seed for list shuffle
    SEED = 42
    random.seed(SEED)

    ############################ load lyft dataset
    level5data = LyftDataset(data_path=train_root_path, \
        json_path=Path(train_root_path) / "data", verbose=True)
    level5data_test = LyftDataset(data_path=test_root_path, \
        json_path=Path(test_root_path) / "data", verbose=True)

    train_scenes = []
    val_scenes = []
    all_scenes = []
    test_scenes = []

    for sample in level5data.sample:
        all_scenes.append(sample["scene_token"])

    for sample in level5data_test.sample:
        test_scenes.append(sample["scene_token"])

    all_scenes = list(set(all_scenes))
    test_scenes = list(set(test_scenes))

    random.shuffle(all_scenes)
    train_scenes = all_scenes[:int(5 * len(all_scenes) / 6)]
    val_scenes = all_scenes[int(5 * len(all_scenes) / 6):]

    ############################ save splitting result
    with open(Path(train_root_path) / "train_scenes.txt", "w") as f:
        for item in train_scenes:
            f.write("%s\n" % item)

    with open(Path(train_root_path) / "val_scenes.txt", "w") as f:
        for item in val_scenes:
            f.write("%s\n" % item)

    with open(Path(test_root_path) / "test_scenes.txt", "w") as f:
        for item in test_scenes:
            f.write("%s\n" % item)
Esempio n. 28
0
class LyftDataClass(Dataset):
    def __init__(self, data_path, json_path, verbose=True, map_resolution=0.1):
        self._lyftdataset = LyftDataset(data_path, json_path, verbose=verbose, map_resolution=map_resolution)

    def __len__(self):
        return len(self._lyftdataset.scene)

    def __getitem__(self, idx):
        sample_token = self._lyftdataset.sample[idx]
        sample_record = self.getFromLyft("sample", sample_token)

        return Sample(self, sample_record)

    def getFromLyft(self, table_name, token):
        return self._lyftdataset.get(table_name, token)

    def getFromLyftDatapath(self):
        return self._lyftdataset.data_path

    def getFromLyftBoxes(self, sample_data_token):
        return self._lyftdataset.getBoxes(sample_data_token)
Esempio n. 29
0
def transform_image_to_cam_coordinate(image_array_p: np.array, camera_token: str, lyftd: LyftDataset):
    sd_record = lyftd.get("sample_data", camera_token)
    cs_record = lyftd.get("calibrated_sensor", sd_record["calibrated_sensor_token"])
    sensor_record = lyftd.get("sensor", cs_record["sensor_token"])
    pose_record = lyftd.get("ego_pose", sd_record["ego_pose_token"])

    # inverse the viewpoint transformation
    def normalization(input_array):
        input_array[0:2, :] = input_array[0:2, :] * input_array[2:3, :].repeat(2, 0).reshape(2, input_array.shape[1])
        return input_array

    image_array = normalization(np.copy(image_array_p))
    image_array = np.concatenate((image_array.ravel(), np.array([1])))
    image_array = image_array.reshape(4, 1)

    cam_intrinsic_mtx = np.array(cs_record["camera_intrinsic"])
    view = cam_intrinsic_mtx
    viewpad = np.eye(4)
    viewpad[: view.shape[0], : view.shape[1]] = view
    image_in_cam_coord = np.dot(np.linalg.inv(viewpad), image_array)

    return image_in_cam_coord[0:3, :]
Esempio n. 30
0
def read_frustum_pointnet_output_v2(ldt: LyftDataset, inference_tfrecord_file):
    raw_dataset = tf.data.TFRecordDataset(inference_tfrecord_file)
    for raw_record in raw_dataset:
        example = parse_inference_record(raw_record)

        inferred_box = get_box_from_inference(
            lyftd=ldt,
            heading_cls=example['rot_heading_angle_class'].numpy(),
            heading_res=example['rot_heading_angle_residual'].numpy(),
            rot_angle=example['frustum_angle'].numpy(),
            size_cls=example['size_class'].numpy(),
            size_res=example['size_residual'].numpy(),
            center_coord=example['rot_box_center'].numpy(),
            sample_data_token=example['camera_token'].numpy().decode('utf8'),
            score=example['score'].numpy(),
            type_name=example['type_name'].numpy().decode('utf8'))

        pc_record = ldt.get("sample_data",
                            example['camera_token'].numpy().decode('utf8'))
        sample_of_pc_record = ldt.get("sample", pc_record['sample_token'])

        yield inferred_box, pc_record['sample_token']