def from_sample_token(self, nusc: 'NuScenes', sample_token: str,
                       sensor: str) -> 'myRadarPointCloud':
     # trace back frame data - here we have all the sensor data and all the annotations
     sample_rec = nusc.get('sample', sample_token)
     # get sensor data
     sd_rec = nusc.get('sample_data', sample_rec['data'][sensor])
     # get sensor calibration
     cs_rec = nusc.get('calibrated_sensor',
                       sd_rec['calibrated_sensor_token'])
     # ego pose - ego car to global frame
     ep_rec = nusc.get('ego_pose', sd_rec['ego_pose_token'])
     # A_cs^car: Homogeneous transformation matrix from ###########
     A_cs_2_car = transform_matrix(cs_rec['translation'],
                                   Quaternion(cs_rec['rotation']),
                                   inverse=False)
     # A_car^gl: Homogeneous transformation matrix from ###########
     A_car_2_gl = transform_matrix(ep_rec['translation'],
                                   Quaternion(ep_rec['rotation']),
                                   inverse=False)
     # read radar pointcloud in sensor coordinates
     filepath = os.path.join(nusc.dataroot, sd_rec['filename'])
     pc_cs = RadarPointCloud.from_file(
         file_name=filepath
     ).points  # :return: <np.float: d, n>. Point cloud matrix with d dimensions and n points.
     # select fields: x,y,vx_comp,vy_comp,RCS
     pc_cs = pc_cs[self._FEAT_SELECTOR, :]
     # return instance
     return self(nusc, pc_cs, A_cs_2_car, A_car_2_gl)
def get_rad_to_cam(nusc: NuScenes, cam_sd_token: str, rad_sd_token: str):
    """
    Method to get the extrinsic calibration matrix from radar_front to camera_front
    for a specifi sample.
    Every sample_data has a record on which calibrated - sensor the
    data is collected from ("calibrated_sensor_token" key)
    The calibrated_sensor record consists of the definition of a
    particular sensor (lidar/radar/camera) as calibrated on a particular vehicle
    :param nusc: Nuscenes instance
    :param cam_sd_token : A token of a specific camera_front sample_data
    :param rad_sd_token : A token of a specific radar_front sample_data
    :param nuscenes_way : Temporal debug param until transformation order is clear
    :return: rad_to_cam <np.float: 4, 4> Returns homogeneous transform matrix from radar to camera
    """
    cam_cs_token = nusc.get('sample_data',
                            cam_sd_token)["calibrated_sensor_token"]
    cam_cs_rec = nusc.get('calibrated_sensor', cam_cs_token)

    rad_cs_token = nusc.get('sample_data',
                            rad_sd_token)["calibrated_sensor_token"]
    rad_cs_rec = nusc.get('calibrated_sensor', rad_cs_token)

    #Based on how transforms are handled in nuScenes scripts like scripts/export_kitti.py
    rad_to_ego = transform_matrix(rad_cs_rec['translation'],
                                  Quaternion(rad_cs_rec['rotation']),
                                  inverse=False)
    ego_to_cam = transform_matrix(cam_cs_rec['translation'],
                                  Quaternion(cam_cs_rec['rotation']),
                                  inverse=True)
    rad_to_cam = np.dot(ego_to_cam, rad_to_ego)
    return rad_to_cam
Ejemplo n.º 3
0
    def getPoints(self, index):
        sample = self.nusc.get('sample', self.filtered_sample_tokens[index])
        sample_lidar_token = sample['data']['LIDAR_TOP']

        lidar_data = self.nusc.get('sample_data', sample_lidar_token)
        ego_pose = self.nusc.get('ego_pose', lidar_data['ego_pose_token'])
        calibrated_sensor = self.nusc.get(
            'calibrated_sensor', lidar_data['calibrated_sensor_token'])

        global_from_car = transform_matrix(ego_pose['translation'],
                                           Quaternion(ego_pose['rotation']),
                                           inverse=False)
        car_from_sensor = transform_matrix(calibrated_sensor['translation'],
                                           Quaternion(
                                               calibrated_sensor['rotation']),
                                           inverse=False)
        try:
            lidar_pointcloud, times = LidarPointCloud.from_file_multisweep(
                self.nusc, sample, 'LIDAR_TOP', 'LIDAR_TOP')
            lidar_pointcloud.transform(car_from_sensor)
        except Exception as e:
            print(f"Failed to load Lidar Pointcloud for {sample}:{e}")
        points = lidar_pointcloud.points
        points[3, :] /= 255
        points[3, :] -= 0.5

        # points_cat = np.concatenate([points, times], axis=0).transpose()
        points_cat = points.transpose()
        points_cat = points_cat[~np.isnan(points_cat).any(axis=1)]

        return points_cat
Ejemplo n.º 4
0
def return_pose(lidar):
    poss = nusc.get('ego_pose', lidar['ego_pose_token'])
    cs_record_lid = nusc.get('calibrated_sensor', lidar['calibrated_sensor_token'])
    lid_to_ego = transform_matrix(cs_record_lid["translation"], Quaternion(cs_record_lid["rotation"]), inverse=False)
    lid_ego_to_world = transform_matrix(poss["translation"], Quaternion(poss["rotation"]), inverse=False)
    # kitti_to_nu_lidar = Quaternion(axis=(0, 0, 1), angle=np.pi / 2)
    lid_to_world = np.dot(lid_ego_to_world, lid_to_ego)#np.dot(lid_to_ego, kitti_to_nu_lidar.transformation_matrix)
    return lid_to_world
Ejemplo n.º 5
0
    def get_sweeps(sample, channel, max_sweeps, ref_from_car, car_from_global):
        sample_data_token = sample['data'][channel]
        curr_sd_rec = nusc.get('sample_data', sample_data_token)
        sweeps = []
        while len(sweeps) < max_sweeps - 1:
            if curr_sd_rec['prev'] == '':
                if len(sweeps) == 0:
                    sweep = {
                        'file_path': Path(ref_lidar_path).relative_to(data_path).__str__(),
                        'sample_data_token': curr_sd_rec['token'],
                        'transform_matrix': None,
                        'time_lag': curr_sd_rec['timestamp'] * 0,
                    }
                    sweep['lidar_path'] = sweep['file_path']  # for compatibility with old approach
                    sweeps.append(sweep)
                else:
                    sweeps.append(sweeps[-1])
            else:
                curr_sd_rec = nusc.get('sample_data', curr_sd_rec['prev'])

                # Get past pose
                current_pose_rec = nusc.get('ego_pose', curr_sd_rec['ego_pose_token'])
                global_from_car = transform_matrix(
                    current_pose_rec['translation'], Quaternion(current_pose_rec['rotation']), inverse=False,
                )

                # Homogeneous transformation matrix from sensor coordinate frame to ego car frame.
                current_cs_rec = nusc.get(
                    'calibrated_sensor', curr_sd_rec['calibrated_sensor_token']
                )
                car_from_current = transform_matrix(
                    current_cs_rec['translation'], Quaternion(current_cs_rec['rotation']), inverse=False,
                )

                tm = reduce(np.dot, [ref_from_car, car_from_global, global_from_car, car_from_current])

                lidar_path = nusc.get_sample_data_path(curr_sd_rec['token'])

                time_lag = ref_time - 1e-6 * curr_sd_rec['timestamp']

                sweep = {
                    'file_path': Path(lidar_path).relative_to(data_path).__str__(),
                    'sample_data_token': curr_sd_rec['token'],
                    'transform_matrix': tm,
                    'global_from_car': global_from_car,
                    'car_from_current': car_from_current,
                    'time_lag': time_lag,
                }
                sweep['lidar_path'] = sweep['file_path']  # for compatibility with old approach
                sweeps.append(sweep)

        assert len(sweeps) == max_sweeps - 1, \
            f"sweep {curr_sd_rec['token']} only has {len(sweeps)} sweeps, " \
            f"you should duplicate to sweep num {max_sweeps - 1}"

        return sweeps
Ejemplo n.º 6
0
 def get_global_pose(self, sd_token, inverse=False):
     sd = self.nusc.get("sample_data", sd_token)
     sd_ep = self.nusc.get("ego_pose", sd["ego_pose_token"])
     sd_cs = self.nusc.get("calibrated_sensor", sd["calibrated_sensor_token"])
     if inverse is False:
         global_from_ego = transform_matrix(sd_ep["translation"], Quaternion(sd_ep["rotation"]), inverse=False)
         ego_from_sensor = transform_matrix(sd_cs["translation"], Quaternion(sd_cs["rotation"]), inverse=False)
         pose = global_from_ego.dot(ego_from_sensor)
     else:
         sensor_from_ego = transform_matrix(sd_cs["translation"], Quaternion(sd_cs["rotation"]), inverse=True)
         ego_from_global = transform_matrix(sd_ep["translation"], Quaternion(sd_ep["rotation"]), inverse=True)
         pose = sensor_from_ego.dot(ego_from_global)
     return pose
Ejemplo n.º 7
0
def veh_pos_to_transform(veh_pos):
    "convert vehicle pose to two transformation matrix"
    rotation = veh_pos[:3, :3] 
    tran = veh_pos[:3, 3]

    global_from_car = transform_matrix(
        tran, Quaternion(matrix=rotation), inverse=False
    )

    car_from_global = transform_matrix(
        tran, Quaternion(matrix=rotation), inverse=True
    )

    return global_from_car, car_from_global
Ejemplo n.º 8
0
 def _compute_calibration_data(self, frame_sample_data):
     cal_sensor_data = self.nusc.get(
         'calibrated_sensor', frame_sample_data['calibrated_sensor_token'])
     pose_data = self.nusc.get('ego_pose',
                               frame_sample_data['ego_pose_token'])
     car_to_global = geometry_utils.transform_matrix(
         pose_data['translation'],
         Quaternion(pose_data['rotation']),
         inverse=False)
     sensor_to_car = geometry_utils.transform_matrix(
         cal_sensor_data['translation'],
         Quaternion(cal_sensor_data['rotation']),
         inverse=False)
     trans_matrix = np.dot(car_to_global, sensor_to_car)
     return cal_sensor_data, pose_data, trans_matrix
Ejemplo n.º 9
0
def get_extrinsics(ego_pose: DictStrAny,
                   car_from_sensor: DictStrAny) -> Extrinsics:
    """Convert NuScenes ego pose / sensor_to_car to global extrinsics."""
    global_from_car = transform_matrix(
        ego_pose["translation"],
        Quaternion(ego_pose["rotation"]),
        inverse=False,
    )
    car_from_sensor_ = transform_matrix(
        car_from_sensor["translation"],
        Quaternion(car_from_sensor["rotation"]),
        inverse=False,
    )
    extrinsics = np.dot(global_from_car, car_from_sensor_)
    return get_extrinsics_from_matrix(extrinsics)
Ejemplo n.º 10
0
    def _read_camera_data(self, sample_data_token):
        sd_record = self.nusc.get("sample_data", sample_data_token)
        cs_record = self.nusc.get("calibrated_sensor",
                                  sd_record["calibrated_sensor_token"])
        sensor_record = self.nusc.get("sensor", cs_record["sensor_token"])
        assert sensor_record["modality"] == "camera"

        # currently using only keyframes
        assert sd_record["is_key_frame"]

        data_path = self.nusc.get_sample_data_path(sample_data_token)
        imsize = (sd_record["width"], sd_record["height"])

        cam_intrinsic = np.array(cs_record["camera_intrinsic"])
        cam_extrinsics = transform_matrix(cs_record["translation"],
                                          Quaternion(cs_record["rotation"]))

        with open(data_path, "rb") as in_file:
            img_bytes_jpg = in_file.read()

        return (
            {
                "{}_jpg": img_bytes_jpg,
                "{}_size": np.asarray(imsize, dtype=np.int64),
                "{}_intrinsics": cam_intrinsic.astype(np.float32),
            },
            cam_extrinsics,
        )
Ejemplo n.º 11
0
def project_points_to_image(current_pc, pointsensor, cam):

    pc = copy.deepcopy(current_pc)
    # Points live in the point sensor frame. So they need to be transformed via global to the image plane.
    # First step: transform the point-cloud to the ego vehicle frame for the timestamp of the sweep.
    cs_record = nusc.get('calibrated_sensor',
                         pointsensor['calibrated_sensor_token'])
    pc.rotate(Quaternion(cs_record['rotation']).rotation_matrix)
    pc.translate(np.array(cs_record['translation']))

    #     import pdb; pdb.set_trace()
    # Second step: transform to the global frame.
    poserecord = nusc.get('ego_pose', pointsensor['ego_pose_token'])
    pc.rotate(Quaternion(poserecord['rotation']).rotation_matrix)
    pc.translate(np.array(poserecord['translation']))

    t2 = transform_matrix(translation=poserecord['translation'],
                          rotation=Quaternion(poserecord['rotation']),
                          inverse=True)
    #     import pdb; pdb.set_trace()
    global_frame_pc = copy.deepcopy(pc)

    # Third step: transform into the ego vehicle frame for the timestamp of the image.
    poserecord = nusc.get('ego_pose', cam['ego_pose_token'])
    pc.translate(-np.array(poserecord['translation']))
    pc.rotate(Quaternion(poserecord['rotation']).rotation_matrix.T)

    # Fourth step: transform into the camera.
    cs_record = nusc.get('calibrated_sensor', cam['calibrated_sensor_token'])
    pc.translate(-np.array(cs_record['translation']))
    pc.rotate(Quaternion(cs_record['rotation']).rotation_matrix.T)

    return pc, global_frame_pc
    def get_lidar_pointcloud(self):
        """ 
        Extracting lidar pointcloud for current timestep in current scene
        Author: Javier
        :return Point cloud [Position(x,y,z) X n_points] 
        """

        # Select sensor data record
        channel = 'LIDAR_TOP'
        sample_data_token = self.sample['data'][channel]
        sd_record = self.nusc.get('sample_data', sample_data_token)

        # Get aggregated point cloud in lidar frame.
        chan = sd_record['channel']
        pc, times = LidarPointCloud.from_file_multisweep(self.nusc, self.sample, chan, channel, nsweeps=1)

        #calibration of LiDAR points
        ref_sd_record = self.nusc.get('sample_data', sample_data_token)
        cs_record = self.nusc.get('calibrated_sensor', ref_sd_record['calibrated_sensor_token'])
        pose_record = self.nusc.get('ego_pose', ref_sd_record['ego_pose_token'])
        ref_to_ego = transform_matrix(translation=cs_record['translation'],
                                      rotation=Quaternion(cs_record["rotation"]))

        # Compute rotation between 3D vehicle pose and "flat" vehicle pose (parallel to global z plane).
        ego_yaw = Quaternion(pose_record['rotation']).yaw_pitch_roll[0]
        rotation_vehicle_flat_from_vehicle = np.dot(
            Quaternion(scalar=np.cos(ego_yaw / 2), vector=[0, 0, np.sin(ego_yaw / 2)]).rotation_matrix,
            Quaternion(pose_record['rotation']).inverse.rotation_matrix)
        vehicle_flat_from_vehicle = np.eye(4)
        vehicle_flat_from_vehicle[:3, :3] = rotation_vehicle_flat_from_vehicle
        viewpoint = np.dot(vehicle_flat_from_vehicle, ref_to_ego)

        points = view_points(pc.points[:3, :], viewpoint, normalize=False)
        self.points_lidar = points
        return points
Ejemplo n.º 13
0
def get_lidar_to_image_transform(nusc, pointsensor,  camera_sensor):
    tms = []
    intrinsics = []  
    cam_paths = [] 
    for chan in CAM_CHANS:
        cam = camera_sensor[chan]

        # Points live in the point sensor frame. So they need to be transformed via global to the image plane.
        # First step: transform the point-cloud to the ego vehicle frame for the timestamp of the sweep.
        lidar_cs_record = nusc.get('calibrated_sensor', pointsensor['calibrated_sensor_token'])
        car_from_lidar = transform_matrix(
            lidar_cs_record["translation"], Quaternion(lidar_cs_record["rotation"]), inverse=False
        )

        # Second step: transform to the global frame.
        lidar_poserecord = nusc.get('ego_pose', pointsensor['ego_pose_token'])
        global_from_car = transform_matrix(
            lidar_poserecord["translation"],  Quaternion(lidar_poserecord["rotation"]), inverse=False,
        )

        # Third step: transform into the ego vehicle frame for the timestamp of the image.
        cam_poserecord = nusc.get('ego_pose', cam['ego_pose_token'])
        car_from_global = transform_matrix(
            cam_poserecord["translation"],
            Quaternion(cam_poserecord["rotation"]),
            inverse=True,
        )

        # Fourth step: transform into the camera.
        cam_cs_record = nusc.get('calibrated_sensor', cam['calibrated_sensor_token'])
        cam_from_car = transform_matrix(
            cam_cs_record["translation"], Quaternion(cam_cs_record["rotation"]), inverse=True
        )

        tm = reduce(
            np.dot,
            [cam_from_car, car_from_global, global_from_car, car_from_lidar],
        )

        cam_path, _, intrinsic = nusc.get_sample_data(cam['token'])

        tms.append(tm)
        intrinsics.append(intrinsic)
        cam_paths.append(cam_path )

    return tms, intrinsics, cam_paths  
Ejemplo n.º 14
0
    def load_position_raw(self, sample_lidar_token):
        lidar_data = self.NuScenes_data.get("sample_data", sample_lidar_token)

        ego_pose = self.NuScenes_data.get("ego_pose", lidar_data["ego_pose_token"])
        calibrated_sensor = self.NuScenes_data.get("calibrated_sensor", lidar_data["calibrated_sensor_token"])

        # Homogeneous transformation matrix from car frame to world frame.
        global_from_car = transform_matrix(ego_pose['translation'],
                                        Quaternion(ego_pose['rotation']), inverse=False)

        return global_from_car
Ejemplo n.º 15
0
    def load_cloud_raw(self, sample_lidar_token):  
        lidar_data = self.NuScenes_data.get("sample_data", sample_lidar_token)  
        lidar_filepath = self.NuScenes_data.get_sample_data_path(sample_lidar_token)
        ego_pose = self.NuScenes_data.get("ego_pose", lidar_data["ego_pose_token"])
        calibrated_sensor = self.NuScenes_data.get("calibrated_sensor", lidar_data["calibrated_sensor_token"])

        # Homogeneous transformation matrix from sensor coordinate frame to ego car frame.
        car_from_sensor = transform_matrix(calibrated_sensor['translation'], Quaternion(calibrated_sensor['rotation']),
                                            inverse=False)

        lidar_pointcloud = LidarPointCloud.from_file(lidar_filepath)

        # The lidar pointcloud is defined in the sensor's reference frame.
        # We want it in the car's reference frame, so we transform each point
        lidar_pointcloud.transform(car_from_sensor)

        return lidar_pointcloud.points[:3,:].T    
Ejemplo n.º 16
0
    def _read_radar_data(self, sample_data_token):
        sd_record = self.nusc.get("sample_data", sample_data_token)
        cs_record = self.nusc.get("calibrated_sensor",
                                  sd_record["calibrated_sensor_token"])
        sensor_record = self.nusc.get("sensor", cs_record["sensor_token"])
        assert sensor_record["modality"] == "radar"

        # currently using only keyframes
        assert sd_record["is_key_frame"]

        data_path = self.nusc.get_sample_data_path(sample_data_token)
        radar_point_cloud = RadarPointCloud.from_file(data_path)
        points = tf.convert_to_tensor(radar_point_cloud.points.transpose())

        radar_extrinsics = transform_matrix(cs_record["translation"],
                                            Quaternion(cs_record["rotation"]))

        return {"{}_points": tf.io.serialize_tensor(points)}, radar_extrinsics
Ejemplo n.º 17
0
    def __init__(self, ego_pose, cam_calibrated, lidar_calibrated):
        """
        :param ego_pose: ego_pose dictionary from NuScenes
        :param cam_calibrated: calibrated_sensor dictionary from NuScenes
        :param lidar_calibrated: calibrated_sensor dictionary from NuScenes
        """
        self.t_global_car = transform_matrix(ego_pose['translation'],
                                             Quaternion(ego_pose['rotation']),
                                             inverse=False)
        self.t_car_global = transform_matrix(ego_pose['translation'],
                                             Quaternion(ego_pose['rotation']),
                                             inverse=True)
        self.t_car_lidar = transform_matrix(lidar_calibrated['translation'],
                                            Quaternion(
                                                lidar_calibrated['rotation']),
                                            inverse=False)
        self.t_lidar_car = transform_matrix(lidar_calibrated['translation'],
                                            Quaternion(
                                                lidar_calibrated['rotation']),
                                            inverse=True)
        self.t_car_cam = transform_matrix(cam_calibrated['translation'],
                                          Quaternion(
                                              cam_calibrated['rotation']),
                                          inverse=False)
        self.t_cam_car = transform_matrix(cam_calibrated['translation'],
                                          Quaternion(
                                              cam_calibrated['rotation']),
                                          inverse=True)
        camera_intrinsic = np.array(cam_calibrated['camera_intrinsic'])
        self.t_img_cam = np.eye(4)
        self.t_img_cam[:camera_intrinsic.shape[0], :camera_intrinsic.
                       shape[1]] = camera_intrinsic

        self.ego_pose = ego_pose
        self.lidar_calibrated = lidar_calibrated
        self.cam_calibrated = cam_calibrated
Ejemplo n.º 18
0
        current_lidar = first_lidar
        lidar_filenames = []
        poses = []

        if not os.path.exists(velodyne_folder):
            print("Creating output folder: {}".format(output_folder))
            os.makedirs(velodyne_folder)

        original = []

        while current_lidar != "":
            lidar_data = nusc.get('sample_data', current_lidar)
            calib_data = nusc.get("calibrated_sensor", lidar_data["calibrated_sensor_token"])
            egopose_data = nusc.get('ego_pose',  lidar_data["ego_pose_token"])
            
            car_to_velo = geoutils.transform_matrix(calib_data["translation"], Quaternion(calib_data["rotation"]))
            pose_car = geoutils.transform_matrix(egopose_data["translation"], Quaternion(egopose_data["rotation"]))
            
            pose = np.dot(pose_car, car_to_velo)
            
            scan = np.fromfile(os.path.join(dataroot, lidar_data["filename"]), dtype=np.float32)
            points = scan.reshape((-1, 5))[:, :4]

            # ensure that remission is in [0,1]
            max_remission = np.max(points[:, 3])
            min_remission = np.min(points[:, 3])
            points[:, 3] = (points[:, 3] - min_remission)  / (max_remission - min_remission)
            
            output_filename = os.path.join(velodyne_folder, "{:05d}.bin".format(len(lidar_filenames)))
            points.tofile(output_filename)
Ejemplo n.º 19
0
def get_pc_from_file_multisweep(nusc,
                                sample_rec,
                                chan: str,
                                ref_chan: str,
                                nsweeps: int = 3,
                                min_distance: float = 1.0,
                                invalid_states=None,
                                dynprop_states=None,
                                ambig_states=None):
    """
    Returns a point cloud of multiple aggregated sweeps.
    Original function can be found at: https://github.com/nutonomy/nuscenes-devkit/blob/ae022aba3b37f07202ea402f8278979097738369/python-sdk/nuscenes/utils/data_classes.py#L56.
    Function is modified to accept additional input parametes, just like the get_pc_from_file function. This enables the filtering by invalid_states, dynprop_states and ambig_states.

    Arguments:
        nusc: A NuScenes instance, <NuScenes>.
        sample_rec: The current sample, <dict>.
        chan: The radar/lidar channel from which we track back n sweeps to aggregate the point cloud, <str>.
        ref_chan: The reference channel of the current sample_rec that the point clouds are mapped to, <str>.
        nsweeps: Number of sweeps to aggregated, <int>.
        min_distance: Distance below which points are discarded, <float>.
        invalid_states: Radar states to be kept, <int List>.
        dynprop_states: Radar states to be kept, <int List>. Use [0, 2, 6] for moving objects only.
        ambig_states: Radar states to be kept, <int List>.
    """
    # Define
    if chan == 'LIDAR_TOP':
        points = np.zeros((get_number_of_lidar_pc_dimensions(), 0))
        all_pc = LidarPointCloud(points)
    else:
        points = np.zeros((get_number_of_radar_pc_dimensions(), 0))
        all_pc = RadarPointCloud(points)

    all_times = np.zeros((1, 0))

    # Get reference pose and timestamp.
    ref_sd_token = sample_rec['data'][ref_chan]
    ref_sd_rec = nusc.get('sample_data', ref_sd_token)
    ref_pose_rec = nusc.get('ego_pose', ref_sd_rec['ego_pose_token'])
    ref_cs_rec = nusc.get('calibrated_sensor',
                          ref_sd_rec['calibrated_sensor_token'])
    ref_time = 1e-6 * ref_sd_rec['timestamp']

    # Homogeneous transform from ego car frame to reference frame.
    ref_from_car = transform_matrix(ref_cs_rec['translation'],
                                    Quaternion(ref_cs_rec['rotation']),
                                    inverse=True)

    # Homogeneous transformation matrix from global to _current_ ego car frame.
    car_from_global = transform_matrix(ref_pose_rec['translation'],
                                       Quaternion(ref_pose_rec['rotation']),
                                       inverse=True)

    # Aggregate current and previous sweeps.
    sample_data_token = sample_rec['data'][chan]
    current_sd_rec = nusc.get('sample_data', sample_data_token)
    for _ in range(nsweeps):
        # Load up the pointcloud.
        if chan == 'LIDAR_TOP':
            current_pc = LidarPointCloud.from_file(file_name=os.path.join(
                nusc.dataroot, current_sd_rec['filename']))
        else:
            current_pc = RadarPointCloud.from_file(
                file_name=os.path.join(nusc.dataroot,
                                       current_sd_rec['filename']),
                invalid_states=invalid_states,
                dynprop_states=dynprop_states,
                ambig_states=ambig_states)

        # Get past pose.
        current_pose_rec = nusc.get('ego_pose',
                                    current_sd_rec['ego_pose_token'])
        global_from_car = transform_matrix(current_pose_rec['translation'],
                                           Quaternion(
                                               current_pose_rec['rotation']),
                                           inverse=False)

        # Homogeneous transformation matrix from sensor coordinate frame to ego car frame.
        current_cs_rec = nusc.get('calibrated_sensor',
                                  current_sd_rec['calibrated_sensor_token'])
        car_from_current = transform_matrix(current_cs_rec['translation'],
                                            Quaternion(
                                                current_cs_rec['rotation']),
                                            inverse=False)

        # Fuse four transformation matrices into one and perform transform.
        trans_matrix = reduce(
            np.dot,
            [ref_from_car, car_from_global, global_from_car, car_from_current])
        current_pc.transform(trans_matrix)

        # Remove close points and add timevector.
        current_pc.remove_close(min_distance)
        time_lag = ref_time - 1e-6 * current_sd_rec[
            'timestamp']  # Positive difference.
        times = time_lag * np.ones((1, current_pc.nbr_points()))
        all_times = np.hstack((all_times, times))

        # Merge with key pc.
        all_pc.points = np.hstack((all_pc.points, current_pc.points))

        # Abort if there are no previous sweeps.
        if current_sd_rec['prev'] == '':
            break
        else:
            current_sd_rec = nusc.get('sample_data', current_sd_rec['prev'])

    return all_pc, all_times
Ejemplo n.º 20
0
def evaluate(split):
    params = read_params(FLAGS.param)
    nusc = NuScenes(version='v1.0-trainval', dataroot=FLAGS.nuscenes, verbose=True)
    sensor = 'LIDAR_TOP'
    kitti_to_nu_lidar = Quaternion(axis=(0, 0, 1), angle=np.pi / 2)
    meta = {
        'use_camera': False,
        'use_lidar': True,
        'use_radar': False,
        'use_map': False,
        'use_external': False,
    }
    results = {}
    results_0_3 = {}

    detection_graph = tf.Graph()
    with detection_graph.as_default():
        od_graph_def = tf.GraphDef()
        with tf.gfile.GFile(FLAGS.graph, 'rb') as fid:
            serialized_graph = fid.read()
            od_graph_def.ParseFromString(serialized_graph)
            for node in od_graph_def.node:
                if 'BatchMultiClassNonMaxSuppression' in node.name:
                    node.device = '/device:CPU:0'
            tf.import_graph_def(od_graph_def, name='')
    with detection_graph.as_default():
        config = tf.ConfigProto()
        config.gpu_options.allow_growth = False
        with tf.Session(graph=detection_graph, config=config) as sess:
            image_tensor = detection_graph.get_tensor_by_name('image_tensor:0')
            detection_boxes = detection_graph.get_tensor_by_name('detection_boxes:0')
            detection_boxes_inclined = detection_graph.get_tensor_by_name('detection_boxes_3d:0')
            detection_scores = detection_graph.get_tensor_by_name('detection_scores:0')
            detection_classes = detection_graph.get_tensor_by_name('detection_classes:0')
            num_detections = detection_graph.get_tensor_by_name('num_detections:0')
            scene_splits = create_splits_scenes()

            # os.system('touch {}'.format())

            inf_time_list = []

            for scene in nusc.scene:
                if scene['name'] not in scene_splits[split]:
                    continue
                current_sample_token = scene['first_sample_token']
                last_sample_token = scene['last_sample_token']
                sample_in_scene = True
                while sample_in_scene:
                    if current_sample_token == last_sample_token:
                        sample_in_scene = False
                    sample = nusc.get('sample', current_sample_token)
                    lidar_top_data = nusc.get('sample_data', sample['data'][sensor])
                    # Get global pose and calibration data
                    ego_pose = nusc.get('ego_pose', lidar_top_data['ego_pose_token'])
                    calib_sensor = nusc.get('calibrated_sensor', lidar_top_data['calibrated_sensor_token'])
                    ego_to_global = transform_matrix(ego_pose['translation'], Quaternion(ego_pose['rotation']))
                    lidar_to_ego = transform_matrix(calib_sensor['translation'], Quaternion(calib_sensor['rotation']))

                    # Read input data
                    filename_prefix = os.path.splitext(os.path.splitext(lidar_top_data['filename'])[0])[0]
                    image_stacked, det_mask, image_ground, image_zmax = read_images(FLAGS.data, filename_prefix)
                    # Inference
                    start_time = time.time()
                    (boxes_aligned, boxes_inclined, scores, classes, num) = sess.run(
                        [detection_boxes, detection_boxes_inclined, detection_scores, detection_classes,
                         num_detections],
                        feed_dict={image_tensor: image_stacked})
                    inf_time = time.time() - start_time
                    print('Inference time:', inf_time)
                    inf_time_list.append(inf_time)



                    # Evaluate object detection
                    label_map = label_map_util.load_labelmap(FLAGS.label_map)
                    categories = label_map_util.convert_label_map_to_categories(label_map, max_num_classes=10,
                                                                                use_display_name=True)
                    category_index = label_map_util.create_category_index(categories)
                    boxes = []
                    boxes_0_3 = []
                    scores = np.squeeze(scores)
                    for i in range(scores.shape[0]):
                        if scores[i] > .230:
                            object_class = category_index[int(np.squeeze(classes)[i])]['name']
                            box = calculate_object_box(tuple(np.squeeze(boxes_aligned)[i]),
                                                       tuple(np.squeeze(boxes_inclined)[i]), image_ground, image_zmax,
                                                       object_class, scores[i], params)
                            # Transformation box coordinate system to nuscenes lidar coordinate system
                            box.rotate(kitti_to_nu_lidar)
                            # Transformation nuscenes lidar coordinate system to ego vehicle frame
                            box.rotate(Quaternion(matrix=lidar_to_ego[:3, :3]))
                            box.translate(lidar_to_ego[:3, 3])
                            # Transformation ego vehicle frame to global frame
                            box.rotate(Quaternion(matrix=ego_to_global[:3, :3]))
                            box.translate(ego_to_global[:3, 3])
                            boxes.append(box)
                    for i in range(scores.shape[0]):
                        if scores[i] > .225:
                            object_class = category_index[int(np.squeeze(classes)[i])]['name']
                            box = calculate_object_box(tuple(np.squeeze(boxes_aligned)[i]),
                                                       tuple(np.squeeze(boxes_inclined)[i]), image_ground, image_zmax,
                                                       object_class, scores[i], params)
                            # Transformation box coordinate system to nuscenes lidar coordinate system
                            box.rotate(kitti_to_nu_lidar)
                            # Transformation nuscenes lidar coordinate system to ego vehicle frame
                            box.rotate(Quaternion(matrix=lidar_to_ego[:3, :3]))
                            box.translate(lidar_to_ego[:3, 3])
                            # Transformation ego vehicle frame to global frame
                            box.rotate(Quaternion(matrix=ego_to_global[:3, :3]))
                            box.translate(ego_to_global[:3, 3])
                            boxes_0_3.append(box)
                    # Convert boxes to nuScenes detection challenge result format.
                    sample_results = [box_to_sample_result(current_sample_token, box) for box in boxes]
                    results[current_sample_token] = sample_results

                    sample_results_0_3 = [box_to_sample_result(current_sample_token, box) for box in boxes_0_3]
                    results_0_3[current_sample_token] = sample_results_0_3

                    current_sample_token = sample['next']
    f_info = open(
                os.path.join(FLAGS.output, 'inference_time.txt'),'w+')
    average_inf_time = sum(inf_time_list) / float(len(inf_time_list))
    f_info.write('average time:{}\n'.format(average_inf_time))
    f_info.write(str(inf_time_list))
    submission = {
        'meta': meta,
        'results': results
    }
    submission_path = os.path.join(FLAGS.output, 'submission_0_3nn30.json')
    with open(submission_path, 'w') as f:
        json.dump(submission, f, indent=2)

    submission_0_3 = {
        'meta': meta,
        'results': results_0_3
    }
    submission_path_0_3 = os.path.join(FLAGS.output, 'submission_0_3nn25.json')
    with open(submission_path_0_3, 'w') as f:
        json.dump(submission_0_3, f, indent=2)
Ejemplo n.º 21
0
def fill_trainval_infos(data_path, nusc, train_scenes, val_scenes, test=False, max_sweeps=10):
    train_nusc_infos = []
    val_nusc_infos = []
    progress_bar = tqdm.tqdm(total=len(nusc.sample), desc='create_info', dynamic_ncols=True)

    ref_chan = 'LIDAR_TOP'  # The radar channel from which we track back n sweeps to aggregate the point cloud.
    chan = 'LIDAR_TOP'  # The reference channel of the current sample_rec that the point clouds are mapped to.

    for index, sample in enumerate(nusc.sample):
        progress_bar.update()

        ref_sd_token = sample['data'][ref_chan]
        ref_sd_rec = nusc.get('sample_data', ref_sd_token)
        ref_cs_rec = nusc.get('calibrated_sensor', ref_sd_rec['calibrated_sensor_token'])
        ref_pose_rec = nusc.get('ego_pose', ref_sd_rec['ego_pose_token'])
        ref_time = 1e-6 * ref_sd_rec['timestamp']

        ref_lidar_path, ref_boxes, _ = get_sample_data(nusc, ref_sd_token)

        ref_cam_front_token = sample['data']['CAM_FRONT']
        ref_cam_path, _, ref_cam_intrinsic = nusc.get_sample_data(ref_cam_front_token)

        # Homogeneous transform from ego car frame to reference frame
        ref_from_car = transform_matrix(
            ref_cs_rec['translation'], Quaternion(ref_cs_rec['rotation']), inverse=True
        )

        # Homogeneous transformation matrix from global to _current_ ego car frame
        car_from_global = transform_matrix(
            ref_pose_rec['translation'], Quaternion(ref_pose_rec['rotation']), inverse=True,
        )

        info = {
            'lidar_path': Path(ref_lidar_path).relative_to(data_path).__str__(),
            'cam_front_path': Path(ref_cam_path).relative_to(data_path).__str__(),
            'cam_intrinsic': ref_cam_intrinsic,
            'token': sample['token'],
            'sweeps': [],
            'ref_from_car': ref_from_car,
            'car_from_global': car_from_global,
            'timestamp': ref_time,
        }

        sample_data_token = sample['data'][chan]
        curr_sd_rec = nusc.get('sample_data', sample_data_token)
        sweeps = []
        while len(sweeps) < max_sweeps - 1:
            if curr_sd_rec['prev'] == '':
                if len(sweeps) == 0:
                    sweep = {
                        'lidar_path': Path(ref_lidar_path).relative_to(data_path).__str__(),
                        'sample_data_token': curr_sd_rec['token'],
                        'transform_matrix': None,
                        'time_lag': curr_sd_rec['timestamp'] * 0,
                    }
                    sweeps.append(sweep)
                else:
                    sweeps.append(sweeps[-1])
            else:
                curr_sd_rec = nusc.get('sample_data', curr_sd_rec['prev'])

                # Get past pose
                current_pose_rec = nusc.get('ego_pose', curr_sd_rec['ego_pose_token'])
                global_from_car = transform_matrix(
                    current_pose_rec['translation'], Quaternion(current_pose_rec['rotation']), inverse=False,
                )

                # Homogeneous transformation matrix from sensor coordinate frame to ego car frame.
                current_cs_rec = nusc.get(
                    'calibrated_sensor', curr_sd_rec['calibrated_sensor_token']
                )
                car_from_current = transform_matrix(
                    current_cs_rec['translation'], Quaternion(current_cs_rec['rotation']), inverse=False,
                )

                tm = reduce(np.dot, [ref_from_car, car_from_global, global_from_car, car_from_current])

                lidar_path = nusc.get_sample_data_path(curr_sd_rec['token'])

                time_lag = ref_time - 1e-6 * curr_sd_rec['timestamp']

                sweep = {
                    'lidar_path': Path(lidar_path).relative_to(data_path).__str__(),
                    'sample_data_token': curr_sd_rec['token'],
                    'transform_matrix': tm,
                    'global_from_car': global_from_car,
                    'car_from_current': car_from_current,
                    'time_lag': time_lag,
                }
                sweeps.append(sweep)

        info['sweeps'] = sweeps

        assert len(info['sweeps']) == max_sweeps - 1, \
            f"sweep {curr_sd_rec['token']} only has {len(info['sweeps'])} sweeps, " \
            f"you should duplicate to sweep num {max_sweeps - 1}"

        if not test:
            annotations = [nusc.get('sample_annotation', token) for token in sample['anns']]

            # the filtering gives 0.5~1 map improvement
            num_lidar_pts = np.array([anno['num_lidar_pts'] for anno in annotations])
            num_radar_pts = np.array([anno['num_radar_pts'] for anno in annotations])
            mask = (num_lidar_pts + num_radar_pts > 0)

            locs = np.array([b.center for b in ref_boxes]).reshape(-1, 3)
            dims = np.array([b.wlh for b in ref_boxes]).reshape(-1, 3)[:, [1, 0, 2]]  # wlh == > dxdydz (lwh)
            velocity = np.array([b.velocity for b in ref_boxes]).reshape(-1, 3)
            rots = np.array([quaternion_yaw(b.orientation) for b in ref_boxes]).reshape(-1, 1)
            names = np.array([b.name for b in ref_boxes])
            tokens = np.array([b.token for b in ref_boxes])
            gt_boxes = np.concatenate([locs, dims, rots, velocity[:, :2]], axis=1)

            assert len(annotations) == len(gt_boxes) == len(velocity)

            info['gt_boxes'] = gt_boxes[mask, :]
            info['gt_boxes_velocity'] = velocity[mask, :]
            info['gt_names'] = np.array([map_name_from_general_to_detection[name] for name in names])[mask]
            info['gt_boxes_token'] = tokens[mask]
            info['num_lidar_pts'] = num_lidar_pts[mask]
            info['num_radar_pts'] = num_radar_pts[mask]

        if sample['scene_token'] in train_scenes:
            train_nusc_infos.append(info)
        else:
            val_nusc_infos.append(info)

    progress_bar.close()
    return train_nusc_infos, val_nusc_infos
Ejemplo n.º 22
0
def main():
    SCENE_SPLITS["mini-val"] = SCENE_SPLITS["val"]
    if not os.path.exists(DATA_PATH):
        os.mkdir(DATA_PATH)
    if not os.path.exists(OUT_PATH):
        os.mkdir(OUT_PATH)
    for split in SPLITS:
        data_path = DATA_PATH  # + '{}/'.format(SPLITS[split])
        nusc = NuScenes(version=SPLITS[split],
                        dataroot=data_path,
                        verbose=True)
        out_path = OUT_PATH + "{}.json".format(split)
        categories_info = [{
            "name": CATS[i],
            "id": i + 1
        } for i in range(len(CATS))]
        ret = {
            "images": [],
            "annotations": [],
            "categories": categories_info,
            "videos": [],
            "attributes": ATTRIBUTE_TO_ID,
        }
        num_images = 0
        num_anns = 0
        num_videos = 0

        # A "sample" in nuScenes refers to a timestamp with 6 cameras and 1 LIDAR.
        for sample in nusc.sample:
            scene_name = nusc.get("scene", sample["scene_token"])["name"]
            if not (split in ["mini", "test"
                              ]) and not (scene_name in SCENE_SPLITS[split]):
                continue
            if sample["prev"] == "":
                print("scene_name", scene_name)
                num_videos += 1
                ret["videos"].append({
                    "id": num_videos,
                    "file_name": scene_name
                })
                frame_ids = {k: 0 for k in sample["data"]}
                track_ids = {}
            # We decompose a sample into 6 images in our case.
            for sensor_name in sample["data"]:
                if sensor_name in USED_SENSOR:
                    image_token = sample["data"][sensor_name]
                    image_data = nusc.get("sample_data", image_token)
                    num_images += 1

                    # Complex coordinate transform. This will take time to understand.
                    sd_record = nusc.get("sample_data", image_token)
                    cs_record = nusc.get("calibrated_sensor",
                                         sd_record["calibrated_sensor_token"])
                    pose_record = nusc.get("ego_pose",
                                           sd_record["ego_pose_token"])
                    global_from_car = transform_matrix(
                        pose_record["translation"],
                        Quaternion(pose_record["rotation"]),
                        inverse=False,
                    )
                    car_from_sensor = transform_matrix(
                        cs_record["translation"],
                        Quaternion(cs_record["rotation"]),
                        inverse=False,
                    )
                    trans_matrix = np.dot(global_from_car, car_from_sensor)
                    _, boxes, camera_intrinsic = nusc.get_sample_data(
                        image_token, box_vis_level=BoxVisibility.ANY)
                    calib = np.eye(4, dtype=np.float32)
                    calib[:3, :3] = camera_intrinsic
                    calib = calib[:3]
                    frame_ids[sensor_name] += 1

                    # image information in COCO format
                    image_info = {
                        "id": num_images,
                        "file_name": image_data["filename"],
                        "calib": calib.tolist(),
                        "video_id": num_videos,
                        "frame_id": frame_ids[sensor_name],
                        "sensor_id": SENSOR_ID[sensor_name],
                        "sample_token": sample["token"],
                        "trans_matrix": trans_matrix.tolist(),
                        "width": sd_record["width"],
                        "height": sd_record["height"],
                        "pose_record_trans": pose_record["translation"],
                        "pose_record_rot": pose_record["rotation"],
                        "cs_record_trans": cs_record["translation"],
                        "cs_record_rot": cs_record["rotation"],
                    }
                    ret["images"].append(image_info)
                    anns = []
                    for box in boxes:
                        det_name = category_to_detection_name(box.name)
                        if det_name is None:
                            continue
                        num_anns += 1
                        v = np.dot(box.rotation_matrix, np.array([1, 0, 0]))
                        yaw = -np.arctan2(v[2], v[0])
                        box.translate(np.array([0, box.wlh[2] / 2, 0]))
                        category_id = CAT_IDS[det_name]

                        amodel_center = project_to_image(
                            np.array(
                                [
                                    box.center[0],
                                    box.center[1] - box.wlh[2] / 2,
                                    box.center[2],
                                ],
                                np.float32,
                            ).reshape(1, 3),
                            calib,
                        )[0].tolist()
                        sample_ann = nusc.get("sample_annotation", box.token)
                        instance_token = sample_ann["instance_token"]
                        if not (instance_token in track_ids):
                            track_ids[instance_token] = len(track_ids) + 1
                        attribute_tokens = sample_ann["attribute_tokens"]
                        attributes = [
                            nusc.get("attribute", att_token)["name"]
                            for att_token in attribute_tokens
                        ]
                        att = "" if len(attributes) == 0 else attributes[0]
                        if len(attributes) > 1:
                            print(attributes)
                            import pdb

                            pdb.set_trace()
                        track_id = track_ids[instance_token]
                        vel = nusc.box_velocity(box.token)  # global frame
                        vel = np.dot(
                            np.linalg.inv(trans_matrix),
                            np.array([vel[0], vel[1], vel[2], 0], np.float32),
                        ).tolist()

                        # instance information in COCO format
                        ann = {
                            "id":
                            num_anns,
                            "image_id":
                            num_images,
                            "category_id":
                            category_id,
                            "dim": [box.wlh[2], box.wlh[0], box.wlh[1]],
                            "location":
                            [box.center[0], box.center[1], box.center[2]],
                            "depth":
                            box.center[2],
                            "occluded":
                            0,
                            "truncated":
                            0,
                            "rotation_y":
                            yaw,
                            "amodel_center":
                            amodel_center,
                            "iscrowd":
                            0,
                            "track_id":
                            track_id,
                            "attributes":
                            ATTRIBUTE_TO_ID[att],
                            "velocity":
                            vel,
                        }

                        bbox = KittiDB.project_kitti_box_to_image(
                            copy.deepcopy(box),
                            camera_intrinsic,
                            imsize=(1600, 900))
                        alpha = _rot_y2alpha(
                            yaw,
                            (bbox[0] + bbox[2]) / 2,
                            camera_intrinsic[0, 2],
                            camera_intrinsic[0, 0],
                        )
                        ann["bbox"] = [
                            bbox[0],
                            bbox[1],
                            bbox[2] - bbox[0],
                            bbox[3] - bbox[1],
                        ]
                        ann["area"] = (bbox[2] - bbox[0]) * (bbox[3] - bbox[1])
                        ann["alpha"] = alpha
                        anns.append(ann)

                    # Filter out bounding boxes outside the image
                    visable_anns = []
                    for i in range(len(anns)):
                        vis = True
                        for j in range(len(anns)):
                            if anns[i]["depth"] - min(anns[i][
                                    "dim"]) / 2 > anns[j]["depth"] + max(
                                        anns[j]["dim"]) / 2 and _bbox_inside(
                                            anns[i]["bbox"], anns[j]["bbox"]):
                                vis = False
                                break
                        if vis:
                            visable_anns.append(anns[i])
                        else:
                            pass

                    for ann in visable_anns:
                        ret["annotations"].append(ann)
                    if DEBUG:
                        img_path = data_path + image_info["file_name"]
                        img = cv2.imread(img_path)
                        img_3d = img.copy()
                        for ann in visable_anns:
                            bbox = ann["bbox"]
                            cv2.rectangle(
                                img,
                                (int(bbox[0]), int(bbox[1])),
                                (int(bbox[2] + bbox[0]),
                                 int(bbox[3] + bbox[1])),
                                (0, 0, 255),
                                3,
                                lineType=cv2.LINE_AA,
                            )
                            box_3d = compute_box_3d(ann["dim"],
                                                    ann["location"],
                                                    ann["rotation_y"])
                            box_2d = project_to_image(box_3d, calib)
                            img_3d = draw_box_3d(img_3d, box_2d)

                            pt_3d = unproject_2d_to_3d(ann["amodel_center"],
                                                       ann["depth"], calib)
                            pt_3d[1] += ann["dim"][0] / 2
                            print("location", ann["location"])
                            print("loc model", pt_3d)
                            pt_2d = np.array(
                                [(bbox[0] + bbox[2]) / 2,
                                 (bbox[1] + bbox[3]) / 2],
                                dtype=np.float32,
                            )
                            pt_3d = unproject_2d_to_3d(pt_2d, ann["depth"],
                                                       calib)
                            pt_3d[1] += ann["dim"][0] / 2
                            print("loc      ", pt_3d)
                        cv2.imshow("img", img)
                        cv2.imshow("img_3d", img_3d)
                        cv2.waitKey()
                        nusc.render_sample_data(image_token)
                        plt.show()
        print("reordering images")
        images = ret["images"]
        video_sensor_to_images = {}
        for image_info in images:
            tmp_seq_id = image_info["video_id"] * 20 + image_info["sensor_id"]
            if tmp_seq_id in video_sensor_to_images:
                video_sensor_to_images[tmp_seq_id].append(image_info)
            else:
                video_sensor_to_images[tmp_seq_id] = [image_info]
        ret["images"] = []
        for tmp_seq_id in sorted(video_sensor_to_images):
            ret["images"] = ret["images"] + video_sensor_to_images[tmp_seq_id]

        print("{} {} images {} boxes".format(split, len(ret["images"]),
                                             len(ret["annotations"])))
        print("out_path", out_path)
        json.dump(ret, open(out_path, "w"))
Ejemplo n.º 23
0
    def _read_sample(self, sample: typing.Tuple[dict, int]):

        radars = ["RADAR_FRONT", "RADAR_FRONT_LEFT", "RADAR_FRONT_RIGHT"]
        cameras = ["CAM_FRONT"]

        sample_id = self.make_sample_id(sample)
        sample, sample_index = sample
        sample_data_token: str = sample["data"]["LIDAR_TOP"]

        lidar_sample_data = self.nusc.get("sample_data", sample_data_token)
        assert lidar_sample_data["is_key_frame"]

        data_path, box_list, cam_intrinsic = self.nusc.get_sample_data(
            sample_data_token)

        # POINT CLOUD [(x y z I) x P]. Intensity from 0.0 to 255.0
        point_cloud_orig = LidarPointCloud.from_file(data_path)
        if point_cloud_orig.points.dtype != np.float32:
            raise RuntimeError("Point cloud has wrong data type.")
        # -> [P x (x y z I)]
        point_cloud_orig = point_cloud_orig.points.transpose()

        camera_data = {}
        radar_data = {}
        box_data = {}
        lidarseg_data = {}

        if self.read_semantics:
            lidarseg_data = self._read_lidarseg_data(sample_data_token,
                                                     point_cloud_orig)

        if self.read_bounding_boxes:
            box_data = self._read_bounding_boxes(box_list, point_cloud_orig,
                                                 sample)

        # rotate point cloud 90 degrees around z-axis,
        # so that x-axis faces front of vehicle
        x = point_cloud_orig[:, 0:1]
        y = point_cloud_orig[:, 1:2]
        point_cloud = np.concatenate((y, -x, point_cloud_orig[:, 2:4]),
                                     axis=-1)

        # LiDAR extrinsic calibration
        calibration_lidar = self.nusc.get(
            "calibrated_sensor", lidar_sample_data["calibrated_sensor_token"])
        tf_lidar = transform_matrix(calibration_lidar["translation"],
                                    Quaternion(calibration_lidar["rotation"]))
        # vehicle -> point cloud coords (turned by 90 degrees)
        tf_vehicle_pc = np.linalg.inv(
            np.dot(tf_lidar, NuscenesReader.turn_matrix))

        if self.read_camera:
            # CAMERAS
            camera_data: [{
                str: typing.Any
            }] = [
                self._read_sensor_data_and_extrinsics(sample, cam_name,
                                                      tf_vehicle_pc,
                                                      self._read_camera_data)
                for cam_name in cameras
            ]
            camera_data = {k: v for d in camera_data for k, v in d.items()}

        if self.read_radar:
            # RADARS
            radar_data: [{
                str: typing.Any
            }] = [
                self._read_sensor_data_and_extrinsics(sample, radar_name,
                                                      tf_vehicle_pc,
                                                      self._read_radar_data)
                for radar_name in radars
            ]
            radar_data = {k: v for d in radar_data for k, v in d.items()}

        assert box_data.keys().isdisjoint(camera_data.keys())
        assert box_data.keys().isdisjoint(radar_data.keys())
        assert box_data.keys().isdisjoint(lidarseg_data.keys())
        # return feature array. Add sample ID.
        d = {
            "sample_id": sample_id,
            "point_cloud": point_cloud,
            **box_data,
            **camera_data,
            **radar_data,
            **lidarseg_data,
        }
        return d
Ejemplo n.º 24
0
    def from_file_multisweep(
            cls,
            nusc: 'NuScenes',
            sample_rec: Dict,
            chan: str,
            ref_chan: str,
            nsweeps: int = 5,
            min_distance: float = 1.0) -> Tuple['PointCloud', np.ndarray]:
        """
        Return a point cloud that aggregates multiple sweeps.
        As every sweep is in a different coordinate frame, we need to map the coordinates to a single reference frame.
        As every sweep has a different timestamp, we need to account for that in the transformations and timestamps.
        :param nusc: A NuScenes instance.
        :param sample_rec: The current sample.
        :param chan: The lidar/radar channel from which we track back n sweeps to aggregate the point cloud.
        :param ref_chan: The reference channel of the current sample_rec that the point clouds are mapped to.
        :param nsweeps: Number of sweeps to aggregated.
        :param min_distance: Distance below which points are discarded.
        :return: (all_pc, all_times). The aggregated point cloud and timestamps.
        """
        # Init.
        points = np.zeros((cls.nbr_dims(), 0))
        all_pc = cls(points)
        all_times = np.zeros((1, 0))

        # Get reference pose and timestamp.
        ref_sd_token = sample_rec['data'][ref_chan]
        ref_sd_rec = nusc.get('sample_data', ref_sd_token)
        ref_pose_rec = nusc.get('ego_pose', ref_sd_rec['ego_pose_token'])
        ref_cs_rec = nusc.get('calibrated_sensor',
                              ref_sd_rec['calibrated_sensor_token'])
        ref_time = 1e-6 * ref_sd_rec['timestamp']

        # Homogeneous transform from ego car frame to reference frame.
        ref_from_car = transform_matrix(ref_cs_rec['translation'],
                                        Quaternion(ref_cs_rec['rotation']),
                                        inverse=True)

        # Homogeneous transformation matrix from global to _current_ ego car frame.
        car_from_global = transform_matrix(ref_pose_rec['translation'],
                                           Quaternion(
                                               ref_pose_rec['rotation']),
                                           inverse=True)

        # Aggregate current and previous sweeps.
        sample_data_token = sample_rec['data'][chan]
        current_sd_rec = nusc.get('sample_data', sample_data_token)
        for _ in range(nsweeps):
            # Load up the pointcloud and remove points close to the sensor.
            current_pc = cls.from_file(
                osp.join(nusc.dataroot, current_sd_rec['filename']))
            current_pc.remove_close(min_distance)

            # Get past pose.
            current_pose_rec = nusc.get('ego_pose',
                                        current_sd_rec['ego_pose_token'])
            global_from_car = transform_matrix(
                current_pose_rec['translation'],
                Quaternion(current_pose_rec['rotation']),
                inverse=False)

            # Homogeneous transformation matrix from sensor coordinate frame to ego car frame.
            current_cs_rec = nusc.get(
                'calibrated_sensor', current_sd_rec['calibrated_sensor_token'])
            car_from_current = transform_matrix(
                current_cs_rec['translation'],
                Quaternion(current_cs_rec['rotation']),
                inverse=False)

            # Fuse four transformation matrices into one and perform transform.
            trans_matrix = reduce(np.dot, [
                ref_from_car, car_from_global, global_from_car,
                car_from_current
            ])
            current_pc.transform(trans_matrix)

            # Add time vector which can be used as a temporal feature.
            time_lag = ref_time - 1e-6 * current_sd_rec[
                'timestamp']  # Positive difference.
            times = time_lag * np.ones((1, current_pc.nbr_points()))
            all_times = np.hstack((all_times, times))

            # Merge with key pc.
            all_pc.points = np.hstack((all_pc.points, current_pc.points))

            # Abort if there are no previous sweeps.
            if current_sd_rec['prev'] == '':
                break
            else:
                current_sd_rec = nusc.get('sample_data',
                                          current_sd_rec['prev'])

        return all_pc, all_times
Ejemplo n.º 25
0
    def load_annotations(self, idx):
        annotations = []
        """
        Converts nuScenes GT annotations to KITTI format.
        """
        kitti_to_nu_lidar = Quaternion(axis=(0, 0, 1), angle=np.pi / 2)
        kitti_to_nu_lidar_inv = kitti_to_nu_lidar.inverse

        sample_token = self.sample_tokens[idx]
        sample = self.nusc.get('sample', sample_token)
        sample_annotation_tokens = sample['anns']

        cam_front_token = sample['data'][self.cam_name]
        lidar_token = sample['data'][self.lidar_name]

        # Retrieve sensor records.
        sd_record_cam = self.nusc.get('sample_data', cam_front_token)
        sd_record_lid = self.nusc.get('sample_data', lidar_token)
        cs_record_cam = self.nusc.get('calibrated_sensor',
                                      sd_record_cam['calibrated_sensor_token'])
        cs_record_lid = self.nusc.get('calibrated_sensor',
                                      sd_record_lid['calibrated_sensor_token'])

        # Combine transformations and convert to KITTI format.
        # Note: cam uses same conventions in KITTI and nuScenes.
        lid_to_ego = transform_matrix(cs_record_lid['translation'],
                                      Quaternion(cs_record_lid['rotation']),
                                      inverse=False)
        ego_to_cam = transform_matrix(cs_record_cam['translation'],
                                      Quaternion(cs_record_cam['rotation']),
                                      inverse=True)
        velo_to_cam = np.dot(ego_to_cam, lid_to_ego)

        # Convert from KITTI to nuScenes LIDAR coordinates, where we apply velo_to_cam.
        velo_to_cam_kitti = np.dot(velo_to_cam,
                                   kitti_to_nu_lidar.transformation_matrix)

        # Currently not used.
        imu_to_velo_kitti = np.zeros((3, 4), dtype=np.float32)  # Dummy values.
        r0_rect = Quaternion(axis=[1, 0, 0], angle=0)  # Dummy values.

        # Projection matrix.
        p_left_kitti = np.zeros((3, 4), dtype=np.float32)
        p_left_kitti[:3, :3] = cs_record_cam[
            'camera_intrinsic']  # Cameras are always rectified.
        p_left_kitti = p_left_kitti[:3, :3]

        # Create KITTI style transforms.
        velo_to_cam_rot = velo_to_cam_kitti[:3, :3]
        velo_to_cam_trans = velo_to_cam_kitti[:3, 3]

        # Check that the rotation has the same format as in KITTI.
        assert (velo_to_cam_rot.round(0) == np.array([[0, -1, 0], [0, 0, -1],
                                                      [1, 0, 0]])).all()
        assert (velo_to_cam_trans[1:3] < 0).all()

        # Retrieve the token from the lidar.
        # Note that this may be confusing as the filename of the camera will include the timestamp of the lidar,
        # not the camera.
        filename_cam_full = sd_record_cam['filename']
        filename_lid_full = sd_record_lid['filename']

        if self.is_train:
            for sample_annotation_token in sample_annotation_tokens:

                sample_annotation = self.nusc.get('sample_annotation',
                                                  sample_annotation_token)
                # Convert nuScenes category to nuScenes detection challenge category.
                detection_name = category_to_detection_name(
                    sample_annotation['category_name'])
                if detection_name in self.classes:
                    # Get box in LIDAR frame.
                    _, box_lidar_nusc, _ = self.nusc.get_sample_data(
                        lidar_token,
                        box_vis_level=BoxVisibility.NONE,
                        selected_anntokens=[sample_annotation_token])
                    box_lidar_nusc = box_lidar_nusc[0]

                    # Convert from nuScenes to KITTI box format.
                    box_cam_kitti = KittiDB.box_nuscenes_to_kitti(
                        box_lidar_nusc, Quaternion(matrix=velo_to_cam_rot),
                        velo_to_cam_trans, r0_rect)

                    # Convert quaternion to yaw angle.
                    v = np.dot(box_cam_kitti.rotation_matrix,
                               np.array([1, 0, 0]))
                    yaw = -np.arctan2(v[2], v[0])

                    annotations.append({
                        "class":
                        detection_name,
                        "label":
                        TYPE_ID_CONVERSION[detection_name],
                        "dimensions": [
                            float(box_cam_kitti.wlh[2]),
                            float(box_cam_kitti.wlh[0]),
                            float(box_cam_kitti.wlh[1])
                        ],
                        "locations": [
                            float(box_cam_kitti.center[0]),
                            float(box_cam_kitti.center[1]),
                            float(box_cam_kitti.center[2])
                        ],
                        "rot_y":
                        float(yaw)
                    })

        return annotations, p_left_kitti
Ejemplo n.º 26
0
    def nuscenes_gt_to_kitti(self) -> None:
        """
        Converts nuScenes GT annotations to KITTI format.
        """
        kitti_to_nu_lidar = Quaternion(axis=(0, 0, 1), angle=np.pi / 2)
        kitti_to_nu_lidar_inv = kitti_to_nu_lidar.inverse
        imsize = (1600, 900)

        token_idx = 0  # Start tokens from 0.

        # Get assignment of scenes to splits.
        split_logs = create_splits_logs(self.split, self.nusc)

        # Create output folders.
        label_folder = os.path.join(self.nusc_kitti_dir, self.split, 'label_2')
        calib_folder = os.path.join(self.nusc_kitti_dir, self.split, 'calib')
        image_folder = os.path.join(self.nusc_kitti_dir, self.split, 'image_2')
        lidar_folder = os.path.join(self.nusc_kitti_dir, self.split,
                                    'velodyne')
        for folder in [label_folder, calib_folder, image_folder, lidar_folder]:
            if not os.path.isdir(folder):
                os.makedirs(folder)

        # Use only the samples from the current split.
        sample_tokens = self._split_to_samples(split_logs)
        sample_tokens = sample_tokens[:self.image_count]

        tokens = []
        for sample_token in sample_tokens:

            # Get sample data.
            sample = self.nusc.get('sample', sample_token)
            sample_annotation_tokens = sample['anns']
            cam_front_token = sample['data'][self.cam_name]
            lidar_token = sample['data'][self.lidar_name]

            # Retrieve sensor records.
            sd_record_cam = self.nusc.get('sample_data', cam_front_token)
            sd_record_lid = self.nusc.get('sample_data', lidar_token)
            cs_record_cam = self.nusc.get(
                'calibrated_sensor', sd_record_cam['calibrated_sensor_token'])
            cs_record_lid = self.nusc.get(
                'calibrated_sensor', sd_record_lid['calibrated_sensor_token'])

            # Combine transformations and convert to KITTI format.
            # Note: cam uses same conventions in KITTI and nuScenes.
            lid_to_ego = transform_matrix(cs_record_lid['translation'],
                                          Quaternion(
                                              cs_record_lid['rotation']),
                                          inverse=False)
            ego_to_cam = transform_matrix(cs_record_cam['translation'],
                                          Quaternion(
                                              cs_record_cam['rotation']),
                                          inverse=True)
            velo_to_cam = np.dot(ego_to_cam, lid_to_ego)

            # Convert from KITTI to nuScenes LIDAR coordinates, where we apply velo_to_cam.
            velo_to_cam_kitti = np.dot(velo_to_cam,
                                       kitti_to_nu_lidar.transformation_matrix)

            # Currently not used.
            imu_to_velo_kitti = np.zeros((3, 4))  # Dummy values.
            r0_rect = Quaternion(axis=[1, 0, 0], angle=0)  # Dummy values.

            # Projection matrix.
            p_left_kitti = np.zeros((3, 4))
            p_left_kitti[:3, :3] = cs_record_cam[
                'camera_intrinsic']  # Cameras are always rectified.

            # Create KITTI style transforms.
            velo_to_cam_rot = velo_to_cam_kitti[:3, :3]
            velo_to_cam_trans = velo_to_cam_kitti[:3, 3]

            # Check that the rotation has the same format as in KITTI.
            assert (velo_to_cam_rot.round(0) == np.array([[0, -1,
                                                           0], [0, 0, -1],
                                                          [1, 0, 0]])).all()
            assert (velo_to_cam_trans[1:3] < 0).all()

            # Retrieve the token from the lidar.
            # Note that this may be confusing as the filename of the camera will include the timestamp of the lidar,
            # not the camera.
            filename_cam_full = sd_record_cam['filename']
            filename_lid_full = sd_record_lid['filename']
            # token = '%06d' % token_idx # Alternative to use KITTI names.
            token_idx += 1

            # Convert image (jpg to png).
            src_im_path = os.path.join(self.nusc.dataroot, filename_cam_full)
            dst_im_path = os.path.join(image_folder, sample_token + '.png')
            if not os.path.exists(dst_im_path):
                im = Image.open(src_im_path)
                im.save(dst_im_path, "PNG")

            # Convert lidar.
            # Note that we are only using a single sweep, instead of the commonly used n sweeps.
            src_lid_path = os.path.join(self.nusc.dataroot, filename_lid_full)
            dst_lid_path = os.path.join(lidar_folder, sample_token + '.bin')
            assert not dst_lid_path.endswith('.pcd.bin')
            pcl = LidarPointCloud.from_file(src_lid_path)
            pcl.rotate(
                kitti_to_nu_lidar_inv.rotation_matrix)  # In KITTI lidar frame.
            with open(dst_lid_path, "w") as lid_file:
                pcl.points.T.tofile(lid_file)

            # Add to tokens.
            tokens.append(sample_token)

            # Create calibration file.
            kitti_transforms = dict()
            kitti_transforms['P0'] = np.zeros((3, 4))  # Dummy values.
            kitti_transforms['P1'] = np.zeros((3, 4))  # Dummy values.
            kitti_transforms['P2'] = p_left_kitti  # Left camera transform.
            kitti_transforms['P3'] = np.zeros((3, 4))  # Dummy values.
            kitti_transforms[
                'R0_rect'] = r0_rect.rotation_matrix  # Cameras are already rectified.
            kitti_transforms['Tr_velo_to_cam'] = np.hstack(
                (velo_to_cam_rot, velo_to_cam_trans.reshape(3, 1)))
            kitti_transforms['Tr_imu_to_velo'] = imu_to_velo_kitti
            calib_path = os.path.join(calib_folder, sample_token + '.txt')
            with open(calib_path, "w") as calib_file:
                for (key, val) in kitti_transforms.items():
                    val = val.flatten()
                    val_str = '%.12e' % val[0]
                    for v in val[1:]:
                        val_str += ' %.12e' % v
                    calib_file.write('%s: %s\n' % (key, val_str))

            # Write label file.
            label_path = os.path.join(label_folder, sample_token + '.txt')
            if os.path.exists(label_path):
                print('Skipping existing file: %s' % label_path)
                continue
            else:
                print('Writing file: %s' % label_path)
            with open(label_path, "w") as label_file:
                for sample_annotation_token in sample_annotation_tokens:
                    sample_annotation = self.nusc.get('sample_annotation',
                                                      sample_annotation_token)

                    # Get box in LIDAR frame.
                    _, box_lidar_nusc, _ = self.nusc.get_sample_data(
                        lidar_token,
                        box_vis_level=BoxVisibility.NONE,
                        selected_anntokens=[sample_annotation_token])
                    box_lidar_nusc = box_lidar_nusc[0]

                    # Truncated: Set all objects to 0 which means untruncated.
                    truncated = 0.0

                    # Occluded: Set all objects to full visibility as this information is not available in nuScenes.
                    occluded = 0

                    # Convert nuScenes category to nuScenes detection challenge category.
                    detection_name = category_to_detection_name(
                        sample_annotation['category_name'])

                    # Skip categories that are not part of the nuScenes detection challenge.
                    if detection_name is None:
                        continue

                    # Convert from nuScenes to KITTI box format.
                    box_cam_kitti = KittiDB.box_nuscenes_to_kitti(
                        box_lidar_nusc, Quaternion(matrix=velo_to_cam_rot),
                        velo_to_cam_trans, r0_rect)

                    # Project 3d box to 2d box in image, ignore box if it does not fall inside.
                    bbox_2d = KittiDB.project_kitti_box_to_image(box_cam_kitti,
                                                                 p_left_kitti,
                                                                 imsize=imsize)
                    if bbox_2d is None:
                        continue

                    # Set dummy score so we can use this file as result.
                    box_cam_kitti.score = 0

                    # Convert box to output string format.
                    output = KittiDB.box_to_string(name=detection_name,
                                                   box=box_cam_kitti,
                                                   bbox_2d=bbox_2d,
                                                   truncation=truncated,
                                                   occlusion=occluded)

                    # Write to disk.
                    label_file.write(output + '\n')
Ejemplo n.º 27
0
def main():
  SCENE_SPLITS['mini-val'] = SCENE_SPLITS['val']
  if not os.path.exists(OUT_PATH):
    os.mkdir(OUT_PATH)
  for split in SPLITS:
    data_path = DATA_PATH + '{}/'.format(SPLITS[split])
    nusc = NuScenes(
      version=SPLITS[split], dataroot=data_path, verbose=True)
    out_path = OUT_PATH + '{}.json'.format(split)
    categories_info = [{'name': CATS[i], 'id': i + 1} for i in range(len(CATS))]
    ret = {'images': [], 'annotations': [], 'categories': categories_info, 
           'videos': [], 'attributes': ATTRIBUTE_TO_ID}
    num_images = 0
    num_anns = 0
    num_videos = 0

    # A "sample" in nuScenes refers to a timestamp with 6 cameras and 1 LIDAR.
    for sample in nusc.sample:
      scene_name = nusc.get('scene', sample['scene_token'])['name']
      if not (split in ['mini', 'test']) and \
        not (scene_name in SCENE_SPLITS[split]):
        continue
      if sample['prev'] == '':
        print('scene_name', scene_name)
        num_videos += 1
        ret['videos'].append({'id': num_videos, 'file_name': scene_name})
        frame_ids = {k: 0 for k in sample['data']}
        track_ids = {}
      # We decompose a sample into 6 images in our case.
      for sensor_name in sample['data']:
        if sensor_name in USED_SENSOR:
          image_token = sample['data'][sensor_name]
          image_data = nusc.get('sample_data', image_token)
          num_images += 1

          # Complex coordinate transform. This will take time to understand.
          sd_record = nusc.get('sample_data', image_token)
          cs_record = nusc.get(
            'calibrated_sensor', sd_record['calibrated_sensor_token'])
          pose_record = nusc.get('ego_pose', sd_record['ego_pose_token'])
          global_from_car = transform_matrix(pose_record['translation'],
            Quaternion(pose_record['rotation']), inverse=False)
          car_from_sensor = transform_matrix(
            cs_record['translation'], Quaternion(cs_record['rotation']),
            inverse=False)
          trans_matrix = np.dot(global_from_car, car_from_sensor)
          _, boxes, camera_intrinsic = nusc.get_sample_data(
            image_token, box_vis_level=BoxVisibility.ANY)
          calib = np.eye(4, dtype=np.float32)
          calib[:3, :3] = camera_intrinsic
          calib = calib[:3]
          frame_ids[sensor_name] += 1

          # image information in COCO format
          image_info = {'id': num_images,
                        'file_name': image_data['filename'],
                        'calib': calib.tolist(), 
                        'video_id': num_videos,
                        'frame_id': frame_ids[sensor_name],
                        'sensor_id': SENSOR_ID[sensor_name],
                        'sample_token': sample['token'],
                        'trans_matrix': trans_matrix.tolist(),
                        'width': sd_record['width'],
                        'height': sd_record['height'],
                        'pose_record_trans': pose_record['translation'],
                        'pose_record_rot': pose_record['rotation'],
                        'cs_record_trans': cs_record['translation'],
                        'cs_record_rot': cs_record['rotation']}
          ret['images'].append(image_info)
          anns = []
          for box in boxes:
            det_name = category_to_detection_name(box.name)
            if det_name is None:
              continue
            num_anns += 1
            v = np.dot(box.rotation_matrix, np.array([1, 0, 0]))
            yaw = -np.arctan2(v[2], v[0])
            box.translate(np.array([0, box.wlh[2] / 2, 0]))
            category_id = CAT_IDS[det_name]

            amodel_center = project_to_image(
              np.array([box.center[0], box.center[1] - box.wlh[2] / 2, box.center[2]], 
                np.float32).reshape(1, 3), calib)[0].tolist()
            sample_ann = nusc.get(
              'sample_annotation', box.token)
            instance_token = sample_ann['instance_token']
            if not (instance_token in track_ids):
              track_ids[instance_token] = len(track_ids) + 1
            attribute_tokens = sample_ann['attribute_tokens']
            attributes = [nusc.get('attribute', att_token)['name'] \
              for att_token in attribute_tokens]
            att = '' if len(attributes) == 0 else attributes[0]
            if len(attributes) > 1:
              print(attributes)
              import pdb; pdb.set_trace()
            track_id = track_ids[instance_token]
            vel = nusc.box_velocity(box.token) # global frame
            vel = np.dot(np.linalg.inv(trans_matrix), 
              np.array([vel[0], vel[1], vel[2], 0], np.float32)).tolist()
            
            # instance information in COCO format
            ann = {
              'id': num_anns,
              'image_id': num_images,
              'category_id': category_id,
              'dim': [box.wlh[2], box.wlh[0], box.wlh[1]],
              'location': [box.center[0], box.center[1], box.center[2]],
              'depth': box.center[2],
              'occluded': 0,
              'truncated': 0,
              'rotation_y': yaw,
              'amodel_center': amodel_center,
              'iscrowd': 0,
              'track_id': track_id,
              'attributes': ATTRIBUTE_TO_ID[att],
              'velocity': vel
            }

            bbox = KittiDB.project_kitti_box_to_image(
              copy.deepcopy(box), camera_intrinsic, imsize=(1600, 900))
            alpha = _rot_y2alpha(yaw, (bbox[0] + bbox[2]) / 2, 
                                 camera_intrinsic[0, 2], camera_intrinsic[0, 0])
            ann['bbox'] = [bbox[0], bbox[1], bbox[2] - bbox[0], bbox[3] - bbox[1]]
            ann['area'] = (bbox[2] - bbox[0]) * (bbox[3] - bbox[1])
            ann['alpha'] = alpha
            anns.append(ann)

          # Filter out bounding boxes outside the image
          visable_anns = []
          for i in range(len(anns)):
            vis = True
            for j in range(len(anns)):
              if anns[i]['depth'] - min(anns[i]['dim']) / 2 > \
                 anns[j]['depth'] + max(anns[j]['dim']) / 2 and \
                _bbox_inside(anns[i]['bbox'], anns[j]['bbox']):
                vis = False
                break
            if vis:
              visable_anns.append(anns[i])
            else:
              pass

          for ann in visable_anns:
            ret['annotations'].append(ann)
          if DEBUG:
            img_path = data_path + image_info['file_name']
            img = cv2.imread(img_path)
            img_3d = img.copy()
            for ann in visable_anns:
              bbox = ann['bbox']
              cv2.rectangle(img, (int(bbox[0]), int(bbox[1])), 
                            (int(bbox[2] + bbox[0]), int(bbox[3] + bbox[1])), 
                            (0, 0, 255), 3, lineType=cv2.LINE_AA)
              box_3d = compute_box_3d(ann['dim'], ann['location'], ann['rotation_y'])
              box_2d = project_to_image(box_3d, calib)
              img_3d = draw_box_3d(img_3d, box_2d)

              pt_3d = unproject_2d_to_3d(ann['amodel_center'], ann['depth'], calib)
              pt_3d[1] += ann['dim'][0] / 2
              print('location', ann['location'])
              print('loc model', pt_3d)
              pt_2d = np.array([(bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2],
                                dtype=np.float32)
              pt_3d = unproject_2d_to_3d(pt_2d, ann['depth'], calib)
              pt_3d[1] += ann['dim'][0] / 2
              print('loc      ', pt_3d)
            cv2.imshow('img', img)
            cv2.imshow('img_3d', img_3d)
            cv2.waitKey()
            nusc.render_sample_data(image_token)
            plt.show()
    print('reordering images')
    images = ret['images']
    video_sensor_to_images = {}
    for image_info in images:
      tmp_seq_id = image_info['video_id'] * 20 + image_info['sensor_id']
      if tmp_seq_id in video_sensor_to_images:
        video_sensor_to_images[tmp_seq_id].append(image_info)
      else:
        video_sensor_to_images[tmp_seq_id] = [image_info]
    ret['images'] = []
    for tmp_seq_id in sorted(video_sensor_to_images):
      ret['images'] = ret['images'] + video_sensor_to_images[tmp_seq_id]

    print('{} {} images {} boxes'.format(
      split, len(ret['images']), len(ret['annotations'])))
    print('out_path', out_path)
    json.dump(ret, open(out_path, 'w'))
Ejemplo n.º 28
0
yaw = []
sensor = 'LIDAR_TOP'
nu_to_kitti_lidar = Quaternion(axis=(0, 0, 1), angle=np.pi / 2).inverse
for scene in nusc.scene:
    current_sample_token = scene['first_sample_token']
    last_sample_token = scene['last_sample_token']
    while current_sample_token != last_sample_token:
        sample = nusc.get('sample', current_sample_token)
        lidar_top_data = nusc.get('sample_data', sample['data'][sensor])
        if lidar_top_data['prev']:
            ego_pose = nusc.get('ego_pose', lidar_top_data['ego_pose_token'])
            calib_sensor = nusc.get('calibrated_sensor', lidar_top_data['calibrated_sensor_token'])
            lidar_top_data_prev = nusc.get('sample_data', lidar_top_data['prev'])
            ego_pose_prev = nusc.get('ego_pose', lidar_top_data_prev['ego_pose_token'])
            calib_sensor_prev = nusc.get('calibrated_sensor', lidar_top_data_prev['calibrated_sensor_token'])
            ego_to_global = transform_matrix(ego_pose['translation'], Quaternion(ego_pose['rotation']))
            lidar_to_ego = transform_matrix(calib_sensor['translation'], Quaternion(calib_sensor['rotation']))
            ego_to_global_prev = transform_matrix(ego_pose_prev['translation'], Quaternion(ego_pose_prev['rotation']))
            lidar_to_ego_prev = transform_matrix(calib_sensor_prev['translation'],
                                                 Quaternion(calib_sensor_prev['rotation']))
            lidar_to_global = np.dot(ego_to_global, lidar_to_ego)
            lidar_to_global_prev = np.dot(ego_to_global_prev, lidar_to_ego_prev)
            delta = inv(lidar_to_global_prev).dot(lidar_to_global)
            y_translation.append(delta[1, 3])
            delta_angles = rotationMatrixToEulerAngles(delta[0:3, 0:3]) * 180 / np.pi
            yaw.append(delta_angles[2])

        for annotation_token in sample['anns']:
            annotation_metadata = nusc.get('sample_annotation', annotation_token)
            if annotation_metadata['num_lidar_pts'] == 0:
                continue
Ejemplo n.º 29
0
    def __getitem__(self, idx):
        # set defaults here
        kitti_to_nu_lidar = Quaternion(axis=(0, 0, 1), angle=np.pi / 2)
        kitti_to_nu_lidar_inv = kitti_to_nu_lidar.inverse
        imsize = (1600, 900)

        # sample_token based on index
        sample_token = self.sample_tokens[idx]

        # Get sample data.
        sample = self.nusc.get('sample', sample_token)
        sample_annotation_tokens = sample['anns']
        cam_front_token = sample['data'][self.cam_name]
        lidar_token = sample['data'][self.lidar_name]

        # Retrieve sensor records.
        sd_record_cam = self.nusc.get('sample_data', cam_front_token)
        sd_record_lid = self.nusc.get('sample_data', lidar_token)
        cs_record_cam = self.nusc.get('calibrated_sensor',
                                      sd_record_cam['calibrated_sensor_token'])
        cs_record_lid = self.nusc.get('calibrated_sensor',
                                      sd_record_lid['calibrated_sensor_token'])

        # Combine transformations and convert to KITTI format.
        # Note: cam uses same conventions in KITTI and nuScenes.
        lid_to_ego = transform_matrix(cs_record_lid['translation'],
                                      Quaternion(cs_record_lid['rotation']),
                                      inverse=False)
        ego_to_cam = transform_matrix(cs_record_cam['translation'],
                                      Quaternion(cs_record_cam['rotation']),
                                      inverse=True)
        velo_to_cam = np.dot(ego_to_cam, lid_to_ego)

        # Convert from KITTI to nuScenes LIDAR coordinates, where we apply velo_to_cam.
        velo_to_cam_kitti = np.dot(velo_to_cam,
                                   kitti_to_nu_lidar.transformation_matrix)

        r0_rect = Quaternion(axis=[1, 0, 0], angle=0)  # Dummy values.

        # Projection matrix.
        p_left_kitti = np.zeros((3, 4))
        p_left_kitti[:3, :3] = cs_record_cam[
            'camera_intrinsic']  # Cameras are always rectified.

        # Create KITTI style transforms.
        velo_to_cam_rot = velo_to_cam_kitti[:3, :3]
        velo_to_cam_trans = velo_to_cam_kitti[:3, 3]

        # Check that the rotation has the same format as in KITTI.
        assert (velo_to_cam_rot.round(0) == np.array([[0, -1, 0], [0, 0, -1],
                                                      [1, 0, 0]])).all()
        assert (velo_to_cam_trans[1:3] < 0).all()

        # Retrieve the token from the lidar.
        # Note that this may be confusing as the filename of the camera will include the timestamp of the lidar,
        # not the camera.
        filename_cam_full = sd_record_cam['filename']

        # set the img variable to its data here
        src_im_path = os.path.join(self.nusc.dataroot, filename_cam_full)
        img = Image.open(src_im_path)

        # Create calibration matrix.
        K = p_left_kitti
        K = [float(i) for i in K]
        K = np.array(K, dtype=np.float32).reshape(3, 4)
        K = K[:3, :3]

        # populate the list of object annotations for this sample
        anns = []
        for sample_annotation_token in sample_annotation_tokens:
            sample_annotation = self.nusc.get('sample_annotation',
                                              sample_annotation_token)

            # Get box in LIDAR frame.
            _, box_lidar_nusc, _ = self.nusc.get_sample_data(
                lidar_token,
                box_vis_level=BoxVisibility.NONE,
                selected_anntokens=[sample_annotation_token])
            box_lidar_nusc = box_lidar_nusc[0]

            # Truncated: Set all objects to 0 which means untruncated.
            truncated = 0.0

            # Occluded: Set all objects to full visibility as this information is not available in nuScenes.
            occluded = 0

            # Convert nuScenes category to nuScenes detection challenge category.
            detection_name = category_to_detection_name(
                sample_annotation['category_name'])

            # Skip categories that are not part of the nuScenes detection challenge.
            if detection_name is None:
                continue

            # Convert from nuScenes to KITTI box format.
            box_cam_kitti = KittiDB.box_nuscenes_to_kitti(
                box_lidar_nusc, Quaternion(matrix=velo_to_cam_rot),
                velo_to_cam_trans, r0_rect)

            # Project 3d box to 2d box in image, ignore box if it does not fall inside.
            bbox_2d = KittiDB.project_kitti_box_to_image(box_cam_kitti,
                                                         p_left_kitti,
                                                         imsize=imsize)
            if bbox_2d is None:
                continue

            # Set dummy score so we can use this file as result.
            box_cam_kitti.score = 0

            # Convert box to output string format.
            output = KittiDB.box_to_string(name=detection_name,
                                           box=box_cam_kitti,
                                           bbox_2d=bbox_2d,
                                           truncation=truncated,
                                           occlusion=occluded)
            fieldnames = [
                'type', 'truncated', 'occluded', 'alpha', 'xmin', 'ymin',
                'xmax', 'ymax', 'dh', 'dw', 'dl', 'lx', 'ly', 'lz', 'ry'
            ]
            if self.is_train:
                f = StringIO(output)
                reader = csv.DictReader(f,
                                        delimiter=' ',
                                        fieldnames=fieldnames)
                for line, row in enumerate(reader):
                    if row["type"] in self.classes:
                        anns.append({
                            "class":
                            row["type"],
                            "label":
                            TYPE_ID_CONVERSION[row["type"]],
                            "truncation":
                            float(row["truncated"]),
                            "occlusion":
                            float(row["occluded"]),
                            "alpha":
                            float(row["alpha"]),
                            "dimensions": [
                                float(row['dl']),
                                float(row['dh']),
                                float(row['dw'])
                            ],
                            "locations": [
                                float(row['lx']),
                                float(row['ly']),
                                float(row['lz'])
                            ],
                            "rot_y":
                            float(row["ry"])
                        })

        center = np.array([i / 2 for i in img.size], dtype=np.float32)
        size = np.array([i for i in img.size], dtype=np.float32)
        """
        resize, horizontal flip, and affine augmentation are performed here.
        since it is complicated to compute heatmap w.r.t transform.
        """
        flipped = False
        if (self.is_train) and (random.random() < self.flip_prob):
            flipped = True
            img = img.transpose(Image.FLIP_LEFT_RIGHT)
            center[0] = size[0] - center[0] - 1
            K[0, 2] = size[0] - K[0, 2] - 1

        affine = False
        if (self.is_train) and (random.random() < self.aug_prob):
            affine = True
            shift, scale = self.shift_scale[0], self.shift_scale[1]
            shift_ranges = np.arange(-shift, shift + 0.1, 0.1)
            center[0] += size[0] * random.choice(shift_ranges)
            center[1] += size[1] * random.choice(shift_ranges)

            scale_ranges = np.arange(1 - scale, 1 + scale + 0.1, 0.1)
            size *= random.choice(scale_ranges)

        center_size = [center, size]
        trans_affine = get_transfrom_matrix(
            center_size, [self.input_width, self.input_height])
        trans_affine_inv = np.linalg.inv(trans_affine)
        img = img.transform(
            (self.input_width, self.input_height),
            method=Image.AFFINE,
            data=trans_affine_inv.flatten()[:6],
            resample=Image.BILINEAR,
        )

        trans_mat = get_transfrom_matrix(
            center_size, [self.output_width, self.output_height])

        if not self.is_train:
            # for inference we parametrize with original size
            target = ParamsList(image_size=size, is_train=self.is_train)
            target.add_field("trans_mat", trans_mat)
            target.add_field("K", K)
            if self.transforms is not None:
                img, target = self.transforms(img, target)

            return img, target, idx

        heat_map = np.zeros(
            [self.num_classes, self.output_height, self.output_width],
            dtype=np.float32)
        regression = np.zeros([self.max_objs, 3, 8], dtype=np.float32)
        cls_ids = np.zeros([self.max_objs], dtype=np.int32)
        proj_points = np.zeros([self.max_objs, 2], dtype=np.int32)
        p_offsets = np.zeros([self.max_objs, 2], dtype=np.float32)
        dimensions = np.zeros([self.max_objs, 3], dtype=np.float32)
        locations = np.zeros([self.max_objs, 3], dtype=np.float32)
        rotys = np.zeros([self.max_objs], dtype=np.float32)
        reg_mask = np.zeros([self.max_objs], dtype=np.uint8)
        flip_mask = np.zeros([self.max_objs], dtype=np.uint8)

        for i, a in enumerate(anns):
            a = a.copy()
            cls = a["label"]

            locs = np.array(a["locations"])
            rot_y = np.array(a["rot_y"])
            if flipped:
                locs[0] *= -1
                rot_y *= -1

            point, box2d, box3d = encode_label(K, rot_y, a["dimensions"], locs)
            point = affine_transform(point, trans_mat)
            box2d[:2] = affine_transform(box2d[:2], trans_mat)
            box2d[2:] = affine_transform(box2d[2:], trans_mat)
            box2d[[0, 2]] = box2d[[0, 2]].clip(0, self.output_width - 1)
            box2d[[1, 3]] = box2d[[1, 3]].clip(0, self.output_height - 1)
            h, w = box2d[3] - box2d[1], box2d[2] - box2d[0]

            if (0 < point[0] < self.output_width) and (0 < point[1] <
                                                       self.output_height):
                point_int = point.astype(np.int32)
                p_offset = point - point_int
                radius = gaussian_radius(h, w)
                radius = max(0, int(radius))
                heat_map[cls] = draw_umich_gaussian(heat_map[cls], point_int,
                                                    radius)

                cls_ids[i] = cls
                regression[i] = box3d
                proj_points[i] = point_int
                p_offsets[i] = p_offset
                dimensions[i] = np.array(a["dimensions"])
                locations[i] = locs
                rotys[i] = rot_y
                reg_mask[i] = 1 if not affine else 0
                flip_mask[i] = 1 if not affine and flipped else 0

        target = ParamsList(image_size=img.size, is_train=self.is_train)
        target.add_field("hm", heat_map)
        target.add_field("reg", regression)
        target.add_field("cls_ids", cls_ids)
        target.add_field("proj_p", proj_points)
        target.add_field("dimensions", dimensions)
        target.add_field("locations", locations)
        target.add_field("rotys", rotys)
        target.add_field("trans_mat", trans_mat)
        target.add_field("K", K)
        target.add_field("reg_mask", reg_mask)
        target.add_field("flip_mask", flip_mask)

        if self.transforms is not None:
            img, target = self.transforms(img, target)

        return img, target, idx
Ejemplo n.º 30
0
def nusc2coco(data_dir, nusc, scenes, input_seq_id):

    ret = defaultdict(list)

    for k, v in cats_mapping.items():
        ret['categories'].append(dict(id=v, name=k))

    vid_id = 0
    fr_id = 0

    for sensor in USED_SENSOR:
        print(f'converting {sensor}')
        seq_id = input_seq_id

        for sample in tqdm(nusc.sample):
            if not (sample["scene_token"] in scenes):
                continue

            image_token = sample['data'][sensor]

            sd_record = nusc.get('sample_data', image_token)
            img_name = osp.join(data_dir, sd_record['filename'])
            height = sd_record['height']
            width = sd_record['width']

            cs_record = nusc.get('calibrated_sensor',
                                 sd_record['calibrated_sensor_token'])
            pose_record = nusc.get('ego_pose', sd_record['ego_pose_token'])
            global_from_car = transform_matrix(pose_record['translation'],
                                               Quaternion(
                                                   pose_record['rotation']),
                                               inverse=False)
            car_from_sensor = transform_matrix(cs_record['translation'],
                                               Quaternion(
                                                   cs_record['rotation']),
                                               inverse=False)
            trans_matrix = np.dot(global_from_car, car_from_sensor)

            rotation_matrix = trans_matrix[:3, :3]
            position = trans_matrix[:3, 3:].flatten().tolist()

            rotation = R.from_matrix(rotation_matrix).as_euler('xyz').tolist()
            pose_dict = dict(rotation=rotation, position=position)

            _, boxes, camera_intrinsic = nusc.get_sample_data(
                image_token, box_vis_level=BoxVisibility.ANY)
            calib = np.eye(4, dtype=np.float32)
            calib[:3, :3] = camera_intrinsic
            calib = calib[:3].tolist()

            img_info = dict(file_name=img_name,
                            cali=calib,
                            pose=pose_dict,
                            sensor_id=SENSOR_ID[sensor],
                            height=height,
                            width=width,
                            fov=60,
                            near_clip=0.15,
                            id=len(ret['images']),
                            video_id=vid_id,
                            index=fr_id)
            ret['images'].append(img_info)

            anns = []
            for box in boxes:
                cat = general_to_nusc_cats[box.name]
                if cat in ['ignore']:
                    continue

                v = np.dot(box.rotation_matrix, np.array([1, 0, 0]))
                yaw = -np.arctan2(v[2], v[0]).tolist()

                center_2d = project_to_image(
                    np.array([box.center[0], box.center[1], box.center[2]],
                             np.float32).reshape(1, 3), calib)[0].tolist()

                sample_ann = nusc.get('sample_annotation', box.token)
                instance_token = sample_ann['instance_token']

                if instance_token not in trackid_maps.keys():
                    trackid_maps[instance_token] = len(trackid_maps)

                bbox = project_kitti_box_to_image(copy.deepcopy(box),
                                                  camera_intrinsic,
                                                  imsize=(width, height))

                if bbox is None:
                    continue

                x1, y1, x2, y2 = bbox
                alpha = _rot_y2alpha(yaw, (x1 + x2) / 2, camera_intrinsic[0,
                                                                          2],
                                     camera_intrinsic[0, 0]).tolist()
                delta_2d = [
                    center_2d[0] - (x1 + x2) / 2, center_2d[1] - (y1 + y2) / 2
                ]
                ann = dict(
                    image_id=ret['images'][-1]['id'],
                    category_id=cats_mapping[cat],
                    instance_id=trackid_maps[instance_token],
                    alpha=float(alpha),
                    roty=float(yaw),
                    dimension=[box.wlh[2], box.wlh[0], box.wlh[1]],
                    translation=[box.center[0], box.center[1], box.center[2]],
                    is_occluded=0,
                    is_truncated=0,
                    bbox=[x1, y1, x2 - x1, y2 - y1],
                    area=(x2 - x1) * (y2 - y1),
                    delta_2d=delta_2d,
                    center_2d=center_2d,
                    iscrowd=False,
                    ignore=False,
                    segmentation=[[x1, y1, x1, y2, x2, y2, x2, y1]])
                anns.append(ann)

            # Filter out bounding boxes outside the image
            visable_anns = []
            for i in range(len(anns)):
                vis = True
                for j in range(len(anns)):
                    if anns[i]['translation'][2] - min(anns[i]['dimension']) / 2 > \
                        anns[j]['translation'][2] + max(anns[j]['dimension']) / 2 and \
                        _bbox_inside(anns[i]['bbox'], anns[j]['bbox']):
                        vis = False
                        break
                if vis:
                    visable_anns.append(anns[i])
                else:
                    pass

            for ann in visable_anns:
                ann['id'] = len(ret['annotations'])
                ret['annotations'].append(ann)

            fr_id += 1
            if sample['next'] == '':
                vid_info = dict(id=vid_id,
                                name=f'{seq_id:05}_{SENSOR_ID[sensor]}',
                                scene_token=sample["scene_token"],
                                n_frames=fr_id)
                ret['videos'].append(vid_info)
                seq_id += 1
                vid_id += 1
                fr_id = 0

    return ret