def tokens_to_data_pairs(nusc: NuScenes, cam_sd_tokens: List[str],
                         rad_sd_tokens: List[str], depth: float,
                         threshold: float) -> list(zip()):
    """
    This method takes a pair of lists with the Camera and Radar sample_data tokens,
    filters the RadarPointCloud for detections closer than the parameter depth,
    loads the actual data in two corresponding lists and returns the zipped lists
    :param nusc: Nuscenes instance
    :param cam_sd_tokens: List with all the camera sample_data tokens
    :param rad_sd_tokens: List with all the radar sample_data tokens
    :param depth: Distance from the radar sensor (x value) above which all detections are omitted
    :param threshold: No pairs of points in the resulted dataset will have distance less than this threshold
    :return: list(zip(np.array, np.array)) List of zipped array lists with the data
    """
    rgb_images_list = []
    for i in range(len(cam_sd_tokens)):
        cam_sd_path = nusc.get_sample_data_path(cam_sd_tokens[i])
        if not os.path.isfile(cam_sd_path):
            continue
        #im = Image.open(cam_sd_path)
        #im = im.resize((IMAGE_WIDTH, IMAGE_HEIGHT), Image.BILINEAR)
        img = cv2.imread(cam_sd_path)
        #Resize with Bilinear interpolation
        img = cv2.resize(img, (IMAGE_WIDTH, IMAGE_HEIGHT))
        img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        rgb_images_list.append(img)

    radar_pcl_list = []
    for i in range(len(rad_sd_tokens)):
        rad_sd_path = nusc.get_sample_data_path(rad_sd_tokens[i])
        if not os.path.isfile(rad_sd_path):
            continue
        #radar_pcl = RadarPointCloud.from_file(rad_sd_path, invalid_states = range(18), dynprop_states = range(18), ambig_states = range(18))
        #radar_pcl = RadarPointCloud.from_file(rad_sd_path, invalid_states = None, dynprop_states = [0,2,6], ambig_states = None)
        radar_pcl = RadarPointCloud.from_file(rad_sd_path)
        # radar_pcl.points.shape is (18, num_points)
        radar_pcl.points, dist_list = filter_pointcloud(
            radar_pcl.points, threshold)
        # radar_pcl.points.shape became (3, num_points)
        #RADNET expects shape (num_points, 4)
        radar_pcl.points = radar_pcl.points.transpose()
        #radar_pcl.points = radar_pcl.points[:, :3]
        #radar_pcl.points = radar_pcl.points[radar_pcl.points[:, 0] < depth]
        radar_pcl.points = np.hstack((radar_pcl.points,
                                      np.ones((radar_pcl.points.shape[0], 1),
                                              dtype=radar_pcl.points.dtype)))

        radar_pcl_list.append(radar_pcl)

    assert (len(rgb_images_list) == len(radar_pcl_list))
    image_radar_pairs = list(zip(rgb_images_list, radar_pcl_list))

    del rgb_images_list
    del radar_pcl_list

    return image_radar_pairs
def tokens_to_data_pairs(nusc: NuScenes, cam_sd_tokens: List[str],
                         rad_sd_tokens: List[str]) -> list(zip()):
    """
    This method takes a pair of lists with the Camera and Radar sample_data tokens,
    loads the actual data in two corresponding lists and returns the zipped lists
    :param nusc: Nuscenes instance
    :param cam_sd_tokens: List with all the camera sample_data tokens
    :param rad_sd_tokens: List with all the radar sample_data tokens
    :return: list(zip(np.array, np.array)) List of zipped array lists with the data
    """
    rgb_images_list = []
    for i in range(len(cam_sd_tokens)):
        cam_sd_path = nusc.get_sample_data_path(cam_sd_tokens[i])
        if not os.path.isfile(cam_sd_path):
            continue
        #im = Image.open(cam_sd_path)
        #im = im.resize((IMAGE_WIDTH, IMAGE_HEIGHT), Image.BILINEAR)
        img = cv2.imread(cam_sd_path)
        #Resize with Bilinear interpolation
        img = cv2.resize(img, (IMAGE_WIDTH, IMAGE_HEIGHT))
        img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        rgb_images_list.append(img)

    radar_pcl_list = []
    for i in range(len(rad_sd_tokens)):
        rad_sd_path = nusc.get_sample_data_path(rad_sd_tokens[i])
        if not os.path.isfile(rad_sd_path):
            continue
        #radar_pcl = RadarPointCloud.from_file(rad_sd_path, invalid_states = range(18), dynprop_states = range(18), ambig_states = range(18))
        #radar_pcl = RadarPointCloud.from_file(rad_sd_path, invalid_states = None, dynprop_states = [0,2,6], ambig_states = None)
        radar_pcl = RadarPointCloud.from_file(rad_sd_path)
        #nuScenes RadarPointCloud has shape (18, num_points)
        #RADNET expects (num_points, 4)
        radar_pcl.points = radar_pcl.points.transpose()
        radar_pcl.points = radar_pcl.points[:, :3]
        radar_pcl.points = np.hstack((radar_pcl.points,
                                      np.ones((radar_pcl.points.shape[0], 1),
                                              dtype=radar_pcl.points.dtype)))
        radar_pcl_list.append(radar_pcl)

    assert (len(rgb_images_list) == len(radar_pcl_list))
    image_radar_pairs = list(zip(rgb_images_list, radar_pcl_list))

    del rgb_images_list
    del radar_pcl_list

    return image_radar_pairs
def convert(split='val',
            data_path='data/nuscenes/',
            save_path='data/nuscenes/depth_maps'):
    nusc = NuScenes(version=SPLITS[split], dataroot=data_path, verbose=True)
    nusc_exp = NuScenesExplorer(nusc)

    save_dir = os.path.join(save_path, split)
    if not os.path.isdir(save_dir):
        pass

    ret = []

    for sample in nusc.sample:

        sample_token = sample['token']
        print(sample_token, len(ret))

        lidar_token = sample['data'][LidarName]

        for cam_name in CamNames:
            cam_token = sample['data'][cam_name]
            depth_map_path = sample_token + cam_name + '.pts'
            depth_map_path = os.path.join(save_dir, 'depth_data',
                                          depth_map_path)
            img_path = nusc.get_sample_data_path(cam_token)

            data_info = {}
            data_info['sample_token'] = sample_token
            data_info['lidar_token'] = lidar_token
            data_info['depth_path'] = depth_map_path
            data_info['img_path'] = img_path

            ret.append(data_info)
            continue
            points, coloring, im = nusc_exp.map_pointcloud_to_image(
                lidar_token, cam_token)

            float_x_cords = points[0]  # < 1600
            float_y_cords = points[1]  # < 900
            float_depth = coloring  #

            point_with_depth = np.stack(
                [float_x_cords, float_y_cords, float_depth], axis=-1)
            np.save(depth_map_path, point_with_depth)

            #nusc.render_pointcloud_in_image(sample_token, camera_channel='CAM_FRONT', out_path='./render.png', verbose=False)

    meta_file_path = os.path.join(save_dir, 'meta.json')
    with open(meta_file_path, 'w') as f:
        json.dump(ret, f)
Beispiel #4
0
def tokens_to_data(nusc: NuScenes, rad_sd_tokens: List[str]) -> List:
    """
    This method takes a list with the Radar sample_data tokens and
    loads the actual data in a new list
    :param nusc: Nuscenes instance
    :param rad_sd_tokens: List with all the radar sample_data tokens
    :return: list((np.array) List of numpy array with the radar data
    """

    radar_pcl_list = []
    for i in range(len(rad_sd_tokens)):
        rad_sd_path = nusc.get_sample_data_path(rad_sd_tokens[i])
        if not os.path.isfile(rad_sd_path):
            continue
        radar_pcl = RadarPointCloud.from_file(rad_sd_path)
        #nuScenes RadarPointCloud has shape (18, num_points)
        #RADNET expects (num_points, 4)
        radar_pcl.points = radar_pcl.points.transpose()
        radar_pcl.points = radar_pcl.points[:, :3]
        radar_pcl.points = np.hstack((radar_pcl.points, np.ones((radar_pcl.points.shape[0], 1), dtype=radar_pcl.points.dtype)))
        radar_pcl_list.append(radar_pcl)

    return radar_pcl_list
                first_sample_rec['data'][channel],
            )
            current_time = first_sample_rec['timestamp']
            while current_time < last_sample_rec['timestamp']:

                current_time += time_step
                for channel, sd_rec in current_recs.items():
                    while sd_rec['timestamp'] < current_time and sd_rec[
                            'next'] != '':
                        sd_rec = nusc.get('sample_data', sd_rec['next'])
                        current_recs[channel] = sd_rec

                count_frames = count_frames + 1
                cam_token = sd_rec['token']
                cam_record = nusc.get('sample_data', cam_token)
                cam_path = nusc.get_sample_data_path(cam_token)
                cam_path, boxes, camera_intrinsic = nusc.get_sample_data(
                    sd_rec['token'], )
                im = Image.open(cam_path)
                im.save(
                    data_directory + spl + str(scene_id) + camera_channel +
                    '/img1/' + str(count_frames) + '.jpg',
                    'JPEG',
                )
                TL_found = False
                bb = []
                patch_radius = 700
                sample_record = nusc.get('sample', sample_token)
                log_record = nusc.get('log', scene_record['log_token'])
                log_location = log_record['location']
                nusc_map = NuScenesMap(
Beispiel #6
0
def render_tracking_result(estimation_file: str, scene_name: str, version: str,
                           dataroot: str):
    '''
    Render tracking result onto CAM_FRONT image
    :param estimation_file: name of esitmation file produced by nuscens_tracking_pmbm.py
    :param scene_name: name of the scene whose tracking result is about to be rendered
    :param version: version of NuScene dataset (mini, trainval, test) the scene to rendered belong to
    :param dataroot: directory contains NuScenes dataset
    '''
    # Load tracking data
    with open(estimation_file, 'r') as infile:
        all_tracking_result = json.load(infile)

    num_unique_colors = 200
    all_color_indicies = np.linspace(
        0, 1.0, num=num_unique_colors)  # allow up to 200 unique colors

    # load NuScenese styff
    nusc = NuScenes(version=version, dataroot=dataroot, verbose=False)
    my_scene_token = nusc.field2token('scene', 'name', scene_name)[0]
    my_scene = nusc.get('scene', my_scene_token)

    current_time_step = 0
    current_sample_token = my_scene['first_sample_token']
    while True:
        # get necessary record
        sample_record = nusc.get('sample', current_sample_token)
        camera_token = sample_record['data']['CAM_FRONT']
        sd_record = nusc.get('sample_data', camera_token)
        cs_record = nusc.get('calibrated_sensor',
                             sd_record['calibrated_sensor_token'])
        pose_record = nusc.get('ego_pose', sd_record['ego_pose_token'])

        # get camera information
        cam_intrinsic = np.array(cs_record['camera_intrinsic'])
        imsize = (sd_record['width'], sd_record['height'])
        impath = nusc.get_sample_data_path(camera_token)
        im = cv2.imread(impath)

        # get tracking result
        current_tracks = all_tracking_result[str(current_time_step)]
        for target_id, target in current_tracks.items():
            box = Box4Track(center=target['translation'] + [target['height']],
                            orientation=Quaternion(
                                axis=[0, 0, 1], angle=target['orientation']),
                            size=target['size'],
                            name=target['class'],
                            label=int(target_id))

            box.to_camera_frame(pose_record, cs_record)

            # render box on image
            if not box_in_image(box, cam_intrinsic, imsize, BoxVisibility.ANY):
                # print('Box {} not in image'.format(box.name))
                continue
            # get color
            c = np.array(plt.cm.Spectral(box.label % num_unique_colors))
            c = np.round(c * 255)
            box.render_track(im,
                             view=cam_intrinsic,
                             normalize=True,
                             color=(c[0], c[1], c[2]))

        # move on
        current_time_step += 1
        current_sample_token = sample_record['next']
        if current_sample_token == '':
            break

        cv2.imshow('CAM_FRONT', im)
        key = cv2.waitKey(1000)  # wait 100ms
        if key == 32:  # if space is pressed, pause.
            key = cv2.waitKey()
        if key == 27:  # if ESC is pressed, exit.
            cv2.destroyAllWindows()
            break
class NuscenesReader:

    scene_split_lists = {"train": set(split_train), "val": set(split_val)}
    sample_id_template = "ns_{}_{:02d}"
    np_str_to_cat = np.vectorize(nuscenes_category_to_object_class,
                                 otypes=[np.int64])
    np_str_to_sem = np.vectorize(nuscenes_category_to_semantic_class,
                                 otypes=[np.int64])

    # Nuscenes LiDAR has x-axis to vehicle right and y-axis to front.
    # Turn this 90 degrees to have x-axis facing the front
    turn_matrix = np.linalg.inv(
        np.asarray([[0, 1, 0, 0], [-1, 0, 0, 0], [0, 0, 1, 0], [0, 0, 0, 1]],
                   dtype=np.float64))
    turn_quaternion = Quaternion(axis=(0.0, 0.0, 1.0), radians=-np.pi / 2.0)

    def __init__(self,
                 nuscenes_root,
                 version="v1.0-trainval",
                 max_scenes=None,
                 *,
                 read_radar: bool = True,
                 read_camera: bool = True,
                 read_semantics: bool = True,
                 read_bounding_boxes: bool = True):
        self.nusc = NuScenes(version=version,
                             dataroot=nuscenes_root,
                             verbose=False)
        self.root = pathlib.Path(nuscenes_root)

        # global counter to sanity-check if we calculate the same number of points
        # within boxes as the dataset authors
        self.ns_lidar_pts_to_calculated_diff = 0

        # flags defining the data entries to return from 'read'
        self.read_radar = read_radar
        self.read_camera = read_camera
        self.read_semantics = read_semantics
        self.read_bounding_boxes = read_bounding_boxes

        if self.read_semantics and not hasattr(self.nusc, "lidarseg"):
            raise RuntimeError("Error: nuScenes-lidarseg not installed!")

        # assert that the training targets range from 0 - (|mapping| - 1)
        assert len(set(
            NUSCENES_SEM_CLASSES.values())) == len(NUSCENES_SEM_CLASSES)
        assert all(a == b
                   for a, b in zip(sorted(NUSCENES_SEM_CLASSES.values()),
                                   range(len(NUSCENES_SEM_CLASSES))))

        split_name = {
            "v1.0-trainval": "nuscenes_default",
            "v1.0-mini": "nuscenes_mini",
        }.get(version, "nuscenes_{}".format(version))

        # create split dict
        self.split = {
            "name": split_name,
            "data": {k: []
                     for k in self.scene_split_lists.keys()},
        }
        for i, scene in enumerate(self.nusc.scene):
            if max_scenes is not None and i > max_scenes:
                break
            name = scene["name"]
            for k, v in self.scene_split_lists.items():
                if name in v:
                    split_list = self.split["data"][k]
                    split_list.extend([
                        self.sample_id_template.format(name, i)
                        for i in range(0, scene["nbr_samples"])
                    ])
                    break
            else:
                raise RuntimeError(
                    "Found scene that is not in a split: {}".format(name))

    def _make_path(self, filename: str):
        return str(self.root / filename)

    def __iter__(self):
        self._scene_iter = self.nusc.scene.__iter__()
        self._next_scene()
        return self

    def __next__(self) -> typing.Tuple[dict, int]:
        try:
            return self._sample_iter.__next__()
        except StopIteration:
            self._next_scene()
            return self._sample_iter.__next__()

    def make_sample_id(self, sample: typing.Tuple[dict, int]) -> str:
        scene = self.nusc.get("scene", sample[0]["scene_token"])
        return self.sample_id_template.format(scene["name"], sample[1])

    def _next_scene(self):
        self._current_scene = self._scene_iter.__next__()

        class SampleIteration:
            """Iterate over nuscenes samples of a scene.
            Add an additional sample index.
            """
            def __init__(self, scene, nusc):
                self.nusc = nusc
                self._pos = scene["first_sample_token"]
                self._index = itertools.count().__iter__()

            def __next__(self):
                if not self._pos:
                    raise StopIteration()
                sample = self.nusc.get("sample", self._pos)
                self._pos = sample["next"]
                return sample, self._index.__next__()

        self._sample_iter = SampleIteration(self._current_scene, self.nusc)

    def read(self, sample: typing.Tuple[dict, int], _sample_id: str):
        d = self._read_sample(sample)

        def convert(entry):
            """Convert data types to be compatible with tfrecords features."""
            if isinstance(entry, str):
                return entry.encode("utf-8")
            if isinstance(entry, np.ndarray):
                entry = entry.flatten()
                if not np.issubdtype(entry.dtype, np.number):
                    entry = tf.io.serialize_tensor(tf.convert_to_tensor(entry))
                elif entry.dtype == np.float64:
                    entry = entry.astype(np.float32)
                return entry
            return entry

        return {k: convert(v) for k, v in d.items()}

    def _read_sample(self, sample: typing.Tuple[dict, int]):

        radars = ["RADAR_FRONT", "RADAR_FRONT_LEFT", "RADAR_FRONT_RIGHT"]
        cameras = ["CAM_FRONT"]

        sample_id = self.make_sample_id(sample)
        sample, sample_index = sample
        sample_data_token: str = sample["data"]["LIDAR_TOP"]

        lidar_sample_data = self.nusc.get("sample_data", sample_data_token)
        assert lidar_sample_data["is_key_frame"]

        data_path, box_list, cam_intrinsic = self.nusc.get_sample_data(
            sample_data_token)

        # POINT CLOUD [(x y z I) x P]. Intensity from 0.0 to 255.0
        point_cloud_orig = LidarPointCloud.from_file(data_path)
        if point_cloud_orig.points.dtype != np.float32:
            raise RuntimeError("Point cloud has wrong data type.")
        # -> [P x (x y z I)]
        point_cloud_orig = point_cloud_orig.points.transpose()

        camera_data = {}
        radar_data = {}
        box_data = {}
        lidarseg_data = {}

        if self.read_semantics:
            lidarseg_data = self._read_lidarseg_data(sample_data_token,
                                                     point_cloud_orig)

        if self.read_bounding_boxes:
            box_data = self._read_bounding_boxes(box_list, point_cloud_orig,
                                                 sample)

        # rotate point cloud 90 degrees around z-axis,
        # so that x-axis faces front of vehicle
        x = point_cloud_orig[:, 0:1]
        y = point_cloud_orig[:, 1:2]
        point_cloud = np.concatenate((y, -x, point_cloud_orig[:, 2:4]),
                                     axis=-1)

        # LiDAR extrinsic calibration
        calibration_lidar = self.nusc.get(
            "calibrated_sensor", lidar_sample_data["calibrated_sensor_token"])
        tf_lidar = transform_matrix(calibration_lidar["translation"],
                                    Quaternion(calibration_lidar["rotation"]))
        # vehicle -> point cloud coords (turned by 90 degrees)
        tf_vehicle_pc = np.linalg.inv(
            np.dot(tf_lidar, NuscenesReader.turn_matrix))

        if self.read_camera:
            # CAMERAS
            camera_data: [{
                str: typing.Any
            }] = [
                self._read_sensor_data_and_extrinsics(sample, cam_name,
                                                      tf_vehicle_pc,
                                                      self._read_camera_data)
                for cam_name in cameras
            ]
            camera_data = {k: v for d in camera_data for k, v in d.items()}

        if self.read_radar:
            # RADARS
            radar_data: [{
                str: typing.Any
            }] = [
                self._read_sensor_data_and_extrinsics(sample, radar_name,
                                                      tf_vehicle_pc,
                                                      self._read_radar_data)
                for radar_name in radars
            ]
            radar_data = {k: v for d in radar_data for k, v in d.items()}

        assert box_data.keys().isdisjoint(camera_data.keys())
        assert box_data.keys().isdisjoint(radar_data.keys())
        assert box_data.keys().isdisjoint(lidarseg_data.keys())
        # return feature array. Add sample ID.
        d = {
            "sample_id": sample_id,
            "point_cloud": point_cloud,
            **box_data,
            **camera_data,
            **radar_data,
            **lidarseg_data,
        }
        return d

    def _read_lidarseg_data(self, sample_data_token: str,
                            point_cloud_orig: np.ndarray):
        # self.nusc.lidarseg_idx2name_mapping

        lidarseg_labels_filename = (
            pathlib.Path(self.nusc.dataroot) /
            self.nusc.get("lidarseg", sample_data_token)["filename"])
        # Load labels from .bin file.
        points_label = np.fromfile(str(lidarseg_labels_filename),
                                   dtype=np.uint8)  # [num_points]

        if points_label.shape[0] != point_cloud_orig.shape[0]:
            raise ValueError("Semantic labels do not match point cloud.")

        return {"semantic_labels": tf.io.serialize_tensor(points_label)}

    def _read_bounding_boxes(self, box_list, point_cloud_orig, sample):
        # create transform matrices for all boxes
        dt = np.float32
        if box_list:
            r = np.asarray([x.rotation_matrix for x in box_list])
            c = np.asarray([x.center for x in box_list])
            center_pos = np.asarray([x.center for x in box_list], dtype=dt)
            # NuScenes format: width, length, height. Reorder to l w h
            box_dims_lwh = np.asarray([x.wlh for x in box_list],
                                      dtype=np.float64)[:, [1, 0, 2]]
            box_rot = np.asarray([(self.turn_quaternion * x.orientation).q
                                  for x in box_list],
                                 dtype=dt)
        else:
            r = np.zeros(shape=[0, 3, 3], dtype=np.float64)
            c = np.zeros(shape=[0, 3], dtype=np.float64)
            center_pos = np.zeros(shape=[0, 3], dtype=np.float64)
            box_dims_lwh = np.zeros(shape=[0, 3], dtype=np.float64)
            box_rot = np.zeros(shape=[0, 4], dtype=np.float64)

        rc = np.concatenate((r, c[:, :, None]), axis=-1)
        tfs = np.concatenate(
            (
                rc,
                np.broadcast_to(
                    tf.constant([0, 0, 0, 1], dtype=rc.dtype)[None, None, :],
                    [rc.shape[0], 1, 4],
                ),
            ),
            axis=1,
        )

        total_points_per_box, mapping = assign_point_cloud_to_bounding_boxes(
            point_cloud_orig,
            bounding_boxes_tfs=tfs,
            bounding_boxes_dims=box_dims_lwh,
        )

        # 3D BOXES IN LIDAR COORDS [(x y z) (w x y z) (l w h)]
        bboxes_spatial = np.empty(shape=(len(box_list), 10), dtype=dt)
        bboxes_spatial[:, 0] = center_pos[:, 1]
        bboxes_spatial[:, 1] = -center_pos[:, 0]
        bboxes_spatial[:, 2] = center_pos[:, 2]
        bboxes_spatial[:, 3:7] = box_rot
        bboxes_spatial[:, 7:10] = box_dims_lwh.astype(dt)

        object_str = np.array([x.name for x in box_list], dtype=np.unicode)
        object_category = NuscenesReader.np_str_to_cat(object_str).astype(
            np.int64)
        class_value = NuscenesReader.np_str_to_sem(object_str).astype(np.int64)
        lidar_pts = np.asarray([
            x["num_lidar_pts"] for x in [
                self.nusc.get("sample_annotation", ann_token)
                for ann_token in sample["anns"]
            ]
        ])
        # rotate box transforms (same as bboxes_spatial)
        tfs = np.matmul(np.linalg.inv(self.turn_matrix)[None, ...], tfs)

        # track to see if box calculation gives the same results as the
        # lidar points counter from the nuscenes dataset
        diff = np.sum(np.abs(lidar_pts - total_points_per_box))
        self.ns_lidar_pts_to_calculated_diff += diff

        return {
            "bounding_boxes_3d_spatial": bboxes_spatial,
            "bounding_boxes_3d_transforms": tfs,
            "bounding_boxes_class": class_value,
            "bounding_boxes_category": object_category,
            "bounding_boxes_class_str": object_str,
            "bounding_boxes_point_counter":
            total_points_per_box.astype(np.int64),
            "bounding_boxes_point_mapping": mapping,
        }

    @staticmethod
    def _read_sensor_data_and_extrinsics(sample, sensor_name: str,
                                         coord_transform: np.ndarray,
                                         reader_func):
        data, extrinsics = reader_func(sample["data"][sensor_name])
        extrinsics = np.matmul(coord_transform, extrinsics)
        return {
            **{k.format(sensor_name.lower()): v
               for k, v in data.items()},
            "{}_extrinsics".format(sensor_name.lower()): extrinsics,
        }

    def _read_camera_data(self, sample_data_token):
        sd_record = self.nusc.get("sample_data", sample_data_token)
        cs_record = self.nusc.get("calibrated_sensor",
                                  sd_record["calibrated_sensor_token"])
        sensor_record = self.nusc.get("sensor", cs_record["sensor_token"])
        assert sensor_record["modality"] == "camera"

        # currently using only keyframes
        assert sd_record["is_key_frame"]

        data_path = self.nusc.get_sample_data_path(sample_data_token)
        imsize = (sd_record["width"], sd_record["height"])

        cam_intrinsic = np.array(cs_record["camera_intrinsic"])
        cam_extrinsics = transform_matrix(cs_record["translation"],
                                          Quaternion(cs_record["rotation"]))

        with open(data_path, "rb") as in_file:
            img_bytes_jpg = in_file.read()

        return (
            {
                "{}_jpg": img_bytes_jpg,
                "{}_size": np.asarray(imsize, dtype=np.int64),
                "{}_intrinsics": cam_intrinsic.astype(np.float32),
            },
            cam_extrinsics,
        )

    def _read_radar_data(self, sample_data_token):
        sd_record = self.nusc.get("sample_data", sample_data_token)
        cs_record = self.nusc.get("calibrated_sensor",
                                  sd_record["calibrated_sensor_token"])
        sensor_record = self.nusc.get("sensor", cs_record["sensor_token"])
        assert sensor_record["modality"] == "radar"

        # currently using only keyframes
        assert sd_record["is_key_frame"]

        data_path = self.nusc.get_sample_data_path(sample_data_token)
        radar_point_cloud = RadarPointCloud.from_file(data_path)
        points = tf.convert_to_tensor(radar_point_cloud.points.transpose())

        radar_extrinsics = transform_matrix(cs_record["translation"],
                                            Quaternion(cs_record["rotation"]))

        return {"{}_points": tf.io.serialize_tensor(points)}, radar_extrinsics
Beispiel #8
0
class NuScenes_utils():
    def __init__(self, phase):
        assert phase in ['train', 'validation', 'test']
        self.phase = phase
        self.prep_list_of_sessions()
        self.splits = {
            'train': train_split,
            'validation': val_split,
            'test': test_split
        }

    def is_phase_session(self, phase, session_ind=None):
        if session_ind is None:
            res = np.array(
                [n in self.splits[phase] for n in self.session_names])
        else:
            res = self.session_names[session_ind] in self.splits[phase]
        return res

    def is_location_session(self, location, session_ind=None):
        if session_ind is None:
            res = np.array([location in l for l in self.session_locations])
        else:
            res = location in self.session_locations[session_ind]
        return res

    def load_PC(self, session_ind, cloud_ind, cache_file=None):
        assert session_ind < self.num_sessions
        assert cloud_ind < self.session_lengths[
            session_ind], f"Requested cloud {cloud_ind}, but session {session_ind} only contains {self.session_lengths[session_ind]} clouds"
        lidar_token = self.cloud_tokens[session_ind][cloud_ind]
        cloud = self.load_cloud_raw(lidar_token)
        if (cache_file is not None) and not os.path.isfile(cache_file):
            np.save(cache_file, cloud)
        return cloud

    def get_relative_motion_A_to_B(self, session_ind, cloud_ind_A,
                                   cloud_ind_B):
        token_A = self.cloud_tokens[session_ind][cloud_ind_A]
        posA = self.load_position_raw(token_A)
        token_B = self.cloud_tokens[session_ind][cloud_ind_B]
        posB = self.load_position_raw(token_B)
        mot_A_to_B = np.linalg.inv(posB) @ posA
        return mot_A_to_B

    def prep_list_of_sessions(self):
        if self.phase in ['train', 'validation']:
            version = 'v1.0-trainval'
        elif self.phase == 'test':
            version = 'v1.0-test'
        self.NuScenes_data = NuScenes(version=version,
                                      dataroot=DATASET_ROOT,
                                      verbose=True)
        self.num_sessions = len(self.NuScenes_data.scene)

        self.cloud_tokens = []
        self.session_lengths = []
        self.session_locations = []
        self.session_names = []

        for session_ind in range(self.num_sessions):

            record = self.NuScenes_data.scene[session_ind]
            session_token = record['token']
            self.session_names.append(record['name'])
            location = self.NuScenes_data.get('log',
                                              record['log_token'])['location']
            self.session_locations.append(location)
            sample_token = record["first_sample_token"]
            sample = self.NuScenes_data.get("sample", sample_token)
            lidar_token = sample["data"]["LIDAR_TOP"]
            cur_lidar_tokens = []
            while len(lidar_token) > 0:
                cur_lidar_tokens.append(lidar_token)
                lidar_data = self.NuScenes_data.get("sample_data", lidar_token)
                lidar_token = lidar_data["next"]
            self.cloud_tokens.append(cur_lidar_tokens)
            self.session_lengths.append(len(cur_lidar_tokens))

    def load_position_raw(self, sample_lidar_token):
        lidar_data = self.NuScenes_data.get("sample_data", sample_lidar_token)

        ego_pose = self.NuScenes_data.get("ego_pose",
                                          lidar_data["ego_pose_token"])
        calibrated_sensor = self.NuScenes_data.get(
            "calibrated_sensor", lidar_data["calibrated_sensor_token"])

        # Homogeneous transformation matrix from car frame to world frame.
        global_from_car = transform_matrix(ego_pose['translation'],
                                           Quaternion(ego_pose['rotation']),
                                           inverse=False)

        return global_from_car

    def load_cloud_raw(self, sample_lidar_token):
        lidar_data = self.NuScenes_data.get("sample_data", sample_lidar_token)
        lidar_filepath = self.NuScenes_data.get_sample_data_path(
            sample_lidar_token)
        ego_pose = self.NuScenes_data.get("ego_pose",
                                          lidar_data["ego_pose_token"])
        calibrated_sensor = self.NuScenes_data.get(
            "calibrated_sensor", lidar_data["calibrated_sensor_token"])

        # Homogeneous transformation matrix from sensor coordinate frame to ego car frame.
        car_from_sensor = transform_matrix(calibrated_sensor['translation'],
                                           Quaternion(
                                               calibrated_sensor['rotation']),
                                           inverse=False)

        lidar_pointcloud = LidarPointCloud.from_file(lidar_filepath)

        # The lidar pointcloud is defined in the sensor's reference frame.
        # We want it in the car's reference frame, so we transform each point
        lidar_pointcloud.transform(car_from_sensor)

        return lidar_pointcloud.points[:3, :].T
Beispiel #9
0
class NuScenesDataset(Dataset):
    """
    NuScenes dataset loader and producer
    """
    def __init__(self,
                 mode,
                 split='training',
                 img_list='trainval',
                 is_training=True,
                 workers_num=1):
        """
        mode: 'loading', 'preprocessing'
        """
        self.mode = mode
        self.dataset_dir = os.path.join(cfg.ROOT_DIR,
                                        cfg.DATASET.KITTI.BASE_DIR_PATH)
        self.max_sweeps = cfg.DATASET.NUSCENES.NSWEEPS
        self.is_training = is_training
        self.img_list = img_list
        self.workers_num = workers_num

        # cast labels from NuScenes name to useful name
        self.useful_cls_dict = {
            'animal': 'ignore',
            'human.pedestrian.personal_mobility': 'ignore',
            'human.pedestrian.stroller': 'ignore',
            'human.pedestrian.wheelchair': 'ignore',
            'movable_object.debris': 'ignore',
            'movable_object.pushable_pullable': 'ignore',
            'static_object.bicycle_rack': 'ignore',
            'vehicle.emergency.ambulance': 'ignore',
            'vehicle.emergency.police': 'ignore',
            'movable_object.barrier': 'barrier',
            'vehicle.bicycle': 'bicycle',
            'vehicle.bus.bendy': 'bus',
            'vehicle.bus.rigid': 'bus',
            'vehicle.car': 'car',
            'vehicle.construction': 'construction_vehicle',
            'vehicle.motorcycle': 'motorcycle',
            'human.pedestrian.adult': 'pedestrian',
            'human.pedestrian.child': 'pedestrian',
            'human.pedestrian.construction_worker': 'pedestrian',
            'human.pedestrian.police_officer': 'pedestrian',
            'movable_object.trafficcone': 'traffic_cone',
            'vehicle.trailer': 'trailer',
            'vehicle.truck': 'truck'
        }
        # cast attribute to index
        self.attribute_idx_list = {
            'vehicle.moving': 0,
            'vehicle.stopped': 1,
            'vehicle.parked': 2,
            'cycle.with_rider': 3,
            'cycle.without_rider': 4,
            'pedestrian.sitting_lying_down': 5,
            'pedestrian.standing': 6,
            'pedestrian.moving': 7,
            'default': -1,
        }
        self.idx_attribute_list = dict([
            (v, k) for k, v in self.attribute_idx_list.items()
        ])
        self.AttributeIdxLabelMapping = {
            "car": ['vehicle.moving', 'vehicle.stopped', 'vehicle.parked'],
            "truck": ['vehicle.moving', 'vehicle.stopped', 'vehicle.parked'],
            "bus": ['vehicle.moving', 'vehicle.stopped', 'vehicle.parked'],
            "trailer": ['vehicle.moving', 'vehicle.stopped', 'vehicle.parked'],
            "construction_vehicle":
            ['vehicle.moving', 'vehicle.stopped', 'vehicle.parked'],
            "pedestrian": [
                'pedestrian.sitting_lying_down', 'pedestrian.standing',
                'pedestrian.moving'
            ],
            "motorcycle": ['cycle.with_rider', 'cycle.without_rider', ''],
            "bicycle": ['cycle.with_rider', 'cycle.without_rider', ''],
            "traffic_cone": ['', '', ''],
            "barrier": ['', '', ''],
        }

        self.DefaultAttribute = {
            "car": "vehicle.parked",
            "pedestrian": "pedestrian.moving",
            "trailer": "vehicle.parked",
            "truck": "vehicle.parked",
            "bus": "vehicle.parked",
            "motorcycle": "cycle.without_rider",
            "construction_vehicle": "vehicle.parked",
            "bicycle": "cycle.without_rider",
            "barrier": "",
            "traffic_cone": "",
        }

        self.cls_list = cfg.DATASET.KITTI.CLS_LIST
        self.idx2cls_dict = dict([(idx + 1, cls)
                                  for idx, cls in enumerate(self.cls_list)])
        self.cls2idx_dict = dict([(cls, idx + 1)
                                  for idx, cls in enumerate(self.cls_list)])

        self.sv_npy_path = os.path.join(
            cfg.ROOT_DIR, cfg.DATASET.KITTI.SAVE_NUMPY_PATH, 'NuScenes',
            '{}_{}'.format(img_list, self.max_sweeps))
        self.train_list = os.path.join(self.sv_npy_path, 'infos.pkl')

        self.voxel_generator = VoxelGenerator()

        self.test_mode = cfg.TEST.TEST_MODE
        if self.test_mode == 'mAP':
            self.evaluation = self.evaluate_map
            self.logger_and_select_best = self.logger_and_select_best_map
        elif self.test_mode == 'Recall':
            self.evaluation = self.evaluate_recall
            self.logger_and_select_best = self.logger_and_select_best_recall
        else:
            raise Exception('No other evaluation mode.')

        if mode == 'loading':
            # data loader
            with open(self.train_list, 'rb') as f:
                self.train_npy_list = pickle.load(f)
            self.sample_num = len(self.train_npy_list)
            if self.is_training:
                self.data_augmentor = DataAugmentor(
                    'NuScenes', workers_num=self.workers_num)

        elif mode == 'preprocessing':
            # preprocess raw data
            if img_list == 'train':
                self.nusc = NuScenes(dataroot=self.dataset_dir,
                                     version='v1.0-trainval')
                self.scenes = [
                    scene for scene in self.nusc.scene
                    if scene['name'] in train_scene
                ]
            elif img_list == 'val':
                self.nusc = NuScenes(dataroot=self.dataset_dir,
                                     version='v1.0-trainval')
                self.scenes = [
                    scene for scene in self.nusc.scene
                    if scene['name'] in val_scene
                ]
            else:  # test
                self.nusc = NuScenes(dataroot=self.dataset_dir,
                                     version='v1.0-test')
                self.scenes = self.nusc.scene

            self.sample_data_token_list = OrderedDict()
            sample_num = 0
            for scene in self.scenes:
                # static the sample num, and save all sample_data_token
                self.sample_data_token_list[scene['token']] = []
                all_sample = self.nusc.field2token('sample', 'scene_token',
                                                   scene['token'])
                sample_num += len(all_sample)
                for sample in all_sample:  # all sample token
                    sample = self.nusc.get('sample', sample)
                    cur_token = sample['token']
                    cur_data_token = sample['data']['LIDAR_TOP']
                    self.sample_data_token_list[scene['token']].append(
                        cur_data_token)

            self.sample_num = sample_num

            self.extents = cfg.DATASET.POINT_CLOUD_RANGE
            self.extents = np.reshape(self.extents, [3, 2])
            if not os.path.exists(self.sv_npy_path):
                os.makedirs(self.sv_npy_path)

            # also calculate the mean size here
            self.cls_size_dict = dict([(cls,
                                        np.array([0, 0, 0], dtype=np.float32))
                                       for cls in self.cls_list])
            self.cls_num_dict = dict([(cls, 0) for cls in self.cls_list])

            # the save path for MixupDB
            if self.img_list in [
                    'train', 'val', 'trainval'
            ] and cfg.TEST.WITH_GT and cfg.TRAIN.AUGMENTATIONS.MIXUP.OPEN:
                self.mixup_db_cls_path = dict()
                self.mixup_db_trainlist_path = dict()
                self.mixup_db_class = cfg.TRAIN.AUGMENTATIONS.MIXUP.CLASS
                for cls in self.mixup_db_class:
                    mixup_db_cls_path = os.path.join(
                        cfg.ROOT_DIR, cfg.DATASET.KITTI.SAVE_NUMPY_PATH,
                        cfg.TRAIN.AUGMENTATIONS.MIXUP.SAVE_NUMPY_PATH,
                        cfg.TRAIN.AUGMENTATIONS.MIXUP.PC_LIST,
                        '{}'.format(cls))
                    mixup_db_trainlist_path = os.path.join(
                        mixup_db_cls_path, 'train_list.txt')
                    if not os.path.exists(mixup_db_cls_path):
                        os.makedirs(mixup_db_cls_path)
                    self.mixup_db_cls_path[cls] = mixup_db_cls_path
                    self.mixup_db_trainlist_path[cls] = mixup_db_trainlist_path

    def __len__(self):
        return self.sample_num

    def load_samples(self, sample_idx, pipename):
        """ load data per thread """
        pipename = int(pipename)
        biggest_label_num = 0
        sample_dict = self.train_npy_list[sample_idx]

        points_path = sample_dict[maps_dict.KEY_POINT_CLOUD]
        sweeps = sample_dict[maps_dict.KEY_SWEEPS]
        sample_name = sample_dict[maps_dict.KEY_SAMPLE_NAME]
        cur_transformation_matrix = sample_dict[
            maps_dict.KEY_TRANSFORMRATION_MATRIX]
        ts = sample_dict[maps_dict.KEY_TIMESTAMPS] / 1e6

        # then first read points and stack points from multiple frame
        points = np.fromfile(points_path, dtype=np.float32)
        points = points.reshape((-1, 5))
        points = cast_points_to_kitti(points)
        points[:, 3] /= 255
        points[:, 4] = 0
        sweep_points_list = [points]
        original_cur_sweep_points = points
        cur_sweep_points_num = points.shape[0]
        for sweep in sweeps:
            points_sweep = np.fromfile(sweep['lidar_path'], dtype=np.float32)
            points_sweep = points_sweep.reshape((-1, 5))
            sweep_ts = sweep['timestamp'] / 1e6
            points_sweep[:, 3] /= 255
            points_sweep[:, :3] = points_sweep[:, :3] @ sweep[
                'sweep2lidar_rotation'].T
            points_sweep[:, :3] += sweep['sweep2lidar_translation']
            points_sweep[:, 4] = ts - sweep_ts
            points_sweep = cast_points_to_kitti(points_sweep)
            sweep_points_list.append(points_sweep)
        if cfg.DATASET.NUSCENES.INPUT_FEATURE_CHANNEL == 4:
            points = np.concatenate(sweep_points_list, axis=0)[:, [0, 1, 2, 4]]
        else:
            points = np.concatenate(sweep_points_list, axis=0)

        # then read groundtruth file if have
        if self.is_training or cfg.TEST.WITH_GT:
            label_boxes_3d = sample_dict[maps_dict.KEY_LABEL_BOXES_3D]
            label_boxes_3d = cast_box_3d_to_kitti_format(label_boxes_3d)

            label_classes_name = sample_dict[maps_dict.KEY_LABEL_CLASSES]
            label_classes = np.array([
                self.cls2idx_dict[label_class]
                for label_class in label_classes_name
            ])

            label_attributes = sample_dict[maps_dict.KEY_LABEL_ATTRIBUTES]
            label_velocity = sample_dict[
                maps_dict.KEY_LABEL_VELOCITY]  # [-1, 2]

            ry_cls_label, residual_angle = encode_angle2class_np(
                label_boxes_3d[:, -1], cfg.MODEL.ANGLE_CLS_NUM)
        else:  # not is_training and no_gt
            label_boxes_3d = np.zeros([1, 7], np.float32)
            label_classes = np.zeros([1], np.int32)
            label_attributes = np.zeros([1], np.int32)
            label_velocity = np.zeros([1, 2], np.float32)
            ry_cls_label = np.zeros([1], np.int32)
            residual_angle = np.zeros([1], np.float32)

        if self.is_training:  # data augmentation
            points, label_boxes_3d, label_classes, label_attributes, label_velocity, cur_sweep_points_num = self.data_augmentor.nuscenes_forward(
                points, label_boxes_3d, label_classes, pipename,
                label_attributes, label_velocity, cur_sweep_points_num)
            ry_cls_label, residual_angle = encode_angle2class_np(
                label_boxes_3d[:, -1], cfg.MODEL.ANGLE_CLS_NUM)
        cur_label_num = len(label_boxes_3d)

        # then randomly choose some points
        cur_sweep_points = points[:cur_sweep_points_num, :]  # [-1, 4]
        other_sweep_points = points[cur_sweep_points_num:, :]  # [-1, 4]
        if len(other_sweep_points) == 0:
            other_sweep_points = cur_sweep_points.copy()
        np.random.shuffle(cur_sweep_points)
        np.random.shuffle(other_sweep_points)

        input_sample_points, num_points_per_voxel = self.voxel_generator.generate_nusc(
            cur_sweep_points, other_sweep_points,
            cfg.DATASET.NUSCENE.MAX_CUR_SAMPLE_POINTS_NUM
        )  # points, [num_voxels, num_points, 5], sem_labels, [num_voxels, num_points]
        cur_sample_points = input_sample_points[:cfg.DATASET.NUSCENE.
                                                MAX_CUR_SAMPLE_POINTS_NUM]
        other_sample_points = input_sample_points[cfg.DATASET.NUSCENE.
                                                  MAX_CUR_SAMPLE_POINTS_NUM:]

        biggest_label_num = max(biggest_label_num, cur_label_num)
        return biggest_label_num, input_sample_points, cur_sample_points, other_sample_points, label_boxes_3d, ry_cls_label, residual_angle, label_classes, label_attributes, label_velocity, sample_name, cur_transformation_matrix, sweeps, original_cur_sweep_points

    def load_batch(self, batch_size):
        perm = np.arange(
            self.sample_num).tolist()  # a list indicates each data
        dp = DataFromList(perm,
                          is_train=self.is_training,
                          shuffle=self.is_training)
        dp = MultiProcessMapData(dp, self.load_samples, self.workers_num)

        use_list = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1]
        use_concat = [0, 0, 0, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0]

        dp = BatchDataNuscenes(dp,
                               batch_size,
                               use_concat=use_concat,
                               use_list=use_list)
        dp.reset_state()
        dp = dp.get_data()
        return dp

    # Preprocess data
    def preprocess_samples(self, cur_scene_key, sample_data_token):
        sample_dicts = []
        biggest_label_num = 0

        cur_sample_data = self.nusc.get('sample_data', sample_data_token)
        cur_sample_token = cur_sample_data['sample_token']
        cur_sample = self.nusc.get('sample', cur_sample_token)

        ego_pose = self.nusc.get('ego_pose', cur_sample_data['ego_pose_token'])
        calibrated_sensor = self.nusc.get(
            'calibrated_sensor', cur_sample_data['calibrated_sensor_token'])

        l2e_r = calibrated_sensor['rotation']
        l2e_t = calibrated_sensor['translation']
        e2g_r = ego_pose['rotation']
        e2g_t = ego_pose['translation']
        l2e_r_mat = Quaternion(l2e_r).rotation_matrix
        e2g_r_mat = Quaternion(e2g_r).rotation_matrix
        cur_timestamp = cur_sample['timestamp']

        cur_transformation_matrix = {
            'lidar2ego_translation': l2e_t,
            'lidar2ego_rotation': l2e_r,
            'ego2global_translation': e2g_t,
            'ego2global_rotation': e2g_r,
        }

        # get point cloud in former 0.5 second
        sweeps = []
        while len(sweeps) < self.max_sweeps:
            if not cur_sample_data['prev'] == '':
                # has next frame
                cur_sample_data = self.nusc.get('sample_data',
                                                cur_sample_data['prev'])
                cur_ego_pose = self.nusc.get('ego_pose',
                                             cur_sample_data['ego_pose_token'])
                cur_calibrated_sensor = self.nusc.get(
                    'calibrated_sensor',
                    cur_sample_data['calibrated_sensor_token'])
                cur_lidar_path, cur_sweep_boxes, _ = self.nusc.get_sample_data(
                    cur_sample_data['token'])
                sweep = {
                    "lidar_path": cur_lidar_path,
                    "sample_data_token": cur_sample_data['token'],
                    "lidar2ego_translation":
                    cur_calibrated_sensor['translation'],
                    "lidar2ego_rotation": cur_calibrated_sensor['rotation'],
                    "ego2global_translation": cur_ego_pose['translation'],
                    "ego2global_rotation": cur_ego_pose['rotation'],
                    "timestamp": cur_sample_data["timestamp"]
                }
                l2e_r_s = sweep["lidar2ego_rotation"]
                l2e_t_s = sweep["lidar2ego_translation"]
                e2g_r_s = sweep["ego2global_rotation"]
                e2g_t_s = sweep["ego2global_translation"]
                # sweep->ego->global->ego'->lidar
                l2e_r_s_mat = Quaternion(l2e_r_s).rotation_matrix
                e2g_r_s_mat = Quaternion(e2g_r_s).rotation_matrix

                R = (l2e_r_s_mat.T @ e2g_r_s_mat.T) @ (
                    np.linalg.inv(e2g_r_mat).T @ np.linalg.inv(l2e_r_mat).T)
                T = (l2e_t_s @ e2g_r_s_mat.T + e2g_t_s) @ (
                    np.linalg.inv(e2g_r_mat).T @ np.linalg.inv(l2e_r_mat).T)
                T -= e2g_t @ (np.linalg.inv(e2g_r_mat).T @ np.linalg.inv(
                    l2e_r_mat).T) + l2e_t @ np.linalg.inv(l2e_r_mat).T

                sweep["sweep2lidar_rotation"] = R.T  # points @ R.T + T
                sweep["sweep2lidar_translation"] = T
                sweeps.append(sweep)
            else:  # prev is none
                break

        # then load gt_boxes_3d
        if self.img_list in ['train', 'val'] and cfg.TEST.WITH_GT:
            cur_data_path, all_boxes, _ = self.nusc.get_sample_data(
                sample_data_token)

            # then first parse boxes labels
            locs = np.array([box.center for box in all_boxes]).reshape(-1, 3)
            sizes = np.array([box.wlh for box in all_boxes]).reshape(-1, 3)
            rots = np.array([
                box.orientation.yaw_pitch_roll[0] for box in all_boxes
            ]).reshape(-1, 1)
            all_boxes_3d = np.concatenate([locs, sizes, -rots], axis=-1)

            annos_tokens = cur_sample['anns']
            all_velocity = np.array([
                self.nusc.box_velocity(ann_token)[:2]
                for ann_token in annos_tokens
            ])  # [-1, 2]
            for i in range(len(all_boxes)):
                velo = np.array([*all_velocity[i], 0.0])
                velo = velo @ np.linalg.inv(e2g_r_mat).T @ np.linalg.inv(
                    l2e_r_mat).T
                all_velocity[i] = velo[:2]  # [-1, 2]

            attribute_tokens = [
                self.nusc.get('sample_annotation',
                              ann_token)['attribute_tokens']
                for ann_token in annos_tokens
            ]
            all_attribute = []
            for attribute_token in attribute_tokens:
                if len(attribute_token) == 0:
                    all_attribute.append([])
                else:
                    all_attribute.append(
                        self.nusc.get('attribute', attribute_token[0])['name'])
            # then filter these ignore labels
            categories = np.array([box.name for box in all_boxes])
            if self.img_list == 'train':
                useful_idx = [
                    index for index, category in enumerate(categories)
                    if self.useful_cls_dict[category] != 'ignore'
                ]
            else:
                useful_idx = [
                    index for index, category in enumerate(categories)
                ]
            if len(useful_idx) == 0:
                if self.img_list == 'train':
                    return None, biggest_label_num
                else:
                    all_boxes_3d = np.ones([1, 7], dtype=np.float32)
                    all_boxes_classes = np.array(['ignore'])
                    all_attribute = np.array([-1])
                    all_velocity = np.array([[0, 0]], dtype=np.float32)
            else:
                all_boxes_3d = all_boxes_3d[useful_idx]

                categories = categories[useful_idx]
                all_boxes_classes = np.array(
                    [self.useful_cls_dict[cate] for cate in categories])
                # now calculate the mean size of each box
                for tmp_idx, all_boxes_class in enumerate(all_boxes_classes):
                    cur_mean_size = self.cls_size_dict[
                        all_boxes_class] * self.cls_num_dict[all_boxes_class]
                    cur_cls_num = self.cls_num_dict[all_boxes_class] + 1
                    cur_total_size = cur_mean_size + all_boxes_3d[tmp_idx, [
                        4, 5, 3
                    ]]  # [l, w, h]
                    cur_mean_size = cur_total_size / cur_cls_num
                    self.cls_size_dict[all_boxes_class] = cur_mean_size
                    self.cls_num_dict[all_boxes_class] = cur_cls_num

                all_attribute = [
                    all_attribute[tmp_idx] for tmp_idx in useful_idx
                ]
                tmp_attribute = []
                for attr in all_attribute:
                    if attr == []: tmp_attribute.append(-1)
                    else:
                        tmp_attribute.append(self.attribute_idx_list[attr])
                all_attribute = tmp_attribute
                all_attribute = np.array(all_attribute, dtype=np.int32)
                all_velocity = [
                    all_velocity[tmp_idx] for tmp_idx in useful_idx
                ]
                all_velocity = np.array(all_velocity, dtype=np.float32)
        else:
            cur_data_path = self.nusc.get_sample_data_path(sample_data_token)

        # then generate the bev_maps
        if self.img_list in ['train', 'val', 'trainval'] and cfg.TEST.WITH_GT:
            sample_dict = {
                maps_dict.KEY_LABEL_BOXES_3D:
                all_boxes_3d,
                maps_dict.KEY_LABEL_CLASSES:
                all_boxes_classes,
                maps_dict.KEY_LABEL_ATTRIBUTES:
                all_attribute,
                maps_dict.KEY_LABEL_VELOCITY:
                all_velocity,
                maps_dict.KEY_LABEL_NUM:
                len(all_boxes_3d),
                maps_dict.KEY_POINT_CLOUD:
                cur_data_path,
                maps_dict.KEY_TRANSFORMRATION_MATRIX:
                cur_transformation_matrix,
                maps_dict.KEY_SAMPLE_NAME:
                '{}/{}/{}'.format(cur_scene_key, cur_sample_token,
                                  sample_data_token),
                maps_dict.KEY_SWEEPS:
                sweeps,
                maps_dict.KEY_TIMESTAMPS:
                cur_timestamp,
            }
            biggest_label_num = max(len(all_boxes_3d), biggest_label_num)
        else:
            # img_list is test
            sample_dict = {
                maps_dict.KEY_POINT_CLOUD:
                cur_data_path,
                maps_dict.KEY_SAMPLE_NAME:
                '{}/{}/{}'.format(cur_scene_key, cur_sample_token,
                                  sample_data_token),
                maps_dict.KEY_TRANSFORMRATION_MATRIX:
                cur_transformation_matrix,
                maps_dict.KEY_SWEEPS:
                sweeps,
                maps_dict.KEY_TIMESTAMPS:
                cur_timestamp,
            }
        return sample_dict, biggest_label_num

    def preprocess_batch(self):
        # if create_gt_dataset, then also create a boxes_numpy, saving all points
        if cfg.TRAIN.AUGMENTATIONS.MIXUP.OPEN:  # also save mixup database
            mixup_label_dict = dict([(cls, []) for cls in self.mixup_db_class])

        sample_dicts_list = []
        for scene_key, v in tqdm.tqdm(self.sample_data_token_list.items()):
            for sample_data_token in v:
                sample_dict, tmp_biggest_label_num = self.preprocess_samples(
                    scene_key, sample_data_token)
                if sample_dict is None:
                    continue
                # else save the result
                sample_dicts_list.append(sample_dict)

                # create_gt_dataset
                if self.img_list in [
                        'train', 'val', 'trainval'
                ] and cfg.TEST.WITH_GT and cfg.TRAIN.AUGMENTATIONS.MIXUP.OPEN:
                    mixup_sample_dicts = self.generate_mixup_sample(
                        sample_dict)
                    if mixup_sample_dicts is None: continue
                    for mixup_sample_dict in mixup_sample_dicts:
                        cur_cls = mixup_sample_dict[
                            maps_dict.KEY_SAMPLED_GT_CLSES]
                        mixup_label_dict[cur_cls].append(mixup_sample_dict)

        # save preprocessed data
        with open(self.train_list, 'wb') as f:
            pickle.dump(sample_dicts_list, f)
        for k, v in self.cls_num_dict.items():
            print('class name: %s / class num: %d / mean size: (%f, %f, %f)' %
                  (k, v, self.cls_size_dict[k][0], self.cls_size_dict[k][1],
                   self.cls_size_dict[k][2]))  # [l, w, h]

        if self.img_list in [
                'train', 'val', 'trainval'
        ] and cfg.TEST.WITH_GT and cfg.TRAIN.AUGMENTATIONS.MIXUP.OPEN:
            print('**** Generating groundtruth database ****')
            for cur_cls_name, mixup_sample_dict in mixup_label_dict.items():
                cur_mixup_db_cls_path = self.mixup_db_cls_path[cur_cls_name]
                cur_mixup_db_trainlist_path = self.mixup_db_trainlist_path[
                    cur_cls_name]
                print('**** Class %s ****' % cur_cls_name)
                with open(cur_mixup_db_trainlist_path, 'w') as f:
                    for tmp_idx, tmp_cur_mixup_sample_dict in tqdm.tqdm(
                            enumerate(mixup_sample_dict)):
                        f.write('%06d.npy\n' % tmp_idx)
                        np.save(
                            os.path.join(cur_mixup_db_cls_path,
                                         '%06d.npy' % tmp_idx),
                            tmp_cur_mixup_sample_dict)
        print('Ending of the preprocess !!!')

    def generate_mixup_sample(self, sample_dict):
        """ This function is bound for generating mixup dataset """
        all_boxes_3d = sample_dict[maps_dict.KEY_LABEL_BOXES_3D]
        all_boxes_classes = sample_dict[maps_dict.KEY_LABEL_CLASSES]
        point_cloud_path = sample_dict[maps_dict.KEY_POINT_CLOUD]

        # then we first cast all_boxes_3d to kitti format
        all_boxes_3d = cast_box_3d_to_kitti_format(all_boxes_3d)

        # load points
        points = np.fromfile(point_cloud_path, dtype=np.float32).reshape(
            (-1, 5))
        points = cast_points_to_kitti(points)
        points[:, 3] /= 255
        points[:, 4] = 0  # timestamp is zero

        points_mask = check_inside_points(points,
                                          all_boxes_3d)  # [pts_num, gt_num]
        points_masks_num = np.sum(points_masks, axis=0)  # [gt_num]
        valid_box_idx = np.where(
            points_masks_num >= cfg.DATASET.MIN_POINTS_NUM)[0]

        if len(valid_box_idx) == 0:
            return None

        valid_label_boxes_3d = all_boxes_3d[valid_box_idx]
        valid_label_classes = all_boxes_classes[valid_box_idx]

        sample_dicts = []
        for index, i in enumerate(valid_box_idx):
            cur_points_mask = points_mask[:, i]
            cur_points_idx = np.where(cur_points_mask)[0]
            cur_inside_points = points[cur_points_idx, :]
            sample_dict = {
                # 0 timestamp and /255 reflectance
                maps_dict.KEY_SAMPLED_GT_POINTS:
                cur_inside_points,  # kitti format points
                maps_dict.KEY_SAMPLED_GT_LABELS_3D:
                valid_label_boxes_3d[index],
                maps_dict.KEY_SAMPLED_GT_CLSES:
                valid_label_classes[index],
            }
            sample_dicts.append(sample_dict)
        return sample_dicts

    # Evaluation
    def set_evaluation_tensor(self, model):
        # get prediction results, bs = 1
        pred_bbox_3d = tf.squeeze(model.output[maps_dict.PRED_3D_BBOX][-1],
                                  axis=0)
        pred_cls_score = tf.squeeze(model.output[maps_dict.PRED_3D_SCORE][-1],
                                    axis=0)
        pred_cls_category = tf.squeeze(
            model.output[maps_dict.PRED_3D_CLS_CATEGORY][-1], axis=0)
        pred_list = [pred_bbox_3d, pred_cls_score, pred_cls_category]

        if len(model.output[maps_dict.PRED_3D_ATTRIBUTE]) > 0:
            pred_attribute = tf.squeeze(
                model.output[maps_dict.PRED_3D_ATTRIBUTE][-1], axis=0)
            pred_velocity = tf.squeeze(
                model.output[maps_dict.PRED_3D_VELOCITY][-1], axis=0)
            pred_list.extend([pred_attribute, pred_velocity])
        return pred_list

    def evaluate_map(self,
                     sess,
                     feeddict_producer,
                     pred_list,
                     val_size,
                     cls_thresh,
                     log_dir,
                     placeholders=None):
        submissions = {}
        submissions['meta'] = dict()
        submissions['meta']['use_camera'] = False
        submissions['meta']['use_lidar'] = True
        submissions['meta']['use_radar'] = False
        submissions['meta']['use_map'] = False
        submissions['meta']['use_external'] = False

        submissions_results = dict()
        pred_attr_velo = (len(pred_list) == 5)

        for i in tqdm.tqdm(range(val_size)):
            feed_dict = feeddict_producer.create_feed_dict()

            if pred_attr_velo:
                pred_bbox_3d_op, pred_cls_score_op, pred_cls_category_op, pred_attr_op, pred_velo_op = sess.run(
                    pred_list, feed_dict=feed_dict)
            else:
                pred_bbox_3d_op, pred_cls_score_op, pred_cls_category_op = sess.run(
                    pred_list, feed_dict=feed_dict)
            pred_cls_category_op += 1  # label from 1 to n

            sample_name, cur_transformation_matrix, sweeps = feeddict_producer.info
            sample_name = sample_name[0]
            cur_transformation_matrix = cur_transformation_matrix[0]
            sweeps = sweeps[0]
            cur_scene_key, cur_sample_token, cur_sample_data_token = sample_name.split(
                '/')

            select_idx = np.where(pred_cls_score_op >= cls_thresh)[0]
            pred_cls_score_op = pred_cls_score_op[select_idx]
            pred_cls_category_op = pred_cls_category_op[select_idx]
            pred_bbox_3d_op = pred_bbox_3d_op[select_idx]
            if pred_attr_velo:
                pred_attr_op = pred_attr_op[select_idx]
                pred_velo_op = pred_velo_op[select_idx]
            else:
                pred_attr_op, pred_velo_op = None, None

            if len(pred_bbox_3d_op) > 500:
                arg_sort_idx = np.argsort(pred_cls_score_op)[::-1]
                arg_sort_idx = arg_sort_idx[:500]
                pred_cls_score_op = pred_cls_score_op[arg_sort_idx]
                pred_cls_category_op = pred_cls_category_op[arg_sort_idx]
                pred_bbox_3d_op = pred_bbox_3d_op[arg_sort_idx]
                if pred_attr_velo:
                    pred_attr_op = pred_attr_op[arg_sort_idx]
                    pred_velo_op = pred_velo_op[arg_sort_idx]

            # then transform pred_bbox_op to nuscenes_box
            boxes = cast_kitti_format_to_nusc_box_3d(
                pred_bbox_3d_op,
                pred_cls_score_op,
                pred_cls_category_op,
                cur_attribute=pred_attr_op,
                cur_velocity=pred_velo_op,
                classes=self.idx2cls_dict)
            for box in boxes:
                velocity = box.velocity[:2].tolist()
                if len(sweeps) == 0:
                    velocity = (np.nan, np.nan)
                box.velocity = np.array([*velocity, 0.0])
            # then cast the box from ego to global
            boxes = _lidar_nusc_box_to_global(cur_transformation_matrix,
                                              boxes,
                                              self.idx2cls_dict,
                                              eval_version='cvpr_2019')

            annos = []
            for box in boxes:
                name = self.idx2cls_dict[box.label]
                if box.name == -1:
                    attr = self.DefaultAttribute[name]
                else:
                    attr = self.AttributeIdxLabelMapping[name][box.name]
                velocity = box.velocity[:2].tolist()
                nusc_anno = {
                    "sample_token": cur_sample_token,
                    "translation": box.center.tolist(),
                    "size": box.wlh.tolist(),
                    "rotation": box.orientation.elements.tolist(),
                    "velocity": velocity,
                    "detection_name": name,
                    "detection_score": box.score,
                    "attribute_name": attr,
                }
                annos.append(nusc_anno)
            submissions_results[info['sample_token']] = annos

        submissions['results'] = submissions_results

        res_path = os.path.join(log_dir, "results_nusc_1.json")
        with open(res_path, "w") as f:
            json.dump(submissions, f)
        eval_main_file = os.path.join(cfg.ROOT_DIR, 'lib/core/nusc_eval.py')
        root_path = self.dataset_dir
        cmd = f"python3 {str(eval_main_file)} --root_path=\"{str(root_path)}\""
        cmd += f" --version={'v1.0-trainval'} --eval_version={'cvpr_2019'}"
        cmd += f" --res_path=\"{str(res_path)}\" --eval_set={'val'}"
        cmd += f" --output_dir=\"{LOG_FOUT_DIR}\""
        # use subprocess can release all nusc memory after evaluation
        subprocess.check_output(cmd, shell=True)
        os.system('rm \"%s\"' % res_path)  # remove former result file

        with open(os.path.join(log_dir, "metrics_summary.json"), "r") as f:
            metrics = json.load(f)
        return metrics

    def evaluate_recall(self,
                        sess,
                        feeddict_producer,
                        pred_list,
                        val_size,
                        cls_thresh,
                        log_dir,
                        placeholders=None):
        pass

    def logger_and_select_best_map(self, metrics, log_string):
        detail = {}
        result = f"Nusc v1.0-trainval Evaluation\n"
        final_score = []
        for name in self.cls_list:
            detail[name] = {}
            for k, v in metrics["label_aps"][name].items():
                detail[name][f"dist@{k}"] = v
            tp_errs = []
            tp_names = []
            for k, v in metrics["label_tp_errors"][name].items():
                detail[name][k] = v
                tp_errs.append(f"{v:.4f}")
                tp_names.append(k)
            threshs = ', '.join(list(metrics["label_aps"][name].keys()))
            scores = list(metrics["label_aps"][name].values())
            final_score.append(np.mean(scores))
            scores = ', '.join([f"{s * 100:.2f}" for s in scores])
            result += f"{name} Nusc dist AP@{threshs} and TP errors\n"
            result += scores
            result += "\n"
            result += "mAP: %0.2f\n" % (
                np.mean(list(metrics["label_aps"][name].values())) * 100)
            result += ', '.join(tp_names) + ": " + ', '.join(tp_errs)
            result += "\n"
        result += 'NDS score: %0.2f\n' % (metrics['nd_score'] * 100)
        log_string(result)

        cur_result = metrics['nd_score']
        return cur_result

    def logger_and_select_best_recall(self, metrics, log_string):
        pass

    # save prediction results
    def save_predictions(self,
                         sess,
                         feeddict_producer,
                         pred_list,
                         val_size,
                         cls_thresh,
                         log_dir,
                         placeholders=None):
        pass
def test(estimator=None):

    if args.Joint:
        nusc = NuScenes(
            version='v1.0-trainval', verbose=True,
            dataroot=args.nuscenes_root,
        )

    dataset_image_folder_format = os.path.join(
        args.nuscenes_data_root, args.type+'/'+'{}/img1',
    )
    detection_file_name_format = os.path.join(
        args.nuscenes_data_root, args.type+'/'+'{}/gt/gt.txt',
    )

    if not os.path.exists(args.log_folder):
        os.mkdir(args.log_folder)

    save_folder = args.log_folder
    if not os.path.exists(save_folder):
        os.mkdir(save_folder)

    saved_file_name_format = os.path.join(save_folder, 'Video'+'{}.txt')
    save_video_name_format = os.path.join(save_folder, 'Video'+'{}.avi')
    test_dataset_index = os.listdir(
        os.path.join(args.nuscenes_data_root, args.type),
    )

    def f(format_str): return [
        format_str.format(index)
        for index in test_dataset_index
    ]
    for image_folder, detection_file_name, saved_file_name, save_video_name in zip(f(dataset_image_folder_format), f(detection_file_name_format), f(saved_file_name_format), f(save_video_name_format)):
        if path.exists(image_folder) and os.path.getsize(detection_file_name) > 0:

            tracker = Tracker(estimator)
            reader = DataReader(
                image_folder=image_folder,
                detection_file_name=detection_file_name,
            )
            result = list()
            first_run = True
            for i, item in enumerate(reader):

                if i > len(reader):
                    break

                if item is None:
                    print('item is none')
                    continue

                img = item[0]
                det = item[1]
                if img is None or det is None or len(det) == 0:
                    continue
                if len(det) > config['max_object']:
                    det = det[:config['max_object'], :]

                h, w, _ = img.shape

                if first_run:
                    vw = cv2.VideoWriter(
                        save_video_name, cv2.VideoWriter_fourcc(
                            'M', 'J', 'P', 'G',
                        ), 10, (w, h),
                    )
                    first_run = False

                features = det[:, 6:12].astype(float)
                tokens = det[:, 12:]

                if args.Joint:
                    tokens = tokens[0][1:]
                    frame_token = tokens[0]
                    first_frame_token = tokens[1]

                    current_cam_record = nusc.get('sample_data', frame_token)
                    current_cam_path = nusc.get_sample_data_path(frame_token)
                    current_cam_path, boxes, current_camera_intrinsic = nusc.get_sample_data(
                        current_cam_record['token'],
                    )
                    current_cs_record = nusc.get(
                        'calibrated_sensor', current_cam_record['calibrated_sensor_token'],
                    )
                    current_poserecord = nusc.get(
                        'ego_pose', current_cam_record['ego_pose_token'],
                    )

                    first_cam_record = nusc.get(
                        'sample_data', first_frame_token,
                    )
                    first_cam_path = nusc.get_sample_data_path(
                        first_frame_token,
                    )
                    first_cam_path, boxes, first_camera_intrinsic = nusc.get_sample_data(
                        first_cam_record['token'],
                    )
                    first_cs_record = nusc.get(
                        'calibrated_sensor', first_cam_record['calibrated_sensor_token'],
                    )
                    first_poserecord = nusc.get(
                        'ego_pose', first_cam_record['ego_pose_token'],
                    )

                det[:, [2, 4]] /= float(w)
                det[:, [3, 5]] /= float(h)
                if args.Joint:
                    image_org = tracker.update(
                        args.Joint, img, det[
                            :,
                            2:6
                        ], args.show_image, i, features, tokens, False,
                        current_cs_record, current_poserecord, first_cs_record, first_poserecord, first_camera_intrinsic,
                    )
                else:
                    image_org = tracker.update(
                        args.Joint, img, det[:, 2:6], args.show_image, i,
                    )

                vw.write(image_org)

                # save result
                for t in tracker.tracks:
                    n = t.nodes[-1]
                    if t.age == 1:
                        b = n.get_box(tracker.frame_index-1, tracker.recorder)
#                         print('n.pose ',n.pose.cpu().data.numpy())
                        if args.Joint:
                            result.append(
                                [i] + [t.id] + [
                                    b[0]*w, b[1]*h, b[2]*w, b[3]
                                    * h,
                                ] + list(n.pose.cpu().data.numpy()),
                            )
                        else:
                            result.append(
                                [i] + [t.id] + [b[0]*w, b[1]*h, b[2]*w, b[3]*h],
                            )
            # save data
            np.savetxt(saved_file_name, np.array(result), fmt='%10f')
Beispiel #11
0
    def create_annotations(self, version, max_sweeps):
        from nuscenes.nuscenes import NuScenes
        nusc = NuScenes(version=version, dataroot=self.cfg.DATA.ROOTDIR, verbose=True)
        from nuscenes.utils import splits
        available_vers = ["v1.0-trainval", "v1.0-test", "v1.0-mini"]
        assert version in available_vers
        if version == "v1.0-trainval":
            train_scenes = splits.train
            val_scenes = splits.val
        elif version == "v1.0-test":
            train_scenes = splits.test
            val_scenes = []
        elif version == "v1.0-mini":
            train_scenes = splits.mini_train
            val_scenes = splits.mini_val
        else:
            raise ValueError("unknown")
        root_path = Path(self.cfg.DATA.ROOTDIR)
        available_scenes = self._get_available_scenes(nusc)
        available_scene_names = [s["name"] for s in available_scenes]
        train_scenes = list(filter(lambda x: x in available_scene_names, train_scenes))
        val_scenes = list(filter(lambda x: x in available_scene_names, val_scenes))
        train_scenes = set([
            available_scenes[available_scene_names.index(s)]["token"]
            for s in train_scenes
        ])
        val_scenes = set([
            available_scenes[available_scene_names.index(s)]["token"]
            for s in val_scenes
        ])
        print(
            f"train scene: {len(train_scenes)}, val scene: {len(val_scenes)}")
        self.train_infos = dict()
        self.val_infos = dict()
        train_index = 0
        val_index = 0
        
        for sample in tqdm(nusc.sample, desc="Generating train infos..."):
            lidar_token = sample["data"]["LIDAR_TOP"]
            sd_rec = nusc.get('sample_data', lidar_token)
            cs_record = nusc.get('calibrated_sensor', sd_rec['calibrated_sensor_token'])
            pose_record = nusc.get('ego_pose', sd_rec['ego_pose_token'])
            lidar_path, boxes, _ = nusc.get_sample_data(lidar_token)
            assert Path(lidar_path).exists(), ("some lidar file miss...+_+")
            item = {
                "lidar_path": lidar_path,
                "token": sample["token"],
                "sweeps": [],
                "calib": cs_record,
                "ego_pose": pose_record,
                "timestamp": sample["timestamp"]
            }
            l2e_t = cs_record['translation']
            l2e_r = cs_record['rotation']
            e2g_t = pose_record['translation']
            e2g_r = pose_record['rotation']
            
            l2e_r_mat = Quaternion(l2e_r).rotation_matrix
            e2g_r_mat = Quaternion(e2g_r).rotation_matrix

            sweeps = []
            while len(sweeps) < max_sweeps:
                if not sd_rec['prev'] == "":
                    sd_rec = nusc.get('sample_data', sd_rec['prev'])
                    cs_record = nusc.get('calibrated_sensor', sd_rec['calibrated_sensor_token'])
                    pose_record = nusc.get('ego_pose', sd_rec['ego_pose_token'])
                    lidar_path = nusc.get_sample_data_path(sd_rec['token'])
                    sweep = {
                        "lidar_path": lidar_path,
                        "sample_data_token": sd_rec['token'],
                        "timestamp": sd_rec["timestamp"]
                    }
                    l2e_r_s = cs_record["rotation"]
                    l2e_t_s = cs_record["translation"]
                    e2g_r_s = pose_record["rotation"]
                    e2g_t_s = pose_record["translation"]
                    ## sweep->ego->global->ego'->lidar
                    l2e_r_s_mat = Quaternion(l2e_r_s).rotation_matrix
                    e2g_r_s_mat = Quaternion(e2g_r_s).rotation_matrix

                    R = (l2e_r_s_mat.T @ e2g_r_s_mat.T) @ (
                        np.linalg.inv(e2g_r_mat).T @ np.linalg.inv(l2e_r_mat).T
                    )
                    T = (l2e_t_s @ e2g_r_s_mat.T + e2g_t_s) @ (
                        np.linalg.inv(e2g_r_mat).T @ np.linalg.inv(l2e_r_mat).T
                    )
                    T -= e2g_t @ (np.linalg.inv(e2g_r_mat).T @ np.linalg.inv(
                        l2e_r_mat).T) + l2e_t @ np.linalg.inv(l2e_r_mat).T 
                    sweep["sweep2lidar_rotation"] = R.T 
                    sweep["sweep2lidar_translation"] = T
                    sweeps.append(sweep)
                else:
                    break
            item["sweeps"] = sweeps

            annotations = [
                nusc.get('sample_annotation', token)
                for token in sample['anns']
            ]
            locs = np.array([b.center for b in boxes]).reshape(-1, 3)
            dims = np.array([b.wlh for b in boxes]).reshape(-1, 3)
            rots = np.array([b.orientation.yaw_pitch_roll[0] 
                            for b in boxes]).reshape(-1, 1)
            names = [b.name for b in boxes]
            class_idx = []
            for i in range(len(names)):
                if names[i] in NuscenesDataset.NameMapping:
                    names[i] = NuscenesDataset.NameMapping[names[i]]
                    class_idx.append(nuscenes_class_name_to_idx(names[i]))
            names = np.array(names)
            class_idx = np.array(class_idx)

            gt_boxes = np.concatenate([locs, dims, rots], axis=1)

            ## In Nuscenes Dataset, need to filter some rare class
            mask = np.array([NuscenesDataset.Nuscenes_classes.count(s) > 0 for s in names], dtype=np.bool_)
            gt_boxes = gt_boxes[mask]
            names = names[mask]
            
            # assert len(gt_boxes) == len(
            #     annotations), f"{len(gt_boxes)}, {len(annotations)}."
            assert len(gt_boxes) == len(names), f"{len(gt_boxes)}, {len(names)}"
            
            assert len(gt_boxes) == len(class_idx), f"{len(gt_boxes)}, {len(class_idx)}"

            item["boxes"] = gt_boxes
            item["class_idx"] = class_idx
            item["names"] = names
            item["num_lidar_pts"] = np.array(
                [a["num_lidar_pts"] for a in annotations]
            )
            item["num_radar_pts"] = np.array(
                [a["num_radar_pts"] for a in annotations]
            )
            if sample["scene_token"] in train_scenes:
                self.train_infos[train_index] = item
                train_index += 1
            else:
                self.val_infos[val_index] = item
                val_index += 1
def train():

    net.train()
    current_lr = config['learning_rate']
    print('Loading Dataset...')

    dataset = TrainDataset(
        args.nuscenes_data_root,
        SSJAugmentation(
            config['image_size'],
            means,
        ),
    )
    print('length = ', len(dataset))
    epoch_size = len(dataset) // args.batch_size
    step_index = 0

    batch_iterator = None
    batch_size = 1
    data_loader = data.DataLoader(
        dataset,
        batch_size,
        num_workers=args.num_workers,
        shuffle=True,
        collate_fn=collate_fn,
        pin_memory=False,
    )
    if args.Joint:
        nusc = NuScenes(
            version='v1.0-trainval',
            verbose=True,
            dataroot=args.nuscenes_root,
        )
    for iteration in range(0, 380000):
        print(iteration)
        if (not batch_iterator) or (iteration % epoch_size == 0):
            # create batch iterator
            batch_iterator = iter(data_loader)
            all_epoch_loss = []
        print('pass 0')
        if iteration in stepvalues:
            step_index += 1
            current_lr = current_lr * 0.5
            for param_group in optimizer.param_groups:
                param_group['lr'] = current_lr

        # load train data
        img_pre, img_next, boxes_pre, boxes_next, labels, valid_pre, valid_next, current_poses, next_poses, pre_bbox, next_bbox, img_org_pre, img_org_next, current_tokens, next_tokens =\
            next(batch_iterator)

        if args.Joint:
            current_token = current_tokens[0]
            next_token = next_tokens[0]
            first_token = current_tokens[1]

            current_cam_record = nusc.get('sample_data', current_token)
            current_cam_path = nusc.get_sample_data_path(current_token)
            current_cam_path, boxes, current_camera_intrinsic = nusc.get_sample_data(
                current_cam_record['token'], )
            current_cs_record = nusc.get(
                'calibrated_sensor',
                current_cam_record['calibrated_sensor_token'],
            )
            current_poserecord = nusc.get(
                'ego_pose',
                current_cam_record['ego_pose_token'],
            )

            next_cam_record = nusc.get('sample_data', next_token)
            next_cam_path = nusc.get_sample_data_path(next_token)
            next_cam_path, boxes, next_camera_intrinsic = nusc.get_sample_data(
                next_cam_record['token'], )
            next_cs_record = nusc.get(
                'calibrated_sensor',
                next_cam_record['calibrated_sensor_token'],
            )
            next_poserecord = nusc.get(
                'ego_pose',
                next_cam_record['ego_pose_token'],
            )

            first_cam_record = nusc.get('sample_data', first_token)
            first_cam_path = nusc.get_sample_data_path(first_token)
            first_cam_path, boxes, first_camera_intrinsic = nusc.get_sample_data(
                first_cam_record['token'], )
            first_cs_record = nusc.get(
                'calibrated_sensor',
                first_cam_record['calibrated_sensor_token'],
            )
            first_poserecord = nusc.get(
                'ego_pose',
                first_cam_record['ego_pose_token'],
            )

        if args.cuda:
            img_pre = (img_pre.cuda())
            img_next = (img_next.cuda())
            boxes_pre = (boxes_pre.cuda())
            boxes_next = (boxes_next.cuda())
            valid_pre = (valid_pre.cuda())
            valid_next = (valid_next.cuda())
            labels = (labels.cuda())
            current_poses = (current_poses.cuda())
            next_poses = (next_poses.cuda())
            pre_bbox = (pre_bbox.cuda())
            next_bbox = (next_bbox.cuda())
            img_org_pre = (img_org_pre.cuda())
            img_org_next = (img_org_next.cuda())

        # forward
        if args.Joint:
            out, pose_loss = net(
                args.Joint,
                img_pre,
                img_next,
                boxes_pre,
                boxes_next,
                valid_pre,
                valid_next,
                current_poses,
                next_poses,
                pre_bbox,
                next_bbox,
                img_org_pre,
                img_org_next,
                current_cs_record,
                current_poserecord,
                next_cs_record,
                next_poserecord,
                first_cs_record,
                first_poserecord,
                first_camera_intrinsic,
            )

        else:
            out = net(
                img_pre,
                img_next,
                boxes_pre,
                boxes_next,
                valid_pre,
                valid_next,
            )

        optimizer.zero_grad()
        loss_pre, loss_next, loss_similarity, loss, accuracy_pre, accuracy_next, accuracy, predict_indexes = criterion(
            out,
            labels,
            valid_pre,
            valid_next,
        )
        total_loss = loss + 0.1 * pose_loss
        total_loss.backward()
        optimizer.step()
        optimizer.zero_grad()

        all_epoch_loss += [loss.data.cpu()]

        print(
            'iter ' + repr(iteration) + ', ' + repr(epoch_size) +
            ' || epoch: %.4f ' % (iteration / (float)(epoch_size)) +
            ' || Loss: %.4f ||' % (loss.data.item()),
            end=' ',
        )

        if iteration % 1000 == 0:

            result_image = show_batch_circle_image(
                img_pre,
                img_next,
                boxes_pre,
                boxes_next,
                valid_pre,
                valid_next,
                predict_indexes,
                iteration,
            )

        if iteration % 1000 == 0:
            print('Saving state, iter:', iteration)
            torch.save(
                net.state_dict(),
                os.path.join(
                    args.save_folder,
                    'model_' + repr(iteration) + '.pth',
                ),
            )
def main():
    args = parse_args()

    nusc = NuScenes(version=args.version,
                    dataroot=args.dataroot,
                    verbose=args.verbose)

    ann_dir = os.path.join(args.saveroot, "frames_ann")
    os.makedirs(ann_dir, exist_ok=True)

    ego_pose_dir = os.path.join(args.saveroot, "ego_pose")
    os.makedirs(ego_pose_dir, exist_ok=True)

    scene_count = 0

    for scene in nusc.scene:

        instance_ids = dict()

        first_sample_token = scene["first_sample_token"]
        sample = nusc.get("sample", first_sample_token)
        cam_sample = nusc.get("sample_data", sample["data"]["CAM_FRONT"])

        instances_ann = list()
        ego_poses = list()

        scene_dir = os.path.join(args.saveroot, "frames", scene["name"])
        os.makedirs(scene_dir, exist_ok=True)

        has_more_frames = True
        while has_more_frames:

            frame_path = nusc.get_sample_data_path(cam_sample["token"])
            # img = cv2.imread(frame_path)

            ego_pose = nusc.get("ego_pose", cam_sample["ego_pose_token"])
            ego_poses.append([ego_pose["timestamp"]] + ego_pose["rotation"] +
                             ego_pose["translation"])

            if cam_sample["is_key_frame"]:
                frame_time_stamp = int(
                    frame_path.split("__")[-1].split(".")[0])
                reproj_bboxes = get_2d_boxes(nusc,
                                             cam_sample["token"],
                                             visibilities=['3', '4'])

                for reproj_bbox in reproj_bboxes:
                    if "human" in reproj_bbox[
                            "category_name"] or "vehicle" in reproj_bbox[
                                "category_name"]:

                        instance_token = reproj_bbox["instance_token"]
                        if instance_token not in instance_ids:
                            instance_id = len(instance_ids.keys()) + 1
                            instance_ids[instance_token] = instance_id

                        x1, y1, x2, y2, depth = reproj_bbox["bbox_corners"]
                        instance_id = instance_ids[instance_token]
                        instances_ann.append([
                            frame_time_stamp, instance_id, x1, y1, x2, y2,
                            depth
                        ])

            # shutil.copy(frame_path, os.path.join(scene_dir, frame_path.split("/")[-1]))
            if not cam_sample["next"] == "":
                cam_sample = nusc.get("sample_data", cam_sample["next"])
            else:
                has_more_frames = False

        df_ins = pd.DataFrame(instances_ann)
        df_ins.to_csv(os.path.join(
            ann_dir, "{}_instances_ann.csv".format(scene["name"])),
                      header=False,
                      index=False)

        df_ego_pose = pd.DataFrame(ego_poses)
        df_ego_pose.to_csv(os.path.join(
            ego_pose_dir, "{}_ego_pose.csv".format(scene["name"])),
                           header=False,
                           index=False)

        scene_count += 1
        print(
            "{} scenes have been successfully processed!".format(scene_count))
def render_image_annotations(sample_data_token: str, with_anns: bool = True, out_path = None,
                             box_vis_level: BoxVisibility = BoxVisibility.ANY,
                             axes_limit: float = 40,ax: Axes = None, image_annotations_token2ind = {},
                             image_annotations = [],visibilities = ['','1','2','3','4'],cam_type = 'CAM_FRONT'):
    _cam_type = '/' + cam_type + '/'
    nusc = NuScenes(version='v1.0-mini', dataroot='dataset/nuScenes/v1.0-mini', verbose=True)
    # Get sensor modality.
    sd_record = nusc.get('sample_data', sample_data_token)
    sensor_modality = sd_record['sensor_modality']#'camera'
    '''
    data_path, boxes, camera_intrinsic = nusc.get_sample_data(sample_data_token,
                                                                   box_vis_level=box_vis_level)
    '''
    cs_record = nusc.get('calibrated_sensor', sd_record['calibrated_sensor_token'])
    sensor_record = nusc.get('sensor', cs_record['sensor_token'])
    pose_record = nusc.get('ego_pose', sd_record['ego_pose_token'])
    data_path = nusc.get_sample_data_path(sample_data_token)#'dataset/nuScenes/v1.0-mini/samples/CAM_FRONT/n015-2018-07-24-11-22-45+0800__CAM_FRONT__1532402927612460.jpg'

    camera_intrinsic = np.array(cs_record['camera_intrinsic'])
    imsize = (sd_record['width'], sd_record['height'])

    '''
    boxes[0]:
        label: nan, score: nan, 
        xyz: [373.26, 1130.42, 0.80], 
        wlh: [0.62, 0.67, 1.64], 
        rot axis: [0.00, 0.00, -1.00], 
        ang(degrees): 21.09, ang(rad): 0.37, 
        vel: nan, nan, nan, 
        name: human.pedestrian.adult, 
        token: ef63a697930c4b20a6b9791f423351da
    '''
    boxes2d = get_boxes2d(sample_data_token=sample_data_token,
                          image_annotations_token2ind=image_annotations_token2ind,
                          image_annotations=image_annotations)
    '''
    box2d_list = []
    for box2d in boxes2d:
        if use_flat_vehicle_coordinates:
            # Move box to ego vehicle coord system parallel to world z plane.
            yaw = Quaternion(pose_record['rotation']).yaw_pitch_roll[0]
            box.translate(-np.array(pose_record['translation']))
            box.rotate(Quaternion(scalar=np.cos(yaw / 2), vector=[0, 0, np.sin(yaw / 2)]).inverse)
        else:
            # Move box to ego vehicle coord system.
            box.translate(-np.array(pose_record['translation']))
            box.rotate(Quaternion(pose_record['rotation']).inverse)

            #  Move box to sensor coord system.
            box.translate(-np.array(cs_record['translation']))
            box.rotate(Quaternion(cs_record['rotation']).inverse)

        if sensor_record['modality'] == 'camera' and not \
                box_in_image(box, cam_intrinsic, imsize, vis_level=box_vis_level):
            continue
        box2d_list.append(box2d)
    '''



    data = Image.open(data_path)

    # Init axes.
    if ax is None:
        _, ax = plt.subplots(1, 1, figsize=(9, 16))

    # Show image.
    ax.imshow(data)
    # Show boxes.
    if with_anns:
        for box2d in boxes2d:
            print(box2d.corners, box2d.name,box2d.filename[8:22])
            print(box2d.token)
            c = np.array(get_color(box2d.name)) / 255.0
            #ipdb.set_trace()
            if box2d.visibility in visibilities and (_cam_type in box2d.filename):
                box2d.render(ax, view=camera_intrinsic, normalize=True, colors=(c, c, c))

    # Limit visible range.
    #ax.set_xlim(0, data.size[0])
    #ax.set_ylim(data.size[1], 0)
    #ax.axis('off')
    ax.set_title(sd_record['channel'])
    #ax.set_aspect('equal')

    if out_path is not None:
        plt.savefig(out_path)