예제 #1
0
def gen_scene_splits(dataroot: str):
    """
    Retruns all nuScenes scene splits by scene token, as specified in https://github.com/nutonomy/nuscenes-devkit/blob/master/python-sdk/nuscenes/utils/splits.py.
    Arguments:
        dataroot: Directory path of the nuScenes datasets, <str>.
    """
    # Imports
    from nuscenes.nuscenes import NuScenes
    from nuscenes.utils.splits import create_splits_scenes

    # Define
    scene_splits = create_splits_scenes()

    # Load trainval
    version = 'v1.0-trainval'
    nusc = NuScenes(version=version, dataroot=dataroot + version, verbose=False)

    for split, scene_names in scene_splits.items():
        if split == 'test':
            continue
        else:
            scene_splits[split] = [nusc.field2token('scene', 'name', scene_name)[0] for scene_name in scene_names]

    # Load test
    version = 'v1.0-test'
    nusc = NuScenes(version=version, dataroot=dataroot + version, verbose=False)
    scene_splits['test'] = [nusc.field2token('scene', 'name', scene_name)[0] for scene_name in scene_splits['test']]

    return scene_splits
예제 #2
0
def get_egoposes_on_drivable_ratio(nusc: NuScenes, nusc_map: NuScenesMap,
                                   scene_token: str) -> float:
    """
    Get the ratio of ego poses on the drivable area.
    :param nusc: A NuScenes instance.
    :param nusc_map: The NuScenesMap instance of a particular map location.
    :param scene_token: The token of the current scene.
    :return: The ratio of poses that fall on the driveable area.
    """

    # Go through each sample in the scene.
    sample_tokens = nusc.field2token('sample', 'scene_token', scene_token)
    poses_all = 0
    poses_valid = 0
    for sample_token in sample_tokens:

        # Poses are associated with the sample_data. Here we use the lidar sample_data.
        sample_record = nusc.get('sample', sample_token)
        sample_data_record = nusc.get('sample_data',
                                      sample_record['data']['LIDAR_TOP'])
        pose_record = nusc.get('ego_pose',
                               sample_data_record['ego_pose_token'])

        # Check if the ego pose is on the driveable area.
        ego_pose = pose_record['translation'][:2]
        record = nusc_map.record_on_point(ego_pose[0], ego_pose[1],
                                          'drivable_area')
        if len(record) > 0:
            poses_valid += 1
        poses_all += 1
    ratio_valid = poses_valid / poses_all

    return ratio_valid
def process_data(data_path, version, val_split):
    nusc = NuScenes(version=version, dataroot=data_path, verbose=True)
    splits = create_splits_scenes()
    train_scenes, val_scenes = train_test_split(splits['train' if 'mini' not in version else 'mini_train'], test_size=val_split)
    train_scene_names = splits['train' if 'mini' not in version else 'mini_train']
    val_scene_names = splits['val' if 'mini' not in version else 'mini_val']

    ns_scene_names = dict()
    ns_scene_names['train'] = train_scene_names
    ns_scene_names['val'] = val_scene_names
    scenes = []
    for data_class in ['train', 'val']:
        for ns_scene_name in tqdm(ns_scene_names[data_class]):
            ns_scene = nusc.get('scene', nusc.field2token('scene', 'name', ns_scene_name)[0])
            scene_id = int(ns_scene['name'].replace('scene-', ''))
            if scene_id in scene_blacklist:  # Some scenes have bad localization
                continue

            scene = process_scene(ns_scene, nusc)
            if scene is not None:
                scenes.append(scene)
    
    print(f'Processed {len(scenes):.2f} scenes')
예제 #4
0
def render_tracking_result(estimation_file: str, scene_name: str, version: str,
                           dataroot: str):
    '''
    Render tracking result onto CAM_FRONT image
    :param estimation_file: name of esitmation file produced by nuscens_tracking_pmbm.py
    :param scene_name: name of the scene whose tracking result is about to be rendered
    :param version: version of NuScene dataset (mini, trainval, test) the scene to rendered belong to
    :param dataroot: directory contains NuScenes dataset
    '''
    # Load tracking data
    with open(estimation_file, 'r') as infile:
        all_tracking_result = json.load(infile)

    num_unique_colors = 200
    all_color_indicies = np.linspace(
        0, 1.0, num=num_unique_colors)  # allow up to 200 unique colors

    # load NuScenese styff
    nusc = NuScenes(version=version, dataroot=dataroot, verbose=False)
    my_scene_token = nusc.field2token('scene', 'name', scene_name)[0]
    my_scene = nusc.get('scene', my_scene_token)

    current_time_step = 0
    current_sample_token = my_scene['first_sample_token']
    while True:
        # get necessary record
        sample_record = nusc.get('sample', current_sample_token)
        camera_token = sample_record['data']['CAM_FRONT']
        sd_record = nusc.get('sample_data', camera_token)
        cs_record = nusc.get('calibrated_sensor',
                             sd_record['calibrated_sensor_token'])
        pose_record = nusc.get('ego_pose', sd_record['ego_pose_token'])

        # get camera information
        cam_intrinsic = np.array(cs_record['camera_intrinsic'])
        imsize = (sd_record['width'], sd_record['height'])
        impath = nusc.get_sample_data_path(camera_token)
        im = cv2.imread(impath)

        # get tracking result
        current_tracks = all_tracking_result[str(current_time_step)]
        for target_id, target in current_tracks.items():
            box = Box4Track(center=target['translation'] + [target['height']],
                            orientation=Quaternion(
                                axis=[0, 0, 1], angle=target['orientation']),
                            size=target['size'],
                            name=target['class'],
                            label=int(target_id))

            box.to_camera_frame(pose_record, cs_record)

            # render box on image
            if not box_in_image(box, cam_intrinsic, imsize, BoxVisibility.ANY):
                # print('Box {} not in image'.format(box.name))
                continue
            # get color
            c = np.array(plt.cm.Spectral(box.label % num_unique_colors))
            c = np.round(c * 255)
            box.render_track(im,
                             view=cam_intrinsic,
                             normalize=True,
                             color=(c[0], c[1], c[2]))

        # move on
        current_time_step += 1
        current_sample_token = sample_record['next']
        if current_sample_token == '':
            break

        cv2.imshow('CAM_FRONT', im)
        key = cv2.waitKey(1000)  # wait 100ms
        if key == 32:  # if space is pressed, pause.
            key = cv2.waitKey()
        if key == 27:  # if ESC is pressed, exit.
            cv2.destroyAllWindows()
            break
예제 #5
0
class NuScenesDataset(VisionDataset):
    def __init__(self, version, root, transform=None, target_transform=None, *, verbose=True,
                 specific_tokens=None, sensor_modality='camera', sensor='CAM_FRONT', lidar='LIDAR_TOP',
                 pretransform_data=True, preload_data=True, only_annotated=False):
        super(NuScenesDataset, self).__init__(root, transform=transform, target_transform=target_transform)
        self.nusc = NuScenes(version=version, dataroot=root, verbose=verbose)
        self.lidar = lidar
        self.only_annotated = only_annotated
        self.sensor = ""
        self.sensor_modality = ""
        if specific_tokens:
            self.tokens = specific_tokens
        elif sensor:
            self.tokens = self.nusc.field2token(table_name="sample_data", field="channel",
                                                query=sensor)
            self.sensor = sensor
        elif sensor_modality:
            self.tokens = self.nusc.field2token(table_name="sample_data", field="sensor_modality",
                                                query=sensor_modality)
            self.sensor_modality = sensor_modality
        else:
            raise ValueError("Both sensor_modality or sensor parameters cannot be None.")

        if only_annotated:
            tokens = []
            for t in self.tokens:
                sample_data = self.nusc.get("sample_data", t)
                if sample_data["is_key_frame"]:
                    tokens.append(t)
            self.tokens = tokens
        if verbose:
            print("Number of valid sample data tokens: {}".format(len(self.tokens)))

        self.objects = []
        self.images = []
        self.scene_tokens = []
        self.transforms = transforms
        self.pretransform_data = pretransform_data
        self.preload_data = preload_data
        if self.preload_data:
            for t in self.tokens:
                img = Image.open(self.get_filepath(t))
                if self.transform and self.pretransform_data:
                    img = self.transform(img)
                self.images.append(img)
                self.objects = []
        for t in self.tokens:
            # find scene token
            self.scene_tokens.append(self.get_scene_token(t))

    def __getitem__(self, index):
        if self.preload_data:
            img = self.images[index]
        else:
            img = Image.open(self.get_filepath(self.tokens[index])).convert('RGB')

        if self.transform and not self.pretransform_data:
            img = self.transform(img)

        # TODO:: return object detection groundtruth
        return img, self.tokens[index]

    def __len__(self):
        return len(self.tokens)

    def __repr__(self):
        return self.nusc.__repr__()

    def get_filepath(self, token):
        assert token in self.tokens, "Token {} not in specific tokens set".format(token)
        sample_data = self.nusc.get("sample_data", token)
        return os.path.join(self.nusc.dataroot, sample_data["filename"])

    def get_filepaths(self, scene_token, sensor="", use_specific_tokens=True):
        sensor = sensor if sensor else self.sensor
        assert sensor

        scene = self.nusc.get("scene", scene_token)
        first_sample = self.nusc.get("sample", scene["first_sample_token"])
        first_sample_data_token = first_sample["data"][sensor]

        curr_token = first_sample_data_token
        file_paths = []
        all_tokens = set(self.tokens)  # Checking if curr_token is in the token list would be slower
        while curr_token:
            sample_data = self.nusc.get("sample_data", curr_token)
            curr_token = sample_data["next"]

            if use_specific_tokens and sample_data["token"] not in all_tokens:
                continue

            if not self.only_annotated or sample_data["is_key_frame"]:
                file_paths.append(sample_data["filename"])
        return file_paths

    def get_scene_token(self, sample_data_token):
        sample_data = self.nusc.get("sample_data", sample_data_token)
        sample = self.nusc.get("sample", sample_data["sample_token"])
        return sample["scene_token"]

    def estimate_camera_settings(self, sensor=""):
        # Returns 1 camera settings per scene (dict, scene token)
        sensor = sensor if sensor else self.sensor
        assert sensor

        camera_settings = dict()
        for t in set(self.scene_tokens):
            scene = self.nusc.get("scene", t)
            sample = self.nusc.get("sample", scene["first_sample_token"])

            first_sample_data_token = sample["data"][sensor]

            sample_data = self.nusc.get("sample_data", first_sample_data_token)
            calibrated_sensor = self.nusc.get("calibrated_sensor", sample_data["calibrated_sensor_token"])

            # needs ccd parameters (since exif is a myth ;) ) took from https://www.nuscenes.org/data-collection
            ccd_width = 1600
            ccd_height = 1200
            px_size = 1.98  # in um --> more or less arbitrary... considering CMOS 1/8'' with 1600 px width
            exposure = 20  # in ms

            # calculate "focal" from ccd parameters and intrinsics matrix
            intrinsics = np.array(calibrated_sensor["camera_intrinsic"])
            assert np.any(intrinsics)  # this could fail, if, for any reason, I would be reading radar sensor intrinsics
            focal = np.mean([intrinsics[0, 0] * px_size / 1000, intrinsics[1, 1] * px_size / 1000])  # focal in mm

            # Hardcoded known fact because laziness
            frequency = 12
            width = 1600
            height = 900

            camera_settings[t] = ({"translation": calibrated_sensor["translation"], "focal": focal, "px_size": px_size,
                                   "ccd_width": ccd_width, "ccd_height": ccd_height, "width": width, "height": height,
                                   "frequency": frequency, "exposure": exposure })

        return camera_settings

    def estimate_camera_motions(self, sensor=""):
        # Returns a list of camera motion per scene (dict, scene token)
        sensor = sensor if sensor else self.sensor
        assert sensor

        camera_motions = dict()
        for t in set(self.scene_tokens):
            scene = self.nusc.get("scene", t)
            first_sample = self.nusc.get("sample", scene["first_sample_token"])
            first_sample_data_token = first_sample["data"][sensor]

            scene_motions = []
            curr_token = first_sample_data_token
            last_camera_position = None
            while curr_token:
                sample_data = self.nusc.get("sample_data", curr_token)
                curr_token = sample_data["next"]
                ego_pose = self.nusc.get("ego_pose", sample_data["ego_pose_token"])

                if sample_data["token"] == first_sample_data_token:
                    last_camera_position = np.array(ego_pose["translation"])
                    continue

                scene_motions.append((np.array(ego_pose["translation"]) - last_camera_position).tolist())
                last_camera_position = ego_pose["translation"]
            scene_motions.append(scene_motions[-1])  # Yep, last frame will keep its momentum
            camera_motions[t] = scene_motions

        return camera_motions

    def estimate_sequences_duration(self, sensor="", epsilon=1e-3):
        # Returns 1 duration per scene (dict, scene token)
        sensor = sensor if sensor else self.sensor
        assert sensor

        # Hardcoded known fact because laziness
        frequency = 12

        scenes_duration = dict()
        for t in set(self.scene_tokens):
            scene = self.nusc.get("scene", t)
            first_sample = self.nusc.get("sample", scene["first_sample_token"])
            first_sample_data_token = first_sample["data"][sensor]

            curr_token = first_sample_data_token
            tokens = []
            while curr_token:
                tokens.append(curr_token)
                sample_data = self.nusc.get("sample_data", curr_token)
                curr_token = sample_data["next"]

            scenes_duration[t] = len(tokens) / frequency + epsilon  # duration in sec

        return scenes_duration

    def get_depth_from_lidar(self, sample_data_token):
        sample_token = self.nusc.get("sample_data", sample_data_token)["sample_token"]
        sample = self.nusc.get("sample", sample_token)
        sample_data_lidar_token = sample["data"][self.lidar]

        pts_cloud, depths = self.map_pointcloud_to_image(sample_data_lidar_token, sample_data_token)
        pts_cloud[2, :] = depths

        return pts_cloud

    def map_pointcloud_to_image(self,
                                pointsensor_token: str,
                                camera_token: str,
                                min_dist: float = 1.0) -> Tuple:
        """
        Given a point sensor (lidar/radar) token and camera sample_data token, load point-cloud and map it to the image
        plane. [Recoded from the NuscenesExplorer class so the image is not to be loaded].
        :param pointsensor_token: Lidar/radar sample_data token.
        :param camera_token: Camera sample_data token.
        :param min_dist: Distance from the camera below which points are discarded.
        :return (pointcloud <np.float: 2, n)>, coloring <np.float: n>).
        """

        cam = self.nusc.get('sample_data', camera_token)
        pointsensor = self.nusc.get('sample_data', pointsensor_token)
        pcl_path = os.path.join(self.nusc.dataroot, pointsensor['filename'])
        if pointsensor['sensor_modality'] == 'lidar':
            pc = LidarPointCloud.from_file(pcl_path)
        else:
            pc = RadarPointCloud.from_file(pcl_path)

        # Points live in the point sensor frame. So they need to be transformed via global to the image plane.
        # First step: transform the point-cloud to the ego vehicle frame for the timestamp of the sweep.
        cs_record = self.nusc.get('calibrated_sensor', pointsensor['calibrated_sensor_token'])
        pc.rotate(Quaternion(cs_record['rotation']).rotation_matrix)
        pc.translate(np.array(cs_record['translation']))

        # Second step: transform to the global frame.
        poserecord = self.nusc.get('ego_pose', pointsensor['ego_pose_token'])
        pc.rotate(Quaternion(poserecord['rotation']).rotation_matrix)
        pc.translate(np.array(poserecord['translation']))

        # Third step: transform into the ego vehicle frame for the timestamp of the image.
        poserecord = self.nusc.get('ego_pose', cam['ego_pose_token'])
        pc.translate(-np.array(poserecord['translation']))
        pc.rotate(Quaternion(poserecord['rotation']).rotation_matrix.T)

        # Fourth step: transform into the camera.
        cs_record = self.nusc.get('calibrated_sensor', cam['calibrated_sensor_token'])
        pc.translate(-np.array(cs_record['translation']))
        pc.rotate(Quaternion(cs_record['rotation']).rotation_matrix.T)

        # Fifth step: actually take a "picture" of the point cloud.
        # Grab the depths (camera frame z axis points away from the camera).
        depths = pc.points[2, :]

        # Retrieve the color from the depth.
        coloring = depths

        # Take the actual picture (matrix multiplication with camera-matrix + renormalization).
        points = view_points(pc.points[:3, :], np.array(cs_record['camera_intrinsic']), normalize=True)

        # Remove points that are either outside or behind the camera. Leave a margin of 1 pixel for aesthetic reasons.
        # Also make sure points are at least 1m in front of the camera to avoid seeing the lidar points on the camera
        # casing for non-keyframes which are slightly out of sync.
        mask = np.ones(depths.shape[0], dtype=bool)
        mask = np.logical_and(mask, depths > min_dist)
        mask = np.logical_and(mask, points[0, :] > 1)
        mask = np.logical_and(mask, points[0, :] < 1600 - 1)    # hardcoded width
        mask = np.logical_and(mask, points[1, :] > 1)
        mask = np.logical_and(mask, points[1, :] < 900 - 1)   # hardcoded height
        points = points[:, mask]
        coloring = coloring[mask]

        return points, coloring
예제 #6
0
        print(pc.points.shape)
        count = 0
        points_in_horizion = np.transpose(
            view_points(pc.points[:3, :], view, normalize=True))
        for point in points_in_horizion:
            if np.sum(np.square(point[0:1])) > 2500:
                count += 1

        print(count, len(points_in_horizion))
        count = 0
        points = np.transpose(pc.points[:3, :])
        for point in points:
            if point[2] > 10:
                print(point)
                count += 1
            if np.sum(np.square(point[0:1])) < 1 and point[2] < 0.3:
                print(point)
        print(count, len(points))
        print(pc.points[3, :])

        fig, axes = plt.subplots(1, 2, figsize=(18, 9))
        pc.render_height(axes[0], view=view)

print('annotations')
for sa_token in sample_record['anns']:
    sa_record = nusc.get('sample_annotation', sa_token)
    print(sa_record['rotation'])

my_scene_token = nusc.field2token('scene', 'name', 'scene-0061')[0]
nusc.render_scene(my_scene_token)
예제 #7
0
class NuScenesDataset(Dataset):
    """
    NuScenes dataset loader and producer
    """
    def __init__(self,
                 mode,
                 split='training',
                 img_list='trainval',
                 is_training=True,
                 workers_num=1):
        """
        mode: 'loading', 'preprocessing'
        """
        self.mode = mode
        self.dataset_dir = os.path.join(cfg.ROOT_DIR,
                                        cfg.DATASET.KITTI.BASE_DIR_PATH)
        self.max_sweeps = cfg.DATASET.NUSCENES.NSWEEPS
        self.is_training = is_training
        self.img_list = img_list
        self.workers_num = workers_num

        # cast labels from NuScenes name to useful name
        self.useful_cls_dict = {
            'animal': 'ignore',
            'human.pedestrian.personal_mobility': 'ignore',
            'human.pedestrian.stroller': 'ignore',
            'human.pedestrian.wheelchair': 'ignore',
            'movable_object.debris': 'ignore',
            'movable_object.pushable_pullable': 'ignore',
            'static_object.bicycle_rack': 'ignore',
            'vehicle.emergency.ambulance': 'ignore',
            'vehicle.emergency.police': 'ignore',
            'movable_object.barrier': 'barrier',
            'vehicle.bicycle': 'bicycle',
            'vehicle.bus.bendy': 'bus',
            'vehicle.bus.rigid': 'bus',
            'vehicle.car': 'car',
            'vehicle.construction': 'construction_vehicle',
            'vehicle.motorcycle': 'motorcycle',
            'human.pedestrian.adult': 'pedestrian',
            'human.pedestrian.child': 'pedestrian',
            'human.pedestrian.construction_worker': 'pedestrian',
            'human.pedestrian.police_officer': 'pedestrian',
            'movable_object.trafficcone': 'traffic_cone',
            'vehicle.trailer': 'trailer',
            'vehicle.truck': 'truck'
        }
        # cast attribute to index
        self.attribute_idx_list = {
            'vehicle.moving': 0,
            'vehicle.stopped': 1,
            'vehicle.parked': 2,
            'cycle.with_rider': 3,
            'cycle.without_rider': 4,
            'pedestrian.sitting_lying_down': 5,
            'pedestrian.standing': 6,
            'pedestrian.moving': 7,
            'default': -1,
        }
        self.idx_attribute_list = dict([
            (v, k) for k, v in self.attribute_idx_list.items()
        ])
        self.AttributeIdxLabelMapping = {
            "car": ['vehicle.moving', 'vehicle.stopped', 'vehicle.parked'],
            "truck": ['vehicle.moving', 'vehicle.stopped', 'vehicle.parked'],
            "bus": ['vehicle.moving', 'vehicle.stopped', 'vehicle.parked'],
            "trailer": ['vehicle.moving', 'vehicle.stopped', 'vehicle.parked'],
            "construction_vehicle":
            ['vehicle.moving', 'vehicle.stopped', 'vehicle.parked'],
            "pedestrian": [
                'pedestrian.sitting_lying_down', 'pedestrian.standing',
                'pedestrian.moving'
            ],
            "motorcycle": ['cycle.with_rider', 'cycle.without_rider', ''],
            "bicycle": ['cycle.with_rider', 'cycle.without_rider', ''],
            "traffic_cone": ['', '', ''],
            "barrier": ['', '', ''],
        }

        self.DefaultAttribute = {
            "car": "vehicle.parked",
            "pedestrian": "pedestrian.moving",
            "trailer": "vehicle.parked",
            "truck": "vehicle.parked",
            "bus": "vehicle.parked",
            "motorcycle": "cycle.without_rider",
            "construction_vehicle": "vehicle.parked",
            "bicycle": "cycle.without_rider",
            "barrier": "",
            "traffic_cone": "",
        }

        self.cls_list = cfg.DATASET.KITTI.CLS_LIST
        self.idx2cls_dict = dict([(idx + 1, cls)
                                  for idx, cls in enumerate(self.cls_list)])
        self.cls2idx_dict = dict([(cls, idx + 1)
                                  for idx, cls in enumerate(self.cls_list)])

        self.sv_npy_path = os.path.join(
            cfg.ROOT_DIR, cfg.DATASET.KITTI.SAVE_NUMPY_PATH, 'NuScenes',
            '{}_{}'.format(img_list, self.max_sweeps))
        self.train_list = os.path.join(self.sv_npy_path, 'infos.pkl')

        self.voxel_generator = VoxelGenerator()

        self.test_mode = cfg.TEST.TEST_MODE
        if self.test_mode == 'mAP':
            self.evaluation = self.evaluate_map
            self.logger_and_select_best = self.logger_and_select_best_map
        elif self.test_mode == 'Recall':
            self.evaluation = self.evaluate_recall
            self.logger_and_select_best = self.logger_and_select_best_recall
        else:
            raise Exception('No other evaluation mode.')

        if mode == 'loading':
            # data loader
            with open(self.train_list, 'rb') as f:
                self.train_npy_list = pickle.load(f)
            self.sample_num = len(self.train_npy_list)
            if self.is_training:
                self.data_augmentor = DataAugmentor(
                    'NuScenes', workers_num=self.workers_num)

        elif mode == 'preprocessing':
            # preprocess raw data
            if img_list == 'train':
                self.nusc = NuScenes(dataroot=self.dataset_dir,
                                     version='v1.0-trainval')
                self.scenes = [
                    scene for scene in self.nusc.scene
                    if scene['name'] in train_scene
                ]
            elif img_list == 'val':
                self.nusc = NuScenes(dataroot=self.dataset_dir,
                                     version='v1.0-trainval')
                self.scenes = [
                    scene for scene in self.nusc.scene
                    if scene['name'] in val_scene
                ]
            else:  # test
                self.nusc = NuScenes(dataroot=self.dataset_dir,
                                     version='v1.0-test')
                self.scenes = self.nusc.scene

            self.sample_data_token_list = OrderedDict()
            sample_num = 0
            for scene in self.scenes:
                # static the sample num, and save all sample_data_token
                self.sample_data_token_list[scene['token']] = []
                all_sample = self.nusc.field2token('sample', 'scene_token',
                                                   scene['token'])
                sample_num += len(all_sample)
                for sample in all_sample:  # all sample token
                    sample = self.nusc.get('sample', sample)
                    cur_token = sample['token']
                    cur_data_token = sample['data']['LIDAR_TOP']
                    self.sample_data_token_list[scene['token']].append(
                        cur_data_token)

            self.sample_num = sample_num

            self.extents = cfg.DATASET.POINT_CLOUD_RANGE
            self.extents = np.reshape(self.extents, [3, 2])
            if not os.path.exists(self.sv_npy_path):
                os.makedirs(self.sv_npy_path)

            # also calculate the mean size here
            self.cls_size_dict = dict([(cls,
                                        np.array([0, 0, 0], dtype=np.float32))
                                       for cls in self.cls_list])
            self.cls_num_dict = dict([(cls, 0) for cls in self.cls_list])

            # the save path for MixupDB
            if self.img_list in [
                    'train', 'val', 'trainval'
            ] and cfg.TEST.WITH_GT and cfg.TRAIN.AUGMENTATIONS.MIXUP.OPEN:
                self.mixup_db_cls_path = dict()
                self.mixup_db_trainlist_path = dict()
                self.mixup_db_class = cfg.TRAIN.AUGMENTATIONS.MIXUP.CLASS
                for cls in self.mixup_db_class:
                    mixup_db_cls_path = os.path.join(
                        cfg.ROOT_DIR, cfg.DATASET.KITTI.SAVE_NUMPY_PATH,
                        cfg.TRAIN.AUGMENTATIONS.MIXUP.SAVE_NUMPY_PATH,
                        cfg.TRAIN.AUGMENTATIONS.MIXUP.PC_LIST,
                        '{}'.format(cls))
                    mixup_db_trainlist_path = os.path.join(
                        mixup_db_cls_path, 'train_list.txt')
                    if not os.path.exists(mixup_db_cls_path):
                        os.makedirs(mixup_db_cls_path)
                    self.mixup_db_cls_path[cls] = mixup_db_cls_path
                    self.mixup_db_trainlist_path[cls] = mixup_db_trainlist_path

    def __len__(self):
        return self.sample_num

    def load_samples(self, sample_idx, pipename):
        """ load data per thread """
        pipename = int(pipename)
        biggest_label_num = 0
        sample_dict = self.train_npy_list[sample_idx]

        points_path = sample_dict[maps_dict.KEY_POINT_CLOUD]
        sweeps = sample_dict[maps_dict.KEY_SWEEPS]
        sample_name = sample_dict[maps_dict.KEY_SAMPLE_NAME]
        cur_transformation_matrix = sample_dict[
            maps_dict.KEY_TRANSFORMRATION_MATRIX]
        ts = sample_dict[maps_dict.KEY_TIMESTAMPS] / 1e6

        # then first read points and stack points from multiple frame
        points = np.fromfile(points_path, dtype=np.float32)
        points = points.reshape((-1, 5))
        points = cast_points_to_kitti(points)
        points[:, 3] /= 255
        points[:, 4] = 0
        sweep_points_list = [points]
        original_cur_sweep_points = points
        cur_sweep_points_num = points.shape[0]
        for sweep in sweeps:
            points_sweep = np.fromfile(sweep['lidar_path'], dtype=np.float32)
            points_sweep = points_sweep.reshape((-1, 5))
            sweep_ts = sweep['timestamp'] / 1e6
            points_sweep[:, 3] /= 255
            points_sweep[:, :3] = points_sweep[:, :3] @ sweep[
                'sweep2lidar_rotation'].T
            points_sweep[:, :3] += sweep['sweep2lidar_translation']
            points_sweep[:, 4] = ts - sweep_ts
            points_sweep = cast_points_to_kitti(points_sweep)
            sweep_points_list.append(points_sweep)
        if cfg.DATASET.NUSCENES.INPUT_FEATURE_CHANNEL == 4:
            points = np.concatenate(sweep_points_list, axis=0)[:, [0, 1, 2, 4]]
        else:
            points = np.concatenate(sweep_points_list, axis=0)

        # then read groundtruth file if have
        if self.is_training or cfg.TEST.WITH_GT:
            label_boxes_3d = sample_dict[maps_dict.KEY_LABEL_BOXES_3D]
            label_boxes_3d = cast_box_3d_to_kitti_format(label_boxes_3d)

            label_classes_name = sample_dict[maps_dict.KEY_LABEL_CLASSES]
            label_classes = np.array([
                self.cls2idx_dict[label_class]
                for label_class in label_classes_name
            ])

            label_attributes = sample_dict[maps_dict.KEY_LABEL_ATTRIBUTES]
            label_velocity = sample_dict[
                maps_dict.KEY_LABEL_VELOCITY]  # [-1, 2]

            ry_cls_label, residual_angle = encode_angle2class_np(
                label_boxes_3d[:, -1], cfg.MODEL.ANGLE_CLS_NUM)
        else:  # not is_training and no_gt
            label_boxes_3d = np.zeros([1, 7], np.float32)
            label_classes = np.zeros([1], np.int32)
            label_attributes = np.zeros([1], np.int32)
            label_velocity = np.zeros([1, 2], np.float32)
            ry_cls_label = np.zeros([1], np.int32)
            residual_angle = np.zeros([1], np.float32)

        if self.is_training:  # data augmentation
            points, label_boxes_3d, label_classes, label_attributes, label_velocity, cur_sweep_points_num = self.data_augmentor.nuscenes_forward(
                points, label_boxes_3d, label_classes, pipename,
                label_attributes, label_velocity, cur_sweep_points_num)
            ry_cls_label, residual_angle = encode_angle2class_np(
                label_boxes_3d[:, -1], cfg.MODEL.ANGLE_CLS_NUM)
        cur_label_num = len(label_boxes_3d)

        # then randomly choose some points
        cur_sweep_points = points[:cur_sweep_points_num, :]  # [-1, 4]
        other_sweep_points = points[cur_sweep_points_num:, :]  # [-1, 4]
        if len(other_sweep_points) == 0:
            other_sweep_points = cur_sweep_points.copy()
        np.random.shuffle(cur_sweep_points)
        np.random.shuffle(other_sweep_points)

        input_sample_points, num_points_per_voxel = self.voxel_generator.generate_nusc(
            cur_sweep_points, other_sweep_points,
            cfg.DATASET.NUSCENE.MAX_CUR_SAMPLE_POINTS_NUM
        )  # points, [num_voxels, num_points, 5], sem_labels, [num_voxels, num_points]
        cur_sample_points = input_sample_points[:cfg.DATASET.NUSCENE.
                                                MAX_CUR_SAMPLE_POINTS_NUM]
        other_sample_points = input_sample_points[cfg.DATASET.NUSCENE.
                                                  MAX_CUR_SAMPLE_POINTS_NUM:]

        biggest_label_num = max(biggest_label_num, cur_label_num)
        return biggest_label_num, input_sample_points, cur_sample_points, other_sample_points, label_boxes_3d, ry_cls_label, residual_angle, label_classes, label_attributes, label_velocity, sample_name, cur_transformation_matrix, sweeps, original_cur_sweep_points

    def load_batch(self, batch_size):
        perm = np.arange(
            self.sample_num).tolist()  # a list indicates each data
        dp = DataFromList(perm,
                          is_train=self.is_training,
                          shuffle=self.is_training)
        dp = MultiProcessMapData(dp, self.load_samples, self.workers_num)

        use_list = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1]
        use_concat = [0, 0, 0, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0]

        dp = BatchDataNuscenes(dp,
                               batch_size,
                               use_concat=use_concat,
                               use_list=use_list)
        dp.reset_state()
        dp = dp.get_data()
        return dp

    # Preprocess data
    def preprocess_samples(self, cur_scene_key, sample_data_token):
        sample_dicts = []
        biggest_label_num = 0

        cur_sample_data = self.nusc.get('sample_data', sample_data_token)
        cur_sample_token = cur_sample_data['sample_token']
        cur_sample = self.nusc.get('sample', cur_sample_token)

        ego_pose = self.nusc.get('ego_pose', cur_sample_data['ego_pose_token'])
        calibrated_sensor = self.nusc.get(
            'calibrated_sensor', cur_sample_data['calibrated_sensor_token'])

        l2e_r = calibrated_sensor['rotation']
        l2e_t = calibrated_sensor['translation']
        e2g_r = ego_pose['rotation']
        e2g_t = ego_pose['translation']
        l2e_r_mat = Quaternion(l2e_r).rotation_matrix
        e2g_r_mat = Quaternion(e2g_r).rotation_matrix
        cur_timestamp = cur_sample['timestamp']

        cur_transformation_matrix = {
            'lidar2ego_translation': l2e_t,
            'lidar2ego_rotation': l2e_r,
            'ego2global_translation': e2g_t,
            'ego2global_rotation': e2g_r,
        }

        # get point cloud in former 0.5 second
        sweeps = []
        while len(sweeps) < self.max_sweeps:
            if not cur_sample_data['prev'] == '':
                # has next frame
                cur_sample_data = self.nusc.get('sample_data',
                                                cur_sample_data['prev'])
                cur_ego_pose = self.nusc.get('ego_pose',
                                             cur_sample_data['ego_pose_token'])
                cur_calibrated_sensor = self.nusc.get(
                    'calibrated_sensor',
                    cur_sample_data['calibrated_sensor_token'])
                cur_lidar_path, cur_sweep_boxes, _ = self.nusc.get_sample_data(
                    cur_sample_data['token'])
                sweep = {
                    "lidar_path": cur_lidar_path,
                    "sample_data_token": cur_sample_data['token'],
                    "lidar2ego_translation":
                    cur_calibrated_sensor['translation'],
                    "lidar2ego_rotation": cur_calibrated_sensor['rotation'],
                    "ego2global_translation": cur_ego_pose['translation'],
                    "ego2global_rotation": cur_ego_pose['rotation'],
                    "timestamp": cur_sample_data["timestamp"]
                }
                l2e_r_s = sweep["lidar2ego_rotation"]
                l2e_t_s = sweep["lidar2ego_translation"]
                e2g_r_s = sweep["ego2global_rotation"]
                e2g_t_s = sweep["ego2global_translation"]
                # sweep->ego->global->ego'->lidar
                l2e_r_s_mat = Quaternion(l2e_r_s).rotation_matrix
                e2g_r_s_mat = Quaternion(e2g_r_s).rotation_matrix

                R = (l2e_r_s_mat.T @ e2g_r_s_mat.T) @ (
                    np.linalg.inv(e2g_r_mat).T @ np.linalg.inv(l2e_r_mat).T)
                T = (l2e_t_s @ e2g_r_s_mat.T + e2g_t_s) @ (
                    np.linalg.inv(e2g_r_mat).T @ np.linalg.inv(l2e_r_mat).T)
                T -= e2g_t @ (np.linalg.inv(e2g_r_mat).T @ np.linalg.inv(
                    l2e_r_mat).T) + l2e_t @ np.linalg.inv(l2e_r_mat).T

                sweep["sweep2lidar_rotation"] = R.T  # points @ R.T + T
                sweep["sweep2lidar_translation"] = T
                sweeps.append(sweep)
            else:  # prev is none
                break

        # then load gt_boxes_3d
        if self.img_list in ['train', 'val'] and cfg.TEST.WITH_GT:
            cur_data_path, all_boxes, _ = self.nusc.get_sample_data(
                sample_data_token)

            # then first parse boxes labels
            locs = np.array([box.center for box in all_boxes]).reshape(-1, 3)
            sizes = np.array([box.wlh for box in all_boxes]).reshape(-1, 3)
            rots = np.array([
                box.orientation.yaw_pitch_roll[0] for box in all_boxes
            ]).reshape(-1, 1)
            all_boxes_3d = np.concatenate([locs, sizes, -rots], axis=-1)

            annos_tokens = cur_sample['anns']
            all_velocity = np.array([
                self.nusc.box_velocity(ann_token)[:2]
                for ann_token in annos_tokens
            ])  # [-1, 2]
            for i in range(len(all_boxes)):
                velo = np.array([*all_velocity[i], 0.0])
                velo = velo @ np.linalg.inv(e2g_r_mat).T @ np.linalg.inv(
                    l2e_r_mat).T
                all_velocity[i] = velo[:2]  # [-1, 2]

            attribute_tokens = [
                self.nusc.get('sample_annotation',
                              ann_token)['attribute_tokens']
                for ann_token in annos_tokens
            ]
            all_attribute = []
            for attribute_token in attribute_tokens:
                if len(attribute_token) == 0:
                    all_attribute.append([])
                else:
                    all_attribute.append(
                        self.nusc.get('attribute', attribute_token[0])['name'])
            # then filter these ignore labels
            categories = np.array([box.name for box in all_boxes])
            if self.img_list == 'train':
                useful_idx = [
                    index for index, category in enumerate(categories)
                    if self.useful_cls_dict[category] != 'ignore'
                ]
            else:
                useful_idx = [
                    index for index, category in enumerate(categories)
                ]
            if len(useful_idx) == 0:
                if self.img_list == 'train':
                    return None, biggest_label_num
                else:
                    all_boxes_3d = np.ones([1, 7], dtype=np.float32)
                    all_boxes_classes = np.array(['ignore'])
                    all_attribute = np.array([-1])
                    all_velocity = np.array([[0, 0]], dtype=np.float32)
            else:
                all_boxes_3d = all_boxes_3d[useful_idx]

                categories = categories[useful_idx]
                all_boxes_classes = np.array(
                    [self.useful_cls_dict[cate] for cate in categories])
                # now calculate the mean size of each box
                for tmp_idx, all_boxes_class in enumerate(all_boxes_classes):
                    cur_mean_size = self.cls_size_dict[
                        all_boxes_class] * self.cls_num_dict[all_boxes_class]
                    cur_cls_num = self.cls_num_dict[all_boxes_class] + 1
                    cur_total_size = cur_mean_size + all_boxes_3d[tmp_idx, [
                        4, 5, 3
                    ]]  # [l, w, h]
                    cur_mean_size = cur_total_size / cur_cls_num
                    self.cls_size_dict[all_boxes_class] = cur_mean_size
                    self.cls_num_dict[all_boxes_class] = cur_cls_num

                all_attribute = [
                    all_attribute[tmp_idx] for tmp_idx in useful_idx
                ]
                tmp_attribute = []
                for attr in all_attribute:
                    if attr == []: tmp_attribute.append(-1)
                    else:
                        tmp_attribute.append(self.attribute_idx_list[attr])
                all_attribute = tmp_attribute
                all_attribute = np.array(all_attribute, dtype=np.int32)
                all_velocity = [
                    all_velocity[tmp_idx] for tmp_idx in useful_idx
                ]
                all_velocity = np.array(all_velocity, dtype=np.float32)
        else:
            cur_data_path = self.nusc.get_sample_data_path(sample_data_token)

        # then generate the bev_maps
        if self.img_list in ['train', 'val', 'trainval'] and cfg.TEST.WITH_GT:
            sample_dict = {
                maps_dict.KEY_LABEL_BOXES_3D:
                all_boxes_3d,
                maps_dict.KEY_LABEL_CLASSES:
                all_boxes_classes,
                maps_dict.KEY_LABEL_ATTRIBUTES:
                all_attribute,
                maps_dict.KEY_LABEL_VELOCITY:
                all_velocity,
                maps_dict.KEY_LABEL_NUM:
                len(all_boxes_3d),
                maps_dict.KEY_POINT_CLOUD:
                cur_data_path,
                maps_dict.KEY_TRANSFORMRATION_MATRIX:
                cur_transformation_matrix,
                maps_dict.KEY_SAMPLE_NAME:
                '{}/{}/{}'.format(cur_scene_key, cur_sample_token,
                                  sample_data_token),
                maps_dict.KEY_SWEEPS:
                sweeps,
                maps_dict.KEY_TIMESTAMPS:
                cur_timestamp,
            }
            biggest_label_num = max(len(all_boxes_3d), biggest_label_num)
        else:
            # img_list is test
            sample_dict = {
                maps_dict.KEY_POINT_CLOUD:
                cur_data_path,
                maps_dict.KEY_SAMPLE_NAME:
                '{}/{}/{}'.format(cur_scene_key, cur_sample_token,
                                  sample_data_token),
                maps_dict.KEY_TRANSFORMRATION_MATRIX:
                cur_transformation_matrix,
                maps_dict.KEY_SWEEPS:
                sweeps,
                maps_dict.KEY_TIMESTAMPS:
                cur_timestamp,
            }
        return sample_dict, biggest_label_num

    def preprocess_batch(self):
        # if create_gt_dataset, then also create a boxes_numpy, saving all points
        if cfg.TRAIN.AUGMENTATIONS.MIXUP.OPEN:  # also save mixup database
            mixup_label_dict = dict([(cls, []) for cls in self.mixup_db_class])

        sample_dicts_list = []
        for scene_key, v in tqdm.tqdm(self.sample_data_token_list.items()):
            for sample_data_token in v:
                sample_dict, tmp_biggest_label_num = self.preprocess_samples(
                    scene_key, sample_data_token)
                if sample_dict is None:
                    continue
                # else save the result
                sample_dicts_list.append(sample_dict)

                # create_gt_dataset
                if self.img_list in [
                        'train', 'val', 'trainval'
                ] and cfg.TEST.WITH_GT and cfg.TRAIN.AUGMENTATIONS.MIXUP.OPEN:
                    mixup_sample_dicts = self.generate_mixup_sample(
                        sample_dict)
                    if mixup_sample_dicts is None: continue
                    for mixup_sample_dict in mixup_sample_dicts:
                        cur_cls = mixup_sample_dict[
                            maps_dict.KEY_SAMPLED_GT_CLSES]
                        mixup_label_dict[cur_cls].append(mixup_sample_dict)

        # save preprocessed data
        with open(self.train_list, 'wb') as f:
            pickle.dump(sample_dicts_list, f)
        for k, v in self.cls_num_dict.items():
            print('class name: %s / class num: %d / mean size: (%f, %f, %f)' %
                  (k, v, self.cls_size_dict[k][0], self.cls_size_dict[k][1],
                   self.cls_size_dict[k][2]))  # [l, w, h]

        if self.img_list in [
                'train', 'val', 'trainval'
        ] and cfg.TEST.WITH_GT and cfg.TRAIN.AUGMENTATIONS.MIXUP.OPEN:
            print('**** Generating groundtruth database ****')
            for cur_cls_name, mixup_sample_dict in mixup_label_dict.items():
                cur_mixup_db_cls_path = self.mixup_db_cls_path[cur_cls_name]
                cur_mixup_db_trainlist_path = self.mixup_db_trainlist_path[
                    cur_cls_name]
                print('**** Class %s ****' % cur_cls_name)
                with open(cur_mixup_db_trainlist_path, 'w') as f:
                    for tmp_idx, tmp_cur_mixup_sample_dict in tqdm.tqdm(
                            enumerate(mixup_sample_dict)):
                        f.write('%06d.npy\n' % tmp_idx)
                        np.save(
                            os.path.join(cur_mixup_db_cls_path,
                                         '%06d.npy' % tmp_idx),
                            tmp_cur_mixup_sample_dict)
        print('Ending of the preprocess !!!')

    def generate_mixup_sample(self, sample_dict):
        """ This function is bound for generating mixup dataset """
        all_boxes_3d = sample_dict[maps_dict.KEY_LABEL_BOXES_3D]
        all_boxes_classes = sample_dict[maps_dict.KEY_LABEL_CLASSES]
        point_cloud_path = sample_dict[maps_dict.KEY_POINT_CLOUD]

        # then we first cast all_boxes_3d to kitti format
        all_boxes_3d = cast_box_3d_to_kitti_format(all_boxes_3d)

        # load points
        points = np.fromfile(point_cloud_path, dtype=np.float32).reshape(
            (-1, 5))
        points = cast_points_to_kitti(points)
        points[:, 3] /= 255
        points[:, 4] = 0  # timestamp is zero

        points_mask = check_inside_points(points,
                                          all_boxes_3d)  # [pts_num, gt_num]
        points_masks_num = np.sum(points_masks, axis=0)  # [gt_num]
        valid_box_idx = np.where(
            points_masks_num >= cfg.DATASET.MIN_POINTS_NUM)[0]

        if len(valid_box_idx) == 0:
            return None

        valid_label_boxes_3d = all_boxes_3d[valid_box_idx]
        valid_label_classes = all_boxes_classes[valid_box_idx]

        sample_dicts = []
        for index, i in enumerate(valid_box_idx):
            cur_points_mask = points_mask[:, i]
            cur_points_idx = np.where(cur_points_mask)[0]
            cur_inside_points = points[cur_points_idx, :]
            sample_dict = {
                # 0 timestamp and /255 reflectance
                maps_dict.KEY_SAMPLED_GT_POINTS:
                cur_inside_points,  # kitti format points
                maps_dict.KEY_SAMPLED_GT_LABELS_3D:
                valid_label_boxes_3d[index],
                maps_dict.KEY_SAMPLED_GT_CLSES:
                valid_label_classes[index],
            }
            sample_dicts.append(sample_dict)
        return sample_dicts

    # Evaluation
    def set_evaluation_tensor(self, model):
        # get prediction results, bs = 1
        pred_bbox_3d = tf.squeeze(model.output[maps_dict.PRED_3D_BBOX][-1],
                                  axis=0)
        pred_cls_score = tf.squeeze(model.output[maps_dict.PRED_3D_SCORE][-1],
                                    axis=0)
        pred_cls_category = tf.squeeze(
            model.output[maps_dict.PRED_3D_CLS_CATEGORY][-1], axis=0)
        pred_list = [pred_bbox_3d, pred_cls_score, pred_cls_category]

        if len(model.output[maps_dict.PRED_3D_ATTRIBUTE]) > 0:
            pred_attribute = tf.squeeze(
                model.output[maps_dict.PRED_3D_ATTRIBUTE][-1], axis=0)
            pred_velocity = tf.squeeze(
                model.output[maps_dict.PRED_3D_VELOCITY][-1], axis=0)
            pred_list.extend([pred_attribute, pred_velocity])
        return pred_list

    def evaluate_map(self,
                     sess,
                     feeddict_producer,
                     pred_list,
                     val_size,
                     cls_thresh,
                     log_dir,
                     placeholders=None):
        submissions = {}
        submissions['meta'] = dict()
        submissions['meta']['use_camera'] = False
        submissions['meta']['use_lidar'] = True
        submissions['meta']['use_radar'] = False
        submissions['meta']['use_map'] = False
        submissions['meta']['use_external'] = False

        submissions_results = dict()
        pred_attr_velo = (len(pred_list) == 5)

        for i in tqdm.tqdm(range(val_size)):
            feed_dict = feeddict_producer.create_feed_dict()

            if pred_attr_velo:
                pred_bbox_3d_op, pred_cls_score_op, pred_cls_category_op, pred_attr_op, pred_velo_op = sess.run(
                    pred_list, feed_dict=feed_dict)
            else:
                pred_bbox_3d_op, pred_cls_score_op, pred_cls_category_op = sess.run(
                    pred_list, feed_dict=feed_dict)
            pred_cls_category_op += 1  # label from 1 to n

            sample_name, cur_transformation_matrix, sweeps = feeddict_producer.info
            sample_name = sample_name[0]
            cur_transformation_matrix = cur_transformation_matrix[0]
            sweeps = sweeps[0]
            cur_scene_key, cur_sample_token, cur_sample_data_token = sample_name.split(
                '/')

            select_idx = np.where(pred_cls_score_op >= cls_thresh)[0]
            pred_cls_score_op = pred_cls_score_op[select_idx]
            pred_cls_category_op = pred_cls_category_op[select_idx]
            pred_bbox_3d_op = pred_bbox_3d_op[select_idx]
            if pred_attr_velo:
                pred_attr_op = pred_attr_op[select_idx]
                pred_velo_op = pred_velo_op[select_idx]
            else:
                pred_attr_op, pred_velo_op = None, None

            if len(pred_bbox_3d_op) > 500:
                arg_sort_idx = np.argsort(pred_cls_score_op)[::-1]
                arg_sort_idx = arg_sort_idx[:500]
                pred_cls_score_op = pred_cls_score_op[arg_sort_idx]
                pred_cls_category_op = pred_cls_category_op[arg_sort_idx]
                pred_bbox_3d_op = pred_bbox_3d_op[arg_sort_idx]
                if pred_attr_velo:
                    pred_attr_op = pred_attr_op[arg_sort_idx]
                    pred_velo_op = pred_velo_op[arg_sort_idx]

            # then transform pred_bbox_op to nuscenes_box
            boxes = cast_kitti_format_to_nusc_box_3d(
                pred_bbox_3d_op,
                pred_cls_score_op,
                pred_cls_category_op,
                cur_attribute=pred_attr_op,
                cur_velocity=pred_velo_op,
                classes=self.idx2cls_dict)
            for box in boxes:
                velocity = box.velocity[:2].tolist()
                if len(sweeps) == 0:
                    velocity = (np.nan, np.nan)
                box.velocity = np.array([*velocity, 0.0])
            # then cast the box from ego to global
            boxes = _lidar_nusc_box_to_global(cur_transformation_matrix,
                                              boxes,
                                              self.idx2cls_dict,
                                              eval_version='cvpr_2019')

            annos = []
            for box in boxes:
                name = self.idx2cls_dict[box.label]
                if box.name == -1:
                    attr = self.DefaultAttribute[name]
                else:
                    attr = self.AttributeIdxLabelMapping[name][box.name]
                velocity = box.velocity[:2].tolist()
                nusc_anno = {
                    "sample_token": cur_sample_token,
                    "translation": box.center.tolist(),
                    "size": box.wlh.tolist(),
                    "rotation": box.orientation.elements.tolist(),
                    "velocity": velocity,
                    "detection_name": name,
                    "detection_score": box.score,
                    "attribute_name": attr,
                }
                annos.append(nusc_anno)
            submissions_results[info['sample_token']] = annos

        submissions['results'] = submissions_results

        res_path = os.path.join(log_dir, "results_nusc_1.json")
        with open(res_path, "w") as f:
            json.dump(submissions, f)
        eval_main_file = os.path.join(cfg.ROOT_DIR, 'lib/core/nusc_eval.py')
        root_path = self.dataset_dir
        cmd = f"python3 {str(eval_main_file)} --root_path=\"{str(root_path)}\""
        cmd += f" --version={'v1.0-trainval'} --eval_version={'cvpr_2019'}"
        cmd += f" --res_path=\"{str(res_path)}\" --eval_set={'val'}"
        cmd += f" --output_dir=\"{LOG_FOUT_DIR}\""
        # use subprocess can release all nusc memory after evaluation
        subprocess.check_output(cmd, shell=True)
        os.system('rm \"%s\"' % res_path)  # remove former result file

        with open(os.path.join(log_dir, "metrics_summary.json"), "r") as f:
            metrics = json.load(f)
        return metrics

    def evaluate_recall(self,
                        sess,
                        feeddict_producer,
                        pred_list,
                        val_size,
                        cls_thresh,
                        log_dir,
                        placeholders=None):
        pass

    def logger_and_select_best_map(self, metrics, log_string):
        detail = {}
        result = f"Nusc v1.0-trainval Evaluation\n"
        final_score = []
        for name in self.cls_list:
            detail[name] = {}
            for k, v in metrics["label_aps"][name].items():
                detail[name][f"dist@{k}"] = v
            tp_errs = []
            tp_names = []
            for k, v in metrics["label_tp_errors"][name].items():
                detail[name][k] = v
                tp_errs.append(f"{v:.4f}")
                tp_names.append(k)
            threshs = ', '.join(list(metrics["label_aps"][name].keys()))
            scores = list(metrics["label_aps"][name].values())
            final_score.append(np.mean(scores))
            scores = ', '.join([f"{s * 100:.2f}" for s in scores])
            result += f"{name} Nusc dist AP@{threshs} and TP errors\n"
            result += scores
            result += "\n"
            result += "mAP: %0.2f\n" % (
                np.mean(list(metrics["label_aps"][name].values())) * 100)
            result += ', '.join(tp_names) + ": " + ', '.join(tp_errs)
            result += "\n"
        result += 'NDS score: %0.2f\n' % (metrics['nd_score'] * 100)
        log_string(result)

        cur_result = metrics['nd_score']
        return cur_result

    def logger_and_select_best_recall(self, metrics, log_string):
        pass

    # save prediction results
    def save_predictions(self,
                         sess,
                         feeddict_producer,
                         pred_list,
                         val_size,
                         cls_thresh,
                         log_dir,
                         placeholders=None):
        pass
예제 #8
0
def process_data(data_path, version, output_path, val_split):
    nusc = NuScenes(version=version, dataroot=data_path, verbose=True)
    splits = create_splits_scenes()
    train_scenes, val_scenes = train_test_split(
        splits['train' if 'mini' not in version else 'mini_train'],
        test_size=val_split)
    train_scene_names = splits['train' if 'mini' not in
                               version else 'mini_train']
    val_scene_names = []  #val_scenes
    test_scene_names = splits['val' if 'mini' not in version else 'mini_val']

    ns_scene_names = dict()
    ns_scene_names['train'] = train_scene_names
    ns_scene_names['val'] = val_scene_names
    ns_scene_names['test'] = test_scene_names

    for data_class in ['train', 'val', 'test']:
        env = Environment(node_type_list=['VEHICLE', 'PEDESTRIAN'],
                          standardization=standardization)
        attention_radius = dict()
        attention_radius[(env.NodeType.PEDESTRIAN,
                          env.NodeType.PEDESTRIAN)] = 10.0
        attention_radius[(env.NodeType.PEDESTRIAN,
                          env.NodeType.VEHICLE)] = 20.0
        attention_radius[(env.NodeType.VEHICLE,
                          env.NodeType.PEDESTRIAN)] = 20.0
        attention_radius[(env.NodeType.VEHICLE, env.NodeType.VEHICLE)] = 30.0

        env.attention_radius = attention_radius
        env.robot_type = env.NodeType.VEHICLE
        scenes = []

        for ns_scene_name in tqdm(ns_scene_names[data_class]):
            ns_scene = nusc.get(
                'scene',
                nusc.field2token('scene', 'name', ns_scene_name)[0])
            scene_id = int(ns_scene['name'].replace('scene-', ''))
            if scene_id in scene_blacklist:  # Some scenes have bad localization
                continue

            scene = process_scene(ns_scene, env, nusc, data_path)
            if scene is not None:
                if data_class == 'train':
                    scene.augmented = list()
                    angles = np.arange(0, 360, 15)
                    for angle in angles:
                        scene.augmented.append(augment_scene(scene, angle))
                scenes.append(scene)

        print(f'Processed {len(scenes):.2f} scenes')

        env.scenes = scenes

        if len(scenes) > 0:
            mini_string = ''
            if 'mini' in version:
                mini_string = '_mini'
            data_dict_path = os.path.join(
                output_path,
                'nuScenes_' + data_class + mini_string + '_full.pkl')
            with open(data_dict_path, 'wb') as f:
                dill.dump(env, f, protocol=dill.HIGHEST_PROTOCOL)
            print('Saved Environment!')

        global total
        global curv_0_2
        global curv_0_1
        print(f"Total Nodes: {total}")
        print(f"Curvature > 0.1 Nodes: {curv_0_1}")
        print(f"Curvature > 0.2 Nodes: {curv_0_2}")
        total = 0
        curv_0_1 = 0
        curv_0_2 = 0
예제 #9
0
nusc.log[0]
print("There are {} maps masks in the loaded dataset".format(len(nusc.map)))
nusc.map[0]

nusc.category[0]
cat_token = nusc.category[0]['token']
cat_token

nusc.get('category', cat_token)
nusc.sample_annotation[0]
nusc.get('visibility', nusc.sample_annotation[0]['visibility_token'])

one_instance = nusc.get('instance',
                        nusc.sample_annotation[0]['instance_token'])
one_instance
ann_tokens = nusc.field2token('sample_annotation', 'instance_token',
                              one_instance['token'])
ann_tokens_field2token = set(ann_tokens)

ann_tokens_field2token

ann_record = nusc.get('sample_annotation',
                      one_instance['first_annotation_token'])
ann_record

ann_tokens_traverse = set()
ann_tokens_traverse.add(ann_record['token'])
while not ann_record['next'] == "":
    ann_record = nusc.get('sample_annotation', ann_record['next'])
    ann_tokens_traverse.add(ann_record['token'])
print(ann_tokens_traverse == ann_tokens_field2token)
nusc.list_categories()