def __init__(self, set_name="mini_train"):

        #assert statements
        set_paths = [
            'train', 'val', 'test', 'mini_train', 'mini_val', 'train_detect',
            'train_track'
        ]
        assert set_name in set_paths, "Incorrect set_name"

        #Initialize data and Prediction Helper classes
        self.data_path = DATA_PATH
        self.nusc = NuScenes(version=DATA_VERSION,
                             dataroot=self.data_path,
                             verbose=True)
        self.helper = PredictHelper(self.nusc)

        #get all the scenes
        self.scenes = create_splits_scenes()

        #get all the scenes in the trainset
        self.set_name = set_name
        self.trainset = self.scenes[
            self.set_name]  #List of scenes as part of training set
        self.prediction_scenes = json.load(
            open(self.data_path + "maps/prediction_scenes.json", "r")
        )  #Dictionary containing list of instance and sample tokens for each scene

        print("Number of samples in train set: %d" % (len(self.trainset)))
Beispiel #2
0
def get_prediction_challenge_split(split: str, dataroot: str = '/data/sets/nuscenes') -> List[str]:
    """
    Gets a list of {instance_token}_{sample_token} strings for each split.
    :param split: One of 'mini_train', 'mini_val', 'train', 'val'.
    :param dataroot: Path to the nuScenes dataset.
    :return: List of tokens belonging to the split. Format {instance_token}_{sample_token}.
    """
    if split not in {'mini_train', 'mini_val', 'train', 'train_val', 'val'}:
        raise ValueError("split must be one of (mini_train, mini_val, train, train_val, val)")
    
    if split == 'train_val':
        split_name = 'train'
    else:
        split_name = split

    path_to_file = os.path.join(dataroot, "maps", "prediction", "prediction_scenes.json")
    prediction_scenes = json.load(open(path_to_file, "r"))
    scenes = create_splits_scenes()
    scenes_for_split = scenes[split_name]
    
    if split == 'train':
        scenes_for_split = scenes_for_split[NUM_IN_TRAIN_VAL:]
    if split == 'train_val':
        scenes_for_split = scenes_for_split[:NUM_IN_TRAIN_VAL]

    token_list_for_scenes = map(lambda scene: prediction_scenes.get(scene, []), scenes_for_split)

    return list(chain.from_iterable(token_list_for_scenes))
def factory(dataset, dir_nuscenes):
    """Define dataset type and split training and validation"""

    assert dataset in ['nuscenes', 'nuscenes_mini', 'nuscenes_teaser']
    if dataset == 'nuscenes_mini':
        version = 'v1.0-mini'
    else:
        version = 'v1.0-trainval'

    nusc = NuScenes(version=version, dataroot=dir_nuscenes, verbose=True)
    scenes = nusc.scene

    if dataset == 'nuscenes_teaser':
        with open("splits/nuscenes_teaser_scenes.txt", "r") as file:
            teaser_scenes = file.read().splitlines()
        scenes = [scene for scene in scenes if scene['token'] in teaser_scenes]
        with open("splits/split_nuscenes_teaser.json", "r") as file:
            dic_split = json.load(file)
        split_train = [
            scene['name'] for scene in scenes
            if scene['token'] in dic_split['train']
        ]
        split_val = [
            scene['name'] for scene in scenes
            if scene['token'] in dic_split['val']
        ]
    else:
        split_scenes = splits.create_splits_scenes()
        split_train, split_val = split_scenes['train'], split_scenes['val']

    return nusc, scenes, split_train, split_val
Beispiel #4
0
def gen_scene_splits(dataroot: str):
    """
    Retruns all nuScenes scene splits by scene token, as specified in https://github.com/nutonomy/nuscenes-devkit/blob/master/python-sdk/nuscenes/utils/splits.py.
    Arguments:
        dataroot: Directory path of the nuScenes datasets, <str>.
    """
    # Imports
    from nuscenes.nuscenes import NuScenes
    from nuscenes.utils.splits import create_splits_scenes

    # Define
    scene_splits = create_splits_scenes()

    # Load trainval
    version = 'v1.0-trainval'
    nusc = NuScenes(version=version, dataroot=dataroot + version, verbose=False)

    for split, scene_names in scene_splits.items():
        if split == 'test':
            continue
        else:
            scene_splits[split] = [nusc.field2token('scene', 'name', scene_name)[0] for scene_name in scene_names]

    # Load test
    version = 'v1.0-test'
    nusc = NuScenes(version=version, dataroot=dataroot + version, verbose=False)
    scene_splits['test'] = [nusc.field2token('scene', 'name', scene_name)[0] for scene_name in scene_splits['test']]

    return scene_splits
Beispiel #5
0
 def get_scenes(self):
     # filter by scene split
     split = {
         'v1.0-trainval': {
             True: 'train',
             False: 'val'
         },
         'v1.0-mini': {
             True: 'mini_train',
             False: 'mini_val'
         },
     }[self.nusc.version][self.is_train]
     scenes = create_splits_scenes()[split]
     return scenes
def split_scenes(scenes, split):
    """
    Get the list of scenes in a split
    
    :param scenes (list): list of all scenes from nuscene
    :param split (str): split name
    :return scene_list(list): list of scene tokens in the split
    """
    scene_split_names = splits.create_splits_scenes()[split]
    scenes_list = []
    for scene in scenes:
        #NOTE: mini train and mini val are subsets of train and val
        if scene['name'] in scene_split_names:
            scenes_list.append(scene['token'])
    return scenes_list
Beispiel #7
0
    def _split_scenes(self) -> None:
        """
        Split scenes into train, val and test scenes
        """
        scene_split_names = splits.create_splits_scenes()
        scenes_list = []
        for scene in self.nusc.scene:
            #NOTE: mini train and mini val are subsets of train and val
            if scene['name'] in scene_split_names[self.split]:
                scenes_list.append(scene['token'])

        self.logger.debug('{}: {} scenes'.format(self.nusc_version,
                                                 str(len(scenes_list))))

        return scenes_list
Beispiel #8
0
    def _get_scenes(self):
        # filter by scene split
        split = {
            'v1.0-trainval': {
                True: 'train',
                False: 'val'
            },
            'v1.0-mini': {
                True: 'mini_train',
                False: 'mini_val'
            },
            'lyft': {
                True: 'lyft_train',
                False: 'lyft_val'
            },
        }[self.nusc.version][self.train]

        return create_splits_scenes()[split]
Beispiel #9
0
    def __init__(self, root_path, split='train', init_nusc=True):
        super().__init__()
        self.root_path = root_path
        self.split = split
        if (init_nusc):
            self.nusc = NuScenes(version='v1.0-trainval',
                                 dataroot=root_path,
                                 verbose=True)

        splits = create_splits_scenes()
        split_scenes = splits[split]
        all_scene_names = [scene['name'] for scene in self.nusc.scene]
        split_scene_tokens = [
            self.nusc.scene[all_scene_names.index(scene_name)]['token']
            for scene_name in split_scenes
        ]

        self.sample_id_list = self.get_sample_tokens_from_scenes(
            split_scene_tokens)
Beispiel #10
0
def get_samples_in_eval_set(nusc: NuScenes, eval_set: str) -> List[str]:
    """
    Gets all the sample tokens from the split that are relevant to the eval set.
    :param nusc: A NuScenes object.
    :param eval_set: The dataset split to evaluate on, e.g. train, val or test.
    :return: A list of sample tokens.
    """
    # Create a dict to map from scene name to scene token for quick lookup later on.
    scene_name2tok = dict()
    for rec in nusc.scene:
        scene_name2tok[rec['name']] = rec['token']

    # Get scenes splits from nuScenes.
    scenes_splits = create_splits_scenes(verbose=False)

    # Collect sample tokens for each scene.
    samples = []
    for scene in scenes_splits[eval_set]:
        scene_record = nusc.get('scene', scene_name2tok[scene])
        total_num_samples = scene_record['nbr_samples']
        first_sample_token = scene_record['first_sample_token']
        last_sample_token = scene_record['last_sample_token']

        sample_token = first_sample_token
        i = 0
        while sample_token != '':
            sample_record = nusc.get('sample', sample_token)
            samples.append(sample_record['token'])

            if sample_token == last_sample_token:
                sample_token = ''
            else:
                sample_token = sample_record['next']
            i += 1

        assert total_num_samples == i, 'Error: There were supposed to be {} keyframes, ' \
                                       'but only {} keyframes were processed'.format(total_num_samples, i)

    return samples
def process_data(data_path, version, val_split):
    nusc = NuScenes(version=version, dataroot=data_path, verbose=True)
    splits = create_splits_scenes()
    train_scenes, val_scenes = train_test_split(splits['train' if 'mini' not in version else 'mini_train'], test_size=val_split)
    train_scene_names = splits['train' if 'mini' not in version else 'mini_train']
    val_scene_names = splits['val' if 'mini' not in version else 'mini_val']

    ns_scene_names = dict()
    ns_scene_names['train'] = train_scene_names
    ns_scene_names['val'] = val_scene_names
    scenes = []
    for data_class in ['train', 'val']:
        for ns_scene_name in tqdm(ns_scene_names[data_class]):
            ns_scene = nusc.get('scene', nusc.field2token('scene', 'name', ns_scene_name)[0])
            scene_id = int(ns_scene['name'].replace('scene-', ''))
            if scene_id in scene_blacklist:  # Some scenes have bad localization
                continue

            scene = process_scene(ns_scene, nusc)
            if scene is not None:
                scenes.append(scene)
    
    print(f'Processed {len(scenes):.2f} scenes')
Beispiel #12
0
    def get_scenes(self):

        if self.is_lyft:
            scenes = [row['name'] for row in self.nusc.scene]

            # Split in train/val
            indices = TRAIN_LYFT_INDICES if self.is_train else VAL_LYFT_INDICES
            scenes = [scenes[i] for i in indices]
        else:
            # filter by scene split
            split = {
                'v1.0-trainval': {
                    True: 'train',
                    False: 'val'
                },
                'v1.0-mini': {
                    True: 'mini_train',
                    False: 'mini_val'
                },
            }[self.nusc.version][self.is_train]

            scenes = create_splits_scenes()[split]

        return scenes
Beispiel #13
0
def from_nuscenes(
    data_path: str,
    version: str,
    split: str,
    nproc: int = NPROC,
    add_nonkey_frames: bool = False,
) -> Dataset:
    """Convert NuScenes dataset to Scalabel format."""
    data, df = load_data(data_path, version)
    scene_names_per_split = create_splits_scenes()

    first_sample_tokens = []
    for token, name in zip(df.first_sample_token.values, df.scene_name.values):
        if name in scene_names_per_split[split]:
            first_sample_tokens.append(token)

    func = partial(parse_sequence, data, add_nonkey_frames)
    if nproc > 1:
        partial_results = pmap(
            func,
            zip(first_sample_tokens, scene_names_per_split[split]),
            nprocs=nproc,
        )
    else:
        partial_results = map(  # type: ignore
            func,
            zip(first_sample_tokens, scene_names_per_split[split]),
        )
    frames, groups = [], []
    for f, g in partial_results:
        frames.extend(f)
        groups.extend(g)

    cfg = Config(categories=[Category(name=n) for n in DETECTION_NAMES])
    dataset = Dataset(frames=frames, groups=groups, config=cfg)
    return dataset
Beispiel #14
0
def load_gt(nusc, eval_split: str, verbose: bool = False) -> EvalBoxes:
    """ Loads ground truth boxes from DB. """

    # Init.
    attribute_map = {a['token']: a['name'] for a in nusc.attribute}

    if verbose:
        print('Loading annotations for {} split from nuScenes version: {}'.
              format(eval_split, nusc.version))
    # Read out all sample_tokens in DB.
    sample_tokens_all = [s['token'] for s in nusc.sample]
    assert len(sample_tokens_all) > 0, "Error: Database has no samples!"

    # Only keep samples from this split.
    splits = create_splits_scenes()

    # Check compatibility of split with nusc_version.
    version = nusc.version
    if eval_split in {'train', 'val', 'train_detect', 'train_track'}:
        assert version.endswith('trainval'), \
            'Error: Requested split {} which is not compatible with NuScenes version {}'.format(eval_split, version)
    elif eval_split in {'mini_train', 'mini_val'}:
        assert version.endswith('mini'), \
            'Error: Requested split {} which is not compatible with NuScenes version {}'.format(eval_split, version)
    elif eval_split == 'test':
        assert version.endswith('test'), \
            'Error: Requested split {} which is not compatible with NuScenes version {}'.format(eval_split, version)
    else:
        raise ValueError(
            'Error: Requested split {} which this function cannot map to the correct NuScenes version.'
            .format(eval_split))

    if eval_split == 'test':
        # Check that you aren't trying to cheat :).
        assert len(nusc.sample_annotation) > 0, \
            'Error: You are trying to evaluate on the test set but you do not have the annotations!'

    sample_tokens = []
    for sample_token in sample_tokens_all:
        scene_token = nusc.get('sample', sample_token)['scene_token']
        scene_record = nusc.get('scene', scene_token)
        if scene_record['name'] in splits[eval_split]:
            sample_tokens.append(sample_token)

    all_annotations = EvalBoxes()

    # Load annotations and filter predictions and annotations.
    for sample_token in tqdm.tqdm(sample_tokens):

        sample = nusc.get('sample', sample_token)
        sample_annotation_tokens = sample['anns']

        sample_boxes = []
        for sample_annotation_token in sample_annotation_tokens:

            # Get label name in detection task and filter unused labels.
            sample_annotation = nusc.get('sample_annotation',
                                         sample_annotation_token)
            detection_name = category_to_detection_name(
                sample_annotation['category_name'])
            if detection_name is None:
                continue

            # Get attribute_name.
            attr_tokens = sample_annotation['attribute_tokens']
            attr_count = len(attr_tokens)
            if attr_count == 0:
                attribute_name = ''
            elif attr_count == 1:
                attribute_name = attribute_map[attr_tokens[0]]
            else:
                raise Exception(
                    'Error: GT annotations must not have more than one attribute!'
                )

            sample_boxes.append(
                EvalBox(
                    sample_token=sample_token,
                    translation=sample_annotation['translation'],
                    size=sample_annotation['size'],
                    rotation=sample_annotation['rotation'],
                    velocity=nusc.box_velocity(sample_annotation['token'])[:2],
                    detection_name=detection_name,
                    detection_score=-1.0,  # GT samples do not have a score.
                    attribute_name=attribute_name,
                    num_pts=sample_annotation['num_lidar_pts'] +
                    sample_annotation['num_radar_pts']))
        all_annotations.add_boxes(sample_token, sample_boxes)

    if verbose:
        print("Loaded ground truth annotations for {} samples.".format(
            len(all_annotations.sample_tokens)))

    return all_annotations
Beispiel #15
0
    def _mock_submission(nusc, split) -> Dict[str, dict]:
        """
        Creates "reasonable" submission (results and metadata) by looping through the full val-set, and adding 1
        prediction per GT. Predictions will be permuted randomly along all axes.
        """
        def random_class(category_name):
            class_names = [
                'barrier', 'bicycle', 'bus', 'car', 'construction_vehicle',
                'motorcycle', 'pedestrian', 'traffic_cone', 'trailer', 'truck'
            ]
            tmp = category_to_detection_name(category_name)
            if tmp is not None and np.random.rand() < .9:
                return tmp
            else:
                return class_names[np.random.randint(0, 9)]

        def random_attr(name):
            """
            This is the most straight-forward way to generate a random attribute.
            Not currently used b/c we want the test fixture to be back-wards compatible.
            """
            # Get relevant attributes.
            rel_attributes = detection_name_to_rel_attributes(name)

            if len(rel_attributes) == 0:
                # Empty string for classes without attributes.
                return ''
            else:
                # Pick a random attribute otherwise.
                return rel_attributes[np.random.randint(
                    0, len(rel_attributes))]

        mock_meta = {
            'use_camera': False,
            'use_lidar': True,
            'use_radar': False,
            'use_map': False,
            'use_external': False,
        }
        mock_results = {}
        splits = create_splits_scenes()
        val_samples = []
        for sample in nusc.sample:
            if nusc.get('scene',
                        sample['scene_token'])['name'] in splits[split]:
                val_samples.append(sample)

        for sample in tqdm(val_samples):
            sample_res = []
            for ann_token in sample['anns']:
                ann = nusc.get('sample_annotation', ann_token)
                detection_name = random_class(ann['category_name'])
                sample_res.append({
                    'sample_token':
                    sample['token'],
                    'translation':
                    list(
                        np.array(ann['translation']) + 5 *
                        (np.random.rand(3) - 0.5)),
                    'size':
                    list(
                        np.array(ann['size']) * 2 * (np.random.rand(3) + 0.5)),
                    'rotation':
                    list(
                        np.array(ann['rotation']) +
                        ((np.random.rand(4) - 0.5) * .1)),
                    'velocity':
                    list(
                        nusc.box_velocity(ann_token)[:2] *
                        (np.random.rand(3)[:2] + 0.5)),
                    'detection_name':
                    detection_name,
                    'detection_score':
                    random.random(),
                    'attribute_name':
                    random_attr(detection_name)
                })
            mock_results[sample['token']] = sample_res
        mock_submission = {'meta': mock_meta, 'results': mock_results}
        return mock_submission
Beispiel #16
0
def load_gt(nusc: NuScenes,
            eval_split: str,
            box_cls,
            verbose: bool = False) -> EvalBoxes:
    """
    Loads ground truth boxes from DB.
    :param nusc: A NuScenes instance.
    :param eval_split: The evaluation split for which we load GT boxes.
    :param box_cls: Type of box to load, e.g. DetectionBox or TrackingBox.
    :param verbose: Whether to print messages to stdout.
    :return: The GT boxes.
    """
    # Init.
    if box_cls == DetectionBox:
        attribute_map = {a['token']: a['name'] for a in nusc.attribute}

    if verbose:
        print('Loading annotations for {} split from nuScenes version: {}'.
              format(eval_split, nusc.version))
    # Read out all sample_tokens in DB.
    sample_tokens_all = [s['token'] for s in nusc.sample]
    assert len(sample_tokens_all) > 0, "Error: Database has no samples!"

    # Only keep samples from this split.
    splits = create_splits_scenes()

    # Check compatibility of split with nusc_version.
    version = nusc.version
    if eval_split in {'train', 'val', 'train_detect', 'train_track'}:
        assert version.endswith('trainval'), \
            'Error: Requested split {} which is not compatible with NuScenes version {}'.format(eval_split, version)
    elif eval_split in {'mini_train', 'mini_val'}:
        assert version.endswith('mini'), \
            'Error: Requested split {} which is not compatible with NuScenes version {}'.format(eval_split, version)
    elif eval_split == 'test':
        assert version.endswith('test'), \
            'Error: Requested split {} which is not compatible with NuScenes version {}'.format(eval_split, version)
    else:
        raise ValueError(
            'Error: Requested split {} which this function cannot map to the correct NuScenes version.'
            .format(eval_split))

    if eval_split == 'test':
        # Check that you aren't trying to cheat :).
        assert len(nusc.sample_annotation) > 0, \
            'Error: You are trying to evaluate on the test set but you do not have the annotations!'

    sample_tokens = []
    for sample_token in sample_tokens_all:
        scene_token = nusc.get('sample', sample_token)['scene_token']
        scene_record = nusc.get('scene', scene_token)
        if scene_record['name'] in splits[eval_split]:
            sample_tokens.append(sample_token)

    all_annotations = EvalBoxes()

    # Load annotations and filter predictions and annotations.
    tracking_id_set = set()
    for sample_token in tqdm.tqdm(sample_tokens, leave=verbose):

        sample = nusc.get('sample', sample_token)
        sample_annotation_tokens = sample['anns']

        sample_boxes = []
        for sample_annotation_token in sample_annotation_tokens:

            sample_annotation = nusc.get('sample_annotation',
                                         sample_annotation_token)
            if box_cls == DetectionBox:
                # Get label name in detection task and filter unused labels.
                detection_name = category_to_detection_name(
                    sample_annotation['category_name'])
                if detection_name is None:
                    continue

                # Get attribute_name.
                attr_tokens = sample_annotation['attribute_tokens']
                attr_count = len(attr_tokens)
                if attr_count == 0:
                    attribute_name = ''
                elif attr_count == 1:
                    attribute_name = attribute_map[attr_tokens[0]]
                else:
                    raise Exception(
                        'Error: GT annotations must not have more than one attribute!'
                    )

                sample_boxes.append(
                    box_cls(
                        sample_token=sample_token,
                        translation=sample_annotation['translation'],
                        size=sample_annotation['size'],
                        rotation=sample_annotation['rotation'],
                        velocity=nusc.box_velocity(
                            sample_annotation['token'])[:2],
                        num_pts=sample_annotation['num_lidar_pts'] +
                        sample_annotation['num_radar_pts'],
                        detection_name=detection_name,
                        detection_score=-1.0,  # GT samples do not have a score.
                        attribute_name=attribute_name))
            elif box_cls == TrackingBox:
                # Use nuScenes token as tracking id.
                tracking_id = sample_annotation['instance_token']
                tracking_id_set.add(tracking_id)

                # Get label name in detection task and filter unused labels.
                tracking_name = category_to_tracking_name(
                    sample_annotation['category_name'])
                if tracking_name is None:
                    continue

                sample_boxes.append(
                    box_cls(
                        sample_token=sample_token,
                        translation=sample_annotation['translation'],
                        size=sample_annotation['size'],
                        rotation=sample_annotation['rotation'],
                        velocity=nusc.box_velocity(
                            sample_annotation['token'])[:2],
                        num_pts=sample_annotation['num_lidar_pts'] +
                        sample_annotation['num_radar_pts'],
                        tracking_id=tracking_id,
                        tracking_name=tracking_name,
                        tracking_score=-1.0  # GT samples do not have a score.
                    ))
            else:
                raise NotImplementedError('Error: Invalid box_cls %s!' %
                                          box_cls)

        all_annotations.add_boxes(sample_token, sample_boxes)

    if verbose:
        print("Loaded ground truth annotations for {} samples.".format(
            len(all_annotations.sample_tokens)))

    return all_annotations
Beispiel #17
0
def evaluate(split):
    params = read_params(FLAGS.param)
    nusc = NuScenes(version='v1.0-trainval', dataroot=FLAGS.nuscenes, verbose=True)
    sensor = 'LIDAR_TOP'
    kitti_to_nu_lidar = Quaternion(axis=(0, 0, 1), angle=np.pi / 2)
    meta = {
        'use_camera': False,
        'use_lidar': True,
        'use_radar': False,
        'use_map': False,
        'use_external': False,
    }
    results = {}
    results_0_3 = {}

    detection_graph = tf.Graph()
    with detection_graph.as_default():
        od_graph_def = tf.GraphDef()
        with tf.gfile.GFile(FLAGS.graph, 'rb') as fid:
            serialized_graph = fid.read()
            od_graph_def.ParseFromString(serialized_graph)
            for node in od_graph_def.node:
                if 'BatchMultiClassNonMaxSuppression' in node.name:
                    node.device = '/device:CPU:0'
            tf.import_graph_def(od_graph_def, name='')
    with detection_graph.as_default():
        config = tf.ConfigProto()
        config.gpu_options.allow_growth = False
        with tf.Session(graph=detection_graph, config=config) as sess:
            image_tensor = detection_graph.get_tensor_by_name('image_tensor:0')
            detection_boxes = detection_graph.get_tensor_by_name('detection_boxes:0')
            detection_boxes_inclined = detection_graph.get_tensor_by_name('detection_boxes_3d:0')
            detection_scores = detection_graph.get_tensor_by_name('detection_scores:0')
            detection_classes = detection_graph.get_tensor_by_name('detection_classes:0')
            num_detections = detection_graph.get_tensor_by_name('num_detections:0')
            scene_splits = create_splits_scenes()

            # os.system('touch {}'.format())

            inf_time_list = []

            for scene in nusc.scene:
                if scene['name'] not in scene_splits[split]:
                    continue
                current_sample_token = scene['first_sample_token']
                last_sample_token = scene['last_sample_token']
                sample_in_scene = True
                while sample_in_scene:
                    if current_sample_token == last_sample_token:
                        sample_in_scene = False
                    sample = nusc.get('sample', current_sample_token)
                    lidar_top_data = nusc.get('sample_data', sample['data'][sensor])
                    # Get global pose and calibration data
                    ego_pose = nusc.get('ego_pose', lidar_top_data['ego_pose_token'])
                    calib_sensor = nusc.get('calibrated_sensor', lidar_top_data['calibrated_sensor_token'])
                    ego_to_global = transform_matrix(ego_pose['translation'], Quaternion(ego_pose['rotation']))
                    lidar_to_ego = transform_matrix(calib_sensor['translation'], Quaternion(calib_sensor['rotation']))

                    # Read input data
                    filename_prefix = os.path.splitext(os.path.splitext(lidar_top_data['filename'])[0])[0]
                    image_stacked, det_mask, image_ground, image_zmax = read_images(FLAGS.data, filename_prefix)
                    # Inference
                    start_time = time.time()
                    (boxes_aligned, boxes_inclined, scores, classes, num) = sess.run(
                        [detection_boxes, detection_boxes_inclined, detection_scores, detection_classes,
                         num_detections],
                        feed_dict={image_tensor: image_stacked})
                    inf_time = time.time() - start_time
                    print('Inference time:', inf_time)
                    inf_time_list.append(inf_time)



                    # Evaluate object detection
                    label_map = label_map_util.load_labelmap(FLAGS.label_map)
                    categories = label_map_util.convert_label_map_to_categories(label_map, max_num_classes=10,
                                                                                use_display_name=True)
                    category_index = label_map_util.create_category_index(categories)
                    boxes = []
                    boxes_0_3 = []
                    scores = np.squeeze(scores)
                    for i in range(scores.shape[0]):
                        if scores[i] > .230:
                            object_class = category_index[int(np.squeeze(classes)[i])]['name']
                            box = calculate_object_box(tuple(np.squeeze(boxes_aligned)[i]),
                                                       tuple(np.squeeze(boxes_inclined)[i]), image_ground, image_zmax,
                                                       object_class, scores[i], params)
                            # Transformation box coordinate system to nuscenes lidar coordinate system
                            box.rotate(kitti_to_nu_lidar)
                            # Transformation nuscenes lidar coordinate system to ego vehicle frame
                            box.rotate(Quaternion(matrix=lidar_to_ego[:3, :3]))
                            box.translate(lidar_to_ego[:3, 3])
                            # Transformation ego vehicle frame to global frame
                            box.rotate(Quaternion(matrix=ego_to_global[:3, :3]))
                            box.translate(ego_to_global[:3, 3])
                            boxes.append(box)
                    for i in range(scores.shape[0]):
                        if scores[i] > .225:
                            object_class = category_index[int(np.squeeze(classes)[i])]['name']
                            box = calculate_object_box(tuple(np.squeeze(boxes_aligned)[i]),
                                                       tuple(np.squeeze(boxes_inclined)[i]), image_ground, image_zmax,
                                                       object_class, scores[i], params)
                            # Transformation box coordinate system to nuscenes lidar coordinate system
                            box.rotate(kitti_to_nu_lidar)
                            # Transformation nuscenes lidar coordinate system to ego vehicle frame
                            box.rotate(Quaternion(matrix=lidar_to_ego[:3, :3]))
                            box.translate(lidar_to_ego[:3, 3])
                            # Transformation ego vehicle frame to global frame
                            box.rotate(Quaternion(matrix=ego_to_global[:3, :3]))
                            box.translate(ego_to_global[:3, 3])
                            boxes_0_3.append(box)
                    # Convert boxes to nuScenes detection challenge result format.
                    sample_results = [box_to_sample_result(current_sample_token, box) for box in boxes]
                    results[current_sample_token] = sample_results

                    sample_results_0_3 = [box_to_sample_result(current_sample_token, box) for box in boxes_0_3]
                    results_0_3[current_sample_token] = sample_results_0_3

                    current_sample_token = sample['next']
    f_info = open(
                os.path.join(FLAGS.output, 'inference_time.txt'),'w+')
    average_inf_time = sum(inf_time_list) / float(len(inf_time_list))
    f_info.write('average time:{}\n'.format(average_inf_time))
    f_info.write(str(inf_time_list))
    submission = {
        'meta': meta,
        'results': results
    }
    submission_path = os.path.join(FLAGS.output, 'submission_0_3nn30.json')
    with open(submission_path, 'w') as f:
        json.dump(submission, f, indent=2)

    submission_0_3 = {
        'meta': meta,
        'results': results_0_3
    }
    submission_path_0_3 = os.path.join(FLAGS.output, 'submission_0_3nn25.json')
    with open(submission_path_0_3, 'w') as f:
        json.dump(submission_0_3, f, indent=2)
    def __init__(self, mode='test', limiter=0):
        name = 'nuscenes'
        db.__init__(self, name)
        self._train_scenes = []
        self._val_scenes = []
        self._test_scenes = []
        self._train_index = []
        self._val_index = []
        self._test_index = []
        self._devkit_path = self._get_default_path()
        self._mode = mode
        self._nusc = None
        self._scene_sel = True
        #For now one large cache file is OK, but ideally just take subset of actually needed data and cache that. No need to load nusc every time.

        self._classes = (
            'dontcare',  # always index 0
            'vehicle.car',
            'human.pedestrian',
            'vehicle.bicycle')

        self.config = {'cleanup': True, 'matlab_eval': False, 'rpn_file': None}
        self._class_to_ind = dict(
            list(zip(self.classes, list(range(self.num_classes)))))
        self._val_scenes = create_splits_scenes()['val']
        self._train_scenes = create_splits_scenes()['train']
        self._test_scenes = create_splits_scenes()['test']
        #TODO: create custom scene list
        #print(self._train_scenes)
        for rec in self.nusc.sample_data:
            if (rec['channel'] == 'CAM_FRONT' and rec['is_key_frame'] is True):
                rec_tmp = deepcopy(rec)
                #Reverse lookup, getting the overall sample from the picture sample token, to get the scene information.
                scene_name = self.nusc.get(
                    'scene',
                    self.nusc.get('sample',
                                  rec['sample_token'])['scene_token'])['name']
                desc = self.nusc.get(
                    'scene',
                    self.nusc.get('sample', rec['sample_token'])
                    ['scene_token'])['description'].lower()
                if (self._scene_sel and 'night' not in desc
                        and 'rain' not in desc and 'cones' not in desc):
                    sample = self.nusc.get('sample', rec['sample_token'])
                    rec_tmp['anns'] = sample['anns']
                    rec_tmp['lidar_token'] = sample['data']['LIDAR_TOP']
                    if (scene_name in self._train_scenes):
                        self._train_index.append(rec_tmp)
                    elif (scene_name in self._val_scenes):
                        self._val_index.append(rec_tmp)
                    elif (scene_name in self._train_scenes):
                        self._test_index.append(rec_tmp)
        rand = SystemRandom()
        #Get global image info
        if (mode == 'train'):
            img_index = self._train_index
            rand.shuffle(self._val_index)
        elif (mode == 'val'):
            img_index = self._val_index
        elif (mode == 'test'):
            img_index = self._test_index
        self._imwidth = img_index[0]['width']
        self._imheight = img_index[0]['height']
        self._imtype = img_index[0]['fileformat']
        rand = SystemRandom()
        rand.shuffle(img_index)
        if (limiter != 0):
            img_index = img_index[:limiter]
        if (mode == 'train'):
            self._train_index = img_index
        elif (mode == 'val'):
            self._val_index = img_index
        elif (mode == 'test'):
            self._test_index = img_index
        assert os.path.exists(
            self._devkit_path
        ), 'nuscenes dataset path does not exist: {}'.format(self._devkit_path)
    def _mock_submission(nusc: NuScenes,
                         split: str,
                         add_errors: bool = False) -> Dict[str, dict]:
        """
        Creates "reasonable" submission (results and metadata) by looping through the mini-val set, adding 1 GT
        prediction per sample. Predictions will be permuted randomly along all axes.
        :param nusc: NuScenes instance.
        :param split: Dataset split to use.
        :param add_errors: Whether to use GT or add errors to it.
        """

        def random_class(category_name: str, _add_errors: bool = False) -> Optional[str]:
            # Alter 10% of the valid labels.
            class_names = sorted(TRACKING_NAMES)
            tmp = category_to_tracking_name(category_name)

            if tmp is None:
                return None
            else:
                if not _add_errors or np.random.rand() < .9:
                    return tmp
                else:
                    return class_names[np.random.randint(0, len(class_names) - 1)]

        def random_id(instance_token: str, _add_errors: bool = False) -> str:
            # Alter 10% of the valid ids to be a random string, which hopefully corresponds to a new track.
            if not _add_errors or np.random.rand() < .9:
                _tracking_id = instance_token + '_pred'
            else:
                _tracking_id = str(np.random.randint(0, sys.maxsize))

            return _tracking_id

        mock_meta = {
            'use_camera': False,
            'use_lidar': True,
            'use_radar': False,
            'use_map': False,
            'use_external': False,
        }
        mock_results = {}

        # Get all samples in the current evaluation split.
        splits = create_splits_scenes()
        val_samples = []
        for sample in nusc.sample:
            if nusc.get('scene', sample['scene_token'])['name'] in splits[split]:
                val_samples.append(sample)

        # Prepare results.
        instance_to_score = dict()
        for sample in tqdm(val_samples, leave=False):
            sample_res = []
            for ann_token in sample['anns']:
                ann = nusc.get('sample_annotation', ann_token)
                translation = np.array(ann['translation'])
                size = np.array(ann['size'])
                rotation = np.array(ann['rotation'])
                velocity = nusc.box_velocity(ann_token)[:2]
                tracking_id = random_id(ann['instance_token'], _add_errors=add_errors)
                tracking_name = random_class(ann['category_name'], _add_errors=add_errors)

                # Skip annotations for classes not part of the detection challenge.
                if tracking_name is None:
                    continue

                # Skip annotations with 0 lidar/radar points.
                num_pts = ann['num_lidar_pts'] + ann['num_radar_pts']
                if num_pts == 0:
                    continue

                # If we randomly assign a score in [0, 1] to each box and later average over the boxes in the track,
                # the average score will be around 0.5 and we will have 0 predictions above that.
                # Therefore we assign the same scores to each box in a track.
                if ann['instance_token'] not in instance_to_score:
                    instance_to_score[ann['instance_token']] = random.random()
                tracking_score = instance_to_score[ann['instance_token']]
                tracking_score = np.clip(tracking_score + random.random() * 0.3, 0, 1)

                if add_errors:
                    translation += 4 * (np.random.rand(3) - 0.5)
                    size *= (np.random.rand(3) + 0.5)
                    rotation += (np.random.rand(4) - 0.5) * .1
                    velocity *= np.random.rand(3)[:2] + 0.5

                sample_res.append({
                        'sample_token': sample['token'],
                        'translation': list(translation),
                        'size': list(size),
                        'rotation': list(rotation),
                        'velocity': list(velocity),
                        'tracking_id': tracking_id,
                        'tracking_name': tracking_name,
                        'tracking_score': tracking_score
                    })
            mock_results[sample['token']] = sample_res
        mock_submission = {
            'meta': mock_meta,
            'results': mock_results
        }
        return mock_submission
Beispiel #20
0
def visualize(split):
    nusc = NuScenes(version='v1.0-trainval', dataroot=FLAGS.nuscenes, verbose=True)
    sensor = 'LIDAR_TOP'

    # pipeline_config = pipeline_pb2.TrainEvalPipelineConfig()
    # with tf.gfile.GFile(FLAGS.pipeline_config_path, 'r') as f:
    #     text_format.Merge(f.read(), pipeline_config)
    # if not pipeline_config.model.HasField('ssd_augmentation'):
    #     raise ValueError('Model with ssd_augmentation estimation is required.')



    detection_graph = tf.Graph()
    with detection_graph.as_default():
        od_graph_def = tf.GraphDef()
        with tf.gfile.GFile(FLAGS.graph, 'rb') as fid:
            serialized_graph = fid.read()
            od_graph_def.ParseFromString(serialized_graph)
            for node in od_graph_def.node:
                if 'BatchMultiClassNonMaxSuppression' in node.name:
                    node.device = '/device:CPU:0'
            tf.import_graph_def(od_graph_def, name='')
    with detection_graph.as_default():
        with tf.Session(graph=detection_graph) as sess:
            image_tensor = detection_graph.get_tensor_by_name('image_tensor:0')
            detection_boxes = detection_graph.get_tensor_by_name('detection_boxes:0')
            detection_boxes_inclined = detection_graph.get_tensor_by_name('detection_boxes_3d:0')
            detection_scores = detection_graph.get_tensor_by_name('detection_scores:0')
            detection_classes = detection_graph.get_tensor_by_name('detection_classes:0')
            num_detections = detection_graph.get_tensor_by_name('num_detections:0')
            scene_splits = create_splits_scenes()
            for scene in nusc.scene:
                if scene['name'] not in vis_set:
                    continue
                scene_dir = os.path.join(FLAGS.output, scene['name'])
                os.system('mkdir {}'.format(scene_dir))
                folder_inverse = os.path.join(scene_dir, 'inverse')

                os.system('mkdir {}'.format(folder_inverse))





                current_sample_token = scene['first_sample_token']
                last_sample_token = scene['last_sample_token']
                # first_sample = nusc.get('sample', scene['first_sample_token'])
                # current_token = first_sample['data'][sensor]
                sample_in_scene = True
                first_inference = True
                while sample_in_scene:
                    # while current_token:
                    if current_sample_token == last_sample_token:
                        sample_in_scene = False
                    sample = nusc.get('sample', current_sample_token)
                    lidar_top_data = nusc.get('sample_data', sample['data'][sensor])
                    if first_inference:
                        # current_token = lidar_top_data['next']
                        # if use_10hz_capture_frequency:
                        #    if current_token:
                        #        lidar_top_data_next = nusc.get('sample_data', current_token)
                        #        current_token = lidar_top_data_next['next']
                        current_sample_token = sample['next']
                        first_inference = False
                        continue

                    # Read input data
                    filename_prefix = os.path.splitext(os.path.splitext(lidar_top_data['filename'])[0])[0]
                    image_stacked, det_mask, observation_mask, z_mask = read_images(FLAGS.data, FLAGS.data_beliefs,
                                                                                    filename_prefix)
                    # Inference
                    start_time = time.time()

                    (boxes_aligned, boxes_inclined, scores, classes, num) = sess.run(
                        [detection_boxes, detection_boxes_inclined, detection_scores, detection_classes,
                         num_detections],
                        feed_dict={image_tensor: image_stacked})
                    print('Inference time:', time.time() - start_time)

                    # Visualize object detection and scene flow
                    label_map = label_map_util.load_labelmap(FLAGS.label_map)
                    categories = label_map_util.convert_label_map_to_categories(label_map, max_num_classes=10,
                                                                                use_display_name=True)
                    category_index = label_map_util.create_category_index(categories)

                    # Create grid map to visualize
                    image_vis = np.zeros((image_stacked.shape[1], image_stacked.shape[2], 3),
                                        dtype=np.uint8)
                    image_vis_inv  = np.zeros((image_stacked.shape[1], image_stacked.shape[2], 3),
                                        dtype=np.uint8)
                    image_vis_color = np.zeros((image_stacked.shape[1], image_stacked.shape[2], 3),
                                        dtype=np.uint8) * 255  # todo
                    image_vis_color_inv = np.zeros((image_stacked.shape[1], image_stacked.shape[2], 3),
                                        dtype=np.uint8) * 255  # todo
                    # for (v, u), val in np.ndenumerate(observation_mask):
                    #     if val:
                    #         image_vis[v, u, :] = 50
                    # image_vis_inv = cv2.bitwise_not(image_vis)
                    # for (v, u), val in np.ndenumerate(det_mask):
                    #     if val:
                    #         image_vis[v, u] = 255
                    #         image_vis_inv[v, u] = 0

                    # print("z_mask")
                    # print(z_mask)
                    # print("observation_mask")
                    # print(observation_mask)
                    for v, u in itertools.product(range(image_stacked.shape[1]), range(image_stacked.shape[2])):
                        image_vis_color[v, u, 0] = observation_mask[v, u] * 5
                        image_vis_color[v, u, 1] = det_mask[v, u] * 10
                        image_vis_color[v, u, 2] = z_mask[v, u]
                    image_vis_color_inv = cv2.bitwise_not(image_vis_color)


                    for (v, u), val in np.ndenumerate(det_mask):
                        if val:
                            image_vis[v, u] = 255
                            image_vis_inv[v, u] = 0

                    image_vis = np.zeros((image_stacked.shape[1], image_stacked.shape[2], 3), dtype=np.uint8)
                    for (v, u), val in np.ndenumerate(observation_mask):
                        if val:
                            image_vis[v, u, :] = 50
                    image_vis_inv = cv2.bitwise_not(image_vis)
                    for (v, u), val in np.ndenumerate(det_mask):
                        if val:
                            image_vis[v, u] = 255
                            image_vis_inv[v, u] = 0

                    # Draw inclined detection box
                    vis_util.visualize_boxes_and_labels_on_image_array(
                        image_vis_color,
                        np.squeeze(boxes_aligned),
                        np.squeeze(classes).astype(np.int32),
                        np.squeeze(scores),
                        category_index,
                        boxes_3d=np.squeeze(boxes_inclined),
                        min_score_thresh=0.3,
                        use_normalized_coordinates=True,
                        line_thickness=3)
                    vis_util.visualize_boxes_and_labels_on_image_array(
                        image_vis_color_inv,
                        np.squeeze(boxes_aligned),
                        np.squeeze(classes).astype(np.int32),
                        np.squeeze(scores),
                        category_index,
                        boxes_3d=np.squeeze(boxes_inclined),
                        min_score_thresh=0.3,
                        use_normalized_coordinates=True,
                        line_thickness=3)
                    vis_util.visualize_boxes_and_labels_on_image_array(
                        image_vis,
                        np.squeeze(boxes_aligned),
                        np.squeeze(classes).astype(np.int32),
                        np.squeeze(scores),
                        category_index,
                        boxes_3d=np.squeeze(boxes_inclined),
                        min_score_thresh=0.3,
                        use_normalized_coordinates=True,
                        line_thickness=3)
                    vis_util.visualize_boxes_and_labels_on_image_array(
                        image_vis_inv,
                        np.squeeze(boxes_aligned),
                        np.squeeze(classes).astype(np.int32),
                        np.squeeze(scores),
                        category_index,
                        boxes_3d=np.squeeze(boxes_inclined),
                        min_score_thresh=0.3,
                        use_normalized_coordinates=True,
                        line_thickness=3)

                    # Save image
                    print(filename_prefix.split('/')[-1])
                    output_path = os.path.join(scene_dir, filename_prefix.split('/')[-1] + '.png')
                    cv2.imwrite(output_path, image_vis)


                    output_path_inv = os.path.join(folder_inverse, filename_prefix.split('/')[-1] + '.png')
                    # output_color_path = os.path.join(folder_color,filename_prefix.split('/')[-1] + '.png')
                    # output_color_path_inv = os.path.join(folder_color_inverse, filename_prefix.split('/')[-1] + '.png')

                    cv2.imwrite(output_path_inv, image_vis_inv)
                    # cv2.imwrite(output_color_path, image_vis_color)
                    # cv2.imwrite(output_color_path_inv, image_vis_color_inv)

                    current_sample_token = sample['next']
Beispiel #21
0
    def __init__(self, nusc, nusc_split, kwargs, seed=0):
        super(nuScenesDataset, self).__init__()

        # set seed for split
        np.random.seed(seed)

        self.nusc = nusc
        self.nusc_root = self.nusc.dataroot
        self.nusc_can = NuScenesCanBus(dataroot=self.nusc_root)
        self.nusc_split = nusc_split

        # number of input samples
        self.n_input = kwargs["n_input"]

        # number of sampled trajectories
        self.n_samples = kwargs["n_samples"]

        # number of output samples
        self.n_output = kwargs["n_output"]
        assert(self.n_output == 7)

        #
        self.train_on_all_sweeps = kwargs["train_on_all_sweeps"]

        # scene-0419 does not have vehicle monitor data
        blacklist = [419] + self.nusc_can.can_blacklist

        # NOTE: use the official split (minus the ones in the blacklist)
        if "scene_token" in kwargs and kwargs["scene_token"] != "":
            scene = self.nusc.get("scene", kwargs["scene_token"])
            scenes = [scene]
        else:
            scene_splits = create_splits_scenes(verbose=False)
            scene_names = scene_splits[self.nusc_split]
            scenes = []
            for scene in self.nusc.scene:
                scene_name = scene["name"]
                scene_no = int(scene_name[-4:])
                if (scene_name in scene_names) and (scene_no not in blacklist):
                    scenes.append(scene)

        # list all sample data
        self.valid_index = []
        self.flip_flags = []
        self.scene_tokens = []
        self.sample_data_tokens = []
        for scene in scenes:
            scene_token = scene["token"]
            # location
            log = self.nusc.get("log", scene["log_token"])
            # flip x axis if in left-hand traffic (singapore)
            flip_flag = True if log["location"].startswith("singapore") else False
            # record the token of every key frame
            start_index = len(self.sample_data_tokens)
            first_sample = self.nusc.get("sample", scene["first_sample_token"])
            sample_data_token = first_sample["data"]["LIDAR_TOP"]
            while sample_data_token != "":
                sample_data = self.nusc.get("sample_data", sample_data_token)
                if (self.nusc_split == "train" and self.train_on_all_sweeps) or (sample_data["is_key_frame"]):
                    self.flip_flags.append(flip_flag)
                    self.scene_tokens.append(scene_token)
                    self.sample_data_tokens.append(sample_data_token)
                sample_data_token = sample_data["next"]
            end_index = len(self.sample_data_tokens)
            # NOTE: make sure we have enough number of sweeps for input and output
            if self.nusc_split == "train" and self.train_on_all_sweeps:
                valid_start_index = start_index + self.n_input - 1
                valid_end_index = end_index - (self.n_output - 1) * self.N_SWEEPS_PER_SAMPLE
            else:
                # NEW: acknowledge the fact and skip the first sample
                n_input_samples = self.n_input // self.N_SWEEPS_PER_SAMPLE
                valid_start_index = start_index + n_input_samples
                valid_end_index = end_index - self.n_output + 1
            self.valid_index += list(range(valid_start_index, valid_end_index))
        self._n_examples = len(self.valid_index)
        print(f"{self.nusc_split}: {self._n_examples} valid samples over {len(scenes)} scenes")
Beispiel #22
0
    def nuscenes_gt_to_kitti(self) -> None:
        """
        Converts nuScenes GT annotations to KITTI format.
        """
        kitti_to_nu_lidar = Quaternion(axis=(0, 0, 1), angle=np.pi / 2)
        kitti_to_nu_lidar_inv = kitti_to_nu_lidar.inverse
        imsize = (1600, 900)

        token_idx = 0  # Start tokens from 0.

        # Get assignment of scenes to splits.
        split_logs = create_splits_logs(self.split, self.nusc)

        scene_splits = create_splits_scenes(verbose=False)
        scene_to_log = {
            scene['name']: self.nusc.get('log', scene['log_token'])['logfile']
            for scene in self.nusc.scene
        }
        logs = set()
        scenes = scene_splits[self.split]
        for scene in scenes:
            logs.add(scene_to_log[scene])
        # print(len(scenes), len(logs))

        split_mapping = {"train": "training", "val": "testing"}

        # Create output folders.
        label_folder = os.path.join(self.nusc_kitti_dir,
                                    split_mapping[self.split], 'label_2')
        calib_folder = os.path.join(self.nusc_kitti_dir,
                                    split_mapping[self.split], 'calib')
        image_folder = os.path.join(self.nusc_kitti_dir,
                                    split_mapping[self.split], 'image_2')
        lidar_folder = os.path.join(self.nusc_kitti_dir,
                                    split_mapping[self.split], 'velodyne')
        for folder in [label_folder, calib_folder, image_folder, lidar_folder]:
            if not os.path.isdir(folder):
                os.makedirs(folder)

        # Use only the samples from the current split.
        sample_tokens = self._split_to_samples(split_logs)
        # sample_tokens = sample_tokens[:self.image_count]

        # print(len(sample_tokens))
        tokens = []
        if self.split == "train":
            split_file = [
                os.path.join(self.nusc_kitti_dir, "train.txt"),
                os.path.join(self.nusc_kitti_dir, "val.txt")
            ]
        elif self.split == 'val':
            split_file = os.path.join(self.nusc_kitti_dir, "test.txt")
        # if os.path.isfile(split_file):
        #     os.remove(split_file)
        if self.split == "train":
            cnt = 0
            with open(split_file[0], "w") as f:
                for seq in list(self.sequence_mapping.keys())[:-150]:
                    for tk in self.sequence_mapping[seq]:
                        f.write("%06d" % tk + "\n")
                        cnt += 1
            # print(cnt)

            cnt = 0
            with open(split_file[1], "w") as f:
                for seq in list(self.sequence_mapping.keys())[-150:]:
                    for tk in self.sequence_mapping[seq]:
                        f.write("%06d" % tk + "\n")
                        cnt += 1
            # print(cnt)
        elif self.split == "val":
            with open(split_file, "w") as f:
                for seq in self.sequence_mapping.keys():
                    for tk in self.sequence_mapping[seq]:
                        f.write("%06d" % tk + "\n")

        for idx, sample_token in enumerate(sample_tokens):

            # Get sample data.
            sample = self.nusc.get('sample', sample_token)
            sample_annotation_tokens = sample['anns']
            cam_front_token = sample['data'][self.cam_name]
            lidar_token = sample['data'][self.lidar_name]
            sample_name = "%06d" % idx

            # Retrieve sensor records.
            sd_record_cam = self.nusc.get('sample_data', cam_front_token)
            sd_record_lid = self.nusc.get('sample_data', lidar_token)
            cs_record_cam = self.nusc.get(
                'calibrated_sensor', sd_record_cam['calibrated_sensor_token'])
            cs_record_lid = self.nusc.get(
                'calibrated_sensor', sd_record_lid['calibrated_sensor_token'])

            # Combine transformations and convert to KITTI format.
            # Note: cam uses same conventions in KITTI and nuScenes.
            lid_to_ego = transform_matrix(cs_record_lid['translation'],
                                          Quaternion(
                                              cs_record_lid['rotation']),
                                          inverse=False)
            ego_to_cam = transform_matrix(cs_record_cam['translation'],
                                          Quaternion(
                                              cs_record_cam['rotation']),
                                          inverse=True)
            velo_to_cam = np.dot(ego_to_cam, lid_to_ego)

            # Convert from KITTI to nuScenes LIDAR coordinates, where we apply velo_to_cam.
            velo_to_cam_kitti = np.dot(velo_to_cam,
                                       kitti_to_nu_lidar.transformation_matrix)

            # Currently not used.
            imu_to_velo_kitti = np.zeros((3, 4))  # Dummy values.
            r0_rect = Quaternion(axis=[1, 0, 0], angle=0)  # Dummy values.

            # Projection matrix.
            p_left_kitti = np.zeros((3, 4))
            p_left_kitti[:3, :3] = cs_record_cam[
                'camera_intrinsic']  # Cameras are always rectified.

            # Create KITTI style transforms.
            velo_to_cam_rot = velo_to_cam_kitti[:3, :3]
            velo_to_cam_trans = velo_to_cam_kitti[:3, 3]

            # Check that the rotation has the same format as in KITTI.
            assert (velo_to_cam_rot.round(0) == np.array([[0, -1,
                                                           0], [0, 0, -1],
                                                          [1, 0, 0]])).all()
            assert (velo_to_cam_trans[1:3] < 0).all()

            # Retrieve the token from the lidar.
            # Note that this may be confusing as the filename of the camera will include the timestamp of the lidar,
            # not the camera.
            filename_cam_full = sd_record_cam['filename']
            filename_lid_full = sd_record_lid['filename']
            # token = '%06d' % token_idx # Alternative to use KITTI names.
            token_idx += 1

            # Convert image (jpg to png).
            src_im_path = os.path.join(self.nusc.dataroot, filename_cam_full)
            dst_im_path = os.path.join(image_folder, sample_name + '.png')
            if not os.path.exists(dst_im_path):
                im = Image.open(src_im_path)
                im.save(dst_im_path, "PNG")

            # Convert lidar.
            # Note that we are only using a single sweep, instead of the commonly used n sweeps.
            src_lid_path = os.path.join(self.nusc.dataroot, filename_lid_full)
            dst_lid_path = os.path.join(lidar_folder, sample_name + '.bin')
            assert not dst_lid_path.endswith('.pcd.bin')
            pcl = LidarPointCloud.from_file(src_lid_path)
            # pcl, _ = LidarPointCloud.from_file_multisweep_future(self.nusc, sample, self.lidar_name, self.lidar_name, nsweeps=5)
            pcl.rotate(
                kitti_to_nu_lidar_inv.rotation_matrix)  # In KITTI lidar frame.
            with open(dst_lid_path, "w") as lid_file:
                pcl.points.T.tofile(lid_file)

            # Add to tokens.
            tokens.append(sample_token)

            # Create calibration file.
            kitti_transforms = dict()
            kitti_transforms['P0'] = np.zeros((3, 4))  # Dummy values.
            kitti_transforms['P1'] = np.zeros((3, 4))  # Dummy values.
            kitti_transforms['P2'] = p_left_kitti  # Left camera transform.
            kitti_transforms['P3'] = np.zeros((3, 4))  # Dummy values.
            kitti_transforms[
                'R0_rect'] = r0_rect.rotation_matrix  # Cameras are already rectified.
            kitti_transforms['Tr_velo_to_cam'] = np.hstack(
                (velo_to_cam_rot, velo_to_cam_trans.reshape(3, 1)))
            kitti_transforms['Tr_imu_to_velo'] = imu_to_velo_kitti
            calib_path = os.path.join(calib_folder, sample_name + '.txt')
            with open(calib_path, "w") as calib_file:
                for (key, val) in kitti_transforms.items():
                    val = val.flatten()
                    val_str = '%.12e' % val[0]
                    for v in val[1:]:
                        val_str += ' %.12e' % v
                    calib_file.write('%s: %s\n' % (key, val_str))

            # Write label file.
            label_path = os.path.join(label_folder, sample_name + '.txt')
            if os.path.exists(label_path):
                # print('Skipping existing file: %s' % label_path)
                continue
            # else:
            #     print('Writing file: %s' % label_path)

            objects = []
            for sample_annotation_token in sample_annotation_tokens:
                sample_annotation = self.nusc.get('sample_annotation',
                                                  sample_annotation_token)

                # Get box in LIDAR frame.
                _, box_lidar_nusc, _ = self.nusc.get_sample_data(
                    lidar_token,
                    box_vis_level=BoxVisibility.NONE,
                    selected_anntokens=[sample_annotation_token])
                box_lidar_nusc = box_lidar_nusc[0]

                # Truncated: Set all objects to 0 which means untruncated.
                truncated = 0.0

                # Occluded: Set all objects to full visibility as this information is not available in nuScenes.
                occluded = 0

                obj = dict()

                # Convert nuScenes category to nuScenes detection challenge category.
                obj["detection_name"] = category_to_detection_name(
                    sample_annotation['category_name'])

                # Skip categories that are not part of the nuScenes detection challenge.
                if obj["detection_name"] is None or obj[
                        "detection_name"] not in CLASS_MAP.keys():
                    continue

                obj["detection_name"] = CLASS_MAP[obj["detection_name"]]

                # Convert from nuScenes to KITTI box format.
                obj["box_cam_kitti"] = KittiDB.box_nuscenes_to_kitti(
                    box_lidar_nusc, Quaternion(matrix=velo_to_cam_rot),
                    velo_to_cam_trans, r0_rect)

                # Project 3d box to 2d box in image, ignore box if it does not fall inside.
                bbox_2d = project_to_2d(obj["box_cam_kitti"], p_left_kitti,
                                        imsize[1], imsize[0])
                if bbox_2d is None:
                    continue
                obj["bbox_2d"] = bbox_2d["bbox"]
                obj["truncated"] = bbox_2d["truncated"]

                # Set dummy score so we can use this file as result.
                obj["box_cam_kitti"].score = 0

                v = np.dot(obj["box_cam_kitti"].rotation_matrix,
                           np.array([1, 0, 0]))
                rot_y = -np.arctan2(v[2], v[0])
                obj["alpha"] = -np.arctan2(
                    obj["box_cam_kitti"].center[0],
                    obj["box_cam_kitti"].center[2]) + rot_y
                obj["depth"] = np.linalg.norm(
                    np.array(obj["box_cam_kitti"].center[:3]))
                objects.append(obj)

            objects = postprocessing(objects, imsize[1], imsize[0])

            with open(label_path, "w") as label_file:
                for obj in objects:
                    # Convert box to output string format.
                    output = box_to_string(name=obj["detection_name"],
                                           box=obj["box_cam_kitti"],
                                           bbox_2d=obj["bbox_2d"],
                                           truncation=obj["truncated"],
                                           occlusion=obj["occluded"],
                                           alpha=obj["alpha"])
                    label_file.write(output + '\n')
Beispiel #23
0
def process_data(data_path, version, output_path, val_split):
    nusc = NuScenes(version=version, dataroot=data_path, verbose=True)
    splits = create_splits_scenes()
    train_scenes, val_scenes = train_test_split(
        splits['train' if 'mini' not in version else 'mini_train'],
        test_size=val_split)
    train_scene_names = splits['train' if 'mini' not in
                               version else 'mini_train']
    val_scene_names = []  #val_scenes
    test_scene_names = splits['val' if 'mini' not in version else 'mini_val']

    ns_scene_names = dict()
    ns_scene_names['train'] = train_scene_names
    ns_scene_names['val'] = val_scene_names
    ns_scene_names['test'] = test_scene_names

    for data_class in ['train', 'val', 'test']:
        env = Environment(node_type_list=['VEHICLE', 'PEDESTRIAN'],
                          standardization=standardization)
        attention_radius = dict()
        attention_radius[(env.NodeType.PEDESTRIAN,
                          env.NodeType.PEDESTRIAN)] = 10.0
        attention_radius[(env.NodeType.PEDESTRIAN,
                          env.NodeType.VEHICLE)] = 20.0
        attention_radius[(env.NodeType.VEHICLE,
                          env.NodeType.PEDESTRIAN)] = 20.0
        attention_radius[(env.NodeType.VEHICLE, env.NodeType.VEHICLE)] = 30.0

        env.attention_radius = attention_radius
        env.robot_type = env.NodeType.VEHICLE
        scenes = []

        for ns_scene_name in tqdm(ns_scene_names[data_class]):
            ns_scene = nusc.get(
                'scene',
                nusc.field2token('scene', 'name', ns_scene_name)[0])
            scene_id = int(ns_scene['name'].replace('scene-', ''))
            if scene_id in scene_blacklist:  # Some scenes have bad localization
                continue

            scene = process_scene(ns_scene, env, nusc, data_path)
            if scene is not None:
                if data_class == 'train':
                    scene.augmented = list()
                    angles = np.arange(0, 360, 15)
                    for angle in angles:
                        scene.augmented.append(augment_scene(scene, angle))
                scenes.append(scene)

        print(f'Processed {len(scenes):.2f} scenes')

        env.scenes = scenes

        if len(scenes) > 0:
            mini_string = ''
            if 'mini' in version:
                mini_string = '_mini'
            data_dict_path = os.path.join(
                output_path,
                'nuScenes_' + data_class + mini_string + '_full.pkl')
            with open(data_dict_path, 'wb') as f:
                dill.dump(env, f, protocol=dill.HIGHEST_PROTOCOL)
            print('Saved Environment!')

        global total
        global curv_0_2
        global curv_0_1
        print(f"Total Nodes: {total}")
        print(f"Curvature > 0.1 Nodes: {curv_0_1}")
        print(f"Curvature > 0.2 Nodes: {curv_0_2}")
        total = 0
        curv_0_1 = 0
        curv_0_2 = 0
Beispiel #24
0
def main():
  input_root, output_root, do_trajectories, do_maps = get_command_line_args()
  print("Preprocessing Script for nuScenes Dataset.")
  print("Trajectories: {}, Maps: {}".format("Y" if do_trajectories else "N", "Y" if do_maps else "N"))

  if do_trajectories:
    nusc = NuScenes(version='v1.0-trainval', dataroot=input_root)
    
    name2ind = {} # Maps "scene-name" to nusc.scene(list) index.
    for ind, member in enumerate(nusc.scene):
      name2ind[member['name']] = ind

    token2attr = {} # Maps attribute_token to attribute string.
    for attribute in nusc.attribute:
      token2attr[attribute['token']] = attribute['name']
    
    splits = create_splits_scenes()

  if do_maps:
    from nuscenes.map_expansion.map_api import NuScenesMap

    city_list = ['singapore-onenorth', 'singapore-hollandvillage', 'singapore-queenstown', 'boston-seaport']
    for city_name in city_list:
      scale_h = scale_w = 2
      print("Generating maps for {:s}.".format(city_name))

      mask_path = P(output_root).joinpath('raw_map', '{:s}_mask.pkl'.format(city_name))
      dt_path = P(output_root).joinpath('raw_map', '{:s}_dt.pkl'.format(city_name))
      mask_vis_path = P(output_root).joinpath('raw_map_visualization', '{:s}_mask_vis.png'.format(city_name))
      dt_vis_path = P(output_root).joinpath('raw_map_visualization', '{:s}_dt_vis.png'.format(city_name))
      mask_vis_path.parent.mkdir(parents=True, exist_ok=True)
      mask_path.parent.mkdir(parents=True, exist_ok=True)

      nusc_map = NuScenesMap(input_root, city_name)

      print("Calculating a map mask with scale_h: {:d}, scale_w: {:d}... ".format(scale_h, scale_w), end="", flush=True)
      map_mask = get_drivable_area_mask(nusc_map, scale_h=2, scale_w=2)
      print("Done.")

      print("Calculating Signed Distance Transform... ", end="", flush=True)
      image = map_mask.astype(np.int32)
      invert_image = 1-image
      dt = np.where(invert_image, -distance_transform_edt(invert_image), distance_transform_edt(image))
      print("Done.")

      print("Saving Results... ", end="", flush=True)
      dump({'map': map_mask, 'scale_h': 2, 'scale_w': 2}, mask_path)
      dump({'map': dt, 'scale_h': 2, 'scale_w': 2}, dt_path)

      mask_vis = (map_mask*255).astype(np.uint8)

      dt_max = dt.max()
      dt_min = dt.min()
      dt_vis = ((dt - dt_min)/(dt_max - dt_min)*255).astype(np.uint8)

      cv2.imwrite(str(mask_vis_path), mask_vis)
      cv2.imwrite(str(dt_vis_path), dt_vis)
      print("Done. Saved {:s}, {:s}, {:s}, and {:s}.".format(str(mask_path), str(mask_vis_path), str(dt_path), str(dt_vis_path)))

  if do_trajectories:
    for partition in ['train', 'train_val', 'val']:
      print("Generating Trajectories for {:s} set.".format(partition))
      
      if 'train' in partition:
        scene_list = splits['train']
        if partition == "train":
          scene_list = scene_list[NUM_IN_TRAIN_VAL:]
          
        if partition == "train_val":
          scene_list = scene_list[:NUM_IN_TRAIN_VAL]

      else:
        scene_list = splits['val']

      pool = Pool(cpu_count())
      callback = Counting_Callback(task_name="Trajectory Imputation & Smoothing", num_data=len(scene_list))
      for name in scene_list:  
        """
        Generate a raw DataFrame object for each scene_name.
        Filter object categories other than "human" and "vehicle".
        Perform Kalman Smoothing and/or rule-based Imputation.
        """
        
        ind = name2ind[name]

        scene = nusc.scene[ind]

        log = nusc.get('log', scene['log_token'])
        location = log['location']

        data_dict = annotation_dict()

        sample_token = scene['first_sample_token']
        last_sample_token = scene['last_sample_token']
        frame = 0
        passed_last = False
        while not passed_last:
          sample_data = nusc.get("sample", sample_token)
          timestamp = sample_data["timestamp"]

          # Gather pose token from LiDAR since it has timestamp synced with sample_data.
          lidar_data = nusc.get("sample_data", sample_data["data"]["LIDAR_TOP"])
          ego_pose_token = lidar_data['ego_pose_token']

          ego_pose_data = nusc.get("ego_pose", ego_pose_token)
          
          ego_translation = ego_pose_data["translation"]
          ego_rotation = ego_pose_data["rotation"]
          
          # Append Ego Motion Data
          data_dict.append(location,
                            sample_token,
                            '00000000000000000000000000000000',
                            'vehicle.ego',
                            'ego',
                            timestamp,
                            frame,
                            ego_translation,
                            ego_rotation)

          for anns_token in sample_data['anns']:
            anns_data = nusc.get("sample_annotation", anns_token)

            instance_token = anns_data['instance_token']
            instance_category = anns_data['category_name']
            
            instance_attributes = [token2attr[token] for token in anns_data['attribute_tokens']]
            instance_attributes = ", ".join(instance_attributes)

            instance_translation = anns_data["translation"]
            instance_rotation = anns_data["rotation"]

            # Append Instance Motion Data
            data_dict.append(location,
                              sample_token,
                              instance_token,
                              instance_category,
                              instance_attributes,
                              timestamp,
                              frame,
                              instance_translation,
                              instance_rotation)
          # goto next sample
          if sample_token == last_sample_token or len(sample_data['next']) == 0:
            passed_last = True
          
          else:
            sample_token = sample_data['next']
            frame += 1

        df = pd.DataFrame.from_dict(data_dict) # Generate a DataFrame
        pool.apply_async(kalman_smoother,
                        (df.copy(), name),
                        callback=callback) # Perform Kalman Smoothing
        
      pool.close()
      pool.join()

      # Get Kalman Smoothed results and sort w.r.t. scene_anme.
      smoothed_trajectories = callback.get_results()
      smoothed_trajectories.sort(key=lambda x: x[1])

      pool = Pool(cpu_count())
      callback = Counting_Callback(task_name="Trajectory Chopping & Sample Generation", num_data=float('inf'))
      num_data = 0
      for df, scene_name in smoothed_trajectories:
        """
        Chop a smoothed DataFrame into multiple samples (~33 samples per scene)
        such that each sample spans 5 seconds where the reference frame is set at the 2 second's frame.

        Then, split the sample to obsv (0~2 seconds) and pred (2~5 seconds) files.
        """
        scn_code = int(scene_name.split('-')[-1])
        
        frames = df.FRAME.to_list()
        initial_frame = frames[0]
        last_frame = frames[-1]

        for ref_frame in range(initial_frame+3, last_frame-5):
          obsv_path = P(output_root).joinpath(partition, 'observation', '{:04d}-{:03d}.pkl'.format(scn_code, ref_frame))
          obsv_path.parent.mkdir(parents=True, exist_ok=True)

          pred_path = P(output_root).joinpath(partition, 'prediction', '{:04d}-{:03d}.pkl'.format(scn_code, ref_frame))
          pred_path.parent.mkdir(parents=True, exist_ok=True)
          
          pool.apply_async(generate_trajectories, (df.copy(), ref_frame, obsv_path, pred_path), callback=callback)
          # generate_trajectories(df.copy(), ref_frame, obsv_path, pred_path)
          num_data += 1
      
      callback.set_num_data(num_data)
      pool.close()
      pool.join()

      print("Saved {:d} {:s} samples at {:s}.".format(num_data, partition, str(P(output_root).joinpath(partition))))
    
    # Create train_all set using symbolic link.
    print("Making symlinks to form train_all split... ", end="", flush=True)
    trainall_dirname = 'train_all'
    trainall_obsv_path = P(output_root).joinpath('{:s}/observation'.format(trainall_dirname))
    trainall_obsv_path.mkdir(parents=True, exist_ok=True)
    trainall_pred_path = P(output_root).joinpath('{:s}/prediction'.format(trainall_dirname))
    trainall_pred_path.mkdir(parents=True, exist_ok=True)

    train_path = P(output_root).joinpath('train')
    train_obsv_pkl = list(train_path.glob('observation/*.pkl'))
    train_pred_pkl = list(train_path.glob('prediction/*.pkl'))

    trainval_path = P(output_root).joinpath('train_val')
    trainval_obsv_pkl = list(trainval_path.glob('observation/*.pkl'))
    trainval_pred_pkl = list(trainval_path.glob('prediction/*.pkl'))

    obsv_pkl_list = train_obsv_pkl + trainval_obsv_pkl
    pred_pkl_list = train_pred_pkl + trainval_pred_pkl
    for obsv_pkl, pred_pkl in zip(obsv_pkl_list, pred_pkl_list):
      obsv_filename, obsv_split = obsv_pkl.name, obsv_pkl.parent.parent.stem
      pred_filename, pred_split = pred_pkl.name, pred_pkl.parent.parent.stem
      
      obsv_relpath = P('../../{:s}/observation/'.format(obsv_split)).joinpath(obsv_filename)
      obsv_link = trainall_obsv_path.joinpath(obsv_filename)
      obsv_link.symlink_to(obsv_relpath)
      
      pred_relpath = P('../../{:s}/prediction/'.format(pred_split)).joinpath(pred_filename)
      pred_link = trainall_pred_path.joinpath(pred_filename)
      pred_link.symlink_to(pred_relpath)
    print(" Done.")
Beispiel #25
0
def create_tracks(all_boxes: EvalBoxes, nusc: NuScenes, eval_split: str, gt: bool) \
        -> Dict[str, Dict[int, List[TrackingBox]]]:
    """
    Returns all tracks for all scenes. Samples within a track are sorted in chronological order.
    This can be applied either to GT or predictions.
    :param all_boxes: Holds all GT or predicted boxes.
    :param nusc: The NuScenes instance to load the sample information from.
    :param eval_split: The evaluation split for which we create tracks.
    :param gt: Whether we are creating tracks for GT or predictions
    :return: The tracks.
    """
    # Only keep samples from this split.
    splits = create_splits_scenes()
    scene_tokens = set()
    for sample_token in all_boxes.sample_tokens:
        scene_token = nusc.get('sample', sample_token)['scene_token']
        scene = nusc.get('scene', scene_token)
        if scene['name'] in splits[eval_split]:
            scene_tokens.add(scene_token)

    # Tracks are stored as dict {scene_token: {timestamp: List[TrackingBox]}}.
    tracks = defaultdict(lambda: defaultdict(list))

    # Init all scenes and timestamps to guarantee completeness.
    for scene_token in scene_tokens:
        # Init all timestamps in this scene.
        scene = nusc.get('scene', scene_token)
        cur_sample_token = scene['first_sample_token']
        while True:
            # Initialize array for current timestamp.
            cur_sample = nusc.get('sample', cur_sample_token)
            tracks[scene_token][cur_sample['timestamp']] = []

            # Abort after the last sample.
            if cur_sample_token == scene['last_sample_token']:
                break

            # Move to next sample.
            cur_sample_token = cur_sample['next']

    # Group annotations wrt scene and timestamp.
    for sample_token in all_boxes.sample_tokens:
        sample_record = nusc.get('sample', sample_token)
        scene_token = sample_record['scene_token']
        tracks[scene_token][sample_record['timestamp']] = all_boxes.boxes[sample_token]

    # Replace box scores with track score (average box score). This only affects the compute_thresholds method and
    # should be done before interpolation to avoid diluting the original scores with interpolated boxes.
    if not gt:
        for scene_id, scene_tracks in tracks.items():
            # For each track_id, collect the scores.
            track_id_scores = defaultdict(list)
            for timestamp, boxes in scene_tracks.items():
                for box in boxes:
                    track_id_scores[box.tracking_id].append(box.tracking_score)

            # Compute average scores for each track.
            track_id_avg_scores = {}
            for tracking_id, scores in track_id_scores.items():
                track_id_avg_scores[tracking_id] = np.mean(scores)

            # Apply average score to each box.
            for timestamp, boxes in scene_tracks.items():
                for box in boxes:
                    box.tracking_score = track_id_avg_scores[box.tracking_id]

    # Interpolate GT and predicted tracks.
    for scene_token in tracks.keys():
        tracks[scene_token] = interpolate_tracks(tracks[scene_token])

        if not gt:
            # Make sure predictions are sorted in in time. (Always true for GT).
            tracks[scene_token] = defaultdict(list, sorted(tracks[scene_token].items(), key=lambda kv: kv[0]))

    return tracks
    imgs = []  # Stores image paths
    img_detections = []  # Stores detections for each image index
    file1 = open('videos_tokens.txt')
    tokens_list = file1.readlines()
    file1.close()

    valid_channels = [
        'CAM_FRONT',
        'CAM_FRONT_LEFT',
        'CAM_FRONT_RIGHT',
        'CAM_BACK_LEFT',
        'CAM_BACK',
        'CAM_BACK_RIGHT',
    ]
    lll = []
    splits = create_splits_scenes()
    for camera_channel in valid_channels:
        for token in tokens_list:
            first_time = True
            Dict = {}
            current_recs = {}
            num_objects = 0
            scene_token = token[:-1]
            scene_record = nusc.get('scene', scene_token)
            scene_rec = scene_record
            scene = nusc.get('scene', scene_token)
            scene_name = scene_record['name']

            scene_id = int(scene_name.replace('scene-', ''))
            if scene_name in splits['val']:
                spl = '/test/'
def create_nuscenes_dataset(version, output_path, mode='train'):
    """
    For each video, we store a sequence of data with the following information:
        - image (H, W, 3) jpg
        - instance segmentation (H, W) np.array<np.uint8>
          with values from [0, MAX_INSTANCES-1]. Note that the instances are not aligned with position/velocity
          i.e. id 1 in instance segmentation corresponds to element 0 in position/velocity
          when including more classes, we can store a 4D tensor (N_CLASSES, H, W)
        - position (MAX_INSTANCES, 3) np.array
        - velocity (MAX_INSTANCES, 3) np.array

    """
    ## Yaw angle different on camera: https://github.com/nutonomy/nuscenes-devkit/issues/21
    # Load Mask R-CNN
    # update the config options with the config file
    cfg.merge_from_file(MASK_RCNN_CONFIG_FILE)
    # manual override some options
    # cfg.merge_from_list(['MODEL.DEVICE', 'cpu'])

    mask_rcnn = COCODemo(
        cfg,
        confidence_threshold=0.8,
    )

    # Load NuScenes
    nusc = NuScenes(version=version, dataroot=NUSCENES_ROOT, verbose=True)
    scene_splits = create_splits_scenes()

    print('Begin iterating over Nuscenes')
    print('-' * 30)
    # Loop over dataset
    for scene in nusc.scene:
        # Ensure the scene belongs to the split
        if scene['name'] not in scene_splits[mode]:
            continue

        scene_path = os.path.join(output_path, mode, scene['name'])
        print('scene_path: {}'.format(scene_path))
        os.makedirs(scene_path, exist_ok=True)

        t = 0
        sample_token = scene['first_sample_token']
        while sample_token:
            print('Image {}'.format(t))
            sample = nusc.get('sample', sample_token)
            data = match_instance_seg_and_bbox(nusc, mask_rcnn, sample)

            if data is not None:
                data['image'].save(os.path.join(scene_path, '{:04d}_image_tmp.jpg'.format(t)))
                np.save(os.path.join(scene_path, '{:04d}_instance_seg_tmp.npy'.format(t)), data['instance_seg'])
                np.save(os.path.join(scene_path, '{:04d}_position_tmp.npy'.format(t)), data['position'])
                np.save(os.path.join(scene_path, '{:04d}_velocity_tmp.npy'.format(t)), data['velocity'])
                np.save(os.path.join(scene_path, '{:04d}_orientation_tmp.npy'.format(t)), data['orientation'])
                np.save(os.path.join(scene_path, '{:04d}_size_tmp.npy'.format(t)), data['size'])
                np.save(os.path.join(scene_path, '{:04d}_token_tmp.npy'.format(t)), data['token'])
                np.save(os.path.join(scene_path, '{:04d}_intrinsics_tmp.npy'.format(t)), data['intrinsics'])
                np.save(os.path.join(scene_path, '{:04d}_sample_token_tmp.npy'.format(t)), np.array([sample_token]))


            sample_token = sample['next']
            t += 1

        link_instance_ids(nusc, scene_path)
        print('------------------\n')

    print('Computing depth maps')
    print('-' * 30)
    # Compute depth map here.
    generate_depth(output_path, mode)
    print('Dataset saved.')