def __init__(self, path, split, cameras=None, depth_type=None, with_pose=False, with_semantic=False, back_context=0, forward_context=0, data_transform=None, ): self.path = path self.split = split self.dataset_idx = 0 self.bwd = back_context self.fwd = forward_context self.has_context = back_context + forward_context > 0 self.num_cameras = len(cameras) self.data_transform = data_transform self.depth_type = depth_type self.with_depth = depth_type is not None self.with_pose = with_pose self.with_semantic = with_semantic self.dataset = SynchronizedSceneDataset(path, split=split, datum_names=cameras, backward_context=back_context, forward_context=forward_context, requested_annotations=None, only_annotated_datums=False, )
def test_accumulation_in_scene(self): # Load a scene without accumulation scenes_dataset_json = os.path.join(self.DGP_TEST_DATASET_DIR, "test_scene", "scene_dataset_v1.0.json") dataset = SynchronizedSceneDataset( scenes_dataset_json, split='train', datum_names=['lidar'], ) # Load the same scene with max context available accumulation context, this dataset has two scenes each with 3 samples dataset_acc = SynchronizedSceneDataset( scenes_dataset_json, split='train', datum_names=['lidar'], requested_annotations=['bounding_box_3d'], accumulation_context={'lidar': (2, 0)}, transform_accumulated_box_points=True) # We should only have two samples (one per scene) assert len(dataset_acc) == 2 # Verify that we have not lost any points by accumulating num_points = 0 for i in range(3): context = dataset[i] num_points += len(context[0][-1]['point_cloud']) context_acc = dataset_acc[0] num_points_acc = len(context_acc[0][-1]['point_cloud']) assert num_points == num_points_acc
def main(): parser = argparse.ArgumentParser( description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter) parser.add_argument('--scene-dataset-json', type=str, required=True, help='Path to local SceneDataset JSON.') parser.add_argument('--split', type=str, default='train', required=False, help='Split [train, val, test].', choices=['train', 'val', 'test']) parser.add_argument('--verbose', action='store_true') args = parser.parse_args() # Verbose prints. if args.verbose: logging.getLogger().setLevel(level=logging.INFO) # Load the dataset and build an index into the annotations requested. # If previously loaded/initialized, load the pre-built dataset. st = time.time() dataset = SynchronizedSceneDataset( scene_dataset_json=args.scene_dataset_json, split=args.split, datum_names=('lidar_02', ), requested_annotations=('bounding_box_3d', ), only_annotated_datums=True) print('Loading dataset took {:.2f} s'.format(time.time() - st)) # Iterate through the dataset. for _ in tqdm(dataset, desc='Loading samples from the dataset'): pass
def get_ontology_kl(scene_dataset_json, annotation_type): dataset = SynchronizedSceneDataset(scene_dataset_json, split='train', datum_names=['locator'], backward_context=0, requested_annotations=("key_line_2d", )) return dataset.dataset_metadata.ontology_table.get(annotation_type, None)
def test_autolabels_custom_root(self): """Test that we can load autolabels using autolabel_root""" scenes_dataset_json = os.path.join(self.DGP_TEST_DATASET_DIR, "test_scene", "scene_dataset_v1.0.json") autolabel_model = 'test-model' autolabel_annotation = 'bounding_box_3d' requested_autolabels = (f'{autolabel_model}/{autolabel_annotation}', ) dataset_root = os.path.dirname(scenes_dataset_json) autolabel_root = os.path.join(self.DGP_TEST_DATASET_DIR, 'autolabel_root') clone_scene_as_autolabel(dataset_root, autolabel_root, autolabel_model, autolabel_annotation) dataset = SynchronizedSceneDataset( scenes_dataset_json, split='train', datum_names=['LIDAR'], forward_context=1, backward_context=1, requested_annotations=('bounding_box_3d', ), requested_autolabels=requested_autolabels, autolabel_root=autolabel_root, use_diskcache=False, ) assert len(dataset) == 2 for context in dataset: for sample in context: lidar = sample[0] assert lidar['bounding_box_3d'] == lidar[ requested_autolabels[0]]
def _load_dataset(path, split): return SynchronizedSceneDataset( # TODO: add a interactive checkbox to enable users to select datums. scene_dataset_json=path, split=split, requested_annotations=("bounding_box_3d", ), only_annotated_datums=True)
def get_ontology(scene_dataset_json, annotation_type): dataset = SynchronizedSceneDataset( scene_dataset_json, split='train', datum_names=['camera_01', 'lidar'], backward_context=0, requested_annotations=("bounding_box_2d", "bounding_box_3d")) return dataset.dataset_metadata.ontology_table.get(annotation_type, None)
def setUp(self): # Initialize synchronized dataset scenes_dataset_json = os.path.join(self.DGP_TEST_DATASET_DIR, "test_scene", "scene_dataset_v1.0.json") self.dataset = SynchronizedSceneDataset( scenes_dataset_json, split='train', datum_names=['camera_01', 'lidar'], backward_context=0, requested_annotations=("bounding_box_2d", "bounding_box_3d") )
def test_autolabels_missing_files(self): """Test that skip missing data can be used to skip missing autolabel scene dirs""" scenes_dataset_json = os.path.join(self.DGP_TEST_DATASET_DIR, "test_scene", "scene_dataset_v1.0.json") autolabel_model = 'test-model' autolabel_annotation = 'bounding_box_3d' requested_autolabels = (f'{autolabel_model}/{autolabel_annotation}', ) dataset_root = os.path.dirname(scenes_dataset_json) autolabel_root = os.path.join(self.DGP_TEST_DATASET_DIR, 'autolabel_root') autolabel_dirs = clone_scene_as_autolabel(dataset_root, autolabel_root, autolabel_model, autolabel_annotation) # remove a scene dir and check we can still load the data rmtree(autolabel_dirs[0]) # Test skip missing data allows us to load the dataset dataset = SynchronizedSceneDataset( scenes_dataset_json, split='train', datum_names=['LIDAR'], forward_context=1, backward_context=1, requested_annotations=('bounding_box_3d', ), requested_autolabels=requested_autolabels, autolabel_root=autolabel_root, skip_missing_data=True, use_diskcache=False, ) assert len(dataset) == 2 for context in dataset: for sample in context: lidar = sample[0] autolab = lidar[requested_autolabels[0]] assert autolab is None or lidar['bounding_box_3d'] == autolab
def test_only_annotated_datums(self): """Test that only annotated datums applies to autolabels also""" scenes_dataset_json = os.path.join(self.DGP_TEST_DATASET_DIR, "test_scene", "scene_dataset_v1.0.json") autolabel_model = 'test-model' autolabel_annotation = 'bounding_box_3d' requested_autolabels = (f'{autolabel_model}/{autolabel_annotation}', ) dataset_root = os.path.dirname(scenes_dataset_json) autolabel_root = os.path.join(self.DGP_TEST_DATASET_DIR, 'autolabel_root') autolabel_dirs = clone_scene_as_autolabel(dataset_root, autolabel_root, autolabel_model, autolabel_annotation) # remove a scene dir and check we can still load the data rmtree(autolabel_dirs[0]) # Test that only annotated datums works dataset = SynchronizedSceneDataset( scenes_dataset_json, split='train', datum_names=['LIDAR'], forward_context=1, backward_context=1, requested_autolabels=requested_autolabels, autolabel_root=autolabel_root, only_annotated_datums=True, skip_missing_data=True, use_diskcache=False, ) assert len(dataset) == 1 for context in dataset: for sample in context: lidar = sample[0] assert lidar[requested_autolabels[0]] is not None
def test_accumulation(self): """Test accumulation""" # Generate some samples scenes_dataset_json = os.path.join(self.DGP_TEST_DATASET_DIR, "test_scene", "scene_dataset_v1.0.json") dataset = SynchronizedSceneDataset( scenes_dataset_json, split='train', datum_names=['lidar'], ) assert len(dataset) >= 2 point_datums = [] for sample in dataset: point_datums.append(sample[0][0]) p1, p2 = point_datums[0], point_datums[-1] p1_and_p2_in_p1 = accumulate_points([p1, p2], p1) assert len(p1_and_p2_in_p1['point_cloud']) == len( p1['point_cloud']) + len(p2['point_cloud']) p1_and_p2_in_p2 = accumulate_points([p1, p2], p2) assert len(p1_and_p2_in_p2['point_cloud']) == len( p1['point_cloud']) + len(p2['point_cloud']) # If we move the accumulated p1 frame points to p2, we should recover the accumulated p2 points p1_and_p2_in_p2_v2 = accumulate_points([p1_and_p2_in_p1], p2) assert np.allclose(p1_and_p2_in_p2_v2['point_cloud'], p1_and_p2_in_p2['point_cloud']) # If we accumulate a single point nothing should happen p1_v2 = accumulate_points([p1], p1) assert np.allclose(p1_v2['point_cloud'], p1['point_cloud'])
def test_labeled_synchronized_scene_dataset(self): """Test synchronized scene dataset""" expected_camera_fields = set([ 'rgb', 'timestamp', 'datum_name', 'pose', 'intrinsics', 'extrinsics', 'bounding_box_2d', 'bounding_box_3d', 'depth', 'datum_type', ]) expected_lidar_fields = set([ 'point_cloud', 'timestamp', 'datum_name', 'pose', 'extrinsics', 'bounding_box_2d', 'bounding_box_3d', 'extra_channels', 'datum_type', ]) expected_metadata_fields = set([ 'scene_index', 'sample_index_in_scene', 'log_id', 'timestamp', 'scene_name', 'scene_description' ]) # Initialize synchronized dataset with 2 datums scenes_dataset_json = os.path.join(self.DGP_TEST_DATASET_DIR, "test_scene", "scene_dataset_v1.0.json") dataset = SynchronizedSceneDataset( scenes_dataset_json, split='train', datum_names=['LIDAR', 'CAMERA_01'], forward_context=1, backward_context=1, generate_depth_from_datum='LIDAR', requested_annotations=("bounding_box_2d", "bounding_box_3d")) # There are only 3 samples in the train and val split. # With a forward and backward context of 1 each, the number of # items in the dataset with the desired context frames is 1. assert len(dataset) == 2 # Iterate through labeled dataset and check expected fields assert dataset.calibration_table is not None for idx, item in enumerate(dataset): # Context size is 3 (forward + backward + reference) assert_true(len(item) == 3) # Check both datum and time-dimensions for expected fields im_size = None for t_item in item: # Two selected datums assert_true(len(t_item) == 2) for datum in t_item: if datum['datum_name'] == 'LIDAR': # LIDAR should have point_cloud set assert_true(set(datum.keys()) == expected_lidar_fields) assert_true(isinstance(datum, OrderedDict)) elif datum['datum_name'].startswith('CAMERA_'): # CAMERA_01 should have intrinsics/extrinsics set assert_true(isinstance(datum, OrderedDict)) assert_true(datum['intrinsics'].shape == (3, 3)) assert_true(isinstance(datum['extrinsics'], Pose)) assert_true(isinstance(datum['pose'], Pose)) # Check image sizes for context frames assert_true( set(datum.keys()) == expected_camera_fields) if im_size is None: im_size = datum['rgb'].size assert_true(datum['rgb'].size == im_size) else: raise RuntimeError('Unexpected datum_name {}'.format( datum['datum_name'])) # Retrieve metadata about dataset item at index=idx metadata = dataset.get_scene_metadata(idx) assert_true(metadata.keys() == expected_metadata_fields)
class DGPDataset: """ DGP dataset class Parameters ---------- path : str Path to the dataset split : str {'train', 'val', 'test'} Which dataset split to use cameras : list of str Which cameras to get information from depth_type : str Which lidar will be used to generate ground-truth information with_pose : bool If enabled pose estimates are also returned with_semantic : bool If enabled semantic estimates are also returned back_context : int Size of the backward context forward_context : int Size of the forward context data_transform : Function Transformations applied to the sample """ def __init__(self, path, split, cameras=None, depth_type=None, with_pose=False, with_semantic=False, back_context=0, forward_context=0, data_transform=None, ): self.path = path self.split = split self.dataset_idx = 0 self.bwd = back_context self.fwd = forward_context self.has_context = back_context + forward_context > 0 self.num_cameras = len(cameras) self.data_transform = data_transform self.depth_type = depth_type self.with_depth = depth_type is not None self.with_pose = with_pose self.with_semantic = with_semantic self.dataset = SynchronizedSceneDataset(path, split=split, datum_names=cameras, backward_context=back_context, forward_context=forward_context, requested_annotations=None, only_annotated_datums=False, ) def generate_depth_map(self, sample_idx, datum_idx, filename): """ Generates the depth map for a camera by projecting LiDAR information. It also caches the depth map following DGP folder structure, so it's not recalculated Parameters ---------- sample_idx : int sample index datum_idx : int Datum index filename : Filename used for loading / saving Returns ------- depth : np.array [H, W] Depth map for that datum in that sample """ # Generate depth filename filename = '{}/{}.npz'.format( os.path.dirname(self.path), filename.format('depth/{}'.format(self.depth_type))) # Load and return if exists if os.path.exists(filename): return np.load(filename)['depth'] # Otherwise, create, save and return else: # Get pointcloud scene_idx, sample_idx_in_scene, _ = self.dataset.dataset_item_index[sample_idx] pc_datum_idx_in_sample = self.dataset.get_datum_index_for_datum_name( scene_idx, sample_idx_in_scene, self.depth_type) pc_datum_data = self.dataset.get_point_cloud_from_datum( scene_idx, sample_idx_in_scene, pc_datum_idx_in_sample) # Create camera camera_rgb = self.get_current('rgb', datum_idx) camera_pose = self.get_current('pose', datum_idx) camera_intrinsics = self.get_current('intrinsics', datum_idx) camera = Camera(K=camera_intrinsics, p_cw=camera_pose.inverse()) # Generate depth map world_points = pc_datum_data['pose'] * pc_datum_data['point_cloud'] depth = generate_depth_map(camera, world_points, camera_rgb.size[::-1]) # Save depth map os.makedirs(os.path.dirname(filename), exist_ok=True) np.savez_compressed(filename, depth=depth) # Return depth map return depth def get_current(self, key, sensor_idx): """Return current timestep of a key from a sensor""" return self.sample_dgp[self.bwd][sensor_idx][key] def get_backward(self, key, sensor_idx): """Return backward timesteps of a key from a sensor""" return [] if self.bwd == 0 else \ [self.sample_dgp[i][sensor_idx][key] \ for i in range(0, self.bwd)] def get_forward(self, key, sensor_idx): """Return forward timestep of a key from a sensor""" return [] if self.fwd == 0 else \ [self.sample_dgp[i][sensor_idx][key] \ for i in range(self.bwd + 1, self.bwd + self.fwd + 1)] def get_context(self, key, sensor_idx): """Get both backward and forward contexts""" return self.get_backward(key, sensor_idx) + self.get_forward(key, sensor_idx) def get_filename(self, sample_idx, datum_idx): """ Returns the filename for an index, following DGP structure Parameters ---------- sample_idx : int Sample index datum_idx : int Datum index Returns ------- filename : str Filename for the datum in that sample """ scene_idx, sample_idx_in_scene, datum_indices = self.dataset.dataset_item_index[sample_idx] scene_dir = self.dataset.get_scene_directory(scene_idx) filename = self.dataset.get_datum( scene_idx, sample_idx_in_scene, datum_indices[datum_idx]).datum.image.filename return os.path.splitext(os.path.join(os.path.basename(scene_dir), filename.replace('rgb', '{}')))[0] def __len__(self): """Length of dataset""" return len(self.dataset) def __getitem__(self, idx): """Get a dataset sample""" # Get DGP sample (if single sensor, make it a list) self.sample_dgp = self.dataset[idx] self.sample_dgp = [make_list(sample) for sample in self.sample_dgp] # Loop over all cameras sample = [] for i in range(self.num_cameras): data = { 'idx': idx, 'dataset_idx': self.dataset_idx, 'sensor_name': self.get_current('datum_name', i), # 'filename': self.get_filename(idx, i), 'splitname': '%s_%010d' % (self.split, idx), # 'rgb': self.get_current('rgb', i), 'intrinsics': self.get_current('intrinsics', i), } # If depth is returned if self.with_depth: data.update({ 'depth': self.generate_depth_map(idx, i, data['filename']) }) # If pose is returned if self.with_pose: data.update({ 'extrinsics': self.get_current('extrinsics', i).matrix, 'pose': self.get_current('pose', i).matrix, }) # If context is returned if self.has_context: data.update({ 'rgb_context': self.get_context('rgb', i), }) # If context pose is returned if self.with_pose: # Get original values to calculate relative motion orig_extrinsics = Pose.from_matrix(data['extrinsics']) orig_pose = Pose.from_matrix(data['pose']) data.update({ 'extrinsics_context': [(orig_extrinsics.inverse() * extrinsics).matrix for extrinsics in self.get_context('extrinsics', i)], 'pose_context': [(orig_pose.inverse() * pose).matrix for pose in self.get_context('pose', i)], }) sample.append(data) # Apply same data transformations for all sensors if self.data_transform: sample = [self.data_transform(smp) for smp in sample] # Return sample (stacked if necessary) return stack_sample(sample)
class DGPvaleoDataset: """ DGP dataset class Parameters ---------- path : str Path to the dataset split : str {'train', 'val', 'test'} Which dataset split to use cameras : list of str Which cameras to get information from depth_type : str Which lidar will be used to generate ground-truth information with_pose : bool If enabled pose estimates are also returned with_semantic : bool If enabled semantic estimates are also returned back_context : int Size of the backward context forward_context : int Size of the forward context data_transform : Function Transformations applied to the sample """ def __init__( self, path, split, cameras=None, depth_type=None, with_pose=False, with_semantic=False, back_context=0, forward_context=0, data_transform=None, with_geometric_context=False, ): self.path = path self.split = split self.dataset_idx = 0 self.bwd = back_context self.fwd = forward_context self.has_context = back_context + forward_context > 0 self.with_geometric_context = with_geometric_context self.num_cameras = len(cameras) self.data_transform = data_transform self.depth_type = depth_type self.with_depth = depth_type is not None self.with_pose = with_pose self.with_semantic = with_semantic # arrange cameras alphabetically cameras = sorted(cameras) cameras_left = list(cameras) cameras_right = list(cameras) for i_cam in range(self.num_cameras): replaced = False for k in cam_left_dict: if not replaced and k in cameras_left[i_cam]: cameras_left[i_cam] = cameras_left[i_cam].replace( k, cam_left_dict[k]) replaced = True replaced = False for k in cam_right_dict: if not replaced and k in cameras_right[i_cam]: cameras_right[i_cam] = cameras_right[i_cam].replace( k, cam_right_dict[k]) replaced = True print(cameras) print(cameras_left) print(cameras_right) # arrange cameras left and right and extract sorting indices self.cameras_left_sort_idxs = list(np.argsort(cameras_left)) self.cameras_right_sort_idxs = list(np.argsort(cameras_right)) cameras_left_sorted = sorted(cameras_left) cameras_right_sorted = sorted(cameras_right) self.dataset = SynchronizedSceneDataset( path, split=split, datum_names=cameras, backward_context=back_context, forward_context=forward_context, requested_annotations=None, only_annotated_datums=False, ) if self.with_geometric_context: self.dataset_left = SynchronizedSceneDataset( path, split=split, datum_names=cameras_left_sorted, backward_context=back_context, forward_context=forward_context, requested_annotations=None, only_annotated_datums=False, ) self.dataset_right = SynchronizedSceneDataset( path, split=split, datum_names=cameras_right_sorted, backward_context=back_context, forward_context=forward_context, requested_annotations=None, only_annotated_datums=False, ) @staticmethod def _get_base_folder(image_file): """The base folder""" return '/'.join(image_file.split('/')[:-4]) @staticmethod def _get_sequence_name(image_file): """Returns a sequence name like '20180227_185324'.""" return image_file.split('/')[-4] @staticmethod def _get_camera_name(image_file): """Returns 'cam_i', i between 0 and 4""" return image_file.split('/')[-2] def _get_path_to_ego_mask(self, image_file): """Get the current folder from image_file.""" return os.path.join(self._get_base_folder(image_file), self._get_sequence_name(image_file), 'semantic_masks', self._get_camera_name(image_file) + '.npy') def generate_depth_map(self, sample_idx, datum_idx, filename): """ Generates the depth map for a camera by projecting LiDAR information. It also caches the depth map following DGP folder structure, so it's not recalculated Parameters ---------- sample_idx : int sample index datum_idx : int Datum index filename : Filename used for loading / saving Returns ------- depth : np.array [H, W] Depth map for that datum in that sample """ # Generate depth filename filename = '{}/{}.npz'.format( os.path.dirname(self.path), filename.format('depth/{}'.format(self.depth_type))) # Load and return if exists if os.path.exists(filename): return np.load(filename, allow_pickle=True)['depth'] # Otherwise, create, save and return else: # Get pointcloud scene_idx, sample_idx_in_scene, _ = self.dataset.dataset_item_index[ sample_idx] pc_datum_idx_in_sample = self.dataset.get_datum_index_for_datum_name( scene_idx, sample_idx_in_scene, self.depth_type) pc_datum_data = self.dataset.get_point_cloud_from_datum( scene_idx, sample_idx_in_scene, pc_datum_idx_in_sample) # Create camera camera_rgb = self.get_current('rgb', datum_idx) camera_pose = self.get_current('pose', datum_idx) camera_intrinsics = self.get_current('intrinsics', datum_idx) camera = Camera(K=camera_intrinsics, p_cw=camera_pose.inverse()) # Generate depth map world_points = pc_datum_data['pose'] * pc_datum_data['point_cloud'] depth = generate_depth_map(camera, world_points, camera_rgb.size[::-1]) # Save depth map os.makedirs(os.path.dirname(filename), exist_ok=True) np.savez_compressed(filename, depth=depth) # Return depth map return depth def get_current(self, key, sensor_idx): """Return current timestep of a key from a sensor""" return self.sample_dgp[self.bwd][sensor_idx][key] def get_current_left(self, key, sensor_idx): """Return current timestep of a key from a sensor""" return self.sample_dgp_left[self.bwd][sensor_idx][key] def get_current_right(self, key, sensor_idx): """Return current timestep of a key from a sensor""" return self.sample_dgp_right[self.bwd][sensor_idx][key] def get_backward(self, key, sensor_idx): """Return backward timesteps of a key from a sensor""" return [] if self.bwd == 0 else \ [self.sample_dgp[i][sensor_idx][key] \ for i in range(0, self.bwd)] def get_backward_left(self, key, sensor_idx): """Return backward timesteps of a key from a sensor""" return [] if self.bwd == 0 else \ [self.sample_dgp_left[i][sensor_idx][key] \ for i in range(0, self.bwd)] def get_backward_right(self, key, sensor_idx): """Return backward timesteps of a key from a sensor""" return [] if self.bwd == 0 else \ [self.sample_dgp_right[i][sensor_idx][key] \ for i in range(0, self.bwd)] def get_forward(self, key, sensor_idx): """Return forward timestep of a key from a sensor""" return [] if self.fwd == 0 else \ [self.sample_dgp[i][sensor_idx][key] \ for i in range(self.bwd + 1, self.bwd + self.fwd + 1)] def get_forward_left(self, key, sensor_idx): """Return forward timestep of a key from a sensor""" return [] if self.fwd == 0 else \ [self.sample_dgp_left[i][sensor_idx][key] \ for i in range(self.bwd + 1, self.bwd + self.fwd + 1)] def get_forward_right(self, key, sensor_idx): """Return forward timestep of a key from a sensor""" return [] if self.fwd == 0 else \ [self.sample_dgp_right[i][sensor_idx][key] \ for i in range(self.bwd + 1, self.bwd + self.fwd + 1)] def get_context(self, key, sensor_idx): """Get both backward and forward contexts""" return self.get_backward(key, sensor_idx) + self.get_forward( key, sensor_idx) def get_context_left(self, key, sensor_idx): """Get both backward and forward contexts""" return self.get_backward_left(key, sensor_idx) + self.get_forward_left( key, sensor_idx) def get_context_right(self, key, sensor_idx): """Get both backward and forward contexts""" return self.get_backward_right( key, sensor_idx) + self.get_forward_right(key, sensor_idx) def get_filename(self, sample_idx, datum_idx): """ Returns the filename for an index, following DGP structure Parameters ---------- sample_idx : int Sample index datum_idx : int Datum index Returns ------- filename : str Filename for the datum in that sample """ scene_idx, sample_idx_in_scene, datum_indices = self.dataset.dataset_item_index[ sample_idx] scene_dir = self.dataset.get_scene_directory(scene_idx) filename = self.dataset.get_datum( scene_idx, sample_idx_in_scene, datum_indices[datum_idx]).datum.image.filename return os.path.splitext( os.path.join(os.path.basename(scene_dir), filename.replace('rgb', '{}')))[0] def get_filename_left(self, sample_idx, datum_idx): """ Returns the filename for an index, following DGP structure Parameters ---------- sample_idx : int Sample index datum_idx : int Datum index Returns ------- filename : str Filename for the datum in that sample """ scene_idx, sample_idx_in_scene, datum_indices = self.dataset_left.dataset_item_index[ sample_idx] scene_dir = self.dataset_left.get_scene_directory(scene_idx) filename = self.dataset_left.get_datum( scene_idx, sample_idx_in_scene, datum_indices[datum_idx]).datum.image.filename return os.path.splitext( os.path.join(os.path.basename(scene_dir), filename.replace('rgb', '{}')))[0] def get_filename_right(self, sample_idx, datum_idx): """ Returns the filename for an index, following DGP structure Parameters ---------- sample_idx : int Sample index datum_idx : int Datum index Returns ------- filename : str Filename for the datum in that sample """ scene_idx, sample_idx_in_scene, datum_indices = self.dataset_right.dataset_item_index[ sample_idx] scene_dir = self.dataset_right.get_scene_directory(scene_idx) filename = self.dataset_right.get_datum( scene_idx, sample_idx_in_scene, datum_indices[datum_idx]).datum.image.filename return os.path.splitext( os.path.join(os.path.basename(scene_dir), filename.replace('rgb', '{}')))[0] def get_camera_idx_left(self, camera_idx): return self.cameras_left_sort_idxs[camera_idx] def get_camera_idx_right(self, camera_idx): return self.cameras_right_sort_idxs[camera_idx] def __len__(self): """Length of dataset""" return len(self.dataset) def __getitem__(self, idx): """Get a dataset sample""" # Get DGP sample (if single sensor, make it a list) self.sample_dgp = self.dataset[idx] self.sample_dgp = [make_list(sample) for sample in self.sample_dgp] if self.with_geometric_context: self.sample_dgp_left = self.dataset_left[idx] self.sample_dgp_left = [ make_list(sample) for sample in self.sample_dgp_left ] self.sample_dgp_right = self.dataset_right[idx] self.sample_dgp_right = [ make_list(sample) for sample in self.sample_dgp_right ] # print('self.sample_dgp :') # print(self.sample_dgp) # print('self.sample_dgp_left :') # print(self.sample_dgp_left) # print('self.sample_dgp_right :') # print(self.sample_dgp_right) # Loop over all cameras sample = [] for i in range(self.num_cameras): i_left = self.get_camera_idx_left(i) i_right = self.get_camera_idx_right(i) # print(self.get_current('datum_name', i)) # print(self.get_filename(idx, i)) # print(self.get_current('intrinsics', i)) # print(self.with_depth) data = { 'idx': idx, 'dataset_idx': self.dataset_idx, 'sensor_name': self.get_current('datum_name', i), # 'filename': self.get_filename(idx, i), 'splitname': '%s_%010d' % (self.split, idx), # 'rgb': self.get_current('rgb', i), 'intrinsics': self.get_current('intrinsics', i), 'extrinsics': self.get_current('extrinsics', i).matrix, 'path_to_ego_mask': os.path.join( os.path.dirname(self.path), self._get_path_to_ego_mask(self.get_filename(idx, i))), } # If depth is returned if self.with_depth: data.update({ 'depth': self.generate_depth_map(idx, i, data['filename']) }) # If pose is returned if self.with_pose: data.update({ 'pose': self.get_current('pose', i).matrix, }) if self.has_context: orig_extrinsics = Pose.from_matrix(data['extrinsics']) data.update({ 'rgb_context': self.get_context('rgb', i), 'intrinsics_context': self.get_context('intrinsics', i), 'extrinsics_context': [(extrinsics.inverse() * orig_extrinsics).matrix for extrinsics in self.get_context('extrinsics', i)], }) data.update({ 'path_to_ego_mask_context': [ os.path.join( os.path.dirname(self.path), self._get_path_to_ego_mask( self.get_filename(idx, i))) for _ in range(len(data['rgb_context'])) ], }) data.update({ 'context_type': [], }) for _ in range(self.bwd): data['context_type'].append('backward') for _ in range(self.fwd): data['context_type'].append('forward') # If context pose is returned if self.with_pose: # Get original values to calculate relative motion orig_pose = Pose.from_matrix(data['pose']) data.update({ 'pose_context': [(orig_pose.inverse() * pose).matrix for pose in self.get_context('pose', i)], }) if self.with_geometric_context: orig_extrinsics = data['extrinsics'] #orig_extrinsics[:3,3] = -np.dot(orig_extrinsics[:3,:3].transpose(), orig_extrinsics[:3,3]) orig_extrinsics_left = self.get_current_left( 'extrinsics', i_left).matrix orig_extrinsics_right = self.get_current_right( 'extrinsics', i_right).matrix #orig_extrinsics_left[:3,3] = -np.dot(orig_extrinsics_left[:3,:3].transpose(), orig_extrinsics_left[:3,3]) #orig_extrinsics_right[:3,3] = -np.dot(orig_extrinsics_right[:3,:3].transpose(), orig_extrinsics_right[:3,3]) orig_extrinsics = Pose.from_matrix(orig_extrinsics) orig_extrinsics_left = Pose.from_matrix(orig_extrinsics_left) orig_extrinsics_right = Pose.from_matrix(orig_extrinsics_right) data['rgb_context'].append(self.get_current_left( 'rgb', i_left)) data['rgb_context'].append( self.get_current_right('rgb', i_right)) data['intrinsics_context'].append( self.get_current_left('intrinsics', i_left)) data['intrinsics_context'].append( self.get_current_right('intrinsics', i_right)) data['extrinsics_context'].append( (orig_extrinsics_left.inverse() * orig_extrinsics).matrix) data['extrinsics_context'].append( (orig_extrinsics_right.inverse() * orig_extrinsics).matrix) #data['extrinsics_context'].append((orig_extrinsics.inverse() * orig_extrinsics_left).matrix) #data['extrinsics_context'].append((orig_extrinsics.inverse() * orig_extrinsics_right).matrix) data['path_to_ego_mask_context'].append( os.path.join( os.path.dirname(self.path), self._get_path_to_ego_mask( self.get_filename_left(idx, i_left)))) data['path_to_ego_mask_context'].append( os.path.join( os.path.dirname(self.path), self._get_path_to_ego_mask( self.get_filename_right(idx, i_right)))) data['context_type'].append('left') data['context_type'].append('right') data.update({ 'sensor_name_left': self.get_current_left('datum_name', i_left), 'sensor_name_right': self.get_current_right('datum_name', i_right), # 'filename_left': self.get_filename_left(idx, i_left), 'filename_right': self.get_filename_right(idx, i_right), # #'rgb_left': self.get_current_left('rgb', i), #'rgb_right': self.get_current_right('rgb', i), #'intrinsics_left': self.get_current_left('intrinsics', i), #'intrinsics_right': self.get_current_right('intrinsics', i), #'extrinsics_left': self.get_current_left('extrinsics', i).matrix, #'extrinsics_right': self.get_current_right('extrinsics', i).matrix, #'path_to_ego_mask_left': self._get_path_to_ego_mask(self.get_filename_left(idx, i)), #'path_to_ego_mask_right': self._get_path_to_ego_mask(self.get_filename_right(idx, i)), }) # data.update({ # 'extrinsics_context_left': # [(orig_extrinsics_left.inverse() * extrinsics_left).matrix # for extrinsics_left in self.get_context_left('extrinsics', i)], # 'extrinsics_context_right': # [(orig_extrinsics_right.inverse() * extrinsics_right).matrix # for extrinsics_right in self.get_context_right('extrinsics', i)], # 'intrinsics_context_left': self.get_context_left('intrinsics', i), # 'intrinsics_context_right': self.get_context_right('intrinsics', i), # }) sample.append(data) # Apply same data transformations for all sensors if self.data_transform: sample = [self.data_transform(smp) for smp in sample] # Return sample (stacked if necessary) return stack_sample(sample)
def __init__( self, path, split, cameras=None, depth_type=None, with_pose=False, with_semantic=False, back_context=0, forward_context=0, data_transform=None, with_geometric_context=False, ): self.path = path self.split = split self.dataset_idx = 0 self.bwd = back_context self.fwd = forward_context self.has_context = back_context + forward_context > 0 self.with_geometric_context = with_geometric_context self.num_cameras = len(cameras) self.data_transform = data_transform self.depth_type = depth_type self.with_depth = depth_type is not None self.with_pose = with_pose self.with_semantic = with_semantic # arrange cameras alphabetically cameras = sorted(cameras) cameras_left = list(cameras) cameras_right = list(cameras) for i_cam in range(self.num_cameras): replaced = False for k in cam_left_dict: if not replaced and k in cameras_left[i_cam]: cameras_left[i_cam] = cameras_left[i_cam].replace( k, cam_left_dict[k]) replaced = True replaced = False for k in cam_right_dict: if not replaced and k in cameras_right[i_cam]: cameras_right[i_cam] = cameras_right[i_cam].replace( k, cam_right_dict[k]) replaced = True print(cameras) print(cameras_left) print(cameras_right) # arrange cameras left and right and extract sorting indices self.cameras_left_sort_idxs = list(np.argsort(cameras_left)) self.cameras_right_sort_idxs = list(np.argsort(cameras_right)) cameras_left_sorted = sorted(cameras_left) cameras_right_sorted = sorted(cameras_right) self.dataset = SynchronizedSceneDataset( path, split=split, datum_names=cameras, backward_context=back_context, forward_context=forward_context, requested_annotations=None, only_annotated_datums=False, ) if self.with_geometric_context: self.dataset_left = SynchronizedSceneDataset( path, split=split, datum_names=cameras_left_sorted, backward_context=back_context, forward_context=forward_context, requested_annotations=None, only_annotated_datums=False, ) self.dataset_right = SynchronizedSceneDataset( path, split=split, datum_names=cameras_right_sorted, backward_context=back_context, forward_context=forward_context, requested_annotations=None, only_annotated_datums=False, )
def test_labeled_synchronized_scene_dataset(self): """Test synchronized scene dataset""" expected_camera_fields = set([ 'rgb', 'timestamp', 'datum_name', 'pose', 'intrinsics', 'extrinsics', 'bounding_box_2d', 'bounding_box_3d', 'class_ids', 'instance_ids', 'depth' ]) expected_lidar_fields = set([ 'point_cloud', 'timestamp', 'datum_name', 'pose', 'extrinsics', 'bounding_box_3d', 'class_ids', 'instance_ids', 'extra_channels' ]) expected_metadata_fields = set([ 'scene_index', 'sample_index_in_scene', 'log_id', 'timestamp', 'scene_name', 'scene_description' ]) # Initialize synchronized dataset with 2 datums scenes_dataset_json = os.path.join(self.DGP_TEST_DATASET_DIR, "test_scene", "scene_dataset_v1.0.json") dataset = SynchronizedSceneDataset( scenes_dataset_json, split='train', forward_context=1, backward_context=1, generate_depth_from_datum='LIDAR', requested_annotations=("bounding_box_2d", "bounding_box_3d")) dataset.select_datums(['LIDAR', 'CAMERA_01']) dataset.prefetch() # There are only 3 samples in the train and val split. # With a forward and backward context of 1 each, the number of # items in the dataset with the desired context frames is 1. assert len(dataset) == 2 # Iterate through labeled dataset and check expected fields assert dataset.calibration_table is not None for idx, item in enumerate(dataset): # Context size is 3 (forward + backward + reference) assert_true(len(item) == 3) # Two selected datums for t_item in item: assert_true(len(t_item) == 2) # LIDAR should have point_cloud set for t_item in item: assert_true(set(t_item[0].keys()) == expected_lidar_fields) assert_true(isinstance(t_item[0], OrderedDict)) # CAMERA_01 should have intrinsics/extrinsics set im_size = None for t_item in item: assert_true(isinstance(t_item[1], OrderedDict)) assert_true(t_item[1]['intrinsics'].shape == (3, 3)) assert_true(isinstance(t_item[1]['extrinsics'], Pose)) assert_true(isinstance(t_item[1]['pose'], Pose)) # Check image sizes for context frames assert_true(set(t_item[1].keys()) == expected_camera_fields) if im_size is None: im_size = t_item[1]['rgb'].size assert_true(t_item[1]['rgb'].size == im_size) # Retrieve metadata about dataset item at index=idx metadata = dataset.get_scene_metadata(idx) assert_true(metadata.keys() == expected_metadata_fields) # Make sure you cannot select unavailable datums with assert_raises(AssertionError) as _: dataset.select_datums(['FAKE_LIDAR_NAME'])
# Synchronized dataset with all available datums within a sample dataset_args = dict(backward_context=0, forward_context=0, requested_annotations=("bounding_box_3d", "bounding_box_2d")) if args.dataset_json: logging.info('dataset-json mode: Using split {}'.format(args.split)) dataset = SynchronizedDataset(args.dataset_json, split=args.split, **dataset_args) elif args.scene_dataset_json: logging.info('scene-dataset-json mode: Using split {}'.format( args.split)) dataset = SynchronizedSceneDataset(args.scene_dataset_json, split=args.split, **dataset_args) elif args.scene_json: logging.info('scene-json mode: Split value ignored') # Fetch scene from S3 to cache if remote scene JSON provided if args.scene_json.startswith('s3://'): args.scene_json = fetch_remote_scene(args.scene_json) dataset = SynchronizedScene(args.scene_json, **dataset_args) else: raise ValueError('Provide either --dataset-json or --scene-json') if args.point_cloud_only: dataset.select_datums(datum_names=['LIDAR']) logging.info('Dataset: {}'.format(len(dataset))) # 2D visualization