def test_followup_scenes(self) -> None: num_scenes = 10 scenes = np.zeros(num_scenes, dtype=SCENE_DTYPE) for i in range(num_scenes): scenes[i]["host"] = "some-host" scenes[i]["start_time"] = i * 1000 scenes[i]["end_time"] = (i + 1) * 1000 scenes[i]["frame_index_interval"] = [i * 10, (i + 1) * 10] combined_scenes = get_combined_scenes(scenes) self.assertEqual(len(combined_scenes), 1) combo_scene = combined_scenes[0] self.assertEqual(combo_scene["host"], "some-host") self.assertEqual(combo_scene["start_time"], 0) self.assertEqual(combo_scene["end_time"], 10000) np.testing.assert_array_equal(combo_scene["frame_index_interval"], np.array([0, 100])) # To follow up they must be the same host scenes[1]["host"] = "some-other-host" combined_scenes = get_combined_scenes(scenes) self.assertEqual(len(combined_scenes), 3) # And their timestamps must follow up exactly scenes[5]["start_time"] += 1 combined_scenes = get_combined_scenes(scenes) self.assertEqual(len(combined_scenes), 4)
def test_trivial_input(self) -> None: # One scene scenes = np.zeros(1, dtype=SCENE_DTYPE) scenes[0]["host"] = "some-host" scenes[0]["start_time"] = 0 scenes[0]["end_time"] = 1000 scenes[0]["frame_index_interval"] = [0, 10] combined_scenes = get_combined_scenes(scenes) self.assertEqual(len(combined_scenes), 1) np.testing.assert_array_equal(scenes, combined_scenes)
def test_empty_input(self) -> None: # Empty scenes = np.array([], dtype=SCENE_DTYPE) combined_scenes = get_combined_scenes(scenes) self.assertEqual(len(combined_scenes), 0)
def select_agents( input_folder: str, th_agent_prob: float, th_history_num_frames: int, th_future_num_frames: int, th_yaw_degree: float, th_extent_ratio: float, th_movement: float, th_distance_av: float, num_workers: int, ) -> None: """ Filter agents from zarr INPUT_FOLDER according to multiple thresholds and store a boolean array of the same shape. """ assert th_future_num_frames > 0 # ===== LOAD dm = LocalDataManager() input_folder = dm.require(input_folder) zarr_dataset = ChunkedStateDataset(path=input_folder) zarr_dataset.open() zarr_dataset.scenes = get_combined_scenes(zarr_dataset.scenes) output_group = f"{th_history_num_frames}_{th_future_num_frames}_{th_agent_prob}" if "agents_mask" in zarr_dataset.root and f"agents_mask/{output_group}" in zarr_dataset.root: raise FileExistsError( f"{output_group} exists already! only one is supported for now!") frame_index_intervals = zarr_dataset.scenes["frame_index_interval"] # build a partial with all args except the first one (will be passed by threads) get_valid_agents_partial = partial( get_valid_agents, dataset=zarr_dataset, th_frames_past=th_history_num_frames, th_frames_future=th_future_num_frames, th_agent_filter_probability_threshold=th_agent_prob, th_yaw_degree=th_yaw_degree, th_extent_ratio=th_extent_ratio, th_movement=th_movement, th_distance_av=th_distance_av, ) try: root = zarr.open(zarr_dataset.path, mode="a") root.create_group("agents_mask") except ValueError: pass # group is already there agents_mask = zarr.open_array( str(Path(zarr_dataset.path) / "agents_mask" / output_group), mode="w", shape=(len(zarr_dataset.agents), ), chunks=(10000, ), dtype=np.bool, synchronizer=zarr.ProcessSynchronizer( f"/tmp/ag_mask_{str(uuid4())}.sync"), ) report: Counter = Counter() print("starting pool...") with Pool(num_workers) as pool: tasks = tqdm( enumerate( pool.imap_unordered(get_valid_agents_partial, frame_index_intervals))) for idx, (mask, count, agents_range) in tasks: report += count agents_mask[agents_range[0]:agents_range[1]] = mask print("collecting results..") assert (report["total_agent_frames"] == report["selected_agent_frames"] + report["total_reject"]), "something went REALLY wrong" agents_cfg = { "th_history_num_frames": th_history_num_frames, "th_future_num_frames": th_future_num_frames, "th_agent_filter_probability_threshold": th_agent_prob, "th_yaw_degree": th_yaw_degree, "th_extent_ratio": th_extent_ratio, "th_movement": th_movement, "th_distance_av": th_distance_av, } # print report pp = pprint.PrettyPrinter(indent=4) print(f"start report for {input_folder}") pp.pprint({**agents_cfg, **report}) print(f"end report for {input_folder}") print("==============================")
def build_dataloader( cfg: Dict, split: str, data_manager: DataManager, dataset_class: Callable, rasterizer: Rasterizer, perturbation: Optional[Perturbation] = None, ) -> DataLoader: """ Util function to build a dataloader from a dataset of dataset_class. Note we have to pass rasterizer and perturbation as the factory functions for those are likely to change between repos. Args: cfg (dict): configuration dict split (str): this will be used to index the cfg to get the correct datasets (train or val currently) data_manager (DataManager): manager for resolving paths dataset_class (Callable): a class object (EgoDataset or AgentDataset currently) to build the dataset rasterizer (Rasterizer): the rasterizer for the dataset perturbation (Optional[Perturbation]): an optional perturbation object Returns: DataLoader: pytorch Dataloader object built with Concat and Sub datasets """ data_loader_cfg = cfg[f"{split}_data_loader"] datasets = [] for dataset_param in data_loader_cfg["datasets"]: zarr_dataset_path = data_manager.require(key=dataset_param["key"]) zarr_dataset = ChunkedStateDataset(path=zarr_dataset_path) zarr_dataset.open() zarr_dataset.scenes = get_combined_scenes(zarr_dataset.scenes) # Let's load the zarr dataset with our dataset. dataset = dataset_class(cfg, zarr_dataset, rasterizer, perturbation=perturbation) scene_indices = dataset_param["scene_indices"] scene_subsets = [] if dataset_param["scene_indices"][0] == -1: # TODO replace with empty scene_subset = Subset(dataset, np.arange(0, len(dataset))) scene_subsets.append(scene_subset) else: for scene_idx in scene_indices: valid_indices = dataset.get_scene_indices(scene_idx) scene_subset = Subset(dataset, valid_indices) scene_subsets.append(scene_subset) datasets.extend(scene_subsets) # Let's concatenate the training scenes into one dataset for the data loader to load from. concat_dataset: ConcatDataset = ConcatDataset(datasets) # Initialize the data loader that our training loop will iterate on. batch_size = data_loader_cfg["batch_size"] shuffle = data_loader_cfg["shuffle"] num_workers = data_loader_cfg["num_workers"] dataloader = DataLoader(dataset=concat_dataset, batch_size=batch_size, shuffle=shuffle, num_workers=num_workers) return dataloader
def test_empty_input() -> None: # Empty scenes = np.array([], dtype=SCENE_DTYPE) combined_scenes = get_combined_scenes(scenes) assert len(combined_scenes) == 0