def test_compute_mse_error(tmp_path: Path) -> None: data = ChunkedStateDataset(path="./l5kit/tests/data/single_scene.zarr") data.open() export_zarr_to_ground_truth_csv(data, str(tmp_path / "gt1.csv"), 0, 12, 0.5) export_zarr_to_ground_truth_csv(data, str(tmp_path / "gt2.csv"), 0, 12, 0.5) err = compute_mse_error_csv(str(tmp_path / "gt1.csv"), str(tmp_path / "gt2.csv")) assert err == 0.0 data_fake = ChunkedStateDataset("") data_fake.scenes = np.asarray(data.scenes).copy() data_fake.frames = np.asarray(data.frames).copy() data_fake.agents = np.asarray(data.agents).copy() data_fake.root = data.root data_fake.agents["centroid"] += np.random.rand( *data_fake.agents["centroid"].shape) export_zarr_to_ground_truth_csv(data_fake, str(tmp_path / "gt3.csv"), 0, 12, 0.5) err = compute_mse_error_csv(str(tmp_path / "gt1.csv"), str(tmp_path / "gt3.csv")) assert err > 0.0 # test invalid conf by removing lines in gt1 with open(str(tmp_path / "gt4.csv"), "w") as fp: lines = open(str(tmp_path / "gt1.csv")).readlines() fp.writelines(lines[:-10]) with pytest.raises(ValueError): compute_mse_error_csv(str(tmp_path / "gt1.csv"), str(tmp_path / "gt4.csv"))
def dataset() -> ChunkedStateDataset: dataset = ChunkedStateDataset("") dataset.scenes = np.zeros(1, dtype=dataset.scenes.dtype) dataset.frames = np.zeros(SCENE_LENGTH, dtype=dataset.frames.dtype) dataset.agents = np.zeros(SCENE_LENGTH, dtype=dataset.agents.dtype) dataset.scenes[0]["frame_index_interval"] = (0, SCENE_LENGTH) for idx in range(len(dataset.frames)): dataset.frames[idx]["agent_index_interval"] = (idx, idx + 1) dataset.frames[idx]["timestamp"] = idx for idx in range(len(dataset.agents)): # we don't check moving anymore, so the agent can stay still dataset.agents[idx]["extent"] = (5, 5, 5) dataset.agents[idx]["yaw"] = 0 dataset.agents[idx]["track_id"] = 1 dataset.agents[idx]["label_probabilities"][3] = 1.0 return dataset
def select_agents( input_folder: str, th_agent_prob: float, th_history_num_frames: int, th_future_num_frames: int, th_yaw_degree: float, th_extent_ratio: float, th_movement: float, th_distance_av: float, num_workers: int, ) -> None: """ Filter agents from zarr INPUT_FOLDER according to multiple thresholds and store a boolean array of the same shape. """ assert th_future_num_frames > 0 # ===== LOAD dm = LocalDataManager() input_folder = dm.require(input_folder) zarr_dataset = ChunkedStateDataset(path=input_folder) zarr_dataset.open() zarr_dataset.scenes = get_combined_scenes(zarr_dataset.scenes) output_group = f"{th_history_num_frames}_{th_future_num_frames}_{th_agent_prob}" if "agents_mask" in zarr_dataset.root and f"agents_mask/{output_group}" in zarr_dataset.root: raise FileExistsError( f"{output_group} exists already! only one is supported for now!") frame_index_intervals = zarr_dataset.scenes["frame_index_interval"] # build a partial with all args except the first one (will be passed by threads) get_valid_agents_partial = partial( get_valid_agents, dataset=zarr_dataset, th_frames_past=th_history_num_frames, th_frames_future=th_future_num_frames, th_agent_filter_probability_threshold=th_agent_prob, th_yaw_degree=th_yaw_degree, th_extent_ratio=th_extent_ratio, th_movement=th_movement, th_distance_av=th_distance_av, ) try: root = zarr.open(zarr_dataset.path, mode="a") root.create_group("agents_mask") except ValueError: pass # group is already there agents_mask = zarr.open_array( str(Path(zarr_dataset.path) / "agents_mask" / output_group), mode="w", shape=(len(zarr_dataset.agents), ), chunks=(10000, ), dtype=np.bool, synchronizer=zarr.ProcessSynchronizer( f"/tmp/ag_mask_{str(uuid4())}.sync"), ) report: Counter = Counter() print("starting pool...") with Pool(num_workers) as pool: tasks = tqdm( enumerate( pool.imap_unordered(get_valid_agents_partial, frame_index_intervals))) for idx, (mask, count, agents_range) in tasks: report += count agents_mask[agents_range[0]:agents_range[1]] = mask print("collecting results..") assert (report["total_agent_frames"] == report["selected_agent_frames"] + report["total_reject"]), "something went REALLY wrong" agents_cfg = { "th_history_num_frames": th_history_num_frames, "th_future_num_frames": th_future_num_frames, "th_agent_filter_probability_threshold": th_agent_prob, "th_yaw_degree": th_yaw_degree, "th_extent_ratio": th_extent_ratio, "th_movement": th_movement, "th_distance_av": th_distance_av, } # print report pp = pprint.PrettyPrinter(indent=4) print(f"start report for {input_folder}") pp.pprint({**agents_cfg, **report}) print(f"end report for {input_folder}") print("==============================")
def build_dataloader( cfg: Dict, split: str, data_manager: DataManager, dataset_class: Callable, rasterizer: Rasterizer, perturbation: Optional[Perturbation] = None, ) -> DataLoader: """ Util function to build a dataloader from a dataset of dataset_class. Note we have to pass rasterizer and perturbation as the factory functions for those are likely to change between repos. Args: cfg (dict): configuration dict split (str): this will be used to index the cfg to get the correct datasets (train or val currently) data_manager (DataManager): manager for resolving paths dataset_class (Callable): a class object (EgoDataset or AgentDataset currently) to build the dataset rasterizer (Rasterizer): the rasterizer for the dataset perturbation (Optional[Perturbation]): an optional perturbation object Returns: DataLoader: pytorch Dataloader object built with Concat and Sub datasets """ data_loader_cfg = cfg[f"{split}_data_loader"] datasets = [] for dataset_param in data_loader_cfg["datasets"]: zarr_dataset_path = data_manager.require(key=dataset_param["key"]) zarr_dataset = ChunkedStateDataset(path=zarr_dataset_path) zarr_dataset.open() zarr_dataset.scenes = get_combined_scenes(zarr_dataset.scenes) # Let's load the zarr dataset with our dataset. dataset = dataset_class(cfg, zarr_dataset, rasterizer, perturbation=perturbation) scene_indices = dataset_param["scene_indices"] scene_subsets = [] if dataset_param["scene_indices"][0] == -1: # TODO replace with empty scene_subset = Subset(dataset, np.arange(0, len(dataset))) scene_subsets.append(scene_subset) else: for scene_idx in scene_indices: valid_indices = dataset.get_scene_indices(scene_idx) scene_subset = Subset(dataset, valid_indices) scene_subsets.append(scene_subset) datasets.extend(scene_subsets) # Let's concatenate the training scenes into one dataset for the data loader to load from. concat_dataset: ConcatDataset = ConcatDataset(datasets) # Initialize the data loader that our training loop will iterate on. batch_size = data_loader_cfg["batch_size"] shuffle = data_loader_cfg["shuffle"] num_workers = data_loader_cfg["num_workers"] dataloader = DataLoader(dataset=concat_dataset, batch_size=batch_size, shuffle=shuffle, num_workers=num_workers) return dataloader