def test_compute_mse_error(tmp_path: Path) -> None:
    data = ChunkedStateDataset(path="./l5kit/tests/data/single_scene.zarr")
    data.open()
    export_zarr_to_ground_truth_csv(data, str(tmp_path / "gt1.csv"), 0, 12,
                                    0.5)
    export_zarr_to_ground_truth_csv(data, str(tmp_path / "gt2.csv"), 0, 12,
                                    0.5)
    err = compute_mse_error_csv(str(tmp_path / "gt1.csv"),
                                str(tmp_path / "gt2.csv"))
    assert err == 0.0

    data_fake = ChunkedStateDataset("")
    data_fake.scenes = np.asarray(data.scenes).copy()
    data_fake.frames = np.asarray(data.frames).copy()
    data_fake.agents = np.asarray(data.agents).copy()
    data_fake.root = data.root
    data_fake.agents["centroid"] += np.random.rand(
        *data_fake.agents["centroid"].shape)

    export_zarr_to_ground_truth_csv(data_fake, str(tmp_path / "gt3.csv"), 0,
                                    12, 0.5)
    err = compute_mse_error_csv(str(tmp_path / "gt1.csv"),
                                str(tmp_path / "gt3.csv"))
    assert err > 0.0

    # test invalid conf by removing lines in gt1
    with open(str(tmp_path / "gt4.csv"), "w") as fp:
        lines = open(str(tmp_path / "gt1.csv")).readlines()
        fp.writelines(lines[:-10])

    with pytest.raises(ValueError):
        compute_mse_error_csv(str(tmp_path / "gt1.csv"),
                              str(tmp_path / "gt4.csv"))
Esempio n. 2
0
def dataset() -> ChunkedStateDataset:
    dataset = ChunkedStateDataset("")
    dataset.scenes = np.zeros(1, dtype=dataset.scenes.dtype)
    dataset.frames = np.zeros(SCENE_LENGTH, dtype=dataset.frames.dtype)
    dataset.agents = np.zeros(SCENE_LENGTH, dtype=dataset.agents.dtype)

    dataset.scenes[0]["frame_index_interval"] = (0, SCENE_LENGTH)
    for idx in range(len(dataset.frames)):
        dataset.frames[idx]["agent_index_interval"] = (idx, idx + 1)
        dataset.frames[idx]["timestamp"] = idx

    for idx in range(len(dataset.agents)):
        # we don't check moving anymore, so the agent can stay still
        dataset.agents[idx]["extent"] = (5, 5, 5)
        dataset.agents[idx]["yaw"] = 0
        dataset.agents[idx]["track_id"] = 1
        dataset.agents[idx]["label_probabilities"][3] = 1.0

    return dataset
Esempio n. 3
0
def select_agents(
    input_folder: str,
    th_agent_prob: float,
    th_history_num_frames: int,
    th_future_num_frames: int,
    th_yaw_degree: float,
    th_extent_ratio: float,
    th_movement: float,
    th_distance_av: float,
    num_workers: int,
) -> None:
    """
    Filter agents from zarr INPUT_FOLDER according to multiple thresholds and store a boolean array of the same shape.
    """
    assert th_future_num_frames > 0

    # ===== LOAD
    dm = LocalDataManager()
    input_folder = dm.require(input_folder)

    zarr_dataset = ChunkedStateDataset(path=input_folder)
    zarr_dataset.open()
    zarr_dataset.scenes = get_combined_scenes(zarr_dataset.scenes)

    output_group = f"{th_history_num_frames}_{th_future_num_frames}_{th_agent_prob}"
    if "agents_mask" in zarr_dataset.root and f"agents_mask/{output_group}" in zarr_dataset.root:
        raise FileExistsError(
            f"{output_group} exists already! only one is supported for now!")

    frame_index_intervals = zarr_dataset.scenes["frame_index_interval"]

    # build a partial with all args except the first one (will be passed by threads)
    get_valid_agents_partial = partial(
        get_valid_agents,
        dataset=zarr_dataset,
        th_frames_past=th_history_num_frames,
        th_frames_future=th_future_num_frames,
        th_agent_filter_probability_threshold=th_agent_prob,
        th_yaw_degree=th_yaw_degree,
        th_extent_ratio=th_extent_ratio,
        th_movement=th_movement,
        th_distance_av=th_distance_av,
    )

    try:
        root = zarr.open(zarr_dataset.path, mode="a")
        root.create_group("agents_mask")
    except ValueError:
        pass  # group is already there

    agents_mask = zarr.open_array(
        str(Path(zarr_dataset.path) / "agents_mask" / output_group),
        mode="w",
        shape=(len(zarr_dataset.agents), ),
        chunks=(10000, ),
        dtype=np.bool,
        synchronizer=zarr.ProcessSynchronizer(
            f"/tmp/ag_mask_{str(uuid4())}.sync"),
    )

    report: Counter = Counter()
    print("starting pool...")
    with Pool(num_workers) as pool:
        tasks = tqdm(
            enumerate(
                pool.imap_unordered(get_valid_agents_partial,
                                    frame_index_intervals)))
        for idx, (mask, count, agents_range) in tasks:
            report += count
            agents_mask[agents_range[0]:agents_range[1]] = mask
        print("collecting results..")

    assert (report["total_agent_frames"] == report["selected_agent_frames"] +
            report["total_reject"]), "something went REALLY wrong"

    agents_cfg = {
        "th_history_num_frames": th_history_num_frames,
        "th_future_num_frames": th_future_num_frames,
        "th_agent_filter_probability_threshold": th_agent_prob,
        "th_yaw_degree": th_yaw_degree,
        "th_extent_ratio": th_extent_ratio,
        "th_movement": th_movement,
        "th_distance_av": th_distance_av,
    }
    # print report
    pp = pprint.PrettyPrinter(indent=4)
    print(f"start report for {input_folder}")
    pp.pprint({**agents_cfg, **report})
    print(f"end report for {input_folder}")
    print("==============================")
Esempio n. 4
0
def build_dataloader(
    cfg: Dict,
    split: str,
    data_manager: DataManager,
    dataset_class: Callable,
    rasterizer: Rasterizer,
    perturbation: Optional[Perturbation] = None,
) -> DataLoader:
    """
    Util function to build a dataloader from a dataset of dataset_class. Note we have to pass rasterizer and
    perturbation as the factory functions for those are likely to change between repos.

    Args:
        cfg (dict): configuration dict
        split (str): this will be used to index the cfg to get the correct datasets (train or val currently)
        data_manager (DataManager): manager for resolving paths
        dataset_class (Callable): a class object (EgoDataset or AgentDataset currently) to build the dataset
        rasterizer (Rasterizer): the rasterizer for the dataset
        perturbation (Optional[Perturbation]): an optional perturbation object

    Returns:
        DataLoader: pytorch Dataloader object built with Concat and Sub datasets
    """

    data_loader_cfg = cfg[f"{split}_data_loader"]
    datasets = []
    for dataset_param in data_loader_cfg["datasets"]:
        zarr_dataset_path = data_manager.require(key=dataset_param["key"])
        zarr_dataset = ChunkedStateDataset(path=zarr_dataset_path)
        zarr_dataset.open()
        zarr_dataset.scenes = get_combined_scenes(zarr_dataset.scenes)

        #  Let's load the zarr dataset with our dataset.
        dataset = dataset_class(cfg,
                                zarr_dataset,
                                rasterizer,
                                perturbation=perturbation)

        scene_indices = dataset_param["scene_indices"]
        scene_subsets = []

        if dataset_param["scene_indices"][0] == -1:  # TODO replace with empty
            scene_subset = Subset(dataset, np.arange(0, len(dataset)))
            scene_subsets.append(scene_subset)
        else:
            for scene_idx in scene_indices:
                valid_indices = dataset.get_scene_indices(scene_idx)
                scene_subset = Subset(dataset, valid_indices)
                scene_subsets.append(scene_subset)

        datasets.extend(scene_subsets)

    #  Let's concatenate the training scenes into one dataset for the data loader to load from.
    concat_dataset: ConcatDataset = ConcatDataset(datasets)

    #  Initialize the data loader that our training loop will iterate on.
    batch_size = data_loader_cfg["batch_size"]
    shuffle = data_loader_cfg["shuffle"]
    num_workers = data_loader_cfg["num_workers"]
    dataloader = DataLoader(dataset=concat_dataset,
                            batch_size=batch_size,
                            shuffle=shuffle,
                            num_workers=num_workers)

    return dataloader