Esempio n. 1
0
    def _get_simple_dataset(self) -> ChunkedDataset:
        # build a simple dataset with 3 frames
        # frame 0:
        #   agent 0
        #   agent 1
        #   agent 2
        # frame 1:
        #   agent 0
        #   agent 1
        # frame 2:
        #   agent 0

        dataset = ChunkedDataset("")
        dataset.scenes = np.zeros(1, dtype=SCENE_DTYPE)
        dataset.frames = np.zeros(3, dtype=FRAME_DTYPE)
        dataset.agents = np.zeros(6, dtype=AGENT_DTYPE)

        dataset.scenes[0]["frame_index_interval"] = (0, 3)
        dataset.frames["agent_index_interval"] = [(0, 3), (3, 5), (5, 6)]

        dataset.agents["track_id"] = [0, 1, 2, 0, 1, 0]
        # set properties to something different than 0
        dataset.agents["centroid"] = np.random.rand(*dataset.agents["centroid"].shape)
        dataset.agents["yaw"] = np.random.rand(*dataset.agents["yaw"].shape)
        dataset.agents["extent"] = np.random.rand(*dataset.agents["extent"].shape)
        return dataset
Esempio n. 2
0
def test_compute_mse_error(tmp_path: Path) -> None:
    data = ChunkedDataset(path="./l5kit/tests/artefacts/single_scene.zarr")
    data.open()
    export_zarr_to_ground_truth_csv(data, str(tmp_path / "gt1.csv"), 0, 50, 0.5)
    data.open()  # avoid double select_agents
    export_zarr_to_ground_truth_csv(data, str(tmp_path / "gt2.csv"), 0, 50, 0.5)
    err = compute_mse_error_csv(str(tmp_path / "gt1.csv"), str(tmp_path / "gt2.csv"))
    assert np.all(err == 0.0)

    data_fake = ChunkedDataset(str(tmp_path))
    data_fake.scenes = np.asarray(data.scenes).copy()
    data_fake.frames = np.asarray(data.frames).copy()
    data_fake.agents = np.asarray(data.agents).copy()
    data_fake.root = data.root
    data_fake.agents["centroid"] += np.random.rand(*data_fake.agents["centroid"].shape)

    export_zarr_to_ground_truth_csv(data_fake, str(tmp_path / "gt3.csv"), 0, 50, 0.5)
    err = compute_mse_error_csv(str(tmp_path / "gt1.csv"), str(tmp_path / "gt3.csv"))
    assert np.any(err > 0.0)

    # test invalid conf by removing lines in gt1
    with open(str(tmp_path / "gt4.csv"), "w") as fp:
        lines = open(str(tmp_path / "gt1.csv")).readlines()
        fp.writelines(lines[:-10])

    with pytest.raises(ValueError):
        compute_mse_error_csv(str(tmp_path / "gt1.csv"), str(tmp_path / "gt4.csv"))
Esempio n. 3
0
def test_compute_mse_error(tmp_path: Path,
                           zarr_dataset: ChunkedDataset) -> None:
    export_zarr_to_ground_truth_csv(zarr_dataset, str(tmp_path / "gt1.csv"),
                                    10, 50, 0.5)
    export_zarr_to_ground_truth_csv(zarr_dataset, str(tmp_path / "gt2.csv"),
                                    10, 50, 0.5)
    err = compute_mse_error_csv(str(tmp_path / "gt1.csv"),
                                str(tmp_path / "gt2.csv"))
    assert np.all(err == 0.0)

    data_fake = ChunkedDataset(str(tmp_path))
    data_fake.scenes = np.asarray(zarr_dataset.scenes).copy()
    data_fake.frames = np.asarray(zarr_dataset.frames).copy()
    data_fake.agents = np.asarray(zarr_dataset.agents).copy()
    data_fake.agents["centroid"] += np.random.rand(
        *data_fake.agents["centroid"].shape) * 1e-2

    export_zarr_to_ground_truth_csv(data_fake, str(tmp_path / "gt3.csv"), 10,
                                    50, 0.5)
    err = compute_mse_error_csv(str(tmp_path / "gt1.csv"),
                                str(tmp_path / "gt3.csv"))
    assert np.any(err > 0.0)

    # test invalid conf by removing lines in gt1
    with open(str(tmp_path / "gt4.csv"), "w") as fp:
        lines = open(str(tmp_path / "gt1.csv")).readlines()
        fp.writelines(lines[:-10])

    with pytest.raises(ValueError):
        compute_mse_error_csv(str(tmp_path / "gt1.csv"),
                              str(tmp_path / "gt4.csv"))
Esempio n. 4
0
def get_frames_subset(dataset: ChunkedDataset, frame_start_idx: int,
                      frame_end_idx: int) -> ChunkedDataset:
    """Get a new dataset with frames between start (included) and end (excluded).
    Assumptions:
    - the dataset has only 1 scene
    - the dataset is in numpy format and not zarr anymore

    :param dataset: the single-scene dataset.
    :param frame_start_idx: first frame to keep.
    :param frame_end_idx: where to stop taking frames (excluded).

    """
    if not len(dataset.scenes) == 1:
        raise ValueError(
            f"dataset should have a single scene, got {len(dataset.scenes)}")
    if not isinstance(dataset.agents, np.ndarray):
        raise ValueError("dataset agents should be an editable np array")
    if not isinstance(dataset.tl_faces, np.ndarray):
        raise ValueError("dataset tls should be an editable np array")
    if not isinstance(dataset.frames, np.ndarray):
        raise ValueError("dataset frames should be an editable np array")
    if frame_start_idx >= len(dataset.frames):
        raise ValueError(
            f"frame start {frame_start_idx} is over the length of the dataset")
    if frame_end_idx > len(dataset.frames):
        raise ValueError(
            f"frame end {frame_end_idx} is over the length of the dataset")
    if frame_start_idx >= frame_end_idx:
        raise ValueError(
            f"end frame {frame_end_idx} should be higher than start {frame_start_idx}"
        )
    if frame_start_idx < 0:
        raise ValueError(f"start frame {frame_start_idx} should be positive")

    new_dataset = ChunkedDataset("")
    new_dataset.scenes = dataset.scenes.copy()
    new_dataset.scenes[0]["start_time"] = dataset.frames[frame_start_idx][
        "timestamp"]
    new_dataset.scenes[0]["end_time"] = dataset.frames[frame_end_idx -
                                                       1]["timestamp"]

    new_dataset.frames = dataset.frames[frame_start_idx:frame_end_idx].copy()
    new_dataset.scenes[0]["frame_index_interval"] = (0,
                                                     len(new_dataset.frames))

    agent_slice = get_agents_slice_from_frames(
        *dataset.frames[[frame_start_idx, frame_end_idx - 1]])
    tls_slice = get_tl_faces_slice_from_frames(
        *dataset.frames[[frame_start_idx, frame_end_idx - 1]])
    new_dataset.frames["agent_index_interval"] -= new_dataset.frames[
        "agent_index_interval"][0, 0]
    new_dataset.frames[
        "traffic_light_faces_index_interval"] -= new_dataset.frames[
            "traffic_light_faces_index_interval"][0, 0]
    new_dataset.agents = dataset.agents[agent_slice].copy()
    new_dataset.tl_faces = dataset.tl_faces[tls_slice].copy()
    return new_dataset
Esempio n. 5
0
def dataset(tmp_path: Path) -> ChunkedDataset:
    dataset = ChunkedDataset(str(tmp_path))
    dataset.scenes = np.zeros(1, dtype=dataset.scenes.dtype)
    dataset.frames = np.zeros(SCENE_LENGTH, dtype=dataset.frames.dtype)
    dataset.agents = np.zeros(SCENE_LENGTH, dtype=dataset.agents.dtype)

    dataset.scenes[0]["frame_index_interval"] = (0, SCENE_LENGTH)
    for idx in range(len(dataset.frames)):
        dataset.frames[idx]["agent_index_interval"] = (idx, idx + 1)
        dataset.frames[idx]["timestamp"] = idx

    for idx in range(len(dataset.agents)):
        # we don't check moving anymore, so the agent can stay still
        dataset.agents[idx]["extent"] = (5, 5, 5)
        dataset.agents[idx]["yaw"] = 0
        dataset.agents[idx]["track_id"] = 1
        dataset.agents[idx]["label_probabilities"][3] = 1.0

    return dataset
Esempio n. 6
0
def _mock_dataset() -> ChunkedDataset:
    zarr_dt = ChunkedDataset("")
    zarr_dt.scenes = np.zeros(1, dtype=SCENE_DTYPE)
    zarr_dt.scenes["frame_index_interval"][0] = (0, 4)

    zarr_dt.frames = np.zeros(4, dtype=FRAME_DTYPE)
    zarr_dt.frames["agent_index_interval"][0] = (0, 3)
    zarr_dt.frames["agent_index_interval"][1] = (3, 5)
    zarr_dt.frames["agent_index_interval"][2] = (5, 6)
    zarr_dt.frames["agent_index_interval"][3] = (6, 6)

    zarr_dt.agents = np.zeros(6, dtype=AGENT_DTYPE)
    # all agents except the first one are valid
    zarr_dt.agents["label_probabilities"][1:, 3] = 1
    # FRAME 0
    # second agent is close to ego and has id 1
    zarr_dt.agents["track_id"][1] = 1
    zarr_dt.agents["centroid"][1] = (1, 1)
    # third agent is too far and has id 2
    zarr_dt.agents["track_id"][2] = 2
    zarr_dt.agents["centroid"][2] = (100, 100)

    # FRAME 1
    # track 1 agent is still close to ego
    zarr_dt.agents["track_id"][3] = 1
    zarr_dt.agents["centroid"][3] = (1, 2)
    # track 2 is now close enough
    zarr_dt.agents["track_id"][4] = 2
    zarr_dt.agents["centroid"][4] = (1, 1)

    # FRAME 2
    # track 1 agent is far
    zarr_dt.agents["track_id"][5] = 1
    zarr_dt.agents["centroid"][5] = (100, 100)

    # FRAME 3 is empty

    zarr_dt.tl_faces = np.zeros(0, dtype=TL_FACE_DTYPE)

    return zarr_dt
Esempio n. 7
0
    def _get_simple_dataset(self) -> ChunkedDataset:
        # build a simple dataset with 3 frames
        # frame 0:
        #   agent 0
        #   agent 1
        #   agent 2
        # frame 1:
        #   agent 0
        #   agent 1
        # frame 2:
        #   agent 0

        dataset = ChunkedDataset("")
        dataset.scenes = np.zeros(1, dtype=SCENE_DTYPE)
        dataset.frames = np.zeros(3, dtype=FRAME_DTYPE)
        dataset.agents = np.zeros(6, dtype=AGENT_DTYPE)

        dataset.scenes[0]["frame_index_interval"] = (0, 3)
        dataset.frames["agent_index_interval"] = [(0, 3), (3, 5), (5, 6)]

        dataset.agents["track_id"] = [0, 1, 2, 0, 1, 0]
        return dataset
Esempio n. 8
0
def test_mock_dataset_frames_subset() -> None:
    zarr_dataset = ChunkedDataset("")
    zarr_dataset.scenes = np.zeros(1, dtype=SCENE_DTYPE)
    zarr_dataset.scenes[0]["frame_index_interval"] = (0, 4)
    zarr_dataset.frames = np.zeros(4, dtype=FRAME_DTYPE)
    zarr_dataset.frames["agent_index_interval"] = [(0, 1), (1, 2), (2, 3), (3, 4)]
    zarr_dataset.agents = np.zeros(4, dtype=AGENT_DTYPE)
    zarr_dataset.agents["track_id"] = np.arange(4)
    zarr_dataset.tl_faces = np.zeros(0, dtype=TL_FACE_DTYPE)

    frame_start = 1
    frame_end = 3
    zarr_cut = get_frames_subset(zarr_dataset, frame_start, frame_end)
    assert np.all(zarr_cut.agents["track_id"] == [1, 2])

    frame_start = 0
    frame_end = 3
    zarr_cut = get_frames_subset(zarr_dataset, frame_start, frame_end)
    assert np.all(zarr_cut.agents["track_id"] == [0, 1, 2])

    frame_start = 2
    frame_end = 4
    zarr_cut = get_frames_subset(zarr_dataset, frame_start, frame_end)
    assert np.all(zarr_cut.agents["track_id"] == [2, 3])
Esempio n. 9
0
def build_dataloader(
    cfg: Dict,
    split: str,
    data_manager: DataManager,
    dataset_class: Callable,
    rasterizer: Rasterizer,
    perturbation: Optional[Perturbation] = None,
    combine_scenes: bool = False,
) -> DataLoader:
    """
    Function to build a dataloader from a dataset of dataset_class. Note we have to pass rasterizer and
    perturbation as the factory functions for those are likely to change between repos.

    Args:
        cfg (dict): configuration dict
        split (str): this will be used to index the cfg to get the correct datasets (train or val currently)
        data_manager (DataManager): manager for resolving paths
        dataset_class (Callable): a class object (EgoDataset or AgentDataset currently) to build the dataset
        rasterizer (Rasterizer): the rasterizer for the dataset
        perturbation (Optional[Perturbation]): an optional perturbation object
        combine_scenes (bool): if to combine scenes that follow up each other perfectly

    Returns:
        DataLoader: pytorch Dataloader object built with Concat and Sub datasets
    """

    data_loader_cfg = cfg[f"{split}_data_loader"]
    datasets = []
    for dataset_param in data_loader_cfg["datasets"]:
        zarr_dataset_path = data_manager.require(key=dataset_param["key"])
        zarr_dataset = ChunkedDataset(path=zarr_dataset_path)
        zarr_dataset.open()
        if combine_scenes:  # possible future deprecation
            zarr_dataset.scenes = get_combined_scenes(zarr_dataset.scenes)

        #  Let's load the zarr dataset with our dataset.
        dataset = dataset_class(cfg,
                                zarr_dataset,
                                rasterizer,
                                perturbation=perturbation)

        scene_indices = dataset_param["scene_indices"]
        scene_subsets = []

        if dataset_param["scene_indices"][0] == -1:  # TODO replace with empty
            scene_subset = Subset(dataset, np.arange(0, len(dataset)))
            scene_subsets.append(scene_subset)
        else:
            for scene_idx in scene_indices:
                valid_indices = dataset.get_scene_indices(scene_idx)
                scene_subset = Subset(dataset, valid_indices)
                scene_subsets.append(scene_subset)

        datasets.extend(scene_subsets)

    #  Let's concatenate the training scenes into one dataset for the data loader to load from.
    concat_dataset: ConcatDataset = ConcatDataset(datasets)

    #  Initialize the data loader that our training loop will iterate on.
    batch_size = data_loader_cfg["batch_size"]
    shuffle = data_loader_cfg["shuffle"]
    num_workers = data_loader_cfg["num_workers"]
    dataloader = DataLoader(dataset=concat_dataset,
                            batch_size=batch_size,
                            shuffle=shuffle,
                            num_workers=num_workers)

    return dataloader