Beispiel #1
0
    def test_followup_scenes(self) -> None:
        num_scenes = 10
        scenes = np.zeros(num_scenes, dtype=SCENE_DTYPE)
        for i in range(num_scenes):
            scenes[i]["host"] = "some-host"
            scenes[i]["start_time"] = i * 1000
            scenes[i]["end_time"] = (i + 1) * 1000
            scenes[i]["frame_index_interval"] = [i * 10, (i + 1) * 10]

        combined_scenes = get_combined_scenes(scenes)
        self.assertEqual(len(combined_scenes), 1)
        combo_scene = combined_scenes[0]
        self.assertEqual(combo_scene["host"], "some-host")
        self.assertEqual(combo_scene["start_time"], 0)
        self.assertEqual(combo_scene["end_time"], 10000)
        np.testing.assert_array_equal(combo_scene["frame_index_interval"],
                                      np.array([0, 100]))

        # To follow up they must be the same host
        scenes[1]["host"] = "some-other-host"
        combined_scenes = get_combined_scenes(scenes)
        self.assertEqual(len(combined_scenes), 3)

        # And their timestamps must follow up exactly
        scenes[5]["start_time"] += 1
        combined_scenes = get_combined_scenes(scenes)
        self.assertEqual(len(combined_scenes), 4)
Beispiel #2
0
    def test_trivial_input(self) -> None:
        # One scene
        scenes = np.zeros(1, dtype=SCENE_DTYPE)
        scenes[0]["host"] = "some-host"
        scenes[0]["start_time"] = 0
        scenes[0]["end_time"] = 1000
        scenes[0]["frame_index_interval"] = [0, 10]

        combined_scenes = get_combined_scenes(scenes)
        self.assertEqual(len(combined_scenes), 1)
        np.testing.assert_array_equal(scenes, combined_scenes)
Beispiel #3
0
 def test_empty_input(self) -> None:
     # Empty
     scenes = np.array([], dtype=SCENE_DTYPE)
     combined_scenes = get_combined_scenes(scenes)
     self.assertEqual(len(combined_scenes), 0)
Beispiel #4
0
def select_agents(
    input_folder: str,
    th_agent_prob: float,
    th_history_num_frames: int,
    th_future_num_frames: int,
    th_yaw_degree: float,
    th_extent_ratio: float,
    th_movement: float,
    th_distance_av: float,
    num_workers: int,
) -> None:
    """
    Filter agents from zarr INPUT_FOLDER according to multiple thresholds and store a boolean array of the same shape.
    """
    assert th_future_num_frames > 0

    # ===== LOAD
    dm = LocalDataManager()
    input_folder = dm.require(input_folder)

    zarr_dataset = ChunkedStateDataset(path=input_folder)
    zarr_dataset.open()
    zarr_dataset.scenes = get_combined_scenes(zarr_dataset.scenes)

    output_group = f"{th_history_num_frames}_{th_future_num_frames}_{th_agent_prob}"
    if "agents_mask" in zarr_dataset.root and f"agents_mask/{output_group}" in zarr_dataset.root:
        raise FileExistsError(
            f"{output_group} exists already! only one is supported for now!")

    frame_index_intervals = zarr_dataset.scenes["frame_index_interval"]

    # build a partial with all args except the first one (will be passed by threads)
    get_valid_agents_partial = partial(
        get_valid_agents,
        dataset=zarr_dataset,
        th_frames_past=th_history_num_frames,
        th_frames_future=th_future_num_frames,
        th_agent_filter_probability_threshold=th_agent_prob,
        th_yaw_degree=th_yaw_degree,
        th_extent_ratio=th_extent_ratio,
        th_movement=th_movement,
        th_distance_av=th_distance_av,
    )

    try:
        root = zarr.open(zarr_dataset.path, mode="a")
        root.create_group("agents_mask")
    except ValueError:
        pass  # group is already there

    agents_mask = zarr.open_array(
        str(Path(zarr_dataset.path) / "agents_mask" / output_group),
        mode="w",
        shape=(len(zarr_dataset.agents), ),
        chunks=(10000, ),
        dtype=np.bool,
        synchronizer=zarr.ProcessSynchronizer(
            f"/tmp/ag_mask_{str(uuid4())}.sync"),
    )

    report: Counter = Counter()
    print("starting pool...")
    with Pool(num_workers) as pool:
        tasks = tqdm(
            enumerate(
                pool.imap_unordered(get_valid_agents_partial,
                                    frame_index_intervals)))
        for idx, (mask, count, agents_range) in tasks:
            report += count
            agents_mask[agents_range[0]:agents_range[1]] = mask
        print("collecting results..")

    assert (report["total_agent_frames"] == report["selected_agent_frames"] +
            report["total_reject"]), "something went REALLY wrong"

    agents_cfg = {
        "th_history_num_frames": th_history_num_frames,
        "th_future_num_frames": th_future_num_frames,
        "th_agent_filter_probability_threshold": th_agent_prob,
        "th_yaw_degree": th_yaw_degree,
        "th_extent_ratio": th_extent_ratio,
        "th_movement": th_movement,
        "th_distance_av": th_distance_av,
    }
    # print report
    pp = pprint.PrettyPrinter(indent=4)
    print(f"start report for {input_folder}")
    pp.pprint({**agents_cfg, **report})
    print(f"end report for {input_folder}")
    print("==============================")
Beispiel #5
0
def build_dataloader(
    cfg: Dict,
    split: str,
    data_manager: DataManager,
    dataset_class: Callable,
    rasterizer: Rasterizer,
    perturbation: Optional[Perturbation] = None,
) -> DataLoader:
    """
    Util function to build a dataloader from a dataset of dataset_class. Note we have to pass rasterizer and
    perturbation as the factory functions for those are likely to change between repos.

    Args:
        cfg (dict): configuration dict
        split (str): this will be used to index the cfg to get the correct datasets (train or val currently)
        data_manager (DataManager): manager for resolving paths
        dataset_class (Callable): a class object (EgoDataset or AgentDataset currently) to build the dataset
        rasterizer (Rasterizer): the rasterizer for the dataset
        perturbation (Optional[Perturbation]): an optional perturbation object

    Returns:
        DataLoader: pytorch Dataloader object built with Concat and Sub datasets
    """

    data_loader_cfg = cfg[f"{split}_data_loader"]
    datasets = []
    for dataset_param in data_loader_cfg["datasets"]:
        zarr_dataset_path = data_manager.require(key=dataset_param["key"])
        zarr_dataset = ChunkedStateDataset(path=zarr_dataset_path)
        zarr_dataset.open()
        zarr_dataset.scenes = get_combined_scenes(zarr_dataset.scenes)

        #  Let's load the zarr dataset with our dataset.
        dataset = dataset_class(cfg,
                                zarr_dataset,
                                rasterizer,
                                perturbation=perturbation)

        scene_indices = dataset_param["scene_indices"]
        scene_subsets = []

        if dataset_param["scene_indices"][0] == -1:  # TODO replace with empty
            scene_subset = Subset(dataset, np.arange(0, len(dataset)))
            scene_subsets.append(scene_subset)
        else:
            for scene_idx in scene_indices:
                valid_indices = dataset.get_scene_indices(scene_idx)
                scene_subset = Subset(dataset, valid_indices)
                scene_subsets.append(scene_subset)

        datasets.extend(scene_subsets)

    #  Let's concatenate the training scenes into one dataset for the data loader to load from.
    concat_dataset: ConcatDataset = ConcatDataset(datasets)

    #  Initialize the data loader that our training loop will iterate on.
    batch_size = data_loader_cfg["batch_size"]
    shuffle = data_loader_cfg["shuffle"]
    num_workers = data_loader_cfg["num_workers"]
    dataloader = DataLoader(dataset=concat_dataset,
                            batch_size=batch_size,
                            shuffle=shuffle,
                            num_workers=num_workers)

    return dataloader
def test_empty_input() -> None:
    # Empty
    scenes = np.array([], dtype=SCENE_DTYPE)
    combined_scenes = get_combined_scenes(scenes)
    assert len(combined_scenes) == 0