def load_agents_mask(self) -> np.ndarray: """ Loads a boolean mask of the agent availability stored into the zarr. Performs some sanity check against cfg. Returns: a boolean mask of the same length of the dataset agents """ agent_prob = self.cfg["raster_params"]["filter_agents_threshold"] agents_mask_path = Path( self.dataset.path) / f"agents_mask/{agent_prob}" if not agents_mask_path.exists( ): # don't check in root but check for the path warnings.warn( f"cannot find the right config in {self.dataset.path},\n" f"your cfg has loaded filter_agents_threshold={agent_prob};\n" "but that value doesn't have a match among the agents_mask in the zarr\n" "Mask will now be generated for that parameter.", RuntimeWarning, stacklevel=2, ) select_agents( self.dataset, agent_prob, th_yaw_degree=TH_YAW_DEGREE, th_extent_ratio=TH_EXTENT_RATIO, th_distance_av=TH_DISTANCE_AV, ) agents_mask = convenience.load( str(agents_mask_path)) # note (lberg): this doesn't update root return agents_mask
def get_agent_indices_set( dataset: ChunkedDataset, filter_agents_threshold: float, min_frame_histories: List, min_frame_future: int, ): agents_mask_path = Path( dataset.path) / f"agents_mask/{filter_agents_threshold}" if not agents_mask_path.exists( ): # don't check in root but check for the path print( f"cannot find the right config in {dataset.path},\n" f"your cfg has loaded filter_agents_threshold={filter_agents_threshold};\n" "but that value doesn't have a match among the agents_mask in the zarr\n" "Mask will now be generated for that parameter.") select_agents( dataset, filter_agents_threshold, th_yaw_degree=TH_YAW_DEGREE, th_extent_ratio=TH_EXTENT_RATIO, th_distance_av=TH_DISTANCE_AV, ) agents_mask = convenience.load( str(agents_mask_path)) # note (lberg): this doesn't update root min_frame_history_vals = sorted(min_frame_histories) orig_indices_order = sorted(range(len(min_frame_histories)), key=lambda i: min_frame_histories[i]) results = [] result_mask = agents_mask[:, 1] >= min_frame_future past_counts = agents_mask[:, 0] del agents_mask gc.collect() for min_frame_history_val in min_frame_history_vals: result_mask[past_counts < min_frame_history_val] = False if len(results) == 0: agents_indices = np.nonzero(result_mask)[0] results.append(set(agents_indices)) del agents_indices gc.collect() else: agents_indices_removed = { idx for idx in results[0] if result_mask[idx] == 0 } results.append(agents_indices_removed) if results: results = [results[i] for i in orig_indices_order] return results
def test_lazy_loader(): foo = np.arange(100) bar = np.arange(100, 0, -1) store = 'data/group.zarr' save(store, foo=foo, bar=bar) loader = load(store) assert 'foo' in loader assert 'bar' in loader assert 'baz' not in loader assert len(loader) == 2 assert sorted(loader) == ['bar', 'foo'] assert_array_equal(foo, loader['foo']) assert_array_equal(bar, loader['bar'])
def create_chopped_mask(zarr_path: str, th_agent_prob: float, num_frames_to_copy: int, min_frame_future: int) -> str: """Create mask to emulate chopped dataset with gt data. Args: zarr_path (str): input zarr path to be chopped th_agent_prob (float): threshold over agents probabilities used in select_agents function num_frames_to_copy (int): number of frames to copy from the beginning of each scene, others will be discarded min_frame_future (int): minimum number of frames that must be available in the future for an agent Returns: str: Path to saved mask """ zarr_path = Path(zarr_path) mask_chopped_path = get_mask_chopped_path(zarr_path, th_agent_prob, num_frames_to_copy, min_frame_future) # Create standard mask for the dataset so we can use it to filter out unreliable agents zarr_dt = ChunkedDataset(str(zarr_path)) zarr_dt.open() agents_mask_path = Path(zarr_path) / f"agents_mask/{th_agent_prob}" if not agents_mask_path.exists( ): # don't check in root but check for the path select_agents( zarr_dt, th_agent_prob=th_agent_prob, th_yaw_degree=TH_YAW_DEGREE, th_extent_ratio=TH_EXTENT_RATIO, th_distance_av=TH_DISTANCE_AV, ) agents_mask_origin = np.asarray(convenience.load(str(agents_mask_path))) # compute the chopped boolean mask, but also the original one limited to frames of interest for GT csv agents_mask_orig_bool = np.zeros(len(zarr_dt.agents), dtype=np.bool) for idx in range(len(zarr_dt.scenes)): scene = zarr_dt.scenes[idx] frame_original = zarr_dt.frames[scene["frame_index_interval"][0] + num_frames_to_copy - 1] slice_agents_original = get_agents_slice_from_frames(frame_original) mask = agents_mask_origin[slice_agents_original][:, 1] >= min_frame_future agents_mask_orig_bool[slice_agents_original] = mask.copy() # store the mask and the GT csv of frames on interest np.savez(str(mask_chopped_path), agents_mask_orig_bool) return str(mask_chopped_path)
def test_lazy_loader(zarr_version): foo = np.arange(100) bar = np.arange(100, 0, -1) store = 'data/group.zarr' if zarr_version == 2 else 'data/group.zr3' kwargs = _init_creation_kwargs(zarr_version) save(store, foo=foo, bar=bar, **kwargs) loader = load(store, **kwargs) assert 'foo' in loader assert 'bar' in loader assert 'baz' not in loader assert len(loader) == 2 assert sorted(loader) == ['bar', 'foo'] assert_array_equal(foo, loader['foo']) assert_array_equal(bar, loader['bar']) assert 'LazyLoader: ' in repr(loader)
def test_load_array(zarr_version): foo = np.arange(100) bar = np.arange(100, 0, -1) store = 'data/group.zarr' if zarr_version == 2 else 'data/group.zr3' kwargs = _init_creation_kwargs(zarr_version) save(store, foo=foo, bar=bar, **kwargs) # can also load arrays directly into a numpy array for array_name in ['foo', 'bar']: array_path = 'dataset/' + array_name if zarr_version == 3 else array_name array = load(store, path=array_path, zarr_version=zarr_version) assert isinstance(array, np.ndarray) if array_name == 'foo': assert_array_equal(foo, array) else: assert_array_equal(bar, array)
def create_chopped_dataset_lite( zarr_path: str, th_agent_prob: float, num_frames_to_copy: int, num_frames_gt: int, min_frame_future: int, history_num_frames: int ) -> str: """ Create a chopped version of the zarr that can be used as a test set. This version only includes frames from num_frames_to_copy - history_num_frames : num_frames_to_copy. This function was used to generate the test set for the competition so that the future GT is not in the data. Store: - a dataset where each scene has been chopped at `num_frames_to_copy` frames; - a mask for agents for those final frames based on the original mask and a threshold on the future_frames; - the GT csv for those agents For the competition, only the first two (dataset and mask) will be available in the notebooks Args: zarr_path (str): input zarr path to be chopped th_agent_prob (float): threshold over agents probabilities used in select_agents function num_frames_to_copy (int): number of frames to copy from the beginning of each scene, others will be discarded min_frame_future (int): minimum number of frames that must be available in the future for an agent num_frames_gt (int): number of future predictions to store in the GT file history_num_frames (int): number of historic frames to include Returns: str: the parent folder of the new datam """ zarr_path = Path(zarr_path) dest_path = zarr_path.parent / f"{zarr_path.stem}_chopped_{num_frames_to_copy}_lite_{history_num_frames}_{num_frames_gt}" chopped_path = dest_path / zarr_path.name gt_path = dest_path / "gt.csv" mask_chopped_path = dest_path / "mask" if not os.path.exists(gt_path): # Create standard mask for the dataset so we can use it to filter out unreliable agents zarr_dt = ChunkedDataset(str(zarr_path)) zarr_dt.open() agents_mask_path = Path(zarr_path) / f"agents_mask/{th_agent_prob}" if not agents_mask_path.exists(): # don't check in root but check for the path select_agents( zarr_dt, th_agent_prob=th_agent_prob, th_yaw_degree=TH_YAW_DEGREE, th_extent_ratio=TH_EXTENT_RATIO, th_distance_av=TH_DISTANCE_AV, ) agents_mask_origin = np.asarray(convenience.load(str(agents_mask_path))) # create chopped dataset chopped_info_filename = os.path.join(os.path.split(chopped_path)[0], 'chopped_info.pkl') chopped_indices = check_load(chopped_info_filename, zarr_scenes_chop_lite, str(chopped_path), save_to_file=True, args_in=(str(zarr_path), str(chopped_path), num_frames_to_copy, history_num_frames), verbose=True) zarr_chopped = ChunkedDataset(str(chopped_path)) zarr_chopped.open() # compute original and chopped boolean mask limited to frames of interest for GT csv agents_mask_orig_bool = np.zeros(len(zarr_dt.agents), dtype=np.bool) agents_mask_chop_bool = np.zeros(len(zarr_chopped.agents), dtype=np.bool) for idx in tqdm(range(len(zarr_dt.scenes)), desc='Extracting masks'): scene = zarr_dt.scenes[idx] frame_original = zarr_dt.frames[scene["frame_index_interval"][0] + num_frames_to_copy - 1] slice_agents_original = get_agents_slice_from_frames(frame_original) mask = agents_mask_origin[slice_agents_original][:, 1] >= min_frame_future agents_mask_orig_bool[slice_agents_original] = mask.copy() if idx in chopped_indices: chopped_scene = zarr_chopped.scenes[chopped_indices.index(idx)] frame_chopped = zarr_chopped.frames[chopped_scene["frame_index_interval"][-1] - 1] slice_agents_chopped = get_agents_slice_from_frames(frame_chopped) agents_mask_chop_bool[slice_agents_chopped] = mask.copy() # Store the mask np.savez(str(mask_chopped_path), agents_mask_chop_bool) # Store the GT export_zarr_to_csv(zarr_dt, str(gt_path), num_frames_gt, th_agent_prob, agents_mask=agents_mask_orig_bool) else: print(' : '.join((str(gt_path), 'COMPLETED'))) return str(dest_path)
def create_chopped_dataset(zarr_path: str, th_agent_prob: float, num_frames_to_copy: int, num_frames_gt: int, min_frame_future: int) -> str: """ Create a chopped version of the zarr that can be used as a test set. This function was used to generate the test set for the competition so that the future GT is not in the data. Store: - a dataset where each scene has been chopped at `num_frames_to_copy` frames; - a mask for agents for those final frames based on the original mask and a threshold on the future_frames; - the GT csv for those agents For the competition, only the first two (dataset and mask) will be available in the notebooks Args: zarr_path (str): input zarr path to be chopped th_agent_prob (float): threshold over agents probabilities used in select_agents function num_frames_to_copy (int): number of frames to copy from the beginning of each scene, others will be discarded min_frame_future (int): minimum number of frames that must be available in the future for an agent num_frames_gt (int): number of future predictions to store in the GT file Returns: str: the parent folder of the new datam """ zarr_path = Path(zarr_path) dest_path = zarr_path.parent / f"{zarr_path.stem}_chopped_{num_frames_to_copy}" chopped_path = dest_path / zarr_path.name gt_path = dest_path / "gt.csv" mask_chopped_path = dest_path / "mask" # Create standard mask for the dataset so we can use it to filter out unreliable agents zarr_dt = ChunkedDataset(str(zarr_path)) zarr_dt.open() agents_mask_path = Path(zarr_path) / f"agents_mask/{th_agent_prob}" if not agents_mask_path.exists( ): # don't check in root but check for the path select_agents( zarr_dt, th_agent_prob=th_agent_prob, th_yaw_degree=TH_YAW_DEGREE, th_extent_ratio=TH_EXTENT_RATIO, th_distance_av=TH_DISTANCE_AV, ) agents_mask_origin = np.asarray(convenience.load(str(agents_mask_path))) # create chopped dataset zarr_scenes_chop(str(zarr_path), str(chopped_path), num_frames_to_copy=num_frames_to_copy) zarr_chopped = ChunkedDataset(str(chopped_path)) zarr_chopped.open() # compute the chopped boolean mask, but also the original one limited to frames of interest for GT csv agents_mask_chop_bool = np.zeros(len(zarr_chopped.agents), dtype=np.bool) agents_mask_orig_bool = np.zeros(len(zarr_dt.agents), dtype=np.bool) for idx in range(len(zarr_dt.scenes)): scene = zarr_dt.scenes[idx] frame_original = zarr_dt.frames[scene["frame_index_interval"][0] + num_frames_to_copy - 1] slice_agents_original = get_agents_slice_from_frames(frame_original) frame_chopped = zarr_chopped.frames[ zarr_chopped.scenes[idx]["frame_index_interval"][-1] - 1] slice_agents_chopped = get_agents_slice_from_frames(frame_chopped) mask = agents_mask_origin[slice_agents_original][:, 1] >= min_frame_future agents_mask_orig_bool[slice_agents_original] = mask.copy() agents_mask_chop_bool[slice_agents_chopped] = mask.copy() # store the mask and the GT csv of frames on interest np.savez(str(mask_chopped_path), agents_mask_chop_bool) export_zarr_to_csv(zarr_dt, str(gt_path), num_frames_gt, th_agent_prob, agents_mask=agents_mask_orig_bool) return str(dest_path)