def test_compute_mse_error(tmp_path: Path, zarr_dataset: ChunkedDataset) -> None: export_zarr_to_ground_truth_csv(zarr_dataset, str(tmp_path / "gt1.csv"), 10, 50, 0.5) export_zarr_to_ground_truth_csv(zarr_dataset, str(tmp_path / "gt2.csv"), 10, 50, 0.5) err = compute_mse_error_csv(str(tmp_path / "gt1.csv"), str(tmp_path / "gt2.csv")) assert np.all(err == 0.0) data_fake = ChunkedDataset(str(tmp_path)) data_fake.scenes = np.asarray(zarr_dataset.scenes).copy() data_fake.frames = np.asarray(zarr_dataset.frames).copy() data_fake.agents = np.asarray(zarr_dataset.agents).copy() data_fake.agents["centroid"] += np.random.rand( *data_fake.agents["centroid"].shape) * 1e-2 export_zarr_to_ground_truth_csv(data_fake, str(tmp_path / "gt3.csv"), 10, 50, 0.5) err = compute_mse_error_csv(str(tmp_path / "gt1.csv"), str(tmp_path / "gt3.csv")) assert np.any(err > 0.0) # test invalid conf by removing lines in gt1 with open(str(tmp_path / "gt4.csv"), "w") as fp: lines = open(str(tmp_path / "gt1.csv")).readlines() fp.writelines(lines[:-10]) with pytest.raises(ValueError): compute_mse_error_csv(str(tmp_path / "gt1.csv"), str(tmp_path / "gt4.csv"))
def test_graph_rasterizer_no_error(): # contain traffic light index = 150 cfg = load_config_data(config_file) cfg["raster_params"]["map_type"] = "semantic_graph" data_loader_conf = cfg.get(f"val_data_loader") dm = LocalDataManager() dataset_path = dm.require(data_loader_conf.get("key")) zarr_dataset = ChunkedDataset(dataset_path) zarr_dataset.open() rasterizer = build_rasterizer(cfg=cfg, data_manager=dm) dataset = AgentGraphDataset(cfg=cfg, zarr_dataset=zarr_dataset, rasterizer=rasterizer) data_point = dataset[index] assert "graph" in data_point assert "lanes" in data_point["graph"] assert isinstance(data_point["graph"]["lanes"], List) print() print(data_point.keys()) element_types = SemGraphRasterizer.keys for e in element_types: print(f"---- {e} ----") if len(data_point["graph"][e]) > 0: print(data_point["graph"][e][0])
def uncompress_zar(fn_src, fn_dst): print(fn_src) print(fn_dst) print(zarr.storage.default_compressor) zarr.storage.default_compressor = None ds = ChunkedDataset(fn_src).open(cached=False) dst_dataset = ChunkedDataset(fn_dst) dst_dataset.initialize() # 'w', # # num_scenes=len(ds.scenes), # # num_frames=len(ds.frames), # # num_agents=len(ds.agents), # # num_tl_faces=len(ds.tl_faces) # ) with utils.timeit_context("copy scenes"): dst_dataset.scenes.append(ds.scenes[:]) with utils.timeit_context("copy frames"): dst_dataset.frames.append(ds.frames[:]) with utils.timeit_context("copy agents"): for i in tqdm(range(0, len(ds.agents), 1024 * 1024)): dst_dataset.agents.append(ds.agents[i:i + 1024 * 1024]) with utils.timeit_context("copy tl_faces"): dst_dataset.tl_faces.append(ds.tl_faces[:])
def setup(self, stage=None): train_zarr = ChunkedDataset(self.dm.require( self.train_cfg["key"])).open() self.train_dataset = AgentDataset(self.cfg, train_zarr, self.rasterizer) val_zarr = ChunkedDataset(self.dm.require(self.val_cfg["key"])).open() self.val_dataset = AgentDataset(self.cfg, val_zarr, self.rasterizer)
def hist_data() -> tuple: zarr_dataset = ChunkedDataset( path="./l5kit/tests/artefacts/single_scene.zarr") zarr_dataset.open() hist_frames = zarr_dataset.frames[ 100:111][::-1] # reverse to get them as history hist_agents = filter_agents_by_frames(hist_frames, zarr_dataset.agents) return hist_frames, hist_agents
def zarr_cat_dataset(dmg: LocalDataManager, tmp_path: Path) -> ChunkedDataset: concat_count = 4 zarr_input_path = dmg.require("single_scene.zarr") zarr_output_path = str(tmp_path / f"{uuid4()}.zarr") zarr_concat([zarr_input_path] * concat_count, zarr_output_path) zarr_cat_dataset = ChunkedDataset(zarr_output_path) zarr_cat_dataset.open() return zarr_cat_dataset
def get_frames_subset(dataset: ChunkedDataset, frame_start_idx: int, frame_end_idx: int) -> ChunkedDataset: """Get a new dataset with frames between start (included) and end (excluded). Assumptions: - the dataset has only 1 scene - the dataset is in numpy format and not zarr anymore :param dataset: the single-scene dataset. :param frame_start_idx: first frame to keep. :param frame_end_idx: where to stop taking frames (excluded). """ if not len(dataset.scenes) == 1: raise ValueError( f"dataset should have a single scene, got {len(dataset.scenes)}") if not isinstance(dataset.agents, np.ndarray): raise ValueError("dataset agents should be an editable np array") if not isinstance(dataset.tl_faces, np.ndarray): raise ValueError("dataset tls should be an editable np array") if not isinstance(dataset.frames, np.ndarray): raise ValueError("dataset frames should be an editable np array") if frame_start_idx >= len(dataset.frames): raise ValueError( f"frame start {frame_start_idx} is over the length of the dataset") if frame_end_idx > len(dataset.frames): raise ValueError( f"frame end {frame_end_idx} is over the length of the dataset") if frame_start_idx >= frame_end_idx: raise ValueError( f"end frame {frame_end_idx} should be higher than start {frame_start_idx}" ) if frame_start_idx < 0: raise ValueError(f"start frame {frame_start_idx} should be positive") new_dataset = ChunkedDataset("") new_dataset.scenes = dataset.scenes.copy() new_dataset.scenes[0]["start_time"] = dataset.frames[frame_start_idx][ "timestamp"] new_dataset.scenes[0]["end_time"] = dataset.frames[frame_end_idx - 1]["timestamp"] new_dataset.frames = dataset.frames[frame_start_idx:frame_end_idx].copy() new_dataset.scenes[0]["frame_index_interval"] = (0, len(new_dataset.frames)) agent_slice = get_agents_slice_from_frames( *dataset.frames[[frame_start_idx, frame_end_idx - 1]]) tls_slice = get_tl_faces_slice_from_frames( *dataset.frames[[frame_start_idx, frame_end_idx - 1]]) new_dataset.frames["agent_index_interval"] -= new_dataset.frames[ "agent_index_interval"][0, 0] new_dataset.frames[ "traffic_light_faces_index_interval"] -= new_dataset.frames[ "traffic_light_faces_index_interval"][0, 0] new_dataset.agents = dataset.agents[agent_slice].copy() new_dataset.tl_faces = dataset.tl_faces[tls_slice].copy() return new_dataset
def create_chopped_mask(zarr_path: str, th_agent_prob: float, num_frames_to_copy: int, min_frame_future: int) -> str: """Create mask to emulate chopped dataset with gt data. Args: zarr_path (str): input zarr path to be chopped th_agent_prob (float): threshold over agents probabilities used in select_agents function num_frames_to_copy (int): number of frames to copy from the beginning of each scene, others will be discarded min_frame_future (int): minimum number of frames that must be available in the future for an agent Returns: str: Path to saved mask """ zarr_path = Path(zarr_path) mask_chopped_path = get_mask_chopped_path(zarr_path, th_agent_prob, num_frames_to_copy, min_frame_future) # Create standard mask for the dataset so we can use it to filter out unreliable agents zarr_dt = ChunkedDataset(str(zarr_path)) zarr_dt.open() agents_mask_path = Path(zarr_path) / f"agents_mask/{th_agent_prob}" if not agents_mask_path.exists( ): # don't check in root but check for the path select_agents( zarr_dt, th_agent_prob=th_agent_prob, th_yaw_degree=TH_YAW_DEGREE, th_extent_ratio=TH_EXTENT_RATIO, th_distance_av=TH_DISTANCE_AV, ) agents_mask_origin = np.asarray(convenience.load(str(agents_mask_path))) # compute the chopped boolean mask, but also the original one limited to frames of interest for GT csv agents_mask_orig_bool = np.zeros(len(zarr_dt.agents), dtype=np.bool) for idx in range(len(zarr_dt.scenes)): scene = zarr_dt.scenes[idx] frame_original = zarr_dt.frames[scene["frame_index_interval"][0] + num_frames_to_copy - 1] slice_agents_original = get_agents_slice_from_frames(frame_original) mask = agents_mask_origin[slice_agents_original][:, 1] >= min_frame_future agents_mask_orig_bool[slice_agents_original] = mask.copy() # store the mask and the GT csv of frames on interest np.savez(str(mask_chopped_path), agents_mask_orig_bool) return str(mask_chopped_path)
def get_loaders(train_batch_size=32, valid_batch_size=64): """Prepare loaders. Args: train_batch_size (int, optional): batch size for training dataset. Default is `32`. valid_batch_size (int, optional): batch size for validation dataset. Default is `64`. Returns: train and validation data loaders """ rasterizer = build_rasterizer(cfg, dm) DATASET_CLASS = AgentDataset train_zarr = ChunkedDataset( dm.require("scenes/train_chopped_100/train.zarr")).open() train_dataset = DATASET_CLASS(cfg, train_zarr, rasterizer) train_loader = DataLoader( train_dataset, batch_size=train_batch_size, num_workers=NUM_WORKERS, shuffle=True, worker_init_fn=seed_all, drop_last=True, ) # train_loader = BatchPrefetchLoaderWrapper(train_loader, num_prefetches=6) print(f" * Number of elements in train dataset - {len(train_dataset)}") print(f" * Number of elements in train loader - {len(train_loader)}") valid_zarr_path = dm.require("scenes/validate_chopped_100/validate.zarr") mask_path = dm.require("scenes/validate_chopped_100/mask.npz") valid_mask = np.load(mask_path)["arr_0"] valid_gt_path = dm.require("scenes/validate_chopped_100/gt.csv") valid_zarr = ChunkedDataset(valid_zarr_path).open() valid_dataset = DATASET_CLASS(cfg, valid_zarr, rasterizer, agents_mask=valid_mask) valid_loader = DataLoader( valid_dataset, batch_size=valid_batch_size, shuffle=False, num_workers=NUM_WORKERS, ) print(f" * Number of elements in valid dataset - {len(valid_dataset)}") print(f" * Number of elements in valid loader - {len(valid_loader)}") return train_loader, (valid_loader, valid_gt_path)
def test_zarr_scenes_chunk(dmg: LocalDataManager, tmp_path: Path, zarr_dataset: ChunkedDataset, num_frames_to_copy: int) -> None: # first let's concat so we have multiple scenes concat_count = 10 zarr_input_path = dmg.require("single_scene.zarr") zarr_concatenated_path = str(tmp_path / f"{uuid4()}.zarr") zarr_concat([zarr_input_path] * concat_count, zarr_concatenated_path) # now let's chunk it zarr_chopped_path = str(tmp_path / f"{uuid4()}.zarr") zarr_scenes_chop(zarr_concatenated_path, zarr_chopped_path, num_frames_to_copy=num_frames_to_copy) # open both and compare zarr_concatenated = ChunkedDataset(zarr_concatenated_path) zarr_concatenated.open() zarr_chopped = ChunkedDataset(zarr_chopped_path) zarr_chopped.open() assert len(zarr_concatenated.scenes) == len(zarr_chopped.scenes) assert len( zarr_chopped.frames) == num_frames_to_copy * len(zarr_chopped.scenes) for idx in range(len(zarr_concatenated.scenes)): scene_cat = zarr_concatenated.scenes[idx] scene_chopped = zarr_chopped.scenes[idx] frames_cat = zarr_concatenated.frames[ scene_cat["frame_index_interval"][0]: scene_cat["frame_index_interval"][0] + num_frames_to_copy] frames_chopped = zarr_chopped.frames[get_frames_slice_from_scenes( scene_chopped)] agents_cat = zarr_concatenated.agents[get_agents_slice_from_frames( *frames_cat[[0, -1]])] tl_faces_cat = zarr_concatenated.tl_faces[ get_tl_faces_slice_from_frames(*frames_cat[[0, -1]])] agents_chopped = zarr_chopped.agents[get_agents_slice_from_frames( *frames_chopped[[0, -1]])] tl_faces_chopped = zarr_chopped.tl_faces[ get_tl_faces_slice_from_frames(*frames_chopped[[0, -1]])] assert scene_chopped["host"] == scene_cat["host"] assert scene_chopped["start_time"] == scene_cat["start_time"] assert scene_chopped["end_time"] == scene_cat["end_time"] assert len(frames_chopped) == num_frames_to_copy assert np.all(frames_chopped["ego_translation"] == frames_cat["ego_translation"][:num_frames_to_copy]) assert np.all(frames_chopped["ego_rotation"] == frames_cat["ego_rotation"][:num_frames_to_copy]) assert np.all(agents_chopped == agents_cat) assert np.all(tl_faces_chopped == tl_faces_cat)
def setup(self, stage=None): if self.data_manager is None: self.data_manager = LocalDataManager(self.data_root) if self.rasterizer is None: self.rasterizer = build_rasterizer(self.config, self.data_manager) if stage == 'fit' or stage is None: train_zarr = ChunkedDataset( self.data_manager.require(self.train_split)).open( cache_size_bytes=int(self.cache_size)) train_data = AgentDataset(self.config, train_zarr, self.rasterizer) if self.train_idxs is not None: train_data = Subset(train_data, self.train_idxs) if self.val_split is None or self.val_split == self.train_split: tl = len(train_data) vl = int(tl * self.val_proportion) self.train_data, self.val_data = random_split( train_data, [tl - vl, vl]) else: self.train_data = train_data val_zarr = ChunkedDataset( self.data_manager.require(self.val_split)).open( cache_size_bytes=int(self.cache_size)) self.val_data = AgentDataset(self.config, val_zarr, self.rasterizer) if self.val_idxs is not None: self.val_data = Subset(self.val_data, self.val_idxs) if self.raster_cache_size: self.train_data = CachedDataset(self.train_data, self.raster_cache_size) self.val_data = CachedDataset(self.val_data, self.raster_cache_size) if stage == 'test' or stage is None: test_zarr = ChunkedDataset( self.data_manager.require(self.test_split)).open( cache_size_bytes=int(self.cache_size)) if self.test_mask is not None: test_data = AgentDataset(self.config, test_zarr, self.rasterizer, agents_mask=self.test_mask) else: test_data = AgentDataset(self.config, test_zarr, self.rasterizer) if self.test_idxs is not None: test_data = Subset(test_data, self.test_idxs) else: self.test_idxs = np.arange(start=1, stop=len(test_data) + 1) self.test_data = IndexedDataset(test_data, self.test_idxs)
def __init__(self): print("Visualization Class initialized.") # get config self.cfg = load_config_data("/mnt/extra/kaggle/competitions/2020lyft/ProjectLyft/Modules/visualisation_config.yaml") print(self.cfg) dm = LocalDataManager() self.dataset_path = dm.require(self.cfg["val_data_loader"]["key"]) self.zarr_dataset = ChunkedDataset(self.dataset_path) self.zarr_dataset.open() # Dataset package self.rast = build_rasterizer(self.cfg, dm) self.dataset = EgoDataset(self.cfg, self.zarr_dataset, self.rast)
def test_zarr_split(dmg: LocalDataManager, tmp_path: Path, zarr_dataset: ChunkedDataset) -> None: concat_count = 10 zarr_input_path = dmg.require("single_scene.zarr") zarr_concatenated_path = str(tmp_path / f"{uuid4()}.zarr") zarr_concat([zarr_input_path] * concat_count, zarr_concatenated_path) split_infos = [ { "name": f"{uuid4()}.zarr", "split_size_GB": 0.002 }, # cut around 2MB { "name": f"{uuid4()}.zarr", "split_size_GB": 0.001 }, # cut around 0.5MB { "name": f"{uuid4()}.zarr", "split_size_GB": -1 }, ] # everything else scene_splits = zarr_split(zarr_concatenated_path, str(tmp_path), split_infos) # load the zarrs and check elements zarr_concatenated = ChunkedDataset(zarr_concatenated_path) zarr_concatenated.open() for scene_split, split_info in zip(scene_splits, split_infos): zarr_out = ChunkedDataset(str(tmp_path / str(split_info["name"]))) zarr_out.open() # compare elements at the start and end of each scene in both zarrs for idx_scene in range(len(zarr_out.scenes)): # compare elements in the scene input_scene = zarr_concatenated.scenes[scene_split[0] + idx_scene] input_frames = zarr_concatenated.frames[ get_frames_slice_from_scenes(input_scene)] input_agents = zarr_concatenated.agents[ get_agents_slice_from_frames(*input_frames[[0, -1]])] input_tl_faces = zarr_concatenated.tl_faces[ get_tl_faces_slice_from_frames(*input_frames[[0, -1]])] output_scene = zarr_out.scenes[idx_scene] output_frames = zarr_out.frames[get_frames_slice_from_scenes( output_scene)] output_agents = zarr_out.agents[get_agents_slice_from_frames( *output_frames[[0, -1]])] output_tl_faces = zarr_out.tl_faces[get_tl_faces_slice_from_frames( *output_frames[[0, -1]])] assert np.all(input_frames["ego_translation"] == output_frames["ego_translation"]) assert np.all( input_frames["ego_rotation"] == output_frames["ego_rotation"]) assert np.all(input_agents == output_agents) assert np.all(input_tl_faces == output_tl_faces)
def get_loaders(train_batch_size=32, valid_batch_size=64): """Prepare loaders. Args: train_batch_size (int, optional): batch size for training dataset. Default is `32`. valid_batch_size (int, optional): batch size for validation dataset. Default is `64`. Returns: train and validation data loaders """ rasterizer = build_rasterizer(cfg, dm) train_zarr = ChunkedDataset(dm.require("scenes/train.zarr")).open() train_dataset = AgentDataset(cfg, train_zarr, rasterizer) n_samples = len(train_dataset) // 5 # n_samples = 100 train_dataset = Subset(train_dataset, list(range(n_samples))) train_loader = DataLoader( train_dataset, batch_size=train_batch_size, num_workers=NUM_WORKERS, shuffle=True, worker_init_fn=seed_all, drop_last=True, ) print(f" * Number of elements in train dataset - {len(train_dataset)}") print(f" * Number of elements in train loader - {len(train_loader)}") eval_zarr_path = dm.require("scenes/validate_chopped_100/validate.zarr") eval_gt_path = "scenes/validate_chopped_100/gt.csv" eval_mask_path = "./data/scenes/validate_chopped_100/mask.npz" eval_mask = np.load(eval_mask_path)["arr_0"] valid_zarr = ChunkedDataset(eval_zarr_path).open() valid_dataset = AgentDataset(cfg, valid_zarr, rasterizer) # valid_dataset = Subset(valid_dataset, list(range(200_000))) valid_loader = DataLoader( valid_dataset, batch_size=valid_batch_size, shuffle=False, num_workers=NUM_WORKERS, ) print(f" * Number of elements in valid dataset - {len(valid_dataset)}") print(f" * Number of elements in valid loader - {len(valid_loader)}") return train_loader, valid_loader
def val_dataloader(self): # created chopped dataset rasterizer = build_rasterizer(cfg, dm) eval_cfg = cfg["valid_data_loader"] num_frames_to_chop = 100 eval_base_path = create_chopped_dataset( dm.require(eval_cfg["key"]), cfg["raster_params"]["filter_agents_threshold"], num_frames_to_chop, cfg["model_params"]["future_num_frames"], MIN_FUTURE_STEPS) eval_zarr_path = str( Path(eval_base_path) / Path(dm.require(eval_cfg["key"])).name) eval_mask_path = str(Path(eval_base_path) / "mask.npz") eval_gt_path = str(Path(eval_base_path) / "gt.csv") self.eval_gt_path = eval_gt_path eval_zarr = ChunkedDataset(eval_zarr_path).open(cache_size_bytes=10e9) eval_mask = np.load(eval_mask_path)["arr_0"] eval_dataset = AgentDataset(cfg, eval_zarr, rasterizer, agents_mask=eval_mask) eval_dataloader = DataLoader(eval_dataset, shuffle=False, batch_size=eval_cfg["batch_size"], num_workers=8) return eval_dataloader
def disable_agents(dataset: ChunkedDataset, allowlist: np.ndarray) -> None: """Disable all agents in dataset except for the ones in allowlist Assumptions: - the dataset has only 1 scene - the dataset is in numpy format and not zarr anymore :param dataset: the single-scene dataset :param allowlist: 1D np array of track_ids to keep """ if not len(dataset.scenes) == 1: raise ValueError( f"dataset should have a single scene, got {len(dataset.scenes)}") if not isinstance(dataset.agents, np.ndarray): raise ValueError("dataset agents should be an editable np array") if not len(allowlist.shape) == 1: raise ValueError("allow list should be 1D") agent_track_ids = dataset.agents["track_id"] mask_disable = ~np.in1d(agent_track_ids, allowlist) # this will set those agents as invisible # we also zeroes their pose and extent dataset.agents["centroid"][mask_disable] *= 0 dataset.agents["yaw"][mask_disable] *= 0 dataset.agents["extent"][mask_disable] *= 0 dataset.agents["label_probabilities"][mask_disable] = -1
def test_simulation_dataset_build(zarr_cat_dataset: ChunkedDataset, dmg: LocalDataManager, cfg: dict, tmp_path: Path) -> None: # modify one frame to ensure everything works also when scenes are different zarr_cat_dataset.frames = np.asarray(zarr_cat_dataset.frames) for scene_idx in range(len(zarr_cat_dataset.scenes)): frame_slice = get_frames_slice_from_scenes(zarr_cat_dataset.scenes) zarr_cat_dataset.frames[ frame_slice.start]["ego_translation"] += np.random.randn(3) rasterizer = build_rasterizer(cfg, dmg) ego_dataset = EgoDataset(cfg, zarr_cat_dataset, rasterizer) sim_cfg = SimulationConfig(use_ego_gt=True, use_agents_gt=True, disable_new_agents=False, distance_th_far=30, distance_th_close=10) # we should be able to create the same object by using both constructor and factory scene_indices = list(range(len(zarr_cat_dataset.scenes))) scene_dataset_batch: Dict[int, EgoDataset] = {} for scene_idx in scene_indices: scene_dataset = ego_dataset.get_scene_dataset(scene_idx) scene_dataset_batch[scene_idx] = scene_dataset sim_1 = SimulationDataset(scene_dataset_batch, sim_cfg) sim_2 = SimulationDataset.from_dataset_indices(ego_dataset, scene_indices, sim_cfg) for (k_1, v_1), (k_2, v_2) in zip(sim_1.scene_dataset_batch.items(), sim_2.scene_dataset_batch.items()): assert k_1 == k_2 assert np.allclose(v_1.dataset.frames["ego_translation"], v_2.dataset.frames["ego_translation"])
def get_loaders(train_batch_size=32, valid_batch_size=64): """Prepare loaders. Args: train_batch_size (int, optional): batch size for training dataset. Default is `32`. valid_batch_size (int, optional): batch size for validation dataset. Default is `64`. Returns: train and validation data loaders """ rasterizer = build_rasterizer(cfg, dm) train_zarr = ChunkedDataset(dm.require("scenes/train.zarr")).open() train_dataset = CubicAgentDataset(cfg, train_zarr, rasterizer) n_samples = len(train_dataset) // 5 # n_samples = 100 train_dataset = Subset(train_dataset, list(range(n_samples))) train_loader = DataLoader( train_dataset, batch_size=train_batch_size, num_workers=NUM_WORKERS, shuffle=True, worker_init_fn=seed_all, drop_last=True, ) print(f" * Number of elements in train dataset - {len(train_dataset)}") print(f" * Number of elements in train loader - {len(train_loader)}") return train_loader, None
def evaluate(self, data_path, file_name="submission.csv"): # set env variable for data os.environ["L5KIT_DATA_FOLDER"] = data_path dm = LocalDataManager(None) cfg = self.cfg # ===== INIT DATASET test_cfg = cfg["test_data_loader"] # Rasterizer rasterizer = build_rasterizer(cfg, dm) # Test dataset/dataloader test_zarr = ChunkedDataset(dm.require(test_cfg["key"])).open() test_mask = np.load(f"{data_path}/scenes/mask.npz")["arr_0"] test_dataset = AgentDataset(cfg, test_zarr, rasterizer, agents_mask=test_mask) test_dataloader = DataLoader(test_dataset, shuffle=test_cfg["shuffle"], batch_size=test_cfg["batch_size"], num_workers=test_cfg["num_workers"]) test_dataloader = test_dataloader print(test_dataloader) # ==== EVAL LOOP self.model.eval() torch.set_grad_enabled(False) criterion = nn.MSELoss(reduction="none") # store information for evaluation future_coords_offsets_pd = [] timestamps = [] pred_coords = [] confidences_list = [] agent_ids = [] progress_bar = tqdm(test_dataloader) for data in progress_bar: _, pred, confidences = self.forward(data, criterion) # future_coords_offsets_pd.append(outputs.cpu().numpy().copy()) timestamps.append(data["timestamp"].numpy().copy()) agent_ids.append(data["track_id"].numpy().copy()) # # pred, confidences = predictor(image) pred_coords.append(pred.cpu().numpy().copy()) confidences_list.append(confidences.cpu().numpy().copy()) # ==== Save Results pred_path = f"{os.getcwd()}/{file_name}" write_pred_csv(pred_path, timestamps=np.concatenate(timestamps), track_ids=np.concatenate(agent_ids), coords=np.concatenate(pred_coords), confs=np.concatenate(confidences_list))
def load_tune_data(): dm = get_dm() eval_cfg = cfg["val_data_loader"] eval_base_path = '/home/axot/lyft/data/scenes/validate_chopped_31' eval_zarr_path = str(Path(eval_base_path) / Path(dm.require(eval_cfg["key"])).name) eval_mask_path = str(Path(eval_base_path) / "mask.npz") eval_gt_path = str(Path(eval_base_path) / "gt.csv") rasterizer = build_rasterizer(cfg, dm) eval_zarr = ChunkedDataset(eval_zarr_path).open() eval_mask = np.load(eval_mask_path)["arr_0"] # ===== INIT DATASET AND LOAD MASK eval_dataset = AgentDataset( cfg, eval_zarr, rasterizer, agents_mask=eval_mask) gt_dict = OrderedDict() for el in read_gt_csv(eval_gt_path): gt_dict[el["track_id"] + el["timestamp"]] = el eval_dataloader = DataLoader(eval_dataset, shuffle=eval_cfg["shuffle"], batch_size=eval_cfg["batch_size"], num_workers=eval_cfg["num_workers"]) return eval_dataloader, gt_dict
def __init__(self, data_root: str, config_path: str, split: str, show_progress=True, turn_thresh=3., speed_thresh=0.5, static_thresh=1., output_folder='preprocess', autosave=True, cache_size=1e9): self.autosave = autosave self.show_progress = show_progress self.turn_thresh = turn_thresh self.speed_thresh = speed_thresh self.static_thresh = static_thresh self.split = split self.config = load_config_data(config_path) self.output_folder = output_folder self.data_manager = LocalDataManager(data_root) self.rasterizer = build_rasterizer(self.config, self.data_manager) self.data_zarr = ChunkedDataset(self.data_manager.require(split)).open( cache_size_bytes=int(cache_size)) self.dataset = AgentDataset(self.config, self.data_zarr, self.rasterizer) self.data = defaultdict(list) self.junk = defaultdict(list) self.progress = None
def load_zarr_dataset( self, loader_name: str = "train_data_loder" ) -> Tuple[str, ChunkedDataset, AgentDataset]: zarr_path = self.dm.require(self.cfg[loader_name]["key"]) print("load zarr data:", zarr_path) zarr_dataset = ChunkedDataset(zarr_path).open() if loader_name == "test_data_loader": mask_path = os.path.join(os.path.dirname(zarr_path), "mask.npz") agents_mask = np.load(mask_path)["arr_0"] agent_dataset = AgentDataset(self.cfg, zarr_dataset, self.rasterizer, agents_mask=agents_mask) else: agent_dataset = AgentDataset( self.cfg, zarr_dataset, self.rasterizer, min_frame_history=MIN_FRAME_HISTORY, min_frame_future=MIN_FRAME_FUTURE, ) print(zarr_dataset) return zarr_path, zarr_dataset, agent_dataset
def setup(self): self.dm = LocalDataManager(None) self.rasterizer = self.fn_rasterizer(self.cfg, self.dm) self.data_zarr = ChunkedDataset( self.dm.require(self.cfg[self.loader_key]["key"])).open() self.ds = AgentDataset(self.cfg, self.data_zarr, self.rasterizer)
def train_dataloader(self): train_cfg = cfg["train_data_loader"] try: dataset_meta = _load_metadata(train_cfg["dataset_meta_key"], dm) world_to_ecef = np.array(dataset_meta["world_to_ecef"], dtype=np.float64) except (KeyError, FileNotFoundError): world_to_ecef = get_hardcoded_world_to_ecef() semantic_map_filepath = dm.require(train_cfg["semantic_map_key"]) rasterizer = OpenGLSemanticRasterizer( raster_size=train_cfg["raster_size"], pixel_size=train_cfg["pixel_size"], ego_center=train_cfg["ego_center"], filter_agents_threshold=0.5, history_num_frames=train_cfg['history_num_frames'], semantic_map_path=semantic_map_filepath, world_to_ecef=world_to_ecef, ) train_zarr = ChunkedDataset(dm.require(train_cfg["key"])).open() train_dataset = AgentDataset(cfg, train_zarr, rasterizer) train_dataloader = DataLoader( train_dataset, sampler=RandomSampler( train_dataset, num_samples=cfg["train_params"]["max_num_steps"], replacement=True, ), batch_size=train_cfg["batch_size"], num_workers=train_cfg["num_workers"]) return train_dataloader
def load_val_data(): dm = get_dm() eval_cfg = cfg["val_data_loader"] # MIN_FUTURE_STEPS = 10 # num_frames_to_chop = cfg['model_params']['history_num_frames']+1 # eval_base_path = create_chopped_dataset(dm.require(eval_cfg["key"]), # cfg["raster_params"]["filter_agents_threshold"], # num_frames_to_chop, # cfg["model_params"]["future_num_frames"], # MIN_FUTURE_STEPS) eval_base_path = '/home/axot/lyft/data/scenes/validate_chopped_31' eval_zarr_path = str( Path(eval_base_path) / Path(dm.require(eval_cfg["key"])).name) eval_mask_path = str(Path(eval_base_path) / "mask.npz") eval_gt_path = str(Path(eval_base_path) / "gt.csv") rasterizer = build_rasterizer(cfg, dm) eval_zarr = ChunkedDataset(eval_zarr_path).open() eval_mask = np.load(eval_mask_path)["arr_0"] # ===== INIT DATASET AND LOAD MASK eval_dataset = AgentDataset(cfg, eval_zarr, rasterizer, agents_mask=eval_mask) eval_dataloader = DataLoader(eval_dataset, shuffle=eval_cfg["shuffle"], batch_size=eval_cfg["batch_size"], num_workers=eval_cfg["num_workers"]) return eval_dataloader
def test_get_valid_agents_multi_annot_hole(dataset: ChunkedDataset) -> None: frames_range = np.asarray([0, len(dataset.frames)]) # put an annotation hole at 10 and 25 dataset.agents[10]["track_id"] = 2 dataset.agents[25]["track_id"] = 2 agents_mask, *_ = get_valid_agents_p(frames_range, dataset) agents_mask = agents_mask.astype(np.int) assert np.all(np.diff(agents_mask[:10, 0]) == 1) assert np.all(np.diff(agents_mask[:10, 1]) == -1) assert agents_mask[10, 0] == agents_mask[10, 1] == 0 assert np.all(np.diff(agents_mask[11:25, 0]) == 1) assert np.all(np.diff(agents_mask[11:25, 1]) == -1) assert agents_mask[25, 0] == agents_mask[25, 1] == 0
def test_get_valid_agents_yaw_change(dataset: ChunkedDataset) -> None: frames_range = np.asarray([0, len(dataset.frames)]) # change centroid dataset.agents[10]["yaw"] = np.radians(50) dataset.agents[20]["yaw"] = np.radians(29) # under yaw threshold agents_mask, *_ = get_valid_agents_p(frames_range, dataset) agents_mask = agents_mask.astype(np.int) assert np.all(np.diff(agents_mask[:10, 0]) == 1) assert np.all(np.diff(agents_mask[:10, 1]) == -1) assert agents_mask[10, 0] == agents_mask[10, 1] == 0 assert np.all(np.diff(agents_mask[11:, 0]) == 1) assert np.all(np.diff(agents_mask[11:, 1]) == -1)
def get_loaders(train_batch_size=32, valid_batch_size=64): """Prepare loaders. Args: train_batch_size (int, optional): batch size for training dataset. Default is `32`. valid_batch_size (int, optional): batch size for validation dataset. Default is `64`. Returns: train and validation data loaders """ rasterizer = build_rasterizer(cfg, dm) train_zarr = ChunkedDataset(dm.require("scenes/train.zarr")).open() train_dataset = AgentDataset(cfg, train_zarr, rasterizer) train_sampler = RandomSampler(train_dataset, replacement=True, num_samples=100_000) train_loader = DataLoader( train_dataset, batch_size=train_batch_size, num_workers=NUM_WORKERS, shuffle=False, sampler=train_sampler, worker_init_fn=seed_all, ) print(f" * Number of elements in train dataset - {len(train_dataset)}") print(f" * Number of elements in train loader - {len(train_loader)}") valid_zarr = ChunkedDataset(dm.require("scenes/validate.zarr")).open() valid_dataset = AgentDataset(cfg, valid_zarr, rasterizer) valid_sampler = RandomSampler(valid_dataset, replacement=True, num_samples=10_000) valid_loader = DataLoader( valid_dataset, batch_size=valid_batch_size, shuffle=False, sampler=valid_sampler, num_workers=NUM_WORKERS, ) print(f" * Number of elements in valid dataset - {len(valid_dataset)}") print(f" * Number of elements in valid loader - {len(valid_loader)}") return train_loader, valid_loader
def prepare_train_data(self): train_cfg = cfg["train_data_loader"] rasterizer = build_rasterizer(cfg, dm) train_zarr = ChunkedDataset(dm.require(train_cfg["key"])).open() train_dataset = AgentDataset(cfg, train_zarr, rasterizer) train_dataloader = DataLoader(train_dataset, shuffle=train_cfg["shuffle"], batch_size=train_cfg["batch_size"], num_workers=train_cfg["num_workers"]) return train_dataloader
def test_perturbation_is_applied(perturb_prob: float) -> None: cfg = load_config_data("./l5kit/tests/artefacts/config.yaml") zarr_dataset = ChunkedDataset(path="./l5kit/tests/artefacts/single_scene.zarr") zarr_dataset.open() dm = LocalDataManager("./l5kit/tests/artefacts/") rasterizer = build_rasterizer(cfg, dm) dataset = EgoDataset(cfg, zarr_dataset, rasterizer, None) # no perturb data_no_perturb = dataset[0] # note we cannot change the object we already have as a partial is built at init time perturb = AckermanPerturbation(ReplayRandomGenerator(np.asarray([[4.0, 0.33]])), perturb_prob=perturb_prob) dataset = EgoDataset(cfg, zarr_dataset, rasterizer, perturb) # perturb data_perturb = dataset[0] assert np.linalg.norm(data_no_perturb["target_positions"] - data_perturb["target_positions"]) > 0 assert np.linalg.norm(data_no_perturb["target_yaws"] - data_perturb["target_yaws"]) > 0