def load_zarr_dataset( self, loader_name: str = "train_data_loder" ) -> Tuple[str, ChunkedDataset, AgentDataset]: zarr_path = self.dm.require(self.cfg[loader_name]["key"]) print("load zarr data:", zarr_path) zarr_dataset = ChunkedDataset(zarr_path).open() if loader_name == "test_data_loader": mask_path = os.path.join(os.path.dirname(zarr_path), "mask.npz") agents_mask = np.load(mask_path)["arr_0"] agent_dataset = AgentDataset(self.cfg, zarr_dataset, self.rasterizer, agents_mask=agents_mask) else: agent_dataset = AgentDataset( self.cfg, zarr_dataset, self.rasterizer, min_frame_history=MIN_FRAME_HISTORY, min_frame_future=MIN_FRAME_FUTURE, ) print(zarr_dataset) return zarr_path, zarr_dataset, agent_dataset
def setup(self, stage=None): train_zarr = ChunkedDataset(self.dm.require( self.train_cfg["key"])).open() self.train_dataset = AgentDataset(self.cfg, train_zarr, self.rasterizer) val_zarr = ChunkedDataset(self.dm.require(self.val_cfg["key"])).open() self.val_dataset = AgentDataset(self.cfg, val_zarr, self.rasterizer)
def agent_dataset(cfg: dict, zarr_dataset, rasterizer, perturbation=None, agents_mask=None, min_frame_history=10, min_frame_future=1, svg=False): data = AgentDataset(cfg, zarr_dataset, rasterizer, perturbation, agents_mask, min_frame_history, min_frame_future) if svg: data.get_frame = types.MethodType(get_frame, data) return data
def test_get_scene_indices_agent(scene_idx: int, zarr_dataset: ChunkedDataset, dmg: LocalDataManager, cfg: dict) -> None: cfg["raster_params"]["map_type"] = "box_debug" rasterizer = build_rasterizer(cfg, dmg) dataset = AgentDataset(cfg, zarr_dataset, rasterizer) # test only first 10 elements scene_indices = dataset.get_scene_indices(scene_idx)[:10] agents = np.asarray(dataset.dataset.agents)[dataset.agents_mask][:10] for agent, idx in zip(agents, scene_indices): id_agent = dataset[idx]["track_id"] assert id_agent == agent["track_id"]
def setup(self, stage=None): if self.data_manager is None: self.data_manager = LocalDataManager(self.data_root) if self.rasterizer is None: self.rasterizer = build_rasterizer(self.config, self.data_manager) if stage == 'fit' or stage is None: train_zarr = ChunkedDataset( self.data_manager.require(self.train_split)).open( cache_size_bytes=int(self.cache_size)) train_data = AgentDataset(self.config, train_zarr, self.rasterizer) if self.train_idxs is not None: train_data = Subset(train_data, self.train_idxs) if self.val_split is None or self.val_split == self.train_split: tl = len(train_data) vl = int(tl * self.val_proportion) self.train_data, self.val_data = random_split( train_data, [tl - vl, vl]) else: self.train_data = train_data val_zarr = ChunkedDataset( self.data_manager.require(self.val_split)).open( cache_size_bytes=int(self.cache_size)) self.val_data = AgentDataset(self.config, val_zarr, self.rasterizer) if self.val_idxs is not None: self.val_data = Subset(self.val_data, self.val_idxs) if self.raster_cache_size: self.train_data = CachedDataset(self.train_data, self.raster_cache_size) self.val_data = CachedDataset(self.val_data, self.raster_cache_size) if stage == 'test' or stage is None: test_zarr = ChunkedDataset( self.data_manager.require(self.test_split)).open( cache_size_bytes=int(self.cache_size)) if self.test_mask is not None: test_data = AgentDataset(self.config, test_zarr, self.rasterizer, agents_mask=self.test_mask) else: test_data = AgentDataset(self.config, test_zarr, self.rasterizer) if self.test_idxs is not None: test_data = Subset(test_data, self.test_idxs) else: self.test_idxs = np.arange(start=1, stop=len(test_data) + 1) self.test_data = IndexedDataset(test_data, self.test_idxs)
def get_dataloader(self, zarr_dataset_path, shuffle, agent_mask=None): zarr_dataset = self.chunked_dataset(zarr_dataset_path) if agent_mask is None: agent_dataset = AgentDataset(self.cfg, zarr_dataset, self.rast) else: agent_dataset = AgentDataset(self.cfg, zarr_dataset, self.rast, agents_mask=agent_mask) return DataLoader( agent_dataset, shuffle=shuffle, batch_size=self.args.batch_size, num_workers=self.args.num_workers, )
def get_loaders(train_batch_size=32, valid_batch_size=64): """Prepare loaders. Args: train_batch_size (int, optional): batch size for training dataset. Default is `32`. valid_batch_size (int, optional): batch size for validation dataset. Default is `64`. Returns: train and validation data loaders """ rasterizer = build_rasterizer(cfg, dm) train_zarr = ChunkedDataset(dm.require("scenes/train.zarr")).open() train_dataset = AgentDataset(cfg, train_zarr, rasterizer) n_samples = len(train_dataset) // 5 # n_samples = 100 train_dataset = Subset(train_dataset, list(range(n_samples))) train_loader = DataLoader( train_dataset, batch_size=train_batch_size, num_workers=NUM_WORKERS, shuffle=True, worker_init_fn=seed_all, drop_last=True, ) print(f" * Number of elements in train dataset - {len(train_dataset)}") print(f" * Number of elements in train loader - {len(train_loader)}") eval_zarr_path = dm.require("scenes/validate_chopped_100/validate.zarr") eval_gt_path = "scenes/validate_chopped_100/gt.csv" eval_mask_path = "./data/scenes/validate_chopped_100/mask.npz" eval_mask = np.load(eval_mask_path)["arr_0"] valid_zarr = ChunkedDataset(eval_zarr_path).open() valid_dataset = AgentDataset(cfg, valid_zarr, rasterizer) # valid_dataset = Subset(valid_dataset, list(range(200_000))) valid_loader = DataLoader( valid_dataset, batch_size=valid_batch_size, shuffle=False, num_workers=NUM_WORKERS, ) print(f" * Number of elements in valid dataset - {len(valid_dataset)}") print(f" * Number of elements in valid loader - {len(valid_loader)}") return train_loader, valid_loader
def load_tune_data(): dm = get_dm() eval_cfg = cfg["val_data_loader"] eval_base_path = '/home/axot/lyft/data/scenes/validate_chopped_31' eval_zarr_path = str(Path(eval_base_path) / Path(dm.require(eval_cfg["key"])).name) eval_mask_path = str(Path(eval_base_path) / "mask.npz") eval_gt_path = str(Path(eval_base_path) / "gt.csv") rasterizer = build_rasterizer(cfg, dm) eval_zarr = ChunkedDataset(eval_zarr_path).open() eval_mask = np.load(eval_mask_path)["arr_0"] # ===== INIT DATASET AND LOAD MASK eval_dataset = AgentDataset( cfg, eval_zarr, rasterizer, agents_mask=eval_mask) gt_dict = OrderedDict() for el in read_gt_csv(eval_gt_path): gt_dict[el["track_id"] + el["timestamp"]] = el eval_dataloader = DataLoader(eval_dataset, shuffle=eval_cfg["shuffle"], batch_size=eval_cfg["batch_size"], num_workers=eval_cfg["num_workers"]) return eval_dataloader, gt_dict
def setup(self): self.dm = LocalDataManager(None) self.rasterizer = self.fn_rasterizer(self.cfg, self.dm) self.data_zarr = ChunkedDataset( self.dm.require(self.cfg[self.loader_key]["key"])).open() self.ds = AgentDataset(self.cfg, self.data_zarr, self.rasterizer)
def test_get_frame_indices_agent(frame_idx: int, zarr_dataset: ChunkedDataset, dmg: LocalDataManager, cfg: dict) -> None: cfg["raster_params"]["map_type"] = "box_debug" rasterizer = build_rasterizer(cfg, dmg) dataset = AgentDataset(cfg, zarr_dataset, rasterizer) frame_indices = dataset.get_frame_indices(frame_idx) # get valid agents from that frame only agent_slice = get_agents_slice_from_frames( dataset.dataset.frames[frame_idx]) agents = dataset.dataset.agents[agent_slice] agents = agents[dataset.agents_mask[agent_slice]] for agent, idx in zip(agents, frame_indices): id_agent = dataset[idx]["track_id"] assert id_agent == agent["track_id"]
def train_dataloader(self): train_cfg = cfg["train_data_loader"] try: dataset_meta = _load_metadata(train_cfg["dataset_meta_key"], dm) world_to_ecef = np.array(dataset_meta["world_to_ecef"], dtype=np.float64) except (KeyError, FileNotFoundError): world_to_ecef = get_hardcoded_world_to_ecef() semantic_map_filepath = dm.require(train_cfg["semantic_map_key"]) rasterizer = OpenGLSemanticRasterizer( raster_size=train_cfg["raster_size"], pixel_size=train_cfg["pixel_size"], ego_center=train_cfg["ego_center"], filter_agents_threshold=0.5, history_num_frames=train_cfg['history_num_frames'], semantic_map_path=semantic_map_filepath, world_to_ecef=world_to_ecef, ) train_zarr = ChunkedDataset(dm.require(train_cfg["key"])).open() train_dataset = AgentDataset(cfg, train_zarr, rasterizer) train_dataloader = DataLoader( train_dataset, sampler=RandomSampler( train_dataset, num_samples=cfg["train_params"]["max_num_steps"], replacement=True, ), batch_size=train_cfg["batch_size"], num_workers=train_cfg["num_workers"]) return train_dataloader
def get_loaders(train_batch_size=32, valid_batch_size=64): """Prepare loaders. Args: train_batch_size (int, optional): batch size for training dataset. Default is `32`. valid_batch_size (int, optional): batch size for validation dataset. Default is `64`. Returns: train and validation data loaders """ rasterizer = build_rasterizer(cfg, dm) train_zarr = ChunkedDataset(dm.require("scenes/train.zarr")).open() train_dataset = AgentDataset(cfg, train_zarr, rasterizer) # n_samples = train_batch_size * 100 # train_dataset = Subset(train_dataset, list(range(n_samples))) train_loader = DataLoader( train_dataset, batch_size=train_batch_size, num_workers=NUM_WORKERS, shuffle=True, worker_init_fn=seed_all, drop_last=True, ) print(f" * Number of elements in train dataset - {len(train_dataset)}") print(f" * Number of elements in train loader - {len(train_loader)}") return train_loader, None
def __init__(self, data_root: str, config_path: str, split: str, show_progress=True, turn_thresh=3., speed_thresh=0.5, static_thresh=1., output_folder='preprocess', autosave=True, cache_size=1e9): self.autosave = autosave self.show_progress = show_progress self.turn_thresh = turn_thresh self.speed_thresh = speed_thresh self.static_thresh = static_thresh self.split = split self.config = load_config_data(config_path) self.output_folder = output_folder self.data_manager = LocalDataManager(data_root) self.rasterizer = build_rasterizer(self.config, self.data_manager) self.data_zarr = ChunkedDataset(self.data_manager.require(split)).open( cache_size_bytes=int(cache_size)) self.dataset = AgentDataset(self.config, self.data_zarr, self.rasterizer) self.data = defaultdict(list) self.junk = defaultdict(list) self.progress = None
def evaluate(self, data_path, file_name="submission.csv"): # set env variable for data os.environ["L5KIT_DATA_FOLDER"] = data_path dm = LocalDataManager(None) cfg = self.cfg # ===== INIT DATASET test_cfg = cfg["test_data_loader"] # Rasterizer rasterizer = build_rasterizer(cfg, dm) # Test dataset/dataloader test_zarr = ChunkedDataset(dm.require(test_cfg["key"])).open() test_mask = np.load(f"{data_path}/scenes/mask.npz")["arr_0"] test_dataset = AgentDataset(cfg, test_zarr, rasterizer, agents_mask=test_mask) test_dataloader = DataLoader(test_dataset, shuffle=test_cfg["shuffle"], batch_size=test_cfg["batch_size"], num_workers=test_cfg["num_workers"]) test_dataloader = test_dataloader print(test_dataloader) # ==== EVAL LOOP self.model.eval() torch.set_grad_enabled(False) criterion = nn.MSELoss(reduction="none") # store information for evaluation future_coords_offsets_pd = [] timestamps = [] pred_coords = [] confidences_list = [] agent_ids = [] progress_bar = tqdm(test_dataloader) for data in progress_bar: _, pred, confidences = self.forward(data, criterion) # future_coords_offsets_pd.append(outputs.cpu().numpy().copy()) timestamps.append(data["timestamp"].numpy().copy()) agent_ids.append(data["track_id"].numpy().copy()) # # pred, confidences = predictor(image) pred_coords.append(pred.cpu().numpy().copy()) confidences_list.append(confidences.cpu().numpy().copy()) # ==== Save Results pred_path = f"{os.getcwd()}/{file_name}" write_pred_csv(pred_path, timestamps=np.concatenate(timestamps), track_ids=np.concatenate(agent_ids), coords=np.concatenate(pred_coords), confs=np.concatenate(confidences_list))
def load_val_data(): dm = get_dm() eval_cfg = cfg["val_data_loader"] # MIN_FUTURE_STEPS = 10 # num_frames_to_chop = cfg['model_params']['history_num_frames']+1 # eval_base_path = create_chopped_dataset(dm.require(eval_cfg["key"]), # cfg["raster_params"]["filter_agents_threshold"], # num_frames_to_chop, # cfg["model_params"]["future_num_frames"], # MIN_FUTURE_STEPS) eval_base_path = '/home/axot/lyft/data/scenes/validate_chopped_31' eval_zarr_path = str( Path(eval_base_path) / Path(dm.require(eval_cfg["key"])).name) eval_mask_path = str(Path(eval_base_path) / "mask.npz") eval_gt_path = str(Path(eval_base_path) / "gt.csv") rasterizer = build_rasterizer(cfg, dm) eval_zarr = ChunkedDataset(eval_zarr_path).open() eval_mask = np.load(eval_mask_path)["arr_0"] # ===== INIT DATASET AND LOAD MASK eval_dataset = AgentDataset(cfg, eval_zarr, rasterizer, agents_mask=eval_mask) eval_dataloader = DataLoader(eval_dataset, shuffle=eval_cfg["shuffle"], batch_size=eval_cfg["batch_size"], num_workers=eval_cfg["num_workers"]) return eval_dataloader
def val_dataloader(self): # created chopped dataset rasterizer = build_rasterizer(cfg, dm) eval_cfg = cfg["valid_data_loader"] num_frames_to_chop = 100 eval_base_path = create_chopped_dataset( dm.require(eval_cfg["key"]), cfg["raster_params"]["filter_agents_threshold"], num_frames_to_chop, cfg["model_params"]["future_num_frames"], MIN_FUTURE_STEPS) eval_zarr_path = str( Path(eval_base_path) / Path(dm.require(eval_cfg["key"])).name) eval_mask_path = str(Path(eval_base_path) / "mask.npz") eval_gt_path = str(Path(eval_base_path) / "gt.csv") self.eval_gt_path = eval_gt_path eval_zarr = ChunkedDataset(eval_zarr_path).open(cache_size_bytes=10e9) eval_mask = np.load(eval_mask_path)["arr_0"] eval_dataset = AgentDataset(cfg, eval_zarr, rasterizer, agents_mask=eval_mask) eval_dataloader = DataLoader(eval_dataset, shuffle=False, batch_size=eval_cfg["batch_size"], num_workers=8) return eval_dataloader
def get_loaders(train_batch_size=32, valid_batch_size=64): """Prepare loaders. Args: train_batch_size (int, optional): batch size for training dataset. Default is `32`. valid_batch_size (int, optional): batch size for validation dataset. Default is `64`. Returns: train and validation data loaders """ rasterizer = build_rasterizer(cfg, dm) train_zarr = ChunkedDataset(dm.require("scenes/train.zarr")).open() train_dataset = AgentDataset(cfg, train_zarr, rasterizer) train_sampler = RandomSampler(train_dataset, replacement=True, num_samples=100_000) train_loader = DataLoader( train_dataset, batch_size=train_batch_size, num_workers=NUM_WORKERS, shuffle=False, sampler=train_sampler, worker_init_fn=seed_all, ) print(f" * Number of elements in train dataset - {len(train_dataset)}") print(f" * Number of elements in train loader - {len(train_loader)}") valid_zarr = ChunkedDataset(dm.require("scenes/validate.zarr")).open() valid_dataset = AgentDataset(cfg, valid_zarr, rasterizer) valid_sampler = RandomSampler(valid_dataset, replacement=True, num_samples=10_000) valid_loader = DataLoader( valid_dataset, batch_size=valid_batch_size, shuffle=False, sampler=valid_sampler, num_workers=NUM_WORKERS, ) print(f" * Number of elements in valid dataset - {len(valid_dataset)}") print(f" * Number of elements in valid loader - {len(valid_loader)}") return train_loader, valid_loader
def prepare_train_data(self): train_cfg = cfg["train_data_loader"] rasterizer = build_rasterizer(cfg, dm) train_zarr = ChunkedDataset(dm.require(train_cfg["key"])).open() train_dataset = AgentDataset(cfg, train_zarr, rasterizer) train_dataloader = DataLoader(train_dataset, shuffle=train_cfg["shuffle"], batch_size=train_cfg["batch_size"], num_workers=train_cfg["num_workers"]) return train_dataloader
def check_performance_default(num_samples=64 * 20): """ Defautl datset from l5kit w/o any optimizations """ scene_name = "train" cfg_data = get_dataset_cfg(scene_name=scene_name, map_type="py_semantic") dm = LocalDataManager(None) rasterizer = build_rasterizer(cfg_data, dm) zarr_dataset = ChunkedDataset(dm.require(f"scenes/{scene_name}.zarr")).open() dataset = AgentDataset(cfg_data, zarr_dataset, rasterizer) check_performance(dataset, "default")
def visualizeAgent(self): self.dataset = AgentDataset(self.cfg, self.zarr_dataset, self.rast) data = self.dataset[0] im = data["image"].transpose(1, 2, 0) im = self.dataset.rasterizer.to_rgb(im) target_positions_pixels = transform_points(data["target_positions"] + data["centroid"][:2], data["world_to_image"]) draw_trajectory(im, target_positions_pixels, data["target_yaws"], TARGET_POINTS_COLOR) plt.imshow(im[::-1]) plt.show()
def get_train_dl(cfg, dm): # training cfg train_cfg = cfg["train_data_loader"] # rasterizer rasterizer = build_rasterizer(cfg, dm) # dataloader train_zarr = ChunkedDataset(dm.require(train_cfg["key"])).open() train_dataset = AgentDataset(cfg, train_zarr, rasterizer) train_dataloader = DataLoader(train_dataset, shuffle=train_cfg["shuffle"], batch_size=train_cfg["batch_size"], num_workers=train_cfg["num_workers"]) return train_dataset, train_dataloader
def test_compute_mse_error(tmp_path: Path, zarr_dataset: ChunkedDataset, cfg: dict) -> None: render_context = RenderContext( np.asarray((10, 10)), np.asarray((0.25, 0.25)), np.asarray((0.5, 0.5)), set_origin_to_bottom=cfg["raster_params"]["set_origin_to_bottom"], ) rast = StubRasterizer(render_context) dataset = AgentDataset(cfg, zarr_dataset, rast) gt_coords = [] gt_avails = [] timestamps = [] track_ids = [] for idx, el in enumerate(dataset): # type: ignore gt_coords.append(el["target_positions"]) gt_avails.append(el["target_availabilities"]) timestamps.append(el["timestamp"]) track_ids.append(el["track_id"]) if idx == 100: break # speed up test gt_coords = np.asarray(gt_coords) gt_avails = np.asarray(gt_avails) timestamps = np.asarray(timestamps) track_ids = np.asarray(track_ids) # test same values error write_gt_csv(str(tmp_path / "gt1.csv"), timestamps, track_ids, gt_coords, gt_avails) write_pred_csv(str(tmp_path / "pred1.csv"), timestamps, track_ids, gt_coords, confs=None) metrics = compute_metrics_csv(str(tmp_path / "gt1.csv"), str(tmp_path / "pred1.csv"), [neg_multi_log_likelihood]) for metric_value in metrics.values(): assert np.all(metric_value == 0.0) # test different values error pred_coords = gt_coords.copy() pred_coords += np.random.randn(*pred_coords.shape) write_pred_csv(str(tmp_path / "pred3.csv"), timestamps, track_ids, pred_coords, confs=None) metrics = compute_metrics_csv(str(tmp_path / "gt1.csv"), str(tmp_path / "pred3.csv"), [neg_multi_log_likelihood]) for metric_value in metrics.values(): assert np.any(metric_value > 0.0) # test invalid conf by removing lines in gt1 with open(str(tmp_path / "pred4.csv"), "w") as fp: lines = open(str(tmp_path / "pred1.csv")).readlines() fp.writelines(lines[:-10]) with pytest.raises(ValueError): compute_metrics_csv(str(tmp_path / "gt1.csv"), str(tmp_path / "pred4.csv"), [neg_multi_log_likelihood])
def plt_show_agent_map(self, idx): zarr_dataset = self.chunked_dataset("scenes/train.zarr") agent_dataset = AgentDataset(self.cfg, zarr_dataset, self.rast) data = agent_dataset[idx] im = data["image"].transpose(1, 2, 0) im = self.rast.to_rgb(im) target_positions_pixels = transform_points( data["target_positions"] + data["centroid"][:2], data["world_to_image"]) draw_trajectory(im, target_positions_pixels, TARGET_POINTS_COLOR, 1, data["target_yaws"]) plt.imshow(im[::-1]) plt.savefig("filename.png")
def train(): os.environ["L5KIT_DATA_FOLDER"] = "../" dm = LocalDataManager(None) cfg = load_config_data("../agent_motion_config.yaml") train_cfg = cfg['train_data_loader'] rasterizer = build_rasterizer(cfg, dm) train_zarr = ChunkedDataset(dm.require(train_cfg['key'])).open() train_dataset = AgentDataset(cfg, train_zarr, rasterizer) train_dataloader = DataLoader(train_dataset, shuffle=train_cfg['shuffle'], batch_size=train_cfg['batch_size'], num_workers=train_cfg['num_workers']) device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') model = SusNetv2(cfg['history_num_frames'], cfg['future_num_frames']) optimizer = optim.Adam(model.parameters(), lr=1e-3) criterion = nn.MSELoss(reduction='none') train_it = iter(train_dataloader) losses = [] progress_bar = tqdm(range(cfg['train_params']['max_num_steps'])) for step in progress_bar: try: data = next(train_it) except StopIteration: train_it = iter(train_dataloader) data = next(train_it) model.train() torch.set_grad_enabled(True) inputs = data['image'].to(device) target_availabilities = data['target_availabilities'].unsqueeze(-1).to( device) targets = data['target_positions'].to(device) outputs = model(inputs).reshape(targets.shape) loss = criterion(outputs, targets) loss = loss * target_availabilities loss = loss.mean() optimizer.zero_grad() loss.backward() optimizer.step() losses.append(loss.item()) progress_bar.set_description( f'Loss: {loss.item()}, loss(avg): {np.mean(losses_train)}') plt.plot(np.arange(len(losses)), losses, label='Train loss') plt.legend() plt.show() torch.save(model.state_dict(), 'lyft_model.pth')
def __init__(self, W_PATH=None): os.environ["L5KIT_DATA_FOLDER"] = f"{Global.DIR_INPUT}/l5kit/dataset" self.cfg = Global.getConfig() self.train_cfg = self.cfg["train_data_loader"] dm = LocalDataManager(None) rasterizer = build_rasterizer(self.cfg, dm) train_zarr = ChunkedDataset(dm.require(self.train_cfg["key"])).open() self.train_dataset = AgentDataset(self.cfg, train_zarr, rasterizer) self.straight_train_dataloader = DataLoader( self.train_dataset, shuffle=True, batch_size=32, num_workers=self.train_cfg["num_workers"]) self.W_PATH = W_PATH
def get_train_dataloaders(cfg, dm): """Modified from L5Kit""" train_cfg = cfg["train_data_loader"] rasterizer = build_rasterizer(cfg, dm) train_zarr = ChunkedDataset(dm.require(train_cfg["key"])).open() train_dataset = AgentDataset(cfg, train_zarr, rasterizer) train_dataloader = DataLoader(train_dataset, shuffle=train_cfg["shuffle"], batch_size=train_cfg["batch_size"], num_workers=train_cfg["num_workers"]) train_dataset_ego = EgoDataset(cfg, train_zarr, rasterizer) train_dataloader_ego = DataLoader(train_dataset_ego, shuffle=train_cfg["shuffle"], batch_size=train_cfg["batch_size"], num_workers=train_cfg["num_workers"]) return train_dataset, train_dataset_ego, train_dataloader, train_dataloader_ego
def load_train_data(): # load training data dm = get_dm() train_cfg = cfg["train_data_loader"] rasterizer = build_rasterizer(cfg, dm) train_zarr = ChunkedDataset(dm.require(train_cfg["key"])).open() train_dataset = AgentDataset(cfg, train_zarr, rasterizer) train_dataloader = DataLoader(train_dataset, shuffle=train_cfg["shuffle"], batch_size=train_cfg["batch_size"], num_workers=train_cfg["num_workers"]) print('len(train_dataloader):', len(train_dataloader)) return train_dataloader
def load_test_data(): dm = get_dm() # print(eval_dataset) test_cfg = cfg["test_data_loader"] rasterizer = build_rasterizer(cfg, dm) test_zarr = ChunkedDataset(dm.require(test_cfg["key"])).open() test_mask = np.load(f'{cfg["data_path"]}/scenes/mask.npz')["arr_0"] test_dataset = AgentDataset( cfg, test_zarr, rasterizer, agents_mask=test_mask) test_dataloader = DataLoader(test_dataset, shuffle=test_cfg["shuffle"], batch_size=test_cfg["batch_size"], num_workers=test_cfg["num_workers"]) return test_dataloader
def get_loaders(train_batch_size=32, valid_batch_size=64): """Prepare loaders. Args: train_batch_size (int, optional): batch size for training dataset. Default is `32`. valid_batch_size (int, optional): batch size for validation dataset. Default is `64`. Returns: train and validation data loaders """ rasterizer = build_rasterizer(cfg, dm) train_zarr = ChunkedDataset(dm.require("scenes/train.zarr")).open() train_dataset = AgentDataset(cfg, train_zarr, rasterizer) sizes = ps.read_csv(os.environ["TRAIN_TRAJ_SIZES"])["size"].values is_small = sizes < 6 n_points = is_small.sum() to_sample = n_points // 4 print(" * points - {} (points to sample - {})".format(n_points, to_sample)) print(" * paths -", sizes.shape[0] - n_points) indices = np.concatenate([ np.random.choice( np.where(is_small)[0], size=to_sample, replace=False, ), np.where(~is_small)[0], ]) # TODO: shuffle subset train_dataset = Subset(train_dataset, indices) train_loader = DataLoader( train_dataset, batch_size=train_batch_size, num_workers=NUM_WORKERS, shuffle=True, worker_init_fn=seed_all, drop_last=True, ) print(f" * Number of elements in train dataset - {len(train_dataset)}") print(f" * Number of elements in train loader - {len(train_loader)}") return train_loader, None
def train_dataloader(self): train_cfg = omegaconf.OmegaConf.to_container( (self.cfg.train_data_loader)) train_zarr = ChunkedDataset(self.dm.require(train_cfg["key"])).open() train_dataset = AgentDataset(self.cfg, train_zarr, self.rasterizer) subset = torch.utils.data.Subset( train_dataset, range( 0, int(self.cfg["train_data_loader"]["training_percentage"] * len(train_dataset)))) train_dataloader = DataLoader(subset, shuffle=train_cfg["shuffle"], batch_size=train_cfg["batch_size"], num_workers=train_cfg["num_workers"], drop_last=True) return train_dataloader