def get_loaders(train_batch_size=32, valid_batch_size=64): """Prepare loaders. Args: train_batch_size (int, optional): batch size for training dataset. Default is `32`. valid_batch_size (int, optional): batch size for validation dataset. Default is `64`. Returns: train and validation data loaders """ rasterizer = build_rasterizer(cfg, dm) train_zarr = ChunkedDataset(dm.require("scenes/train.zarr")).open() train_dataset = AgentDataset(cfg, train_zarr, rasterizer) n_samples = len(train_dataset) // 5 # n_samples = 100 train_dataset = Subset(train_dataset, list(range(n_samples))) train_loader = DataLoader( train_dataset, batch_size=train_batch_size, num_workers=NUM_WORKERS, shuffle=True, worker_init_fn=seed_all, drop_last=True, ) print(f" * Number of elements in train dataset - {len(train_dataset)}") print(f" * Number of elements in train loader - {len(train_loader)}") # eval_zarr_path = dm.require("scenes/validate_chopped_100/validate.zarr") # eval_gt_path = "scenes/validate_chopped_100/gt.csv" # eval_mask_path = "./data/scenes/validate_chopped_100/mask.npz" # eval_mask = np.load(eval_mask_path)["arr_0"] # valid_zarr = ChunkedDataset(eval_zarr_path).open() # valid_dataset = AgentDataset(cfg, valid_zarr, rasterizer) # # valid_dataset = Subset(valid_dataset, list(range(200_000))) # valid_loader = DataLoader( # valid_dataset, # batch_size=valid_batch_size, # shuffle=False, # num_workers=NUM_WORKERS, # ) # print(f" * Number of elements in valid dataset - {len(valid_dataset)}") # print(f" * Number of elements in valid loader - {len(valid_loader)}") valid_loader = None return train_loader, valid_loader
def get_loaders(train_batch_size=32, valid_batch_size=64): """Prepare loaders. Args: train_batch_size (int, optional): batch size for training dataset. Default is `32`. valid_batch_size (int, optional): batch size for validation dataset. Default is `64`. Returns: train and validation data loaders """ rasterizer = build_rasterizer(cfg, dm) DATASET_CLASS = AccelAgentDataset train_zarr = ChunkedDataset(dm.require("scenes/train.zarr")).open() train_dataset = DATASET_CLASS(cfg, train_zarr, rasterizer) # indices = np.arange(22079968, 22496709, 1) # train_dataset = Subset(train_dataset, indices) # sizes = ps.read_csv(os.environ["TRAIN_TRAJ_SIZES"])["size"].values # is_small = sizes < 6 # n_points = is_small.sum() # to_sample = n_points // 4 # print(" * points - {} (points to sample - {})".format(n_points, to_sample)) # print(" * paths -", sizes.shape[0] - n_points) # indices = np.concatenate( # [ # np.random.choice(np.where(is_small)[0], size=to_sample, replace=False,), # np.where(~is_small)[0], # ] # ) # train_dataset = Subset(train_dataset, indices) # n_samples = len(train_dataset) // 2 # train_dataset = Subset(train_dataset, list(range(n_samples))) train_loader = DataLoader( train_dataset, batch_size=train_batch_size, num_workers=NUM_WORKERS, shuffle=True, worker_init_fn=seed_all, drop_last=True, ) # train_loader = BatchPrefetchLoaderWrapper(train_loader, num_prefetches=6) print(f" * Number of elements in train dataset - {len(train_dataset)}") print(f" * Number of elements in train loader - {len(train_loader)}") valid_zarr_path = dm.require("scenes/validate_chopped_100/validate.zarr") mask_path = dm.require("scenes/validate_chopped_100/mask.npz") valid_mask = np.load(mask_path)["arr_0"] valid_gt_path = dm.require("scenes/validate_chopped_100/gt.csv") valid_zarr = ChunkedDataset(valid_zarr_path).open() valid_dataset = DATASET_CLASS(cfg, valid_zarr, rasterizer, agents_mask=valid_mask) valid_loader = DataLoader( valid_dataset, batch_size=valid_batch_size, shuffle=False, num_workers=NUM_WORKERS, ) print(f" * Number of elements in valid dataset - {len(valid_dataset)}") print(f" * Number of elements in valid loader - {len(valid_loader)}") return train_loader, (valid_loader, valid_gt_path)
num_trajectories=n_trajectories, ) load_checkpoint(checkpoint_path, model) model = model.eval() device = torch.device("cuda:0") model = model.to(device) valid_mask = np.load( f"{DATA_DIR}/scenes/validate_chopped_100/mask.npz")["arr_0"] dm = LocalDataManager(DATA_DIR) # ====== INIT TEST DATASET============================================================= rasterizer = build_rasterizer(cfg, dm) test_zarr = ChunkedDataset(dm.require("scenes/test.zarr")).open() test_mask = np.load(f"{DATA_DIR}/scenes/mask.npz")["arr_0"] test_dataset = AccelAgentDataset(cfg, test_zarr, rasterizer, agents_mask=test_mask) test_dataloader = DataLoader(test_dataset, shuffle=False, batch_size=32, num_workers=30) model.eval() torch.set_grad_enabled(False) # store information for evaluation