def __init__( self, root_dir: str, task: str, section: str, transform: Union[Sequence[Callable], Callable] = (), download: bool = False, seed: int = 0, val_frac: float = 0.2, cache_num: int = sys.maxsize, cache_rate: float = 1.0, num_workers: int = 0, ) -> None: if not os.path.isdir(root_dir): raise ValueError("Root directory root_dir must be a directory.") self.section = section self.val_frac = val_frac self.set_random_state(seed=seed) if task not in self.resource: raise ValueError( f"Unsupported task: {task}, available options are: {list(self.resource.keys())}." ) dataset_dir = os.path.join(root_dir, task) tarfile_name = f"{dataset_dir}.tar" if download: download_and_extract(self.resource[task], tarfile_name, root_dir, self.md5[task]) if not os.path.exists(dataset_dir): raise RuntimeError( f"Cannot find dataset directory: {dataset_dir}, please use download=True to download it." ) self.indices: np.ndarray = np.array([]) data = self._generate_data_list(dataset_dir) # as `release` key has typo in Task04 config file, ignore it. property_keys = [ "name", "description", "reference", "licence", "tensorImageSize", "modality", "labels", "numTraining", "numTest", ] self._properties = load_decathlon_properties( os.path.join(dataset_dir, "dataset.json"), property_keys) if transform == (): transform = LoadImaged(["image", "label"]) CacheDataset.__init__(self, data, transform, cache_num=cache_num, cache_rate=cache_rate, num_workers=num_workers)
def __init__( self, root_dir: PathLike, section: str, transform: Union[Sequence[Callable], Callable] = (), download: bool = False, seed: int = 0, val_frac: float = 0.1, test_frac: float = 0.1, cache_num: int = sys.maxsize, cache_rate: float = 1.0, num_workers: Optional[int] = 1, progress: bool = True, copy_cache: bool = True, as_contiguous: bool = True, ) -> None: root_dir = Path(root_dir) if not root_dir.is_dir(): raise ValueError("Root directory root_dir must be a directory.") self.section = section self.val_frac = val_frac self.test_frac = test_frac self.set_random_state(seed=seed) tarfile_name = root_dir / self.compressed_file_name dataset_dir = root_dir / self.dataset_folder_name self.num_class = 0 if download: download_and_extract( url=self.resource, filepath=tarfile_name, output_dir=root_dir, hash_val=self.md5, hash_type="md5", progress=progress, ) if not dataset_dir.is_dir(): raise RuntimeError( f"Cannot find dataset directory: {dataset_dir}, please use download=True to download it." ) data = self._generate_data_list(dataset_dir) if transform == (): transform = LoadImaged("image") CacheDataset.__init__( self, data=data, transform=transform, cache_num=cache_num, cache_rate=cache_rate, num_workers=num_workers, progress=progress, copy_cache=copy_cache, as_contiguous=as_contiguous, )
def __init__( self, root_dir: str, section: str, transform: Union[Sequence[Callable], Callable] = (), download: bool = False, seed: int = 0, val_frac: float = 0.1, test_frac: float = 0.1, cache_num: int = sys.maxsize, cache_rate: float = 1.0, num_workers: int = 0, ) -> None: if not os.path.isdir(root_dir): raise ValueError("Root directory root_dir must be a directory.") self.section = section self.val_frac = val_frac self.test_frac = test_frac self.set_random_state(seed=seed) tarfile_name = os.path.join(root_dir, self.compressed_file_name) dataset_dir = os.path.join(root_dir, self.dataset_folder_name) self.num_class = 0 if download: download_and_extract(self.resource, tarfile_name, root_dir, self.md5) if not os.path.exists(dataset_dir): raise RuntimeError( f"Cannot find dataset directory: {dataset_dir}, please use download=True to download it." ) data = self._generate_data_list(dataset_dir) if transform == (): transform = LoadImaged("image") CacheDataset.__init__(self, data, transform, cache_num=cache_num, cache_rate=cache_rate, num_workers=num_workers)