def _make_dataset(cls, root: str, mode: str, subfolders: List[str], verbose=True) -> Dict[str, List[str]]: assert mode in cls.dataset_modes, mode for subfolder in subfolders: assert (Path(root, mode, subfolder).exists() and Path(root, mode, subfolder).is_dir()), os.path.join( root, mode, subfolder) items = [ os.listdir(Path(os.path.join(root, mode, subfoloder))) for subfoloder in subfolders ] # clear up extension items = sorted( [[x for x in item if allow_extension(x, cls.allow_extension)] for item in items]) assert set(map_(len, items)).__len__() == 1, map_(len, items) imgs = {} for subfolder, item in zip(subfolders, items): imgs[subfolder] = sorted([ os.path.join(root, mode, subfolder, x_path) for x_path in item ]) assert (set(map_(len, imgs.values())).__len__() == 1 ), "imgs list have component with different length." for subfolder in subfolders: if verbose: print(f"found {len(imgs[subfolder])} images in {subfolder}\t") return imgs
def __getitem__(self, index) -> Tuple[List[Tensor], str]: img_list, filename_list = self._getitem_index(index) assert img_list.__len__() == self.subfolders.__len__() # make sure the filename is the same image assert (set(map_(lambda x: Path(x).stem, filename_list)).__len__() == 1 ), f"Check the filename list, given {filename_list}." filename = Path(filename_list[0]).stem img_list = self._transform(*img_list) return img_list, filename
def __init__( self, dataset: MedicalImageSegmentationDataset, grp_regex: str, shuffle=False, verbose=True, infinite_sampler: bool = False, ) -> None: filenames: List[str] = dataset.get_filenames() self.grp_regex = grp_regex self.shuffle: bool = shuffle self.shuffle_fn: Callable = ( lambda x: random.sample(x, len(x))) if self.shuffle else id_ self._infinite_sampler = infinite_sampler if verbose: print(f"Grouping using {self.grp_regex} regex") grouping_regex: Pattern = re.compile(self.grp_regex) stems: List[str] = [Path(filename).stem for filename in filenames ] # avoid matching the extension try: matches: List[Match] = map_(grouping_regex.match, stems) patients: List[str] = [match.group(0) for match in matches] except Exception: matches: List[Match] = map_(grouping_regex.search, stems) patients: List[str] = [match.group(0) for match in matches] unique_patients: List[str] = sorted(list(set(patients))) assert len(unique_patients) < len(filenames) if verbose: print( f"Found {len(unique_patients)} unique patients out of {len(filenames)} images" ) self.idx_map: Dict[str, List[int]] = dict(zip(unique_patients, repeat(None))) for i, patient in enumerate(patients): if not self.idx_map[patient]: self.idx_map[patient] = [] self.idx_map[patient] += [i] assert sum(len(self.idx_map[k]) for k in unique_patients) == len(filenames) if verbose: print("Patient to slices mapping done")
def __init__( self, img_folder: str, mask_folder_list: List[str], group_pattern=r"patient\d+_\d+", img_extension: str = "png", crop: int = 0, mapping=None, resize=None, ) -> None: super().__init__() self.img_folder: Path = Path(img_folder) self.crop = crop self.resize = resize assert self.img_folder.exists() and self.img_folder.is_dir( ), self.img_folder self.mask_folder_list = [Path(m) for m in mask_folder_list] self.num_mask = len(self.mask_folder_list) if self.num_mask > 0: for m in self.mask_folder_list: assert m.exists() and m.is_dir(), m self.group_pattern: str = group_pattern self.img_extension = img_extension self.img_paths, self.mask_paths_dict = self._load_images( self.img_folder, self.mask_folder_list, self.img_extension) self.img_paths_group, self.mask_paths_group_dict = self._group_images( self.img_paths, self.mask_paths_dict, self.group_pattern) assert self.img_paths_group.keys() == self.mask_paths_group_dict.keys() for subject, paths in self.img_paths_group.items(): for m, m_paths in self.mask_paths_group_dict[subject].items(): assert map_(lambda x: x.stem, paths) == map_(lambda x: x.stem, m_paths) print( f"Found {len(self.img_paths_group.keys())} subjects with totally {len(self.img_paths)} images." ) self.identifies: List[str] = list(self.img_paths_group.keys()) print(f"identifies: {self.identifies[:5]}...") self.current_identify = 0 self.img_source: np.ndarray self.mask_dicts: Dict[str, np.ndarray] if len(mapping) > 0: # case 1: there is no gt_folders: if len(mask_folder_list) == 0: raise RuntimeError( f"No mask is given, you do not need to have mapping, given {mapping}" ) # case 2: all gt_folder has the same mapping elif len(mapping) == 1 and len(mask_folder_list) > 1: print( f"Found an unique mapping: {mapping}, applying to all gt_masks" ) self.mapping_modules: Dict[str, Callable[ [np.ndarray], np.ndarray]] = { f"{str(m)}": np.vectorize(lambda x: mapping[0].get(x)) for m in self.mask_folder_list } elif len(mapping) == len( mask_folder_list) and len(mask_folder_list) >= 2: print( f"Found {len(mapping)} mapping: {mapping}, applying to {len(mask_folder_list)} gt_masks" ) self.mapping_modules: Dict[str, Callable[ [np.ndarray], np.ndarray]] = { f"{str(m)}": Vectorize(mapping_) for m, mapping_ in zip(self.mask_folder_list, mapping) } else: raise NotImplementedError("mapping logic is wrong.") else: # no mapping is needed, use identical mapping self.mapping_modules: Dict[str, Callable[[np.ndarray], np.ndarray]] = { "original image": identical } self.mapping_modules.update( {f"{m}": identical for m in self.mask_folder_list}) self.img_source, self.mask_dicts = self._preload_subjects( self.img_paths_group[self.identifies[self.current_identify]], self.mask_paths_group_dict[self.identifies[self.current_identify]], self.mapping_modules, )