def _make_dataset(cls,
                      root: str,
                      mode: str,
                      subfolders: List[str],
                      verbose=True) -> Dict[str, List[str]]:
        assert mode in cls.dataset_modes, mode
        for subfolder in subfolders:
            assert (Path(root, mode, subfolder).exists()
                    and Path(root, mode, subfolder).is_dir()), os.path.join(
                        root, mode, subfolder)

        items = [
            os.listdir(Path(os.path.join(root, mode, subfoloder)))
            for subfoloder in subfolders
        ]
        # clear up extension
        items = sorted(
            [[x for x in item if allow_extension(x, cls.allow_extension)]
             for item in items])
        assert set(map_(len, items)).__len__() == 1, map_(len, items)

        imgs = {}
        for subfolder, item in zip(subfolders, items):
            imgs[subfolder] = sorted([
                os.path.join(root, mode, subfolder, x_path) for x_path in item
            ])
        assert (set(map_(len, imgs.values())).__len__() == 1
                ), "imgs list have component with different length."

        for subfolder in subfolders:
            if verbose:
                print(f"found {len(imgs[subfolder])} images in {subfolder}\t")
        return imgs
 def __getitem__(self, index) -> Tuple[List[Tensor], str]:
     img_list, filename_list = self._getitem_index(index)
     assert img_list.__len__() == self.subfolders.__len__()
     # make sure the filename is the same image
     assert (set(map_(lambda x: Path(x).stem, filename_list)).__len__() == 1
             ), f"Check the filename list, given {filename_list}."
     filename = Path(filename_list[0]).stem
     img_list = self._transform(*img_list)
     return img_list, filename
    def __init__(
        self,
        dataset: MedicalImageSegmentationDataset,
        grp_regex: str,
        shuffle=False,
        verbose=True,
        infinite_sampler: bool = False,
    ) -> None:
        filenames: List[str] = dataset.get_filenames()
        self.grp_regex = grp_regex
        self.shuffle: bool = shuffle
        self.shuffle_fn: Callable = (
            lambda x: random.sample(x, len(x))) if self.shuffle else id_
        self._infinite_sampler = infinite_sampler
        if verbose:
            print(f"Grouping using {self.grp_regex} regex")
        grouping_regex: Pattern = re.compile(self.grp_regex)

        stems: List[str] = [Path(filename).stem for filename in filenames
                            ]  # avoid matching the extension
        try:
            matches: List[Match] = map_(grouping_regex.match, stems)
            patients: List[str] = [match.group(0) for match in matches]
        except Exception:
            matches: List[Match] = map_(grouping_regex.search, stems)
            patients: List[str] = [match.group(0) for match in matches]

        unique_patients: List[str] = sorted(list(set(patients)))
        assert len(unique_patients) < len(filenames)
        if verbose:
            print(
                f"Found {len(unique_patients)} unique patients out of {len(filenames)} images"
            )
        self.idx_map: Dict[str,
                           List[int]] = dict(zip(unique_patients,
                                                 repeat(None)))
        for i, patient in enumerate(patients):
            if not self.idx_map[patient]:
                self.idx_map[patient] = []
            self.idx_map[patient] += [i]
        assert sum(len(self.idx_map[k])
                   for k in unique_patients) == len(filenames)
        if verbose:
            print("Patient to slices mapping done")
Beispiel #4
0
    def __init__(
        self,
        img_folder: str,
        mask_folder_list: List[str],
        group_pattern=r"patient\d+_\d+",
        img_extension: str = "png",
        crop: int = 0,
        mapping=None,
        resize=None,
    ) -> None:
        super().__init__()
        self.img_folder: Path = Path(img_folder)
        self.crop = crop
        self.resize = resize
        assert self.img_folder.exists() and self.img_folder.is_dir(
        ), self.img_folder
        self.mask_folder_list = [Path(m) for m in mask_folder_list]
        self.num_mask = len(self.mask_folder_list)
        if self.num_mask > 0:
            for m in self.mask_folder_list:
                assert m.exists() and m.is_dir(), m
        self.group_pattern: str = group_pattern
        self.img_extension = img_extension
        self.img_paths, self.mask_paths_dict = self._load_images(
            self.img_folder, self.mask_folder_list, self.img_extension)
        self.img_paths_group, self.mask_paths_group_dict = self._group_images(
            self.img_paths, self.mask_paths_dict, self.group_pattern)
        assert self.img_paths_group.keys() == self.mask_paths_group_dict.keys()
        for subject, paths in self.img_paths_group.items():
            for m, m_paths in self.mask_paths_group_dict[subject].items():
                assert map_(lambda x: x.stem,
                            paths) == map_(lambda x: x.stem, m_paths)
        print(
            f"Found {len(self.img_paths_group.keys())} subjects with totally {len(self.img_paths)} images."
        )
        self.identifies: List[str] = list(self.img_paths_group.keys())
        print(f"identifies: {self.identifies[:5]}...")
        self.current_identify = 0
        self.img_source: np.ndarray
        self.mask_dicts: Dict[str, np.ndarray]
        if len(mapping) > 0:

            # case 1: there is no gt_folders:
            if len(mask_folder_list) == 0:
                raise RuntimeError(
                    f"No mask is given, you do not need to have mapping, given {mapping}"
                )
            # case 2: all gt_folder has the same mapping
            elif len(mapping) == 1 and len(mask_folder_list) > 1:
                print(
                    f"Found an unique mapping: {mapping}, applying to all gt_masks"
                )
                self.mapping_modules: Dict[str, Callable[
                    [np.ndarray], np.ndarray]] = {
                        f"{str(m)}": np.vectorize(lambda x: mapping[0].get(x))
                        for m in self.mask_folder_list
                    }
            elif len(mapping) == len(
                    mask_folder_list) and len(mask_folder_list) >= 2:
                print(
                    f"Found {len(mapping)} mapping: {mapping}, applying to {len(mask_folder_list)} gt_masks"
                )
                self.mapping_modules: Dict[str, Callable[
                    [np.ndarray], np.ndarray]] = {
                        f"{str(m)}": Vectorize(mapping_)
                        for m, mapping_ in zip(self.mask_folder_list, mapping)
                    }
            else:
                raise NotImplementedError("mapping logic is wrong.")
        else:
            # no mapping is needed, use identical mapping
            self.mapping_modules: Dict[str,
                                       Callable[[np.ndarray], np.ndarray]] = {
                                           "original image": identical
                                       }
            self.mapping_modules.update(
                {f"{m}": identical
                 for m in self.mask_folder_list})

        self.img_source, self.mask_dicts = self._preload_subjects(
            self.img_paths_group[self.identifies[self.current_identify]],
            self.mask_paths_group_dict[self.identifies[self.current_identify]],
            self.mapping_modules,
        )