Example #1
0
    def __init__(
        self,
        images_dir,
        transform=None,
        image_size=256,
        subset="train",
        random_sampling=True,
        validation_cases=10,
        seed=42,
    ):
        assert subset in ["all", "train", "validation"]

        # read images
        volumes = {}
        masks = {}
        print("reading {} images...".format(subset))
        for (dirpath, dirnames, filenames) in os.walk(images_dir):
            image_slices = []
            mask_slices = []
            for filename in sorted(
                    filter(lambda f: ".tif" in f, filenames),
                    key=lambda x: int(x.split(".")[-2].split("_")[4]),
            ):
                filepath = os.path.join(dirpath, filename)
                if "mask" in filename:
                    mask_slices.append(imread(filepath, as_gray=True))
                else:
                    image_slices.append(imread(filepath))
            if len(image_slices) > 0:
                patient_id = dirpath.split("/")[-1]
                volumes[patient_id] = np.array(image_slices[1:-1])
                masks[patient_id] = np.array(mask_slices[1:-1])

        self.patients = sorted(volumes)

        # select cases to subset
        if not subset == "all":
            random.seed(seed)
            validation_patients = random.sample(self.patients,
                                                k=validation_cases)
            if subset == "validation":
                self.patients = validation_patients
            else:
                self.patients = sorted(
                    list(set(self.patients).difference(validation_patients)))

        print("preprocessing {} volumes...".format(subset))
        # create list of tuples (volume, mask)
        self.volumes = [(volumes[k], masks[k]) for k in self.patients]

        print("cropping {} volumes...".format(subset))
        # crop to smallest enclosing volume
        self.volumes = [crop_sample(v) for v in self.volumes]

        print("padding {} volumes...".format(subset))
        # pad to square
        self.volumes = [pad_sample(v) for v in self.volumes]

        print("resizing {} volumes...".format(subset))
        # resize
        self.volumes = [
            resize_sample(v, size=image_size) for v in self.volumes
        ]

        print("normalizing {} volumes...".format(subset))
        # normalize channel-wise
        self.volumes = [(normalize_volume(v), m) for v, m in self.volumes]

        # probabilities for sampling slices based on masks
        self.slice_weights = [
            m.sum(axis=-1).sum(axis=-1) for v, m in self.volumes
        ]
        self.slice_weights = [(s + (s.sum() * 0.1 / len(s))) / (s.sum() * 1.1)
                              for s in self.slice_weights]

        # add channel dimension to masks
        self.volumes = [(v, m[..., np.newaxis]) for (v, m) in self.volumes]

        print("done creating {} dataset".format(subset))

        # create global index for patient and slice (idx -> (p_idx, s_idx))
        num_slices = [v.shape[0] for v, m in self.volumes]
        self.patient_slice_index = list(
            zip(
                sum([[i] * num_slices[i] for i in range(len(num_slices))], []),
                sum([list(range(x)) for x in num_slices], []),
            ))

        self.random_sampling = random_sampling

        self.transform = transform
Example #2
0
    def __init__(self,
                 images_dir,
                 image_size=256,
                 transform=None,
                 random_sampling=True,
                 subset="train",
                 is_resize=True,
                 image_short_side=512,
                 is_padding=False):
        self.images_dir = images_dir
        self.transform = transform
        self.image_size = image_size
        self.random_sampling = random_sampling
        self.is_resize = is_resize
        self.image_short_side = image_short_side
        self.is_padding = is_padding

        assert subset in ["all", "train", "validation"]

        self.volumes = {}
        self.masks = {}

        print("begining")

        img_list = os.listdir(os.path.join(self.images_dir, "masks"))

        # img_list = img_list[:10]
        # print(img_list)

        self.patients = sorted(img_list)

        # validation_cases = 1
        validation_cases = int(0.1 * len(self.patients))

        if not subset == "all":
            validation_patients = random.sample(self.patients,
                                                k=validation_cases)
            if subset == "validation":
                self.patients = validation_patients
            else:
                self.patients = sorted(
                    list(set(self.patients).difference(validation_patients)))
        for img_name in tqdm(self.patients):
            # if img_name != "scale_张昧谡_25_832625798833847228color.png":
            #     continue
            mask = Image.open(os.path.join(self.images_dir, "masks", img_name))

            img = cv2.imread(os.path.join(self.images_dir, "imgs", img_name))
            img = Image.fromarray(img).convert("RGB")

            if self.is_padding:
                img, mask = pad_sample(img, mask)

            assert img.size == mask.size
            # print('tttt', np.array(img))
            img, mask = self.resize_img(img, mask)

            # print('=====', np.array(img))
            mask = np.array(mask)
            mask = mask[np.newaxis, ...]
            mask = torch.as_tensor(mask, dtype=torch.uint8)

            # print('1111', img.size)
            # print('111', np.array(img))
            if self.transform is not None:
                img, mask = self.transform(img, mask)
            # print('222', np.array(img))
            # if img_name == 'scale_张昧谡_25_832625798833847228color.png':
            # print('333', img)
            self.volumes[img_name] = img
            self.masks[img_name] = mask

        print('load image is end .....')