def __init__( self, images_dir, transform=None, image_size=256, subset="train", random_sampling=True, validation_cases=10, seed=42, ): assert subset in ["all", "train", "validation"] # read images volumes = {} masks = {} print("reading {} images...".format(subset)) for (dirpath, dirnames, filenames) in os.walk(images_dir): image_slices = [] mask_slices = [] for filename in sorted( filter(lambda f: ".tif" in f, filenames), key=lambda x: int(x.split(".")[-2].split("_")[4]), ): filepath = os.path.join(dirpath, filename) if "mask" in filename: mask_slices.append(imread(filepath, as_gray=True)) else: image_slices.append(imread(filepath)) if len(image_slices) > 0: patient_id = dirpath.split("/")[-1] volumes[patient_id] = np.array(image_slices[1:-1]) masks[patient_id] = np.array(mask_slices[1:-1]) self.patients = sorted(volumes) # select cases to subset if not subset == "all": random.seed(seed) validation_patients = random.sample(self.patients, k=validation_cases) if subset == "validation": self.patients = validation_patients else: self.patients = sorted( list(set(self.patients).difference(validation_patients))) print("preprocessing {} volumes...".format(subset)) # create list of tuples (volume, mask) self.volumes = [(volumes[k], masks[k]) for k in self.patients] print("cropping {} volumes...".format(subset)) # crop to smallest enclosing volume self.volumes = [crop_sample(v) for v in self.volumes] print("padding {} volumes...".format(subset)) # pad to square self.volumes = [pad_sample(v) for v in self.volumes] print("resizing {} volumes...".format(subset)) # resize self.volumes = [ resize_sample(v, size=image_size) for v in self.volumes ] print("normalizing {} volumes...".format(subset)) # normalize channel-wise self.volumes = [(normalize_volume(v), m) for v, m in self.volumes] # probabilities for sampling slices based on masks self.slice_weights = [ m.sum(axis=-1).sum(axis=-1) for v, m in self.volumes ] self.slice_weights = [(s + (s.sum() * 0.1 / len(s))) / (s.sum() * 1.1) for s in self.slice_weights] # add channel dimension to masks self.volumes = [(v, m[..., np.newaxis]) for (v, m) in self.volumes] print("done creating {} dataset".format(subset)) # create global index for patient and slice (idx -> (p_idx, s_idx)) num_slices = [v.shape[0] for v, m in self.volumes] self.patient_slice_index = list( zip( sum([[i] * num_slices[i] for i in range(len(num_slices))], []), sum([list(range(x)) for x in num_slices], []), )) self.random_sampling = random_sampling self.transform = transform
def __init__(self, images_dir, image_size=256, transform=None, random_sampling=True, subset="train", is_resize=True, image_short_side=512, is_padding=False): self.images_dir = images_dir self.transform = transform self.image_size = image_size self.random_sampling = random_sampling self.is_resize = is_resize self.image_short_side = image_short_side self.is_padding = is_padding assert subset in ["all", "train", "validation"] self.volumes = {} self.masks = {} print("begining") img_list = os.listdir(os.path.join(self.images_dir, "masks")) # img_list = img_list[:10] # print(img_list) self.patients = sorted(img_list) # validation_cases = 1 validation_cases = int(0.1 * len(self.patients)) if not subset == "all": validation_patients = random.sample(self.patients, k=validation_cases) if subset == "validation": self.patients = validation_patients else: self.patients = sorted( list(set(self.patients).difference(validation_patients))) for img_name in tqdm(self.patients): # if img_name != "scale_张昧谡_25_832625798833847228color.png": # continue mask = Image.open(os.path.join(self.images_dir, "masks", img_name)) img = cv2.imread(os.path.join(self.images_dir, "imgs", img_name)) img = Image.fromarray(img).convert("RGB") if self.is_padding: img, mask = pad_sample(img, mask) assert img.size == mask.size # print('tttt', np.array(img)) img, mask = self.resize_img(img, mask) # print('=====', np.array(img)) mask = np.array(mask) mask = mask[np.newaxis, ...] mask = torch.as_tensor(mask, dtype=torch.uint8) # print('1111', img.size) # print('111', np.array(img)) if self.transform is not None: img, mask = self.transform(img, mask) # print('222', np.array(img)) # if img_name == 'scale_张昧谡_25_832625798833847228color.png': # print('333', img) self.volumes[img_name] = img self.masks[img_name] = mask print('load image is end .....')