def stain_norm(std_img, f_p, dst): standardizer = staintools.BrightnessStandardizer() i_std = staintools.read_image(std_img) stain_normalizer = staintools.StainNormalizer(method='vahadane') i_standard = standardizer.transform(i_std) stain_normalizer.fit(i_standard) os.makedirs(dst, exist_ok=True) for f in os.listdir(f_p): img = staintools.read_image(os.path.join(f_p, f)) i_normalized = stain_normalizer.transform(standardizer.transform(img)) cv2.imwrite(os.path.join(dst, os.path.basename(f)), i_normalized)
def transform(image, target_im): # Read data target = staintools.read_image(image) to_transform = staintools.read_image(target_im) # Standardize brightness (This step is optional but can improve the tissue mask calculation) standardizer = staintools.BrightnessStandardizer() target = standardizer.transform(target) to_transform = standardizer.transform(to_transform) # Stain normalize normalizer = staintools.StainNormalizer(method='vahadane') normalizer.fit(target) transformed = normalizer.transform(to_transform) return transformed
return (cropped_img, cropped_mask, index) crop_size2 = [224, 224] i = 0 while i < len(tumor_paths): tumor_path = tumor_paths[i] mask_path = osp.join( mask_paths, osp.basename(tumor_paths[i].replace('.png', '_mask.png'))) #image = plt.imread(tumor_path) imgmask = io.imread(mask_path) stain_normalizer = staintools.StainNormalizer(method='vahadane') imagest = staintools.read_image("/home/wli/Downloads/test/tumor_st.png") img = staintools.read_image(tumor_path) standardizer = staintools.BrightnessStandardizer() imagest_standard = standardizer.transform(imagest) img_standard = standardizer.transform(img) stain_normalizer.fit(imagest_standard) img_norm = stain_normalizer.transform(img_standard) imageroted1 = i1_flip = np.fliplr(img_norm) maskroted1 = i1_flip = np.fliplr(imgmask) imageroted2 = np.rot90(img_norm, 1) imageroted3 = np.rot90(img_norm, 2) imageroted4 = np.rot90(img_norm, 3) maskroted2 = np.rot90(imgmask, 1) maskroted3 = np.rot90(imgmask, 2) maskroted4 = np.rot90(imgmask, 3)
def __init__(self, data_path, transform_args, metadata_csv, split, num_classes=2, resize_shape=(DEFAULT_PATCH_SIZE, DEFAULT_PATCH_SIZE), max_patches=None, tasks_to='tcga', is_training=False, filtered=True, toy=False, normalize=False, transform=None): """Initialize TCGADataset. data directory to be organized as follows: data_path slide_list.pkl train.hdf5 val.hdf5 test.hdf5 metadata.csv Args: data_path (str): path to data directory transform_args (args): arguments to transform data metadata_csv (str): path to csv containing metadata information of the dataset split (str): either "train", "valid", or "test" num_classes (int): number of unique labels resize_shape (tuple): shape to resize the inputs to max_patches (int): max number of patches to obtain for each slide tasks_to (str): corresponds to a task sequence is_training (bool): whether the model in in training mode or not filtered (bool): whether to filter the images """ # if split not in ["train", "valid", "test"]: # raise ValueError("Invalid value for split. Must specify train, valid, or test.") super().__init__(data_path, transform_args, split, is_training, 'tcga', tasks_to) self.data_path = data_path # self.slide_list_path = os.path.join(self.data_path, SLIDE_PKL_FILE) self.hdf5_path = os.path.join(self.data_path, "{}.hdf5".format(split)) #hdf5_fh = h5py.File(self.hdf5_path, "r") #if split == "demo": # s = "TCGA-W5-AA2Z-01Z-00-DX1.49AB7E33-EE0C-42DE-9EDE-91E01290BE45.svs" # print("hdf5 test!") # print("slide: {}".format(s)) # print("patch 0: {}".format(self.hdf5_fh[s][0, 0, 0, 0])) # print("patch 1: {}".format(self.hdf5_fh[s][1, 0, 0, 0])) self.split = split self.is_training = is_training self.metadata_path = os.path.join(self.data_dir, metadata_csv) print("metadata_path: {}".format(self.metadata_path)) self.metadata = pd.read_csv(self.metadata_path) print("hdf5 path: {}".format(self.hdf5_path)) self.toy = True self.filtered = filtered # with open(self.slide_list_path, "rb") as pkl_fh: # self.slide_list = pickle.load(pkl_fh) with h5py.File(self.hdf5_path, "r") as db: self.valid_slides = [slide_id for slide_id in db] self.slide_list = self.metadata[COL_TCGA_SLIDE_ID] print("Num valid slides {}".format(len(self.valid_slides))) self.num_classes = num_classes self.resize_shape = resize_shape self.max_patches_per_slide = max_patches self.patch_list = self._get_patch_list() print("Patch list shape: {}".format(self.patch_list.shape)) self.label_dict = self._get_label_dict(tasks_to) self.labels = self._get_labels() self._set_class_weights(self.labels) self.transform = transform self.normalize = normalize # tools for patch normalization self.standardizer = staintools.BrightnessStandardizer() self.color_normalizer = staintools.ReinhardColorNormalizer() self.normalizer_with_constants = transforms.Compose( [transforms.Normalize(mean=TCGA_MEAN, std=TCGA_STD)]) self.ToTensor = transforms.Compose([transforms.ToTensor()]) # tools for image augmentation self.stain_augmentor = staintools.StainAugmentor(method='vahadane', sigma1=0.2, sigma2=0.2)