Exemple #1
0
 def __call__(self, img):
     if random.random() < self.prob:
         self.to_augment = staintools.LuminosityStandardizer.standardize(
             np.array(img).astype('uint8'))
         self.augmentor = staintools.StainAugmentor(method='vahadane',
                                                    sigma1=0.2,
                                                    sigma2=0.2)
         self.augmentor.fit(self.to_augment)
         augmented_img = self.augmentor.pop()
         return Image.fromarray(
             augmented_img.astype('uint8'))  # .astype(float64)
     else:
         return img
Exemple #2
0
def save_segmentation(i,
                      max_row,
                      max_col,
                      loc,
                      seg,
                      home_dir,
                      count,
                      fileNames,
                      augment=0,
                      tile_size=224,
                      blank_ratio=0.5):
    for j in range(max_col):
        aaa = seg[i, j]
        ccc = np.shape(aaa)
        if ccc[0] == tile_size & ccc[1] == tile_size:
            if (np.sum(seg[i, j][:, :, 0] > 220) /
                (ccc[0] * ccc[1])) < blank_ratio:
                #   print("start saving \n")
                output_dir = (home_dir + "/" + str(count) + "/")
                if not os.path.isdir(output_dir):
                    os.makedirs(output_dir)
                cv2.imwrite(
                    output_dir + "/" + fileNames[:fileNames.rfind(".")] + "_" +
                    str(count) + "_" + '_'.join(map(str, loc[i, j])) + ".jpg",
                    seg[i, j])

                #augment
                if augment > 0:
                    augmentor = staintools.StainAugmentor(method='vahadane',
                                                          sigma1=0.2,
                                                          sigma2=0.2)
                    augmentor.fit(seg[i, j])

                    for index in range(augment):
                        augmented_image = augmentor.pop()
                        cv2.imwrite(
                            output_dir + "/" + "aug_" + str(index) + "_" +
                            fileNames[:fileNames.rfind(".")] + "_" +
                            str(count) + "_" + '_'.join(map(str, loc[i, j])) +
                            ".jpg", augmented_image)
Exemple #3
0
    def __init__(self,
                 data_path,
                 transform_args,
                 metadata_csv,
                 split,
                 num_classes=2,
                 resize_shape=(DEFAULT_PATCH_SIZE, DEFAULT_PATCH_SIZE),
                 max_patches=None,
                 tasks_to='tcga',
                 is_training=False,
                 filtered=True,
                 toy=False,
                 normalize=False,
                 transform=None):
        """Initialize TCGADataset.

        data directory to be organized as follows:
            data_path
                slide_list.pkl
                train.hdf5
                val.hdf5
                test.hdf5
                metadata.csv

        Args:
            data_path (str): path to data directory
            transform_args (args): arguments to transform data
            metadata_csv (str): path to csv containing metadata information of the dataset
            split (str): either "train", "valid", or "test"
            num_classes (int): number of unique labels
            resize_shape (tuple): shape to resize the inputs to
            max_patches (int): max number of patches to obtain for each slide
            tasks_to (str): corresponds to a task sequence
            is_training (bool): whether the model in in training mode or not
            filtered (bool): whether to filter the images
        """
        #        if split not in ["train", "valid", "test"]:
        #            raise ValueError("Invalid value for split. Must specify train, valid, or test.")

        super().__init__(data_path, transform_args, split, is_training, 'tcga',
                         tasks_to)
        self.data_path = data_path
        #        self.slide_list_path = os.path.join(self.data_path, SLIDE_PKL_FILE)
        self.hdf5_path = os.path.join(self.data_path, "{}.hdf5".format(split))

        #hdf5_fh = h5py.File(self.hdf5_path, "r")
        #if split == "demo":
        #    s = "TCGA-W5-AA2Z-01Z-00-DX1.49AB7E33-EE0C-42DE-9EDE-91E01290BE45.svs"
        #    print("hdf5 test!")
        #    print("slide: {}".format(s))
        #    print("patch 0: {}".format(self.hdf5_fh[s][0, 0, 0, 0]))
        #    print("patch 1: {}".format(self.hdf5_fh[s][1, 0, 0, 0]))

        self.split = split
        self.is_training = is_training
        self.metadata_path = os.path.join(self.data_dir, metadata_csv)
        print("metadata_path: {}".format(self.metadata_path))
        self.metadata = pd.read_csv(self.metadata_path)
        print("hdf5 path: {}".format(self.hdf5_path))

        self.toy = True

        self.filtered = filtered
        #        with open(self.slide_list_path, "rb") as pkl_fh:
        #            self.slide_list = pickle.load(pkl_fh)
        with h5py.File(self.hdf5_path, "r") as db:
            self.valid_slides = [slide_id for slide_id in db]

        self.slide_list = self.metadata[COL_TCGA_SLIDE_ID]

        print("Num valid slides {}".format(len(self.valid_slides)))

        self.num_classes = num_classes

        self.resize_shape = resize_shape
        self.max_patches_per_slide = max_patches

        self.patch_list = self._get_patch_list()
        print("Patch list shape: {}".format(self.patch_list.shape))

        self.label_dict = self._get_label_dict(tasks_to)

        self.labels = self._get_labels()
        self._set_class_weights(self.labels)
        self.transform = transform
        self.normalize = normalize
        # tools for patch normalization
        self.standardizer = staintools.BrightnessStandardizer()
        self.color_normalizer = staintools.ReinhardColorNormalizer()
        self.normalizer_with_constants = transforms.Compose(
            [transforms.Normalize(mean=TCGA_MEAN, std=TCGA_STD)])
        self.ToTensor = transforms.Compose([transforms.ToTensor()])
        # tools for image augmentation
        self.stain_augmentor = staintools.StainAugmentor(method='vahadane',
                                                         sigma1=0.2,
                                                         sigma2=0.2)
Exemple #4
0
# Normalize to stain of first image
normalizer = staintools.StainNormalizer(method=METHOD)
normalizer.fit(i1)
i2_normalized = normalizer.transform(i2)
i3_normalized = normalizer.transform(i3)
i4_normalized = normalizer.transform(i4)
i5_normalized = normalizer.transform(i5)

# Plot
images = [i1, i2_normalized, i3_normalized, i4_normalized, i5_normalized]
titles = ["Target"] + ["Stain normalized"] * 4
staintools.plot_image_list(images, width=5, title_list=titles, \
                            save_name=RESULTS_DIR + 'stain-normalized-images.png', show=0)

# ==================
# Stain augmentation
# ==================

# Augment the first image
augmentor = staintools.StainAugmentor(method=METHOD, sigma1=0.4, sigma2=0.4)
augmentor.fit(i1)
augmented_images = []
for _ in range(10):
    augmented_image = augmentor.pop()
    augmented_images.append(augmented_image)

# Plot
titles = ["Augmented"] * 10
staintools.plot_image_list(augmented_images, width=5, title_list=titles, \
                            save_name=RESULTS_DIR + 'stain-augmented-images.png', show=0)
Exemple #5
0
def split_into_tiles(home_dir, fileNames, img_mat, count, normalizer, blank_ratio = 0.5, tile_size = 224, overlapping = 0.25, augment = 0, thread = 1):
    """
    Split a tissue into non-overlapping small tiles

    Args: 1. folder (str): name of the folder where the targer image exists.
          2. fileNames (str): name of the files.
          3. blank_ratio (float): ratio of the blank area (R > 220).
          4. tile_size (int): size of each tile.
          5. overlapping (float): the portion of overlapping side between two consecutive sliding windows


    Precondition: 1. folder and fileNames are UNIX style
                  2. blank_ratio is float between 0 to 1

    """
    #Need to consider the overlapping case



    img = normalizer.transform(img_mat) #normalize

    h, w, channels = img.shape
    height=tile_size + 1
    width=tile_size + 1

    h_val=height*(1 - overlapping)
    w_val=width*(1-overlapping)
    max_row = (h-height)/h_val+1
    max_col = (w-width)/w_val+1

    if max_row == np.fix(max_row):
        max_row = int(max_row)
    else:
        max_row = int(np.fix(max_row+1))

    if max_col == np.fix(max_col):
        max_col = int(max_col)
    else:
        max_col = int(np.fix(max_col+1))

    seg = np.ndarray(shape = (max_row, max_col), dtype = np.ndarray)
    loc = np.ndarray(shape = (max_row, max_col), dtype = np.ndarray)
    for row in range(1, max_row + 1):
        for col in range(1, max_col + 1):
            if ((width+(col-1)*w_val) > w) & (((row-1)*h_val+height) <= h):
                seg[row-1, col-1]= img[int((row-1)*h_val+1) : int(height+(row-1)*h_val),                                       int((col-1)*w_val+1) : w, : ]
                loc[row-1, col-1] = [int((row-1)*h_val+1), int(height+(row-1)*h_val), int((col-1)*w_val+1), w]

            elif ((height + (row - 1) * h_val) > w) & (((col - 1) * w_val + width) <= h):
                seg[row-1, col-1]= img[int((row-1) * h_val + 1) : int(h),                                       int((col-1)*w_val+1) : int(width+(col-1)*w_val), : ]
                loc[row-1, col-1] = [int((row-1) * h_val + 1), int(h), int((col-1)*w_val+1), int(width+(col-1)*w_val)]

            elif ((width + (col-1)*w_val) > w)  & (((row-1)*h_val+height) > h):
                seg[row-1, col-1] = img[int((row-1)*h_val+1) : int(h),                                         int((col-1)*w_val+1) : int(w), :]
                loc[row-1, col-1] = [int((row-1)*h_val+1), int(h), int((col-1)*w_val+1),  int(w)]
            else:
                seg[row-1, col-1]= img[int((row-1)*h_val+1) : int(height+(row-1)*h_val),                                        int((col-1)*w_val+1) : int(width+(col-1)*w_val), :]

                loc[row-1, col-1] = [int((row-1)*h_val+1), int(height+(row-1)*h_val), int((col-1)*w_val+1), int(width+(col-1)*w_val)]

    # save
    if thread == 1:
        print("Segmentation Progress:")
    else:
        print("Begin Segmentation")
    for i in range(max_row):
        sys.stdout.write('\r')
        for j in range(max_col):
                aaa = seg[i, j]
                ccc = np.shape(aaa)
                if ccc[0] == tile_size & ccc[1] == tile_size:
                    if (np.sum(seg[i, j][:, :, 0] > 220)/(ccc[0] * ccc[1])) < blank_ratio:
                     #   print("start saving \n")
                        output_dir = (home_dir + "/" + str(count) + "/")
                        if not os.path.isdir(output_dir):
                            os.makedirs(output_dir)
                        cv2.imwrite( output_dir + "/" + fileNames[:fileNames.rfind(".")] + "_" + str(count) + "_" + '_'.join(map(str,loc[i, j])) + ".jpg", seg[i, j])

                        #augment
                        if augment > 0:
                            augmentor = staintools.StainAugmentor(method='vahadane', sigma1=0.2, sigma2=0.2)
                            augmentor.fit(seg[i, j])

                            for index in range(augment):
                                augmented_image = augmentor.pop()
                                cv2.imwrite( output_dir + "/" + "aug_" + str(index) + "_" + fileNames[:fileNames.rfind(".")] + "_" + str(count) + "_"                                     '_'.join(map(str,loc[i, j])) + ".jpg", augmented_image)

          if thread == 1:
              k = (i + 1) // max_row
              sys.stdout.write("[%-20s] %d%%\n" % ('='*int(20*k), 100*k))

              sys.stdout.flush()
              sleep(0.25)