def test_read_patches(self, file_path, patch_info, expected_img): reader = WSIReader(self.backend) with reader.read(file_path) as img_obj: if self.backend == "tifffile": with self.assertRaises(ValueError): reader.get_data(img_obj, **patch_info)[0] else: img = reader.get_data(img_obj, **patch_info)[0] self.assertTupleEqual(img.shape, expected_img.shape) self.assertIsNone(assert_array_equal(img, expected_img))
def test_read_malformats(self, img_expected): if self.backend == "cucim" and (len(img_expected.shape) < 3 or img_expected.shape[2] == 1): # Until cuCIM addresses https://github.com/rapidsai/cucim/issues/230 return reader = WSIReader(self.backend) file_path = os.path.join(os.path.dirname(__file__), "testing_data", "temp_tiff_image_gray.tiff") imwrite(file_path, img_expected, shape=img_expected.shape) with self.assertRaises((RuntimeError, ValueError, openslide.OpenSlideError if has_osl else ValueError)): with reader.read(file_path) as img_obj: reader.get_data(img_obj)
def test_read_malformats(self, img_expected): reader = WSIReader(self.backend) file_path = save_gray_tiff( img_expected, os.path.join(os.path.dirname(__file__), "testing_data", "temp_tiff_image_gray.tiff")) with self.assertRaises( (RuntimeError, ValueError, openslide.OpenSlideError if has_osl else ValueError)): with reader.read(file_path) as img_obj: reader.get_data(img_obj)
def test_read_region(self, file_path, patch_info, expected_img): kwargs = {"name": None, "offset": None} if self.backend == "tifffile" else {} reader = WSIReader(self.backend, **kwargs) with reader.read(file_path, **kwargs) as img_obj: if self.backend == "tifffile": with self.assertRaises(ValueError): reader.get_data(img_obj, **patch_info)[0] else: # Read twice to check multiple calls img = reader.get_data(img_obj, **patch_info)[0] img2 = reader.get_data(img_obj, **patch_info)[0] self.assertTupleEqual(img.shape, img2.shape) self.assertIsNone(assert_array_equal(img, img2)) self.assertTupleEqual(img.shape, expected_img.shape) self.assertIsNone(assert_array_equal(img, expected_img))
def test_read_patches(self, file_url, patch_info, expected_img): filename = self.camelyon_data_download(file_url) reader = WSIReader("OpenSlide") img_obj = reader.read(filename) img = reader.get_data(img_obj, **patch_info)[0] self.assertTupleEqual(img.shape, expected_img.shape) self.assertIsNone(assert_array_equal(img, expected_img))
def test_read_rgba(self, img_expected): image = {} reader = WSIReader("cuCIM") for mode in ["RGB", "RGBA"]: file_path = self.create_rgba_image(img_expected, "temp_cu_tiff_image", mode=mode) img_obj = reader.read(file_path) image[mode], _ = reader.get_data(img_obj) self.assertIsNone(assert_array_equal(image["RGB"], img_expected)) self.assertIsNone(assert_array_equal(image["RGBA"], img_expected))
def test_read_rgba(self, img_expected): # skip for OpenSlide since not working with images without tiles if self.backend == "openslide": return image = {} reader = WSIReader(self.backend) for mode in ["RGB", "RGBA"]: file_path = save_rgba_tiff( img_expected, os.path.join(os.path.dirname(__file__), "testing_data", f"temp_tiff_image_{mode}.tiff"), mode=mode, ) with reader.read(file_path) as img_obj: image[mode], _ = reader.get_data(img_obj) self.assertIsNone(assert_array_equal(image["RGB"], img_expected)) self.assertIsNone(assert_array_equal(image["RGBA"], img_expected))
def create_masks(args): logging.basicConfig(level=logging.INFO) reader = WSIReader(reader_lib="cuCIM") img_obj = reader.read(args.wsi_path) img_rgb, _ = reader.get_data(img_obj, level=args.level) img_hsv = rgb2hsv(img_rgb.transpose(1, 2, 0)) background_R = img_rgb[0] > threshold_otsu(img_rgb[0]) background_G = img_rgb[1] > threshold_otsu(img_rgb[1]) background_B = img_rgb[2] > threshold_otsu(img_rgb[2]) tissue_RGB = np.logical_not(background_R & background_G & background_B) tissue_S = img_hsv[..., 1] > threshold_otsu(img_hsv[..., 1]) min_R = img_rgb[0] > args.RGB_min min_G = img_rgb[1] > args.RGB_min min_B = img_rgb[2] > args.RGB_min tissue_mask = tissue_S & tissue_RGB & min_R & min_G & min_B np.save(args.npy_path, tissue_mask)
class PatchWSIDataset(Dataset): """ This dataset reads whole slide images, extracts regions, and creates patches. It also reads labels for each patch and provides each patch with its associated class labels. Args: data: the list of input samples including image, location, and label (see the note below for more details). region_size: the size of regions to be extracted from the whole slide image. grid_shape: the grid shape on which the patches should be extracted. patch_size: the size of patches extracted from the region on the grid. transform: transforms to be executed on input data. image_reader_name: the name of library to be used for loading whole slide imaging, either CuCIM or OpenSlide. Defaults to CuCIM. Note: The input data has the following form as an example: `[{"image": "path/to/image1.tiff", "location": [200, 500], "label": [0,0,0,1]}]`. This means from "image1.tiff" extract a region centered at the given location `location` with the size of `region_size`, and then extract patches with the size of `patch_size` from a grid with the shape of `grid_shape`. Be aware the the `grid_shape` should construct a grid with the same number of element as `labels`, so for this example the `grid_shape` should be (2, 2). """ def __init__( self, data: List, region_size: Union[int, Tuple[int, int]], grid_shape: Union[int, Tuple[int, int]], patch_size: Union[int, Tuple[int, int]], transform: Optional[Callable] = None, image_reader_name: str = "cuCIM", ): super().__init__(data, transform) self.region_size = ensure_tuple_rep(region_size, 2) self.grid_shape = ensure_tuple_rep(grid_shape, 2) self.patch_size = ensure_tuple_rep(patch_size, 2) self.image_path_list = list({x["image"] for x in self.data}) self.image_reader_name = image_reader_name self.image_reader = WSIReader(image_reader_name) self.wsi_object_dict = None if self.image_reader_name != "openslide": # OpenSlide causes memory issue if we prefetch image objects self._fetch_wsi_objects() def _fetch_wsi_objects(self): """Load all the image objects and reuse them when asked for an item.""" self.wsi_object_dict = {} for image_path in self.image_path_list: self.wsi_object_dict[image_path] = self.image_reader.read(image_path) def __getitem__(self, index): sample = self.data[index] if self.image_reader_name == "openslide": img_obj = self.image_reader.read(sample["image"]) else: img_obj = self.wsi_object_dict[sample["image"]] location = [sample["location"][i] - self.region_size[i] // 2 for i in range(len(self.region_size))] images, _ = self.image_reader.get_data( img=img_obj, location=location, size=self.region_size, grid_shape=self.grid_shape, patch_size=self.patch_size, ) labels = np.array(sample["label"], dtype=np.float32) # expand dimensions to have 4 dimension as batch, class, height, and width. for _ in range(4 - labels.ndim): labels = np.expand_dims(labels, 1) patches = [{"image": images[i], "label": labels[i]} for i in range(len(sample["label"]))] if self.transform: patches = self.transform(patches) return patches
class MaskedInferenceWSIDataset(Dataset): """ This dataset load the provided foreground masks at an arbitrary resolution level, and extract patches based on that mask from the associated whole slide image. Args: data: a list of sample including the path to the whole slide image and the path to the mask. Like this: `[{"image": "path/to/image1.tiff", "mask": "path/to/mask1.npy}, ...]"`. patch_size: the size of patches to be extracted from the whole slide image for inference. transform: transforms to be executed on extracted patches. image_reader_name: the name of library to be used for loading whole slide imaging, either CuCIM or OpenSlide. Defaults to CuCIM. Note: The resulting output (probability maps) after performing inference using this dataset is supposed to be the same size as the foreground mask and not the original wsi image size. """ def __init__( self, data: List[Dict["str", "str"]], patch_size: Union[int, Tuple[int, int]], transform: Optional[Callable] = None, image_reader_name: str = "cuCIM", ) -> None: super().__init__(data, transform) self.patch_size = ensure_tuple_rep(patch_size, 2) # set up whole slide image reader self.image_reader_name = image_reader_name self.image_reader = WSIReader(image_reader_name) # process data and create a list of dictionaries containing all required data and metadata self.data = self._prepare_data(data) # calculate cumulative number of patches for all the samples self.num_patches_per_sample = [len(d["image_locations"]) for d in self.data] self.num_patches = sum(self.num_patches_per_sample) self.cum_num_patches = np.cumsum([0] + self.num_patches_per_sample[:-1]) def _prepare_data(self, input_data: List[Dict["str", "str"]]) -> List[Dict]: prepared_data = [] for sample in input_data: prepared_sample = self._prepare_a_sample(sample) prepared_data.append(prepared_sample) return prepared_data def _prepare_a_sample(self, sample: Dict["str", "str"]) -> Dict: """ Preprocess input data to load WSIReader object and the foreground mask, and define the locations where patches need to be extracted. Args: sample: one sample, a dictionary containing path to the whole slide image and the foreground mask. For example: `{"image": "path/to/image1.tiff", "mask": "path/to/mask1.npy}` Return: A dictionary containing: "name": the base name of the whole slide image, "image": the WSIReader image object, "mask_shape": the size of the foreground mask, "mask_locations": the list of non-zero pixel locations (x, y) on the foreground mask, "image_locations": the list of pixel locations (x, y) on the whole slide image where patches are extracted, and "level": the resolution level of the mask with respect to the whole slide image. } """ image = self.image_reader.read(sample["image"]) mask = np.load(sample["mask"]) try: level, ratio = self._calculate_mask_level(image, mask) except ValueError as err: err.args = (sample["mask"],) + err.args raise # get all indices for non-zero pixels of the foreground mask mask_locations = np.vstack(mask.nonzero()).T # convert mask locations to image locations to extract patches image_locations = (mask_locations + 0.5) * ratio - np.array(self.patch_size) // 2 return { "name": os.path.splitext(os.path.basename(sample["image"]))[0], "image": image, "mask_shape": mask.shape, "mask_locations": mask_locations.astype(int).tolist(), "image_locations": image_locations.astype(int).tolist(), "level": level, } def _calculate_mask_level(self, image: np.ndarray, mask: np.ndarray) -> Tuple[int, float]: """ Calculate level of the mask and its ratio with respect to the whole slide image Args: image: the original whole slide image mask: a mask, that can be down-sampled at an arbitrary level. Note that down-sampling ratio should be 2^N and equal in all dimension. Return: tuple: (level, ratio) where ratio is 2^level """ image_shape = image.shape mask_shape = mask.shape ratios = [image_shape[i] / mask_shape[i] for i in range(2)] level = np.log2(ratios[0]) if ratios[0] != ratios[1]: raise ValueError( "Image/Mask ratio across dimensions does not match!" f"ratio 0: {ratios[0]} ({image_shape[0]} / {mask_shape[0]})," f"ratio 1: {ratios[1]} ({image_shape[1]} / {mask_shape[1]})," ) if not level.is_integer(): raise ValueError(f"Mask is not at a regular level (ratio not power of 2), image / mask ratio: {ratios[0]}") return int(level), ratios[0] def _load_a_patch(self, index): """ Load sample given the index Since index is sequential and the patches are coming in an stream from different images, this method, first, finds the whole slide image and the patch that should be extracted, then it loads the patch and provide it with its image name and the corresponding mask location. """ sample_num = np.argmax(self.cum_num_patches > index) - 1 sample = self.data[sample_num] patch_num = index - self.cum_num_patches[sample_num] location_on_image = sample["image_locations"][patch_num] location_on_mask = sample["mask_locations"][patch_num] image, _ = self.image_reader.get_data( img=sample["image"], location=location_on_image, size=self.patch_size, ) processed_sample = {"image": image, "name": sample["name"], "mask_location": location_on_mask} return processed_sample def __len__(self): return self.num_patches def __getitem__(self, index): patch = [self._load_a_patch(index)] if self.transform: patch = self.transform(patch) return patch
class LesionFROC: """ Evaluate with Free Response Operating Characteristic (FROC) score. Args: data: either the list of dictionaries containing probability maps (inference result) and tumor mask (ground truth), as below, or the path to a json file containing such list. `{ "prob_map": "path/to/prob_map_1.npy", "tumor_mask": "path/to/ground_truth_1.tiff", "level": 6, "pixel_spacing": 0.243 }` grow_distance: Euclidean distance (in micrometer) by which to grow the label the ground truth's tumors. Defaults to 75, which is the equivalent size of 5 tumor cells. itc_diameter: the maximum diameter of a region (in micrometer) to be considered as an isolated tumor cell. Defaults to 200. eval_thresholds: the false positive rates for calculating the average sensitivity. Defaults to (0.25, 0.5, 1, 2, 4, 8) which is the same as the CAMELYON 16 Challenge. nms_sigma: the standard deviation for gaussian filter of non-maximal suppression. Defaults to 0.0. nms_prob_threshold: the probability threshold of non-maximal suppression. Defaults to 0.5. nms_box_size: the box size (in pixel) to be removed around the the pixel for non-maximal suppression. image_reader_name: the name of library to be used for loading whole slide imaging, either CuCIM or OpenSlide. Defaults to CuCIM. Note: For more info on `nms_*` parameters look at monai.utils.prob_nms.ProbNMS`. """ def __init__( self, data: List[Dict], grow_distance: int = 75, itc_diameter: int = 200, eval_thresholds: Tuple = (0.25, 0.5, 1, 2, 4, 8), nms_sigma: float = 0.0, nms_prob_threshold: float = 0.5, nms_box_size: int = 48, image_reader_name: str = "cuCIM", ) -> None: self.data = data self.grow_distance = grow_distance self.itc_diameter = itc_diameter self.eval_thresholds = eval_thresholds self.image_reader = WSIReader(image_reader_name) self.nms = PathologyProbNMS(sigma=nms_sigma, prob_threshold=nms_prob_threshold, box_size=nms_box_size) def prepare_inference_result(self, sample: Dict): """ Prepare the probability map for detection evaluation. """ # load the probability map (the result of model inference) prob_map = np.load(sample["prob_map"]) # apply non-maximal suppression nms_outputs = self.nms(probs_map=prob_map, resolution_level=sample["level"]) # separate nms outputs if nms_outputs: probs, x_coord, y_coord = zip(*nms_outputs) else: probs, x_coord, y_coord = [], [], [] return np.array(probs), np.array(x_coord), np.array(y_coord) def prepare_ground_truth(self, sample): """ Prepare the ground truth for evaluation based on the binary tumor mask """ # load binary tumor masks img_obj = self.image_reader.read(sample["tumor_mask"]) tumor_mask = self.image_reader.get_data(img_obj, level=sample["level"])[0][0] # calculate pixel spacing at the mask level mask_pixel_spacing = sample["pixel_spacing"] * pow(2, sample["level"]) # compute multi-instance mask from a binary mask grow_pixel_threshold = self.grow_distance / (mask_pixel_spacing * 2) tumor_mask = compute_multi_instance_mask(mask=tumor_mask, threshold=grow_pixel_threshold) # identify isolated tumor cells itc_threshold = (self.itc_diameter + self.grow_distance) / mask_pixel_spacing itc_labels = compute_isolated_tumor_cells(tumor_mask=tumor_mask, threshold=itc_threshold) return tumor_mask, itc_labels def compute_fp_tp(self): """ Compute false positive and true positive probabilities for tumor detection, by comparing the model outputs with the prepared ground truths for all samples """ total_fp_probs, total_tp_probs = [], [] total_num_targets = 0 num_images = len(self.data) for sample in tqdm(self.data): probs, y_coord, x_coord = self.prepare_inference_result(sample) ground_truth, itc_labels = self.prepare_ground_truth(sample) # compute FP and TP probabilities for a pair of an image and an ground truth mask fp_probs, tp_probs, num_targets = compute_fp_tp_probs( probs=probs, y_coord=y_coord, x_coord=x_coord, evaluation_mask=ground_truth, labels_to_exclude=itc_labels, resolution_level=sample["level"], ) total_fp_probs.extend(fp_probs) total_tp_probs.extend(tp_probs) total_num_targets += num_targets return (np.array(total_fp_probs), np.array(total_tp_probs), total_num_targets, num_images) def evaluate(self): """ Evaluate the detection performance of a model based on the model probability map output, the ground truth tumor mask, and their associated metadata (e.g., pixel_spacing, level) """ # compute false positive (FP) and true positive (TP) probabilities for all images fp_probs, tp_probs, num_targets, num_images = self.compute_fp_tp() # compute FROC curve given the evaluation of all images fps_per_image, total_sensitivity = compute_froc_curve_data( fp_probs=fp_probs, tp_probs=tp_probs, num_targets=num_targets, num_images=num_images ) # compute FROC score give specific evaluation threshold froc_score = compute_froc_score( fps_per_image=fps_per_image, total_sensitivity=total_sensitivity, eval_thresholds=self.eval_thresholds ) return froc_score
def test_read_region(self, file_path, patch_info, expected_img): reader = WSIReader("OpenSlide") img_obj = reader.read(file_path) img = reader.get_data(img_obj, **patch_info)[0] self.assertTupleEqual(img.shape, expected_img.shape) self.assertIsNone(assert_array_equal(img, expected_img))
def test_read_whole_image(self, file_path, expected_shape): reader = WSIReader("OpenSlide") img_obj = reader.read(file_path) img = reader.get_data(img_obj)[0] self.assertTupleEqual(img.shape, expected_shape)
def test_read_whole_image(self, file_url, expected_shape): filename = self.camelyon_data_download(file_url) reader = WSIReader("OpenSlide") img_obj = reader.read(filename) img = reader.get_data(img_obj)[0] self.assertTupleEqual(img.shape, expected_shape)
def test_read_whole_image(self, file_path, level, expected_shape): reader = WSIReader(self.backend, level=level) with reader.read(file_path) as img_obj: img = reader.get_data(img_obj)[0] self.assertTupleEqual(img.shape, expected_shape)