def test_prep(girderClient): # noqa cfg.gc = girderClient iteminfo = cfg.gc.get('/item', parameters={'text': "TCGA-A2-A0YE-01Z-00-DX1"})[0] # get RGB region at a small magnification MAG = 1.5 getStr = "/item/%s/tiles/region?left=%d&right=%d&top=%d&bottom=%d" % ( iteminfo['_id'], 46890, 50000, 40350, 43000) + "&magnification=%.2f" % MAG cfg.tissue_rgb = get_image_from_htk_response( cfg.gc.get(getStr, jsonResp=False)) # get mask of things to ignore cfg.mask_out, _ = get_tissue_mask(cfg.tissue_rgb, deconvolve_first=False, n_thresholding_steps=1, sigma=1.5, min_size=30) cfg.mask_out = resize(cfg.mask_out == 0, output_shape=cfg.tissue_rgb.shape[:2], order=0, preserve_range=True) == 1
def set_slide_info_and_get_tissue_mask(self): """Set self.slide_info dict and self.labeled tissue mask.""" # This is a presistent dict to store information about slide self.slide_info = self.gc.get('item/%s/tiles' % self.slide_id) # get tissue mask thumbnail_rgb = get_slide_thumbnail(self.gc, self.slide_id) # color normalization if desired if 'thumbnail' in self.cnorm_params.keys(): thumbnail_rgb = np.uint8( reinhard(im_src=thumbnail_rgb, target_mu=self.cnorm_params['thumbnail']['mu'], target_sigma=self.cnorm_params['thumbnail']['sigma'])) # get labeled tissue mask -- each unique value is one tissue piece labeled, _ = get_tissue_mask(thumbnail_rgb, **self.get_tissue_mask_kwargs) if len(np.unique(labeled)) < 2: raise ValueError("No tissue detected!") if self.visualize_tissue_boundary: annotation_docs = get_tissue_boundary_annotation_documents( self.gc, slide_id=self.slide_id, labeled=labeled) for doc in annotation_docs: _ = self.gc.post("/annotation?itemId=" + self.slide_id, json=doc) # Find size relative to WSI self.slide_info[ 'F_tissue'] = self.slide_info['sizeX'] / labeled.shape[1] return labeled
def test_get_tissue_mask(self): """Test get_tissue_mask().""" thumbnail_rgb = get_slide_thumbnail(gc, SAMPLE_SLIDE_ID) labeled, mask = get_tissue_mask( thumbnail_rgb, deconvolve_first=True, n_thresholding_steps=1, sigma=1.5, min_size=30) # # visualize result # vals = np.random.rand(256,3) # vals[0, ...] = [0.9, 0.9, 0.9] # cMap = ListedColormap(1 - vals) # # f, ax = plt.subplots(1, 3, figsize=(20, 20)) # ax[0].imshow(thumbnail_rgb) # ax[1].imshow(labeled, cmap=cMap) # ax[2].imshow(mask, cmap=cMap) # plt.show() self.assertTupleEqual(labeled.shape, (156, 256)) self.assertEqual(len(np.unique(labeled)), 10) # save for use in the next test imwrite(os.path.join( savepath, 'tissue_binmask.png'), np.uint8(0 + (labeled > 0)))
def test_get_tissue_mask(self): """Test get_tissue_mask().""" thumbnail_rgb = get_slide_thumbnail(cfg.gc, cfg.iteminfo['_id']) cfg.labeled, mask = get_tissue_mask( thumbnail_rgb, deconvolve_first=True, n_thresholding_steps=1, sigma=1.5, min_size=30) assert cfg.labeled.shape == (156, 256) assert len(np.unique(cfg.labeled)) == 11
def get_tissue_contours(gc, item_id, magnification=1.25, contour_area_threshold=15000): """Get the contours of the tissue in a WSI using lower magnification version of the image. Threshold can be provided to remove any small contours. Parameters ---------- gc : girder_client.GirderClient authenticated client for private images item_id : str DSA item id of image magnification : float (optional) magnification to pull image at contour_area_threshold : int (optional) contours with area smaller than this value will be excluded Returns ------- tissue_contours : list opencv formatted contours of the tissue im : np.ndarray RGB low magnification image contour_im : np.ndarray RGB low magnification image with contours drawn in red """ # get whole image at specific magnification im = get_region_im(gc, item_id, {'magnification': magnification})[:, :, :3] # get tissue mask using histomicsTK method mask = get_tissue_mask(im)[0].astype(np.uint8) mask[mask > 0] = 255 # extract contours contours, _ = cv.findContours(mask, cv.RETR_EXTERNAL, cv.CHAIN_APPROX_TC89_KCOS) # remove any small contours tissue_contours = [] for i, contour in enumerate(contours): contour_area = cv.contourArea(contour) if contour_area > contour_area_threshold: tissue_contours.append(contour) # draw the contours on an image copy contour_im = cv.drawContours(im.copy(), tissue_contours, -1, [255, 0, 0], 2) return tissue_contours, im, contour_im
def reinhard_color_stats(gc, item_id, magnification=1.25): """Calculate the Reinhard color stats (mean and standard dev. of each channel in LAB color space) for a DSA image item. The color stats are calculated from only the pixels that fall within the tissue, as detected by the HistomicsTK function: saliency.tissue_detection.get_tissue_mask(..) with default parameters. Parameters ---------- gc : girder_client.GirderClient authenticated girder client for private images item_id : str image item id magnification : float (optional) magnification of thumbnail used to calculate the color stats Returns ------- mu : np.array LAB mean for each channel (length of 3) sigma : np.array LAB standard dev. for each channel (length of 3) """ im_info = gc.get('item/{}/tiles'.format(item_id)) # get thumbnail as specified magnification thumbnail = get_item_image(gc, item_id, 'thumbnail', return_type='Array', width=int(im_info['sizeX']*magnification/im_info['magnification'])) # get the tissue mask tissue_mask = get_tissue_mask(thumbnail)[0] == 0 # convert image to LAB color space im_lab = rgb_to_lab(thumbnail) # get the pixels inside mask tissue_mask_reshaped = tissue_mask[..., None] im_lab = np.ma.masked_array(im_lab, mask=np.tile(tissue_mask_reshaped, (1, 1, 3))) # calculate the channel's mean and standard deviation mu = [im_lab[..., i].mean() for i in range(3)] sigma = [im_lab[..., i].std() for i in range(3)] return mu, sigma
def set_slide_info_and_get_tissue_mask(self): """Set self.slide_info dict and self.labeled tissue mask.""" # This is a presistent dict to store information about slide self.slide_info = self.gc.get('item/%s/tiles' % self.slide_id) # get tissue mask thumbnail_rgb = get_slide_thumbnail(self.gc, self.slide_id) # get labeled tissue mask -- each unique value is one tissue piece labeled, _ = get_tissue_mask(thumbnail_rgb, **self.get_tissue_mask_kwargs) if len(np.unique(labeled)) < 2: raise ValueError("No tissue detected!") # Find size relative to WSI self.slide_info[ 'F_tissue'] = self.slide_info['sizeX'] / labeled.shape[1] return labeled
def test_get_tissue_mask(self): """Test get_tissue_mask().""" thumbnail_rgb = get_slide_thumbnail(gc, SAMPLE_SLIDE_ID) labeled, mask = get_tissue_mask(thumbnail_rgb, deconvolve_first=True, n_thresholding_steps=2, sigma=0., min_size=30) # # visualize result # vals = np.random.rand(256,3) # vals[0, ...] = [0.9, 0.9, 0.9] # cMap = ListedColormap(1 - vals) # # f, ax = plt.subplots(1, 3, figsize=(20, 20)) # ax[0].imshow(thumbnail_rgb) # ax[1].imshow(labeled, cmap=cMap) # ax[2].imshow(mask, cmap=cMap) # plt.show() self.assertTupleEqual(labeled.shape, (152, 256)) self.assertEqual(len(np.unique(labeled)), 10)
def find_potentially_cellular_regions(self): """Find regions that are potentially cellular.""" mask_out = self.labeled != self.cdt.GTcodes.loc["not_specified", "GT_code"] # deconvolvve to ge hematoxylin channel (cellular areas) # hematoxylin channel return shows MINIMA so we invert self.tissue_htx, _, _ = color_deconvolution_routine( self.tissue_rgb, mask_out=mask_out, **self.cdt.stain_unmixing_routine_params) self.tissue_htx = 255 - self.tissue_htx[..., 0] # get cellular regions by threshold HTX stain channel self.maybe_cellular, _ = get_tissue_mask( self.tissue_htx.copy(), deconvolve_first=False, n_thresholding_steps=1, sigma=self.cdt.cellular_step1_sigma, min_size=self.cdt.cellular_step1_min_size) # Second, low-pass filter to dilate and smooth a bit self.maybe_cellular = gaussian(0 + (self.maybe_cellular > 0), sigma=self.cdt.cellular_step2_sigma, output=None, mode='nearest', preserve_range=True) # find connected components self.maybe_cellular, _ = ndimage.label(self.maybe_cellular) # restrict cellular regions to not-otherwise-specified self.maybe_cellular[mask_out] = 0 # assign to mask self.labeled[self.maybe_cellular > 0] = self.cdt.GTcodes.loc[ 'maybe_cellular', 'GT_code']
def test_reinhard(self): """Test reinhard.""" # get RGB image at a small magnification slide_info = gc.get('item/%s/tiles' % SAMPLE_SLIDE_ID) getStr = "/item/%s/tiles/region?left=%d&right=%d&top=%d&bottom=%d" % ( SAMPLE_SLIDE_ID, 0, slide_info['sizeX'], 0, slide_info['sizeY'] ) + "&magnification=%.2f" % MAG tissue_rgb = get_image_from_htk_response( gc.get(getStr, jsonResp=False)) # # SANITY CHECK! normalize to LAB mean and std from SAME slide # mean_lab, std_lab = lab_mean_std(tissue_rgb) # tissue_rgb_normalized = reinhard( # tissue_rgb, target_mu=mean_lab, target_sigma=std_lab) # # # we expect the images to be (almost) exactly the same # assert np.mean(tissue_rgb - tissue_rgb_normalized) < 1 # Normalize to pre-set color standard tissue_rgb_normalized = reinhard( tissue_rgb, target_mu=cnorm['mu'], target_sigma=cnorm['sigma']) # check that it matches mean_lab, std_lab = lab_mean_std(tissue_rgb_normalized) self.assertTrue(all( np.abs(mean_lab - cnorm['mu']) < [0.1, 0.1, 0.1])) self.assertTrue(all( np.abs(std_lab - cnorm['sigma']) < [0.1, 0.1, 0.1])) # get tissue mask thumbnail_rgb = get_slide_thumbnail(gc, SAMPLE_SLIDE_ID) labeled, mask = get_tissue_mask( thumbnail_rgb, deconvolve_first=True, n_thresholding_steps=1, sigma=1.5, min_size=30) # # visualize result # vals = np.random.rand(256, 3) # vals[0, ...] = [0.9, 0.9, 0.9] # cMap = ListedColormap(1 - vals) # # f, ax = plt.subplots(1, 3, figsize=(20, 20)) # ax[0].imshow(thumbnail_rgb) # ax[1].imshow(labeled, cmap=cMap) # ax[2].imshow(mask, cmap=cMap) # plt.show() # Do MASKED normalization to preset standard mask_out = resize( labeled == 0, output_shape=tissue_rgb.shape[:2], order=0, preserve_range=True) == 1 tissue_rgb_normalized = reinhard( tissue_rgb, target_mu=cnorm['mu'], target_sigma=cnorm['sigma'], mask_out=mask_out) # check that it matches mean_lab, std_lab = lab_mean_std( tissue_rgb_normalized, mask_out=mask_out) self.assertTrue(all( np.abs(mean_lab - cnorm['mu']) < [0.1, 0.1, 0.1])) self.assertTrue(all( np.abs(std_lab - cnorm['sigma']) < [0.1, 0.1, 0.1]))
# %%=========================================================================== print("Getting images to be normalized ...") # get RGB image at a small magnification slide_info = gc.get('item/%s/tiles' % SAMPLE_SLIDE_ID) getStr = "/item/%s/tiles/region?left=%d&right=%d&top=%d&bottom=%d" % ( SAMPLE_SLIDE_ID, 0, slide_info['sizeX'], 0, slide_info['sizeY']) + "&magnification=%.2f" % MAG tissue_rgb = get_image_from_htk_response(gc.get(getStr, jsonResp=False)) # get mask of things to ignore thumbnail_rgb = get_slide_thumbnail(gc, SAMPLE_SLIDE_ID) mask_out, _ = get_tissue_mask(thumbnail_rgb, deconvolve_first=True, n_thresholding_steps=1, sigma=1.5, min_size=30) mask_out = resize(mask_out == 0, output_shape=tissue_rgb.shape[:2], order=0, preserve_range=True) == 1 # since this is a unit test, just work on a small image tissue_rgb = tissue_rgb[1000:1500, 2500:3000, :] mask_out = mask_out[1000:1500, 2500:3000] # for reproducibility np.random.seed(0) # %%===========================================================================
def dsa_predict(model, gc, item_id, group_name='Positive', ann_doc_name='Default', preprocess_input=None, tile_size=(224, 224), save_mag=10, mask_mag=1.25, tissue_threshold=0.3, batch_size=8, pred_threshold=0.5, color='rgb(255,153,0)'): """Predict on DSA image item, using a grid tiling approach given a binary trained model. Parameters ---------- model : tensorflow.keras.models.Model a trained keras model for binary classification gc : girder_client.GirderClient authenticated client, used to get the images item_id : str image item id group_name : str (optional) name of the positive class, will be used as the group name in annotation elements ann_doc_name : str (optional) prepend name of the annotation documents preprocess_input : function (optional) a function that is applied to the images to process them, works on a tensor-style image tile_size : tuple (optional) size to predict images at save_mag : float (optional) magnification to extract tiles at mask_mag : float (optional) tissue mask is used to decide which tiles to predict on, this is the magnification of the tissue mask tissue_threshold : float (optional) fraction of tile that must contain tissue to be predicted on batch_size : int (optional) predictions are done on images in batches pred_threshold : float (optinal) model predicts a probability from 0 to 1, predictions above pred_threshold are considered the positive class that will be pushed as annotations color : str (optional) rgb(###,###,###) color of element box in annotation element Return ------ annotation_data : dict annotation data that was pushed as annotation """ # info about the source image im_info = gc.get('item/{}/tiles'.format(item_id)) fr_mag = im_info['magnification'] fr_width = im_info['sizeX'] fr_height = im_info['sizeY'] if save_mag is None: # save magnification will be native magnification save_mag = fr_mag fr_to_lr_factor = save_mag / fr_mag # tile size is determined by the save res fr_tile_size = int(tile_size[0] / fr_to_lr_factor), int(tile_size[1] / fr_to_lr_factor) # (width, height) # get tissue mask lr_im = get_region_im(gc, item_id, {'magnification': mask_mag})[:, :, :3] tissue_mask = get_tissue_mask(lr_im)[0] # we will loop through image in batches, get the coordinates for batches coords = [] for x in range(0, fr_width, fr_tile_size[0]): for y in range(0, fr_height, fr_tile_size[1]): # check that the tile won't go over the edge of image, if so skip if x + fr_tile_size[0] > fr_width or y + fr_tile_size[1] > fr_height: continue # check tile for tissue, using the binary mask for tissue tissue_tile = tissue_mask[ int(y * mask_mag / fr_mag):int((y + fr_tile_size[1]) * mask_mag / fr_mag), int(x * mask_mag / fr_mag):int((x + fr_tile_size[0]) * mask_mag / fr_mag) ] # skip if tile does not contain enough tissue if np.count_nonzero(tissue_tile) / tissue_tile.size < tissue_threshold: continue coords.append((x, y)) # break the coords in batch size chunks coord_batches = [coords[i:i + batch_size] for i in range(0, len(coords), batch_size)] annotation_data = {col_name: [] for col_name in COL_NAMES} print('predicting in batches') print('*********************') for batch_num, coord_batch in enumerate(coord_batches): print('{} of {}'.format(batch_num + 1, len(coord_batches))) # get all the images in this batch batch_ims = [] for coord in coord_batch: region = {'left': coord[0], 'top': coord[1], 'width': fr_tile_size[0], 'height': fr_tile_size[1], 'magnification': save_mag} batch_ims.append(get_region_im(gc, item_id, region)[:, :, :3]) # convert to tensor shape batch_ims = np.array(batch_ims) # process the image before prediction on it batch_ims = preprocess_input(batch_ims) / 255. # predict on the batch predictions = model.predict(batch_ims) # identify predictions that are glomeruli for i, pred in enumerate(predictions): if pred[0] > pred_threshold: # add the data to annotation data annotation_data['group'].append(group_name) annotation_data['color'].append(color) annotation_data['has_holes'].append(0.0) annotation_data['touches_edge-top'].append(0.0) annotation_data['touches_edge-left'].append(0.0) annotation_data['touches_edge-bottom'].append(0.0) annotation_data['touches_edge-right'].append(0.0) xmin, ymin = coord_batch[i][0], coord_batch[i][1] annotation_data['xmin'].append(xmin) annotation_data['ymin'].append(ymin) xmax = xmin + fr_tile_size[0] ymax = ymin + fr_tile_size[1] annotation_data['xmax'].append(xmax) annotation_data['ymax'].append(ymax) annotation_data['coords_x'].append('{},{},{},{}'.format(xmin, xmax, xmax, xmin)) annotation_data['coords_y'].append('{},{},{},{}'.format(ymin, ymin, ymax, ymax)) # only push if annotation data is not empty n = len(annotation_data['group']) if n: print('number of tiles to push: {}'.format(n)) contours_df = DataFrame(annotation_data) annotation_docs = get_annotation_documents_from_contours( contours_df.copy(), separate_docs_by_group=False, annots_per_doc=100, docnamePrefix=ann_doc_name, annprops=ANNPROPS, verbose=False, monitorPrefix='' ) # get current annotations documents from item existing_annotations = gc.get('/annotation/item/' + item_id) # delete annotation documents starting with the same prefix as about to be pushed for ann in existing_annotations: if 'name' in ann['annotation']: doc_name = ann['annotation']['name'] if doc_name.startswith(ann_doc_name): gc.delete('/annotation/%s' % ann['_id']) # post the annotation documents you created for annotation_doc in annotation_docs: _ = gc.post( "/annotation?itemId=" + item_id, json=annotation_doc) else: print('no positive tiles to push..') return annotation_data
def grid_tiling(gc, item_id, group_names, save_dir, save_mag=None, mask_mag=1.25, tile_size=(224, 224), tissue_threshold=0.3, annotation_threshold=0.15, random_seed=64, is_test=False, oversample_background=2.0, reinhard_stats=None): """Split a DSA image item (WSI) into smaller images and save locally grouped by annotations. This approach grids the image into equal sized small images, or tiles (i.e. a grid is placed over the WSI starting at the top left corner). At the bottom and right edge of the WSI the tiles are ignored if not of correct size (the case where the WSI dimensions are not a multiple factor of the tile size). A list of annotation group names are needed to group the tiles into classes of images saved in their own directories. Tiles with no tissue detected are ignored and tiles not containing annotations (but have tissue) are by default saved into background class. A background annotation group will cause issues so avoid having this annotation group name. Tiles can be saved at a lower magnification than source image if needed (param: save_mag). Note that tiles size specified should be the tile size at the save magnification not the source magnification. Image saved will be of the tile size specified in parameters, regardless of the save_mag used. Parameters ---------- gc : girder_client.GirderClient authenticated client item_id : str DSA image item id group_names : list list of annotation group names save_dir : str directory to create group directories with images - save_dir / group_name_1, save_dir / background, etc. save_mag : float (optional) magnification to use when saving the images, if None then source magnification will be used. mask_mag : float (optional) magnification to create binary mask of tissue and annotations. Note that if your annotations are very small it will benefit to use a larger value than default here, but binary masks will fail to create at very high magnifications. tile_size : tuple (optional) size (width, height) to save tiles at, note that this is the size it will be saved at regardless of the magnification used to save the images at (i.e. if save_mag is 4 times less than the source magnification than the actual tile_size will represent 4 times the pixels at full resolution). tissue_threshold : float (optional) from 0 to 1, percentage of tile that must contain tissue to be included annotation_threshold : float (optional) from 0 to 1, percentage of tile that must contain annotation (per group) to be labeled as annotation. Note that a single tile may be saved twice, representing multiple classes. random_seed : int (optional) random seed to use when shuffling the background regions is_test : bool (optional) if True then all the background regions will be saved, otherwise oversample_background will be used to determine how many background regions to save oversample_background : float (optional) factor to oversample background class images, compared to the number of images of the class of annoation images with the most images saved reinhard_stats : dict (optional) if not None then the images saved will be color augmented by color normalizing the tiles using the Reinhard color norm method. This dict should contain src_mu and scr_sigma keys with the stats for this image and target_mu and targe_sigma keys which are lists contain 1 or more target images to normalize to. """ im_info = gc.get('item/{}/tiles'.format(item_id)) if save_mag is None: save_mag = im_info['magnification'] if reinhard_stats is not None: # get color stats for image mu, sigma = reinhard_color_stats(gc, item_id) # ----- prep work ----- # filename = splitext(gc.getItem(item_id)['name'])[0] # create dirs for each image class to save group_dirs = [join(save_dir, group_name) for group_name in group_names] for group_dir in group_dirs: makedirs(group_dir, exist_ok=True) background_dir = join(save_dir, 'background') makedirs(background_dir, exist_ok=True) # get image annotations annotations = gc.get('/annotation/item/' + item_id) # create a dataframe to use with annotation to mask handler functions (gt codes) gt_data = [[group_name, 1, i + 1, 0, 0, 'rgb(0, 0, {})'.format(i), ''] for i, group_name in enumerate(group_names)] gt_codes = pd.DataFrame(columns=[ 'group', 'overlay_order', 'GT_code', 'is_roi', 'is_background_class', 'color', 'comments' ], data=gt_data, index=range(len(group_names))) gt_codes.index = gt_codes.loc[:, 'group'] # get binary masks - tissue mask and annotation(s) mask mask_mag_factor, _ = get_scale_factor_and_appendStr(gc=gc, slide_id=item_id, MAG=mask_mag) # - scaling the annotations to lower magnification mask_annotations = scale_slide_annotations(deepcopy(annotations), sf=mask_mag_factor) # - binary masks are for the whole image at low resolution, function returns also the RGB image which we use for # - getting the tissue mask mask_element_info = get_bboxes_from_slide_annotations(mask_annotations) get_kwargs = deepcopy( GET_KWARGS) # avoid referencing on the global variable get_kwargs['gc'] = gc get_kwargs['slide_id'] = item_id get_kwargs['GTCodes_dict'] = gt_codes.T.to_dict() get_kwargs['bounds'] = None get_kwargs['MAG'] = mask_mag ann_mask_and_image = get_image_and_mask_from_slide( mode='wsi', slide_annotations=mask_annotations, element_infos=mask_element_info, **get_kwargs) tissue_mask = get_tissue_mask(ann_mask_and_image['rgb'])[0] # convert the annotations to lower magnification fr_to_lr_factor, _ = get_scale_factor_and_appendStr(gc=gc, slide_id=item_id, MAG=save_mag) annotations = scale_slide_annotations(annotations, sf=fr_to_lr_factor) lr_element_info = get_bboxes_from_slide_annotations(annotations) # get full resolution information for image fr_mag = im_info['magnification'] fr_width = im_info['sizeX'] fr_height = im_info['sizeY'] fr_tile_size = int(tile_size[0] / fr_to_lr_factor), int( tile_size[1] / fr_to_lr_factor) # (width, height) # change the get_kwargs to save magnification get_kwargs['MAG'] = save_mag # ----- loop through image at full res ----- # group_annotation_counts = [0] * len(group_names) background_regions = [] for x in range(0, fr_width, fr_tile_size[0]): for y in range(0, fr_height, fr_tile_size[1]): # check that the tile won't go over the edge of image, if so skip if x + fr_tile_size[0] > fr_width or y + fr_tile_size[ 1] > fr_height: continue # check tile for tissue, using the binary mask for tissue tissue_tile = tissue_mask[int(y * mask_mag / fr_mag):int((y + fr_tile_size[1]) * mask_mag / fr_mag), int(x * mask_mag / fr_mag):int((x + fr_tile_size[0]) * mask_mag / fr_mag)] # skip if tile does not contain enough tissue if np.count_nonzero( tissue_tile) / tissue_tile.size < tissue_threshold: continue # check tile for annotations, using the binary mask for annotations annotation_tile = ann_mask_and_image['ROI'][ int(y * mask_mag / fr_mag):int((y + fr_tile_size[1]) * mask_mag / fr_mag), int(x * mask_mag / fr_mag):int((x + fr_tile_size[0]) * mask_mag / fr_mag)] # tile is background if no annotation is present (of any group) background_flag = True # - check for each annotation group for i, group_name in enumerate(group_names): group_annotation_tile = annotation_tile == i + 1 # tile is ignored if not enough contain annotation if np.count_nonzero( group_annotation_tile ) / group_annotation_tile.size < annotation_threshold: continue background_flag = False group_annotation_counts[i] += 1 # get annotation image and save it get_kwargs['bounds'] = { 'XMIN': x, 'XMAX': x + fr_tile_size[0], 'YMIN': y, 'YMAX': y + fr_tile_size[1] } annotation_im = get_image_and_mask_from_slide( mode='manual_bounds', slide_annotations=annotations, element_infos=lr_element_info, **get_kwargs)['rgb'] # save the image to correct directory imwrite( join(group_dirs[i], '{}_x_{}_y_{}.png'.format(filename, x, y)), annotation_im) if reinhard_stats is not None: # add color augmentation with Reinhard method for j, (_, v) in enumerate(reinhard_stats.items()): im_norm = reinhard(annotation_im.copy(), v['mu'], v['sigma'], src_mu=mu, src_sigma=sigma) imwrite( join( group_dirs[i], '{}_x_{}_y_{}_norm_{}.png'.format( filename, x, y, j)), im_norm) if background_flag: # save coordinates for non-glomeruli images candidates background_regions.append({ 'magnification': save_mag, 'left': x, 'top': y, 'width': fr_tile_size[0], 'height': fr_tile_size[1] }) # randomly select background class coordinates # - oversample the background class by a factor of the most represented annoation class Random(random_seed).shuffle(background_regions) if not is_test: background_regions = background_regions[:int( oversample_background * max(group_annotation_counts))] for region in background_regions: tile_im = get_region_im(gc, item_id, region)[:, :, :3] # save background image imwrite( join( background_dir, '{}_x_{}_y_{}.png'.format(filename, region['left'], region['top'])), tile_im) if reinhard_stats is not None: # add color augmentation with Reinhard method for j, (_, v) in enumerate(reinhard_stats.items()): im_norm = reinhard(tile_im.copy(), v['mu'], v['sigma'], src_mu=mu, src_sigma=sigma) imwrite( join( background_dir, '{}_x_{}_y_{}_norm_{}.png'.format( filename, region['left'], region['top'], j)), im_norm)