def test_get_roi_mask(self): """Test get_roi_mask().""" slide_annotations = gc.get('/annotation/item/' + SAMPLE_SLIDE_ID) element_infos = get_bboxes_from_slide_annotations(slide_annotations) # read ground truth codes and information GTCodes = read_csv(GTCODE_PATH) GTCodes.index = GTCodes.loc[:, 'group'] # get indices of rois idxs_for_all_rois = _get_idxs_for_all_rois(GTCodes=GTCodes, element_infos=element_infos) # get roi mask and info ROI, roiinfo = get_roi_mask( slide_annotations=slide_annotations, element_infos=element_infos, GTCodes_df=GTCodes.copy(), idx_for_roi=idxs_for_all_rois[0], # <- let's focus on first ROI, iou_thresh=0.0, roiinfo=None, crop_to_roi=True, use_shapely=True, verbose=False, monitorPrefix="roi 1") self.assertTupleEqual(ROI.shape, (4594, 4542)) self.assertTupleEqual( (roiinfo['BBOX_HEIGHT'], roiinfo['BBOX_WIDTH'], roiinfo['XMIN'], roiinfo['XMAX'], roiinfo['YMIN'], roiinfo['YMAX']), (4820, 7006, 59206, 66212, 33505, 38325))
def annotation_info(gc, item_id, magnification=None): """Get item HistomicsUI annotations and element information. Parameters ---------- gc : girder_client.GirderClient authenticated client item_id : str item id magnification : float (optional) if give, annotatiosn will be scaled to match Returns ------- annotations : list annotation documents information element_info : DataFrame annotation element information """ # get item annotations annotations = gc.get('/annotation/item/' + item_id) if magnification is not None: # get factor for scaling annotations to given magnification mask_mag_factor, _ = get_scale_factor_and_appendStr(gc=gc, slide_id=item_id, MAG=magnification) # scale annotations annotations = scale_slide_annotations(annotations, sf=mask_mag_factor) # get annotation element information (i.e. centers) element_info = get_bboxes_from_slide_annotations(annotations) return annotations, element_info
def test_prep(girderClient): # noqa cfg.gc = girderClient cfg.iteminfo = cfg.gc.get('/item', parameters={ 'text': "TCGA-A2-A0YE-01Z-00-DX1"})[0] # read GTCodes dataframe gtcodePath = getTestFilePath('sample_GTcodes.csv') cfg.GTcodes = read_csv(gtcodePath) cfg.GTcodes.index = cfg.GTcodes.loc[:, 'group'] # other params cfg.get_roi_mask_kwargs = { 'iou_thresh': 0.0, 'crop_to_roi': True, 'use_shapely': True, 'verbose': False } cfg.get_contours_kwargs = { 'groups_to_get': None, 'roi_group': 'roi', 'get_roi_contour': True, 'discard_nonenclosed_background': True, 'background_group': 'mostly_stroma', 'MIN_SIZE': 10, 'MAX_SIZE': None, 'verbose': False, 'monitorPrefix': "" } # Microns-per-pixel / Magnification (either or) cfg.MPP = 5.0 cfg.MAG = None # get annotations for slide cfg.slide_annotations = cfg.gc.get('/annotation/item/' + cfg.iteminfo['_id']) # scale up/down annotations by a factor sf, _ = get_scale_factor_and_appendStr( gc=cfg.gc, slide_id=cfg.iteminfo['_id'], MPP=cfg.MPP, MAG=cfg.MAG) cfg.slide_annotations = scale_slide_annotations(cfg.slide_annotations, sf=sf) # get bounding box information for all annotations cfg.element_infos = get_bboxes_from_slide_annotations(cfg.slide_annotations) # params for get_image_and_mask_from_slide() cfg.get_kwargs = { 'gc': cfg.gc, 'slide_id': cfg.iteminfo['_id'], 'GTCodes_dict': cfg.GTcodes.T.to_dict(), 'bounds': { 'XMIN': 58000, 'XMAX': 63000, 'YMIN': 35000, 'YMAX': 39000}, 'MPP': cfg.MPP, 'MAG': cfg.MAG, 'get_roi_mask_kwargs': cfg.get_roi_mask_kwargs, 'get_contours_kwargs': cfg.get_contours_kwargs, 'get_rgb': True, 'get_contours': True, 'get_visualization': True, }
def test_get_bboxes_from_slide_annotations(self): """Test get_bboxes_from_slide_annotations.""" slide_annotations = gc.get('/annotation/item/' + SAMPLE_SLIDE_ID) element_infos = get_bboxes_from_slide_annotations(slide_annotations) self.assertTupleEqual(element_infos.shape, (76, 9)) self.assertTupleEqual(tuple(element_infos.columns), (('annidx', 'elementidx', 'type', 'group', 'xmin', 'xmax', 'ymin', 'ymax', 'bbox_area')))
def test_get_bboxes_from_slide_annotations(self): """Test get_bboxes_from_slide_annotations.""" element_infos = get_bboxes_from_slide_annotations( copy.deepcopy(cfg.annotations)) assert element_infos.shape == (76, 9) assert set(element_infos.columns) == { 'annidx', 'elementidx', 'type', 'group', 'xmin', 'xmax', 'ymin', 'ymax', 'bbox_area' }
def get_all_rois_from_slide( gc, slide_id, GTCodes_dict, save_directories, get_image_and_mask_from_slide_kwargs=None, slide_name=None, verbose=True, monitorPrefix="", ): """Parse annotations and saves ground truth masks for ALL ROIs. Get all ROIs in a single slide. This is mainly uses get_image_and_mask_from_slide(), which should be referred to for implementation details. Parameters ----------- gc : object girder client object to make requests, for example: gc = girder_client.GirderClient(apiUrl = APIURL) gc.authenticate(interactive=True) slide_id : str girder id for item (slide) GTCodes_dict : dict the ground truth codes and information dict. This is a dict that is indexed by the annotation group name and each entry is in turn a dict with the following keys: - group: group name of annotation (string), eg. mostly_tumor - overlay_order: int, how early to place the annotation in the mask. Larger values means this annotation group is overlayed last and overwrites whatever overlaps it. - GT_code: int, desired ground truth code (in the mask) Pixels of this value belong to corresponding group (class) - is_roi: Flag for whether this group encodes an ROI - is_background_class: Flag, whether this group is the default fill value inside the ROI. For example, you may descide that any pixel inside the ROI is considered stroma. save_directories : dict paths to directories to save data. Each entry is a string, and the following keys are allowed - ROI: path to save masks (labeled images) - rgb: path to save rgb images - contours: path to save annotation contours - visualization: path to save rgb visualzation overlays get_image_and_mask_from_slide_kwargs : dict kwargs to pass to get_image_and_mask_from_slide() default values are assigned if speceific parameters are not given. slide_name : str or None If not given, it's inferred using a server request using girder client. verbose : bool Print progress to screen? monitorPrefix : str text to prepend to printed statements Returns -------- list of dicts each entry contains the following keys - ROI: path to saved mask (labeled image) - rgb: path to saved rgb image - contours: path to saved annotation contours - visualization: path to saved rgb visualzation overlay """ # assign defaults if nothing given default_keyvalues = { 'MPP': 5.0, 'MAG': None, 'get_roi_mask_kwargs': { 'iou_thresh': 0.0, 'crop_to_roi': True, 'use_shapely': True, 'verbose': False }, 'get_contours_kwargs': { 'groups_to_get': None, 'roi_group': 'roi', 'get_roi_contour': True, 'discard_nonenclosed_background': True, 'background_group': 'mostly_stroma', 'MIN_SIZE': 10, 'MAX_SIZE': None, 'verbose': False, 'monitorPrefix': "" }, 'get_rgb': True, 'get_contours': True, 'get_visualization': True, } kvp = get_image_and_mask_from_slide_kwargs or {} # for easy referencing for k, v in default_keyvalues.items(): if k not in kvp.keys(): kvp[k] = v # convert to df and sanity check GTCodes_df = DataFrame.from_dict(GTCodes_dict, orient='index') if any(GTCodes_df.loc[:, 'GT_code'] <= 0): raise Exception("All GT_code must be > 0") # if not given, assign name of first file associated with girder item if slide_name is None: resp = gc.get('/item/%s/files' % slide_id) slide_name = resp[0]['name'] slide_name = slide_name[:slide_name.rfind('.')] # get annotations for slide slide_annotations = gc.get('/annotation/item/' + slide_id) # scale up/down annotations by a factor sf, _ = get_scale_factor_and_appendStr(gc=gc, slide_id=slide_id, MPP=kvp['MPP'], MAG=kvp['MAG']) slide_annotations = scale_slide_annotations(slide_annotations, sf=sf) # get bounding box information for all annotations element_infos = get_bboxes_from_slide_annotations(slide_annotations) # get idx of all 'special' roi annotations idxs_for_all_rois = _get_idxs_for_all_rois(GTCodes=GTCodes_df, element_infos=element_infos) savenames = [] for roino, idx_for_roi in enumerate(idxs_for_all_rois): roicountStr = "%s: roi %d of %d" % (monitorPrefix, roino + 1, len(idxs_for_all_rois)) # get specified area roi_out = get_image_and_mask_from_slide( gc=gc, slide_id=slide_id, GTCodes_dict=GTCodes_dict, mode='polygonal_bounds', idx_for_roi=idx_for_roi, slide_annotations=slide_annotations, element_infos=element_infos, **kvp) # now save roi (mask, rgb, contours, vis) this_roi_savenames = dict() ROINAMESTR = "%s_left-%d_top-%d_bottom-%d_right-%d" % ( slide_name, roi_out['bounds']['XMIN'], roi_out['bounds']['YMIN'], roi_out['bounds']['YMAX'], roi_out['bounds']['XMAX']) for imtype in ['ROI', 'rgb', 'visualization']: if imtype in roi_out.keys(): savename = os.path.join(save_directories[imtype], ROINAMESTR + ".png") if verbose: print("%s: Saving %s\n" % (roicountStr, savename)) imwrite(im=roi_out[imtype], uri=savename) this_roi_savenames[imtype] = savename if 'contours' in roi_out.keys(): savename = os.path.join(save_directories['contours'], ROINAMESTR + ".csv") if verbose: print("%s: Saving %s\n" % (roicountStr, savename)) contours_df = DataFrame(roi_out['contours']) contours_df.to_csv(savename) this_roi_savenames['contours'] = savename savenames.append(this_roi_savenames) return savenames
def get_image_and_mask_from_slide(gc, slide_id, GTCodes_dict, MPP=5.0, MAG=None, mode='min_bounding_box', bounds=None, idx_for_roi=None, slide_annotations=None, element_infos=None, get_roi_mask_kwargs=None, get_contours_kwargs=None, linewidth=0.2, get_rgb=True, get_contours=True, get_visualization=True): """Parse region from the slide and get its corresponding labeled mask. This is a wrapper around get_roi_mask() which should be referred to for implementation details. Parameters ----------- gc : object girder client object to make requests, for example: gc = girder_client.GirderClient(apiUrl = APIURL) gc.authenticate(interactive=True) slide_id : str girder id for item (slide) GTCodes_dict : dict the ground truth codes and information dict. This is a dict that is indexed by the annotation group name and each entry is in turn a dict with the following keys: - group: group name of annotation (string), eg. mostly_tumor - overlay_order: int, how early to place the annotation in the mask. Larger values means this annotation group is overlayed last and overwrites whatever overlaps it. - GT_code: int, desired ground truth code (in the mask) Pixels of this value belong to corresponding group (class) - is_roi: Flag for whether this group encodes an ROI - is_background_class: Flag, whether this group is the default fill value inside the ROI. For example, you may descide that any pixel inside the ROI is considered stroma. MPP : float or None Microns-per-pixel -- best use this as it's more well-defined than magnification which is more scanner/manufacturer specific. MPP of 0.25 often roughly translates to 40x MAG : float or None If you prefer to use whatever magnification is reported in slide. If neither MPP or MAG is provided, everything is retrieved without scaling at base (scan) magnification. mode : str This specifies which part of the slide to get the mask from. Allowed modes include the following - wsi: get scaled up/down version of mask of whole slide - min_bounding_box: get minimum box for all annotations in slide - manual_bounds: use given ROI bounds provided by the 'bounds' param - polygonal_bounds: use the idx_for_roi param to get coordinates bounds : dict or None if not None, has keys 'XMIN', 'XMAX', 'YMIN', 'YMAX' for slide region coordinates (AT BASE MAGNIFICATION) to get labeled image (mask) for. Use this with the 'manual_bounds' run mode. idx_for_roi : int index of ROI within the element_infos dataframe. Use this with the 'polygonal_bounds' run mode. slide_annotations : list or None Give this parameter to avoid re-getting slide annotations. If you do provide the annotations, though, make sure you have used scale_slide_annotations() to scale them up/down by sf BEFOREHAND. element_infos : pandas DataFrame. The columns annidx and elementidx encode the dict index of annotation document and element, respectively, in the original slide_annotations list of dictionaries. This can be obained by get_bboxes_from_slide_annotations() method. Make sure you have used scale_slide_annotations(). get_roi_mask_kwargs : dict extra kwargs for get_roi_mask() get_contours_kwargs : dict extra kwargs for get_contours_from_mask() linewidth : float visualization line width get_rgb: bool get rgb image? get_contours : bool get annotation contours? (relative to final mask) get_visualization : bool get overlayed annotation bounds over RGB for visualization Returns -------- dict Results dict containing one or more of the following keys bounds: dict of bounds at scan magnification ROI - (mxn) labeled image (mask) rgb - (mxnx3 np array) corresponding rgb image contours - list, each entry is a dict version of a row from the output of masks_to_annotations_handler.get_contours_from_mask() visualization - (mxnx3 np array) visualization overlay """ get_roi_mask_kwargs = get_roi_mask_kwargs or {} get_contours_kwargs = get_contours_kwargs or {} # important sanity checks (MPP, MAG, mode, bounds, idx_for_roi, get_roi_mask_kwargs, get_rgb, get_contours, get_visualization) = _sanity_checks(MPP, MAG, mode, bounds, idx_for_roi, get_roi_mask_kwargs, get_rgb, get_contours, get_visualization) # calculate the scale factor sf, appendStr = get_scale_factor_and_appendStr(gc=gc, slide_id=slide_id, MPP=MPP, MAG=MAG) if slide_annotations is not None: assert element_infos is not None, "must also provide element_infos" else: # get annotations for slide slide_annotations = gc.get('/annotation/item/' + slide_id) # scale up/down annotations by a factor slide_annotations = scale_slide_annotations(slide_annotations, sf=sf) # get bounding box information for all annotations -> scaled by sf element_infos = get_bboxes_from_slide_annotations(slide_annotations) # Detemine get region based on run mode, keeping in mind that it # must be at BASE MAGNIFICATION coordinates before it is passed # on to get_mask_from_slide() bounds = _get_roi_bounds_by_run_mode(gc=gc, slide_id=slide_id, mode=mode, bounds=bounds, element_infos=element_infos, idx_for_roi=idx_for_roi, sf=sf) result = { 'bounds': bounds, } # get mask for specified area if mode == 'polygonal_bounds': # get roi mask and info ROI, _ = get_roi_mask(slide_annotations=slide_annotations, element_infos=element_infos, GTCodes_df=DataFrame.from_dict(GTCodes_dict, orient='index'), idx_for_roi=idx_for_roi, **get_roi_mask_kwargs) else: ROI, _ = get_mask_from_slide(GTCodes_dict=GTCodes_dict, roiinfo=copy.deepcopy(bounds), slide_annotations=slide_annotations, element_infos=element_infos, sf=sf, get_roi_mask_kwargs=get_roi_mask_kwargs) # get RGB if get_rgb: rgb, ROI = _get_rgb_and_pad_roi(gc=gc, slide_id=slide_id, bounds=bounds, appendStr=appendStr, ROI=ROI) result['rgb'] = rgb # pack result (we have to do it here in case of padding) result['ROI'] = ROI # get contours if get_contours: contours_list = get_contours_from_mask(MASK=ROI, GTCodes_df=DataFrame.from_dict( GTCodes_dict, orient='index'), **get_contours_kwargs) contours_list = contours_list.to_dict(orient='records') result['contours'] = contours_list # get visualization of annotations on RGB if get_visualization: result['visualization'] = _visualize_annotations_on_rgb( rgb=rgb, contours_list=contours_list, linewidth=linewidth) return result
def get_all_rois_from_slide_v2(gc, slide_id, GTCodes_dict, save_directories, annotations_to_contours_kwargs=None, mode='object', get_mask=True, slide_name=None, verbose=True, monitorprefix="", callback=None, callback_kwargs=None): """Get all ROIs for a slide without an intermediate mask form. This mainly relies on contours_to_labeled_object_mask(), which should be referred to for extra documentation. This can be run in either the "object" mode, whereby the saved masks are a three-channel png where first channel encodes class label (i.e. same as semantic segmentation) and the product of the values in the second and third channel encodes the object ID. Otherwise, the user may decide to run in the "semantic" mode and the resultant mask would consist of only one channel (semantic segmentation with no object differentiation). The difference between this and version 1, found at histomicstk.annotations_and_masks.annotations_to_masks_handler. get_all_rois_from_slide() is that this (version 2) gets the contours first, including cropping to wanted ROI boundaries and other processing using shapely, and THEN parses these into masks. This enables us to differentiate various objects to use the data for object localization or classification or segmentation tasks. If you would like to get semantic segmentation masks, i.e. you do not really care about individual objects, you can use either version 1 or this method. They re-use much of the same code-base, but some edge cases maybe better handled by version 1. For example, since this version uses shapely first to crop, some objects may be incorrectly parsed by shapely. Version 1, using PIL.ImageDraw may not have these problems. Bottom line is: if you need semantic segmentation masks, it is probably safer to use version 1, whereas if you need object segmentation masks, this method should be used. Parameters ---------- gc : object girder client object to make requests, for example: gc = girder_client.GirderClient(apiUrl = APIURL) gc.authenticate(interactive=True) slide_id : str girder id for item (slide) GTCodes_dict : dict the ground truth codes and information dict. This is a dict that is indexed by the annotation group name and each entry is in turn a dict with the following keys: - group: group name of annotation (string), eg. mostly_tumor - overlay_order: int, how early to place the annotation in the mask. Larger values means this annotation group is overlayed last and overwrites whatever overlaps it. - GT_code: int, desired ground truth code (in the mask) Pixels of this value belong to corresponding group (class) - is_roi: Flag for whether this group encodes an ROI - is_background_class: Flag, whether this group is the default fill value inside the ROI. For example, you may descide that any pixel inside the ROI is considered stroma. save_directories : dict paths to directories to save data. Each entry is a string, and the following keys are allowed - ROI: path to save masks (labeled images) - rgb: path to save rgb images - contours: path to save annotation contours - visualization: path to save rgb visualzation overlays mode : str run mode for getting masks. Must me in - object: get 3-channel mask where first channel encodes label (tumor, stroma, etc) while product of second and third channel encodes the object ID (i.e. individual contours) This is useful for object localization and segmentation tasks. - semantic: get a 1-channel mask corresponding to the first channel of the object mode. get_mask : bool While the main purpose of this method IS to get object segmentation masks, it is conceivable that some users might just want to get the RGB and contours. Default is True. annotations_to_contours_kwargs : dict kwargs to pass to annotations_to_contours_no_mask() default values are assigned if specific parameters are not given. slide_name : str or None If not given, its inferred using a server request using girder client. verbose : bool Print progress to screen? monitorprefix : str text to prepend to printed statements callback : function a callback function to run on the roi dictionary output. This is internal, but if you really want to use this, make sure the callback can accept the following keys and that you do NOT assign them yourself gc, slide_id, slide_name, MPP, MAG, verbose, monitorprefix Also, this callback MUST *ONLY* return thr roi dictionary, whether or not it is modified inside it. If it is modified inside the callback then the modified version is the one that will be saved to disk. callback_kwargs : dict kwargs to pass to callback, not including the mandatory kwargs that will be passed internally (mentioned earlier here). Returns -------- list of dicts each entry contains the following keys mask - path to saved mask rgb - path to saved rgb image contours - path to saved annotation contours visualization - path to saved rgb visualzation overlay """ default_keyvalues = { 'MPP': None, 'MAG': None, 'linewidth': 0.2, 'get_rgb': True, 'get_visualization': True, } # assign defaults if nothing given kvp = annotations_to_contours_kwargs or {} # for easy referencing for k, v in default_keyvalues.items(): if k not in kvp.keys(): kvp[k] = v # convert to df and sanity check gtcodes_df = DataFrame.from_dict(GTCodes_dict, orient='index') if any(gtcodes_df.loc[:, 'GT_code'] <= 0): raise Exception("All GT_code must be > 0") # if not given, assign name of first file associated with girder item if slide_name is None: resp = gc.get('/item/%s/files' % slide_id) slide_name = resp[0]['name'] slide_name = slide_name[:slide_name.rfind('.')] # get annotations for slide slide_annotations = gc.get('/annotation/item/' + slide_id) # scale up/down annotations by a factor sf, _ = get_scale_factor_and_appendStr(gc=gc, slide_id=slide_id, MPP=kvp['MPP'], MAG=kvp['MAG']) slide_annotations = scale_slide_annotations(slide_annotations, sf=sf) # get bounding box information for all annotations element_infos = get_bboxes_from_slide_annotations(slide_annotations) # get idx of all 'special' roi annotations idxs_for_all_rois = _get_idxs_for_all_rois(GTCodes=gtcodes_df, element_infos=element_infos) savenames = [] for roino, idx_for_roi in enumerate(idxs_for_all_rois): roicountStr = "%s: roi %d of %d" % (monitorprefix, roino + 1, len(idxs_for_all_rois)) # get specified area roi_out = annotations_to_contours_no_mask( gc=gc, slide_id=slide_id, mode='polygonal_bounds', idx_for_roi=idx_for_roi, slide_annotations=slide_annotations, element_infos=element_infos, **kvp) # get corresponding mask (semantic or object) if get_mask: roi_out['mask'] = contours_to_labeled_object_mask( contours=DataFrame(roi_out['contours']), gtcodes=gtcodes_df, mode=mode, verbose=verbose, monitorprefix=roicountStr) # now run callback on roi_out if callback is not None: # these are 'compulsory' kwargs for the callback # since it will not have access to these otherwise callback_kwargs.update({ 'gc': gc, 'slide_id': slide_id, 'slide_name': slide_name, 'MPP': kvp['MPP'], 'MAG': kvp['MAG'], 'verbose': verbose, 'monitorprefix': roicountStr, }) callback(roi_out, **callback_kwargs) # now save roi (rgb, vis, mask) this_roi_savenames = dict() ROINAMESTR = "%s_left-%d_top-%d_bottom-%d_right-%d" % ( slide_name, roi_out['bounds']['XMIN'], roi_out['bounds']['YMIN'], roi_out['bounds']['YMAX'], roi_out['bounds']['XMAX']) for imtype in ['mask', 'rgb', 'visualization']: if imtype in roi_out.keys(): savename = os.path.join(save_directories[imtype], ROINAMESTR + ".png") if verbose: print("%s: Saving %s" % (roicountStr, savename)) imwrite(im=roi_out[imtype], uri=savename) this_roi_savenames[imtype] = savename # save contours savename = os.path.join(save_directories['contours'], ROINAMESTR + ".csv") if verbose: print("%s: Saving %s\n" % (roicountStr, savename)) contours_df = DataFrame(roi_out['contours']) contours_df.to_csv(savename) this_roi_savenames['contours'] = savename savenames.append(this_roi_savenames) return savenames
def annotations_to_contours_no_mask(gc, slide_id, MPP=5.0, MAG=None, mode='min_bounding_box', bounds=None, idx_for_roi=None, slide_annotations=None, element_infos=None, linewidth=0.2, get_rgb=True, get_visualization=True, text=True): """Process annotations to get RGB and contours without intermediate masks. Parameters ---------- gc : object girder client object to make requests, for example: gc = girder_client.GirderClient(apiUrl = APIURL) gc.authenticate(interactive=True) slide_id : str girder id for item (slide) MPP : float or None Microns-per-pixel -- best use this as it's more well-defined than magnification which is more scanner or manufacturer specific. MPP of 0.25 often roughly translates to 40x MAG : float or None If you prefer to use whatever magnification is reported in slide. If neither MPP or MAG is provided, everything is retrieved without scaling at base (scan) magnification. mode : str This specifies which part of the slide to get the mask from. Allowed modes include the following - wsi: get scaled up or down version of mask of whole slide - min_bounding_box: get minimum box for all annotations in slide - manual_bounds: use given ROI bounds provided by the 'bounds' param - polygonal_bounds: use the idx_for_roi param to get coordinates bounds : dict or None if not None, has keys 'XMIN', 'XMAX', 'YMIN', 'YMAX' for slide region coordinates (AT BASE MAGNIFICATION) to get labeled image (mask) for. Use this with the 'manual_bounds' run mode. idx_for_roi : int index of ROI within the element_infos dataframe. Use this with the 'polygonal_bounds' run mode. slide_annotations : list or None Give this parameter to avoid re-getting slide annotations. If you do provide the annotations, though, make sure you have used scale_slide_annotations() to scale them up or down by sf BEFOREHAND. element_infos : pandas DataFrame. The columns annidx and elementidx encode the dict index of annotation document and element, respectively, in the original slide_annotations list of dictionaries. This can be obained by get_bboxes_from_slide_annotations() method. Make sure you have used scale_slide_annotations(). linewidth : float visualization line width get_rgb: bool get rgb image? get_visualization : bool get overlayed annotation bounds over RGB for visualization text : bool add text labels to visualization? Returns -------- dict Results dict containing one or more of the following keys - bounds: dict of bounds at scan magnification - rgb: (mxnx3 np array) corresponding rgb image - contours: dict - visualization: (mxnx3 np array) visualization overlay """ MPP, MAG, mode, bounds, idx_for_roi, get_rgb, get_visualization = \ _sanity_checks( MPP, MAG, mode, bounds, idx_for_roi, get_rgb, get_visualization) # calculate the scale factor sf, appendStr = get_scale_factor_and_appendStr(gc=gc, slide_id=slide_id, MPP=MPP, MAG=MAG) if slide_annotations is not None: assert element_infos is not None, "must also provide element_infos" else: # get annotations for slide slide_annotations = gc.get('/annotation/item/' + slide_id) # scale up/down annotations by a factor slide_annotations = scale_slide_annotations(slide_annotations, sf=sf) # get bounding box information for all annotations -> scaled by sf element_infos = get_bboxes_from_slide_annotations(slide_annotations) # Determine get region based on run mode, keeping in mind that it # must be at BASE MAGNIFICATION coordinates before it is passed # on to get_mask_from_slide() # if mode != 'polygonal_bound': bounds = _get_roi_bounds_by_run_mode(gc=gc, slide_id=slide_id, mode=mode, bounds=bounds, element_infos=element_infos, idx_for_roi=idx_for_roi, sf=sf) # only keep relevant elements and get uncropped bounds elinfos_roi, uncropped_bounds = _keep_relevant_elements_for_roi( element_infos, sf=sf, mode=mode, idx_for_roi=idx_for_roi, roiinfo=copy.deepcopy(bounds)) # find relevant portion from slide annotations to use # (with overflowing beyond edge) annotations_slice = _trim_slide_annotations_to_roi( copy.deepcopy(slide_annotations), elinfos_roi=elinfos_roi) # get roi polygon vertices rescaled_bounds = {k: int(v * sf) for k, v in bounds.items()} if mode == 'polygonal_bounds': roi_coords = _get_coords_from_element( copy.deepcopy(slide_annotations[int( element_infos.loc[idx_for_roi, 'annidx'])]['annotation']['elements'][int( element_infos.loc[idx_for_roi, 'elementidx'])])) cropping_bounds = None else: roi_coords = None cropping_bounds = rescaled_bounds # tabularize to use contours _, contours_df = parse_slide_annotations_into_tables( annotations_slice, cropping_bounds=cropping_bounds, cropping_polygon_vertices=roi_coords, use_shapely=mode in ('manual_bounds', 'polygonal_bounds'), ) contours_list = contours_df.to_dict(orient='records') # Final bounds (relative to slide at base magnification) bounds = {k: int(v / sf) for k, v in rescaled_bounds.items()} result = dict() # get RGB if get_rgb: getStr = \ "/item/%s/tiles/region?left=%d&right=%d&top=%d&bottom=%d&encoding=PNG" \ % (slide_id, bounds['XMIN'], bounds['XMAX'], bounds['YMIN'], bounds['YMAX']) getStr += appendStr resp = gc.get(getStr, jsonResp=False) rgb = get_image_from_htk_response(resp) result['rgb'] = rgb # Assign to results result.update({ 'contours': contours_list, 'bounds': bounds, }) # get visualization of annotations on RGB if get_visualization: result['visualization'] = _visualize_annotations_on_rgb( rgb=rgb, contours_list=contours_list, linewidth=linewidth, text=text) return result
def get_all_roi_masks_for_slide( input_img, input_ann, GTCODE_PATH, MASK_SAVEPATH, slide_name=None, verbose=True, monitorPrefix="", get_roi_mask_kwargs=dict()): """Parse annotations and saves ground truth masks for ALL ROIs. Get all ROIs in a single slide. This is a wrapper around get_roi_mask() which should be referred to for implementation details. Parameters ----------- input_img : object input large image object input_ann : object input annotation object GTCODE_PATH : str path to the ground truth codes and information csv file. Refer to the docstring of get_roi_mask() for more info. MASK_SAVEPATH : str path to directory to save ROI masks slide_name (optional) : str If not given, it's inferred using a server request using girder client. verbose (optional) : bool Print progress to screen? monitorPrefix (optional) : str text to prepend to printed statements get_roi_mask_kwargs : dict extra kwargs for get_roi_mask() Returns -------- list of strs save paths for ROIs """ # read ground truth codes and information GTCodes = read_csv(GTCODE_PATH) GTCodes.index = GTCodes.loc[:, 'group'] if any(GTCodes.loc[:, 'GT_code'] <= 0): raise Exception("All GT_code must be > 0") # get annotations for slide slide_annotations = input_ann # get bounding box information for all annotations element_infos = get_bboxes_from_slide_annotations(slide_annotations) # get indices of rois idxs_for_all_rois = _get_idxs_for_all_rois( GTCodes=GTCodes, element_infos=element_infos) savenames = [] for roino, idx_for_roi in enumerate(idxs_for_all_rois): roicountStr = "%s: roi %d of %d" % ( monitorPrefix, roino + 1, len(idxs_for_all_rois)) # get roi mask and info ROI, roiinfo = get_roi_mask( slide_annotations=slide_annotations, element_infos=element_infos, GTCodes_df=GTCodes.copy(), idx_for_roi=idx_for_roi, monitorPrefix=roicountStr, **get_roi_mask_kwargs) ## make directory for the mask MASK_SAVEPATH_MASK = MASK_SAVEPATH + '/mask' # create folders if necessary for folder in [MASK_SAVEPATH_MASK, ]: try: os.mkdir(folder) except: pass # now save roi ROINAMESTR = "%s_left-%d_top-%d" % ( slide_name, roiinfo['XMIN'], roiinfo['YMIN']) savename = os.path.join(MASK_SAVEPATH_MASK, ROINAMESTR + ".png") if verbose: print("%s: Saving %s\n" % (roicountStr, savename)) imwrite(im=ROI, uri=savename) region = [roiinfo['XMIN'], roiinfo['YMIN'], roiinfo['BBOX_WIDTH'], roiinfo['BBOX_HEIGHT']] maxRegionSize = 5000 ## make directory for the region MASK_SAVEPATH_REG = MASK_SAVEPATH + '/region' # create folders if necessary for folder in [MASK_SAVEPATH_REG, ]: try: os.mkdir(folder) except: pass #######save images im_input = input_img.getRegion( format=large_image.tilesource.TILE_FORMAT_NUMPY, **utils.get_region_dict(region, maxRegionSize, input_img))[0] ROINAMESTR1 = "%s_left-%d_top-%d" % ( slide_name, roiinfo['XMIN'], roiinfo['YMIN']) savename1 = os.path.join(MASK_SAVEPATH_REG, ROINAMESTR1 + ".png") skimage.io.imsave(savename1, im_input) if verbose: print("%s: Saving %s\n" % (roicountStr, savename1)) savenames.append(savename) return savenames
def test_prep(girderClient): # noqa cfg.gc = girderClient iteminfo = cfg.gc.get('/item', parameters={ 'text': "TCGA-A2-A0YE-01Z-00-DX1"})[0] # read GTCodes dataframe gtcodePath = getTestFilePath('sample_GTcodes.csv') GTCodes_dict = read_csv(gtcodePath) GTCodes_dict.index = GTCodes_dict.loc[:, 'group'] GTCodes_dict = GTCodes_dict.to_dict(orient='index') # just a temp directory to save masks for now cfg.BASE_SAVEPATH = tempfile.mkdtemp() cfg.SAVEPATHS = { 'contours': os.path.join(cfg.BASE_SAVEPATH, 'contours'), 'rgb': os.path.join(cfg.BASE_SAVEPATH, 'rgbs'), 'visualization': os.path.join(cfg.BASE_SAVEPATH, 'vis'), 'mask': os.path.join(cfg.BASE_SAVEPATH, 'masks'), } for _, savepath in cfg.SAVEPATHS.items(): if not os.path.exists(savepath): os.mkdir(savepath) # Microns-per-pixel / Magnification (either or) cfg.MPP = 5.0 cfg.MAG = None # get annotations for slide cfg.slide_annotations = cfg.gc.get('/annotation/item/' + iteminfo['_id']) # scale up/down annotations by a factor sf, _ = get_scale_factor_and_appendStr( gc=cfg.gc, slide_id=iteminfo['_id'], MPP=cfg.MPP, MAG=cfg.MAG) cfg.slide_annotations = scale_slide_annotations(cfg.slide_annotations, sf=sf) # get bounding box information for all annotations cfg.element_infos = get_bboxes_from_slide_annotations(cfg.slide_annotations) # common params for annotations_to_contours_no_mask() annotations_to_contours_kwargs = { 'MPP': cfg.MPP, 'MAG': cfg.MAG, 'linewidth': 0.2, 'get_rgb': True, 'get_visualization': True, } # params for TESTING annotations_to_contours_no_mask() cfg.test_annots_to_contours_kwargs = copy.deepcopy( annotations_to_contours_kwargs) cfg.test_annots_to_contours_kwargs.update({ 'gc': cfg.gc, 'slide_id': iteminfo['_id'], 'bounds': { 'XMIN': 58000, 'XMAX': 63000, 'YMIN': 35000, 'YMAX': 39000}, }) # params for getting all rois for slide cfg.get_all_rois_kwargs = { 'gc': cfg.gc, 'slide_id': iteminfo['_id'], 'GTCodes_dict': GTCodes_dict, 'save_directories': cfg.SAVEPATHS, 'annotations_to_contours_kwargs': annotations_to_contours_kwargs, 'slide_name': 'TCGA-A2-A0YE', 'verbose': False, 'monitorprefix': 'test', }
def get_all_roi_masks_for_slide(gc, slide_id, GTCODE_PATH, MASK_SAVEPATH, slide_name=None, verbose=True, monitorPrefix="", get_roi_mask_kwargs=dict()): """Parse annotations and saves ground truth masks for ALL ROIs. Get all ROIs in a single slide. This is a wrapper around get_roi_mask() which should be referred to for implementation details. Parameters ----------- gc : object girder client object to make requests, for example: gc = girder_client.GirderClient(apiUrl = APIURL) gc.authenticate(interactive=True) slide_id : str girder id for item (slide) GTCODE_PATH : str path to the ground truth codes and information csv file. Refer to the docstring of get_roi_mask() for more info. MASK_SAVEPATH : str path to directory to save ROI masks slide_name (optional) : str If not given, it's inferred using a server request using girder client. verbose (optional) : bool Print progress to screen? monitorPrefix (optional) : str text to prepend to printed statements get_roi_mask_kwargs : dict extra kwargs for get_roi_mask() Returns -------- list of strs save paths for ROIs """ # if not given, assign name of first file associated with item if slide_name is None: resp = gc.get('/item/%s/files' % slide_id) slide_name = resp[0]['name'] slide_name = slide_name[:slide_name.rfind('.')] # read ground truth codes and information GTCodes = read_csv(GTCODE_PATH) GTCodes.index = GTCodes.loc[:, 'group'] assert all(GTCodes.loc[:, 'GT_code'] > 0), "All GT_code must be > 0" # get annotations for slide slide_annotations = gc.get('/annotation/item/' + slide_id) # get bounding box information for all annotations element_infos = get_bboxes_from_slide_annotations(slide_annotations) # get indices of rois idxs_for_all_rois = _get_idxs_for_all_rois(GTCodes=GTCodes, element_infos=element_infos) savenames = [] for roino, idx_for_roi in enumerate(idxs_for_all_rois): roicountStr = "%s: roi %d of %d" % (monitorPrefix, roino + 1, len(idxs_for_all_rois)) # get roi mask and info ROI, roiinfo = get_roi_mask(slide_annotations=slide_annotations, element_infos=element_infos, GTCodes_df=GTCodes.copy(), idx_for_roi=idx_for_roi, monitorPrefix=roicountStr, **get_roi_mask_kwargs) # now save roi ROINAMESTR = "%s_left-%d_top-%d_mag-BASE" % ( slide_name, roiinfo['XMIN'], roiinfo['YMIN']) savename = os.path.join(MASK_SAVEPATH, ROINAMESTR + ".png") if verbose: print("%s: Saving %s\n" % (roicountStr, savename)) imwrite(im=ROI, uri=savename) savenames.append(savename) return savenames
def grid_tiling(gc, item_id, group_names, save_dir, save_mag=None, mask_mag=1.25, tile_size=(224, 224), tissue_threshold=0.3, annotation_threshold=0.15, random_seed=64, is_test=False, oversample_background=2.0, reinhard_stats=None): """Split a DSA image item (WSI) into smaller images and save locally grouped by annotations. This approach grids the image into equal sized small images, or tiles (i.e. a grid is placed over the WSI starting at the top left corner). At the bottom and right edge of the WSI the tiles are ignored if not of correct size (the case where the WSI dimensions are not a multiple factor of the tile size). A list of annotation group names are needed to group the tiles into classes of images saved in their own directories. Tiles with no tissue detected are ignored and tiles not containing annotations (but have tissue) are by default saved into background class. A background annotation group will cause issues so avoid having this annotation group name. Tiles can be saved at a lower magnification than source image if needed (param: save_mag). Note that tiles size specified should be the tile size at the save magnification not the source magnification. Image saved will be of the tile size specified in parameters, regardless of the save_mag used. Parameters ---------- gc : girder_client.GirderClient authenticated client item_id : str DSA image item id group_names : list list of annotation group names save_dir : str directory to create group directories with images - save_dir / group_name_1, save_dir / background, etc. save_mag : float (optional) magnification to use when saving the images, if None then source magnification will be used. mask_mag : float (optional) magnification to create binary mask of tissue and annotations. Note that if your annotations are very small it will benefit to use a larger value than default here, but binary masks will fail to create at very high magnifications. tile_size : tuple (optional) size (width, height) to save tiles at, note that this is the size it will be saved at regardless of the magnification used to save the images at (i.e. if save_mag is 4 times less than the source magnification than the actual tile_size will represent 4 times the pixels at full resolution). tissue_threshold : float (optional) from 0 to 1, percentage of tile that must contain tissue to be included annotation_threshold : float (optional) from 0 to 1, percentage of tile that must contain annotation (per group) to be labeled as annotation. Note that a single tile may be saved twice, representing multiple classes. random_seed : int (optional) random seed to use when shuffling the background regions is_test : bool (optional) if True then all the background regions will be saved, otherwise oversample_background will be used to determine how many background regions to save oversample_background : float (optional) factor to oversample background class images, compared to the number of images of the class of annoation images with the most images saved reinhard_stats : dict (optional) if not None then the images saved will be color augmented by color normalizing the tiles using the Reinhard color norm method. This dict should contain src_mu and scr_sigma keys with the stats for this image and target_mu and targe_sigma keys which are lists contain 1 or more target images to normalize to. """ im_info = gc.get('item/{}/tiles'.format(item_id)) if save_mag is None: save_mag = im_info['magnification'] if reinhard_stats is not None: # get color stats for image mu, sigma = reinhard_color_stats(gc, item_id) # ----- prep work ----- # filename = splitext(gc.getItem(item_id)['name'])[0] # create dirs for each image class to save group_dirs = [join(save_dir, group_name) for group_name in group_names] for group_dir in group_dirs: makedirs(group_dir, exist_ok=True) background_dir = join(save_dir, 'background') makedirs(background_dir, exist_ok=True) # get image annotations annotations = gc.get('/annotation/item/' + item_id) # create a dataframe to use with annotation to mask handler functions (gt codes) gt_data = [[group_name, 1, i + 1, 0, 0, 'rgb(0, 0, {})'.format(i), ''] for i, group_name in enumerate(group_names)] gt_codes = pd.DataFrame(columns=[ 'group', 'overlay_order', 'GT_code', 'is_roi', 'is_background_class', 'color', 'comments' ], data=gt_data, index=range(len(group_names))) gt_codes.index = gt_codes.loc[:, 'group'] # get binary masks - tissue mask and annotation(s) mask mask_mag_factor, _ = get_scale_factor_and_appendStr(gc=gc, slide_id=item_id, MAG=mask_mag) # - scaling the annotations to lower magnification mask_annotations = scale_slide_annotations(deepcopy(annotations), sf=mask_mag_factor) # - binary masks are for the whole image at low resolution, function returns also the RGB image which we use for # - getting the tissue mask mask_element_info = get_bboxes_from_slide_annotations(mask_annotations) get_kwargs = deepcopy( GET_KWARGS) # avoid referencing on the global variable get_kwargs['gc'] = gc get_kwargs['slide_id'] = item_id get_kwargs['GTCodes_dict'] = gt_codes.T.to_dict() get_kwargs['bounds'] = None get_kwargs['MAG'] = mask_mag ann_mask_and_image = get_image_and_mask_from_slide( mode='wsi', slide_annotations=mask_annotations, element_infos=mask_element_info, **get_kwargs) tissue_mask = get_tissue_mask(ann_mask_and_image['rgb'])[0] # convert the annotations to lower magnification fr_to_lr_factor, _ = get_scale_factor_and_appendStr(gc=gc, slide_id=item_id, MAG=save_mag) annotations = scale_slide_annotations(annotations, sf=fr_to_lr_factor) lr_element_info = get_bboxes_from_slide_annotations(annotations) # get full resolution information for image fr_mag = im_info['magnification'] fr_width = im_info['sizeX'] fr_height = im_info['sizeY'] fr_tile_size = int(tile_size[0] / fr_to_lr_factor), int( tile_size[1] / fr_to_lr_factor) # (width, height) # change the get_kwargs to save magnification get_kwargs['MAG'] = save_mag # ----- loop through image at full res ----- # group_annotation_counts = [0] * len(group_names) background_regions = [] for x in range(0, fr_width, fr_tile_size[0]): for y in range(0, fr_height, fr_tile_size[1]): # check that the tile won't go over the edge of image, if so skip if x + fr_tile_size[0] > fr_width or y + fr_tile_size[ 1] > fr_height: continue # check tile for tissue, using the binary mask for tissue tissue_tile = tissue_mask[int(y * mask_mag / fr_mag):int((y + fr_tile_size[1]) * mask_mag / fr_mag), int(x * mask_mag / fr_mag):int((x + fr_tile_size[0]) * mask_mag / fr_mag)] # skip if tile does not contain enough tissue if np.count_nonzero( tissue_tile) / tissue_tile.size < tissue_threshold: continue # check tile for annotations, using the binary mask for annotations annotation_tile = ann_mask_and_image['ROI'][ int(y * mask_mag / fr_mag):int((y + fr_tile_size[1]) * mask_mag / fr_mag), int(x * mask_mag / fr_mag):int((x + fr_tile_size[0]) * mask_mag / fr_mag)] # tile is background if no annotation is present (of any group) background_flag = True # - check for each annotation group for i, group_name in enumerate(group_names): group_annotation_tile = annotation_tile == i + 1 # tile is ignored if not enough contain annotation if np.count_nonzero( group_annotation_tile ) / group_annotation_tile.size < annotation_threshold: continue background_flag = False group_annotation_counts[i] += 1 # get annotation image and save it get_kwargs['bounds'] = { 'XMIN': x, 'XMAX': x + fr_tile_size[0], 'YMIN': y, 'YMAX': y + fr_tile_size[1] } annotation_im = get_image_and_mask_from_slide( mode='manual_bounds', slide_annotations=annotations, element_infos=lr_element_info, **get_kwargs)['rgb'] # save the image to correct directory imwrite( join(group_dirs[i], '{}_x_{}_y_{}.png'.format(filename, x, y)), annotation_im) if reinhard_stats is not None: # add color augmentation with Reinhard method for j, (_, v) in enumerate(reinhard_stats.items()): im_norm = reinhard(annotation_im.copy(), v['mu'], v['sigma'], src_mu=mu, src_sigma=sigma) imwrite( join( group_dirs[i], '{}_x_{}_y_{}_norm_{}.png'.format( filename, x, y, j)), im_norm) if background_flag: # save coordinates for non-glomeruli images candidates background_regions.append({ 'magnification': save_mag, 'left': x, 'top': y, 'width': fr_tile_size[0], 'height': fr_tile_size[1] }) # randomly select background class coordinates # - oversample the background class by a factor of the most represented annoation class Random(random_seed).shuffle(background_regions) if not is_test: background_regions = background_regions[:int( oversample_background * max(group_annotation_counts))] for region in background_regions: tile_im = get_region_im(gc, item_id, region)[:, :, :3] # save background image imwrite( join( background_dir, '{}_x_{}_y_{}.png'.format(filename, region['left'], region['top'])), tile_im) if reinhard_stats is not None: # add color augmentation with Reinhard method for j, (_, v) in enumerate(reinhard_stats.items()): im_norm = reinhard(tile_im.copy(), v['mu'], v['sigma'], src_mu=mu, src_sigma=sigma) imwrite( join( background_dir, '{}_x_{}_y_{}_norm_{}.png'.format( filename, region['left'], region['top'], j)), im_norm)