def test_polygon_merger_rtree(self): """Test Polygon_merger_v2.run().""" annotationPath = utilities.externaldata( 'data/TCGA-A2-A0YE-01Z-00-DX1_GET_MergePolygons.svs_annotations.json.sha512') # noqa slide_annotations = json.load(open(annotationPath)) _, contours_df = parse_slide_annotations_into_tables(slide_annotations) # init & run polygon merger pm = Polygon_merger_v2(contours_df, verbose=0) pm.unique_groups.remove("roi") pm.run() # make sure it is what we expect assert pm.new_contours.shape == (16, 16) assert set(pm.new_contours.loc[:, 'group']) == { 'mostly_tumor', 'mostly_stroma', 'mostly_lymphocytic_infiltrate'} # add colors (aesthetic) for group in pm.unique_groups: cs = contours_df.loc[contours_df.loc[:, "group"] == group, "color"] pm.new_contours.loc[ pm.new_contours.loc[:, "group"] == group, "color"] = cs.iloc[0] # get rid of nonenclosed stroma (aesthetic) pm.new_contours = _discard_nonenclosed_background_group( pm.new_contours, background_group="mostly_stroma") # get list of annotation documents annotation_docs = get_annotation_documents_from_contours( pm.new_contours.copy(), separate_docs_by_group=True, docnamePrefix='test', verbose=False, monitorPrefix="annotation docs") assert len(annotation_docs) == 3
def test_polygon_merger(self): """Test Polygon_merger.run().""" # init and run polygon merger on masks grid pm = Polygon_merger(maskpaths=maskpaths, GTCodes_df=GTCodes_df, discard_nonenclosed_background=True, verbose=1) contours_df = pm.run() # make sure it is what we expect self.assertTupleEqual(contours_df.shape, (17, 13)) self.assertSetEqual( set(contours_df.loc[:, 'group']), { 'roi', 'mostly_tumor', 'mostly_stroma', 'mostly_lymphocytic_infiltrate' }) # deleting existing annotations in target slide (if any) delete_annotations_in_slide(gc, SAMPLE_SLIDE_ID) # get list of annotation documents annotation_docs = get_annotation_documents_from_contours( contours_df.copy(), separate_docs_by_group=True, docnamePrefix='test', verbose=False, monitorPrefix=SAMPLE_SLIDE_ID + ": annotation docs") # post annotations to slide -- make sure it posts without errors for annotation_doc in annotation_docs: resp = gc.post("/annotation?itemId=" + SAMPLE_SLIDE_ID, json=annotation_doc) self.assertTrue('annotation' in resp.keys())
def visualize_results(self): """Visualize results in DSA.""" # get contours contours_df = get_contours_from_mask( MASK=self.labeled, GTCodes_df=self.cdt.GTcodes.copy(), get_roi_contour=True, roi_group='roi', background_group='not_specified', discard_nonenclosed_background=True, MIN_SIZE=15, MAX_SIZE=None, verbose=self.cdt.verbose == 3, monitorPrefix=self.monitorPrefix + ": -- contours") # get annotation docs annprops = { 'F': self.cdt.slide_info['magnification'] / self.cdt.MAG, 'X_OFFSET': self.xmin, 'Y_OFFSET': self.ymin, 'opacity': self.cdt.opacity, 'lineWidth': self.cdt.lineWidth, } annotation_docs = get_annotation_documents_from_contours( contours_df.copy(), separate_docs_by_group=True, docnamePrefix='cdt', annprops=annprops, verbose=self.cdt.verbose == 3, monitorPrefix=self.monitorPrefix + ": -- annotation docs") # post annotations to slide for doc in annotation_docs: _ = self.cdt.gc.post("/annotation?itemId=" + self.cdt.slide_id, json=doc)
def get_tissue_boundary_annotation_documents( gc, slide_id, labeled, color='rgb(0,0,0)', group='tissue', annprops=None): """Get annotation documents of tissue boundaries to visualize on DSA. Parameters ----------- gc : object girder client to use slide_id : str girder ID of slide labeled : np array mask of tissue regions using slide thumbnail. This could either be a binary mask or a mask where each unique value corresponds to one tissue region. It will be binalized anyways. This can be obtained using get_tissue_mask(). color : str color to assign to boundaries. format like rgb(0,0,0) group : str label for annotations annpops : dict properties of annotation elements. Contains the following keys F, X_OFFSET, Y_OFFSET, opacity, lineWidth. Refer to get_single_annotation_document_from_contours() for details. Returns -------- list of dicts each dict is an annotation document that you can post to DSA """ # Get annotations properties if annprops is None: slide_info = gc.get('item/%s/tiles' % slide_id) annprops = { 'F': slide_info['sizeX'] / labeled.shape[1], # relative to base 'X_OFFSET': 0, 'Y_OFFSET': 0, 'opacity': 0, 'lineWidth': 4.0, } # Define GTCodes dataframe GTCodes_df = DataFrame(columns=['group', 'GT_code', 'color']) GTCodes_df.loc['tissue', 'group'] = group GTCodes_df.loc['tissue', 'GT_code'] = 1 GTCodes_df.loc['tissue', 'color'] = color # get annotation docs contours_tissue = get_contours_from_mask( MASK=0 + (labeled > 0), GTCodes_df=GTCodes_df, get_roi_contour=False, MIN_SIZE=0, MAX_SIZE=None, verbose=False, monitorPrefix="tissue: getting contours") annotation_docs = get_annotation_documents_from_contours( contours_tissue.copy(), docnamePrefix='test', annprops=annprops, verbose=False, monitorPrefix="tissue : annotation docs") return annotation_docs
def test_polygon_merger_tiled_masks(self, girderClient): # noqa """Test Polygon_merger.run().""" # read GTCodes dataframe gtcodePath = getTestFilePath('sample_GTcodes.csv') GTCodes_df = read_csv(gtcodePath) GTCodes_df.index = GTCodes_df.loc[:, 'group'] # This is where masks for adjacent rois are saved mask_loadpath = getTestFilePath( os.path.join('annotations_and_masks', 'polygon_merger_roi_masks')) maskpaths = [ os.path.join(mask_loadpath, j) for j in os.listdir(mask_loadpath) if j.endswith('.png') ] # init and run polygon merger on masks grid pm = Polygon_merger(maskpaths=maskpaths, GTCodes_df=GTCodes_df, discard_nonenclosed_background=True, verbose=1) contours_df = pm.run() # make sure it is what we expect assert contours_df.shape == (13, 13) assert set(contours_df.loc[:, 'group']) == { 'roi', 'mostly_tumor', 'mostly_stroma', 'mostly_lymphocytic_infiltrate' } # deleting existing annotations in target slide (if any) sampleSlideItem = girderClient.resourceLookup( '/user/admin/Public/TCGA-A2-A0YE-01Z-00-DX1.8A2E3094-5755-42BC-969D-7F0A2ECA0F39.svs' ) # noqa sampleSlideId = str(sampleSlideItem['_id']) delete_annotations_in_slide(girderClient, sampleSlideId) # get list of annotation documents annotation_docs = get_annotation_documents_from_contours( contours_df.copy(), separate_docs_by_group=True, docnamePrefix='test', verbose=False, monitorPrefix=sampleSlideId + ": annotation docs") # post annotations to slide -- make sure it posts without errors for annotation_doc in annotation_docs: resp = girderClient.post("/annotation?itemId=" + sampleSlideId, json=annotation_doc) assert 'annotation' in resp.keys()
def visualize_contiguous_superpixels(self): """Visualize contiguous spixels, color-coded by cellularity.""" # get cellularity cluster membership mask cellularity_mask = np.zeros(self.spixel_mask.shape) for spval, sp in self.fdata.iterrows(): cellularity_mask[self.spixel_mask == spval] = sp['cluster'] # Define GTCodes dataframe GTCodes_df = DataFrame(columns=['group', 'GT_code', 'color']) for spval, cp in self.cluster_props.items(): spstr = 'cellularity-%d' % (cp['cellularity']) GTCodes_df.loc[spstr, 'group'] = spstr GTCodes_df.loc[spstr, 'GT_code'] = spval GTCodes_df.loc[spstr, 'color'] = cp['color'] # get contours df contours_df = get_contours_from_mask(MASK=cellularity_mask, GTCodes_df=GTCodes_df, get_roi_contour=False, MIN_SIZE=0, MAX_SIZE=None, verbose=self.cd.verbose == 3, monitorPrefix=self.monitorPrefix) # get annotation docs annprops = { 'F': (self.ymax - self.ymin) / self.tissue_rgb.shape[0], 'X_OFFSET': self.xmin, 'Y_OFFSET': self.ymin, 'opacity': self.cd.opacity_contig, 'lineWidth': self.cd.lineWidth, } annotation_docs = get_annotation_documents_from_contours( contours_df.copy(), docnamePrefix='contig', annprops=annprops, annots_per_doc=1000, separate_docs_by_group=True, verbose=self.cd.verbose == 3, monitorPrefix=self.monitorPrefix) for didx, doc in enumerate(annotation_docs): self.cd._print2( "%s: Posting doc %d of %d" % (self.monitorPrefix, didx + 1, len(annotation_docs))) _ = self.cd.gc.post("/annotation?itemId=" + self.cd.slide_id, json=doc)
def test_get_annotation_documents_from_contours(self, girderClient): # noqa """Test get_contours_from_bin_mask().""" self._setup() sampleSlideItem = girderClient.resourceLookup( '/user/admin/Public/TCGA-A2-A0YE-01Z-00-DX1.8A2E3094-5755-42BC-969D-7F0A2ECA0F39.svs' ) # noqa sampleSlideId = str(sampleSlideItem['_id']) # get list of annotation documents annprops = { 'X_OFFSET': self.X_OFFSET, 'Y_OFFSET': self.Y_OFFSET, 'opacity': 0.2, 'lineWidth': 4.0, } annotation_docs = get_annotation_documents_from_contours( self.CONTOURS_DF.copy(), separate_docs_by_group=True, annots_per_doc=10, docnamePrefix='test', annprops=annprops, verbose=False, monitorPrefix=self.MASKNAME[:12] + ": annotation docs") # make sure its what we expect assert len(annotation_docs) == 8 assert {j['name'] for j in annotation_docs} == { 'test_blood_vessel-0', 'test_exclude-0', 'test_mostly_lymphocytic_infiltrate-0', 'test_mostly_stroma-0', 'test_mostly_tumor-0', 'test_mostly_tumor-1', 'test_normal_acinus_or_duct-0', 'test_roi-0' } # deleting existing annotations in target slide (if any) delete_annotations_in_slide(girderClient, sampleSlideId) # post annotations to slide -- make sure it posts without errors resp = girderClient.post("/annotation?itemId=" + sampleSlideId, json=annotation_docs[0]) assert 'annotation' in resp.keys()
def test_polygon_merger_v2(self): """Test Polygon_merger_v2.run().""" # init & run polygon merger pm = Polygon_merger_v2(contours_df, verbose=1) pm.unique_groups.remove("roi") pm.run() # make sure it is what we expect self.assertTupleEqual(pm.new_contours.shape, (16, 13)) self.assertSetEqual( set(pm.new_contours.loc[:, 'group']), {'mostly_tumor', 'mostly_stroma', 'mostly_lymphocytic_infiltrate'}) # add colors (aesthetic) for group in pm.unique_groups: cs = contours_df.loc[contours_df.loc[:, "group"] == group, "color"] pm.new_contours.loc[pm.new_contours.loc[:, "group"] == group, "color"] = cs.iloc[0] # get rid of nonenclosed stroma (aesthetic) pm.new_contours = _discard_nonenclosed_background_group( pm.new_contours, background_group="mostly_stroma") # deleting existing annotations in target slide (if any) existing_annotations = gc.get('/annotation/item/' + POST_SLIDE_ID) for ann in existing_annotations: gc.delete('/annotation/%s' % ann['_id']) # get list of annotation documents annotation_docs = get_annotation_documents_from_contours( pm.new_contours.copy(), separate_docs_by_group=True, docnamePrefix='test', verbose=False, monitorPrefix=POST_SLIDE_ID + ": annotation docs") # post annotations to slide -- make sure it posts without errors for annotation_doc in annotation_docs: resp = gc.post("/annotation?itemId=" + POST_SLIDE_ID, json=annotation_doc) self.assertTrue('annotation' in resp.keys())
def test_get_annotation_documents_from_contours(self): """Test get_contours_from_bin_mask().""" # get list of annotation documents annprops = { 'X_OFFSET': X_OFFSET, 'Y_OFFSET': Y_OFFSET, 'opacity': 0.2, 'lineWidth': 4.0, } annotation_docs = get_annotation_documents_from_contours( CONTOURS_DF.copy(), separate_docs_by_group=True, annots_per_doc=10, docnamePrefix='test', annprops=annprops, verbose=False, monitorPrefix=MASKNAME[:12] + ": annotation docs") # make sure its what we expect self.assertTrue(len(annotation_docs) == 8) self.assertSetEqual( {j['name'] for j in annotation_docs}, { 'test_blood_vessel-0', 'test_exclude-0', 'test_mostly_lymphocytic_infiltrate-0', 'test_mostly_stroma-0', 'test_mostly_tumor-0', 'test_mostly_tumor-1', 'test_normal_acinus_or_duct-0', 'test_roi-0' }) # deleting existing annotations in target slide (if any) existing_annotations = gc.get('/annotation/item/' + SAMPLE_SLIDE_ID) for ann in existing_annotations: gc.delete('/annotation/%s' % ann['_id']) # post annotations to slide -- make sure it posts without errors resp = gc.post("/annotation?itemId=" + SAMPLE_SLIDE_ID, json=annotation_docs[0]) self.assertTrue('annotation' in resp.keys())
def create_review_galleries( tilepath_base, upload_results=True, gc=None, gallery_savepath=None, gallery_folderid=None, padding=25, tiles_per_row=2, tiles_per_column=5, annprops=None, url=None, nameprefix=''): """Create and or post review galleries for rapid review. Parameters ---------- tilepath_base : str directory where combined visualization. upload_results : bool upload results to DSA? gc : girder_client.Girder_Client authenticated girder client. Only needed upload_results. gallery_savepath : str directory to save gallery. Only if upload_results. gallery_folderid : str girder ID of folder to post galleries. Only if upload_result. padding : int padding in pixels between tiles in same gallery. tiles_per_row : int how many visualization tiles per row in gallery. tiles_per_column : int how many visualization tiles per column in gallery. annprops : dict properties of the annotations to be posted to DSA. Passed directly as annprops to get_annotation_documents_from_contours() url : str url of the Digital Slide Archive Instance. For example: http://candygram.neurology.emory.edu:8080/ nameprefix : str prefix to prepend to gallery name Returns ------- list each entry is a dict representing the response of the server post request to upload the gallery to DSA. """ if upload_results: for par in ('gc', 'gallery_folderid', 'url'): if locals()[par] is None: raise Exception( "%s cannot be None if upload_results!" % par) if gallery_savepath is None: gallery_savepath = tempfile.mkdtemp(prefix='gallery-') savepaths = [] resps = [] tile_paths = [ os.path.join(tilepath_base, j) for j in os.listdir(tilepath_base) if j.endswith('.png')] tile_paths.sort() def _parse_tilepath(tpath): basename = os.path.basename(tpath) basename = basename[:basename.rfind('.')] tileinfo = {'slide_name': basename.split('_')[0]} for attrib in ['id', 'left', 'top', 'bottom', 'right']: tileinfo[attrib] = basename.split( attrib + '-')[1].split('_')[0] # add URL in histomicsTK tileinfo['URL'] = url + \ "histomicstk#?image=%s&bounds=%s%%2C%s%%2C%s%%2C%s%%2C0" % ( tileinfo['id'], tileinfo['left'], tileinfo['top'], tileinfo['right'], tileinfo['bottom']) return tileinfo n_tiles = len(tile_paths) n_galleries = int(np.ceil(n_tiles / (tiles_per_row * tiles_per_column))) tileidx = 0 for galno in range(n_galleries): # this makes a 8-bit, mono image (initializes as 1x1x3 matrix) im = pyvips.Image.black(1, 1, bands=3) # this will store the roi contours contours = [] for row in range(tiles_per_column): rowpos = im.height + padding # initialize "row" strip image row_im = pyvips.Image.black(1, 1, bands=3) for col in range(tiles_per_row): if tileidx == n_tiles: break tilepath = tile_paths[tileidx] print("Inserting tile %d of %d: %s" % ( tileidx, n_tiles, tilepath)) tileidx += 1 # # get tile from file tile = pyvips.Image.new_from_file( tilepath, access="sequential") # insert tile into mosaic row colpos = row_im.width + padding row_im = row_im.insert( tile[:3], colpos, 0, expand=True, background=255) if upload_results: tileinfo = _parse_tilepath(tilepath) xmin = colpos ymin = rowpos xmax = xmin + tile.width ymax = ymin + tile.height xmin, xmax, ymin, ymax = [ str(j) for j in (xmin, xmax, ymin, ymax)] contours.append({ 'group': tileinfo['slide_name'], 'label': tileinfo['URL'], 'color': 'rgb(0,0,0)', 'coords_x': ",".join([xmin, xmax, xmax, xmin, xmin]), 'coords_y': ",".join([ymin, ymin, ymax, ymax, ymin]), }) # Add a small contour so that when the pathologist # changes the label to approve or disapprove of the # FOV, the URL in THIS contour (a link to the original # FOV) can be used. We place it in the top right corner. boxsize = 25 xmin = str(int(xmax) - boxsize) ymax = str(int(ymin) + boxsize) contours.append({ 'group': tileinfo['slide_name'], 'label': tileinfo['URL'], 'color': 'rgb(0,0,0)', 'coords_x': ",".join([xmin, xmax, xmax, xmin, xmin]), 'coords_y': ",".join([ymin, ymin, ymax, ymax, ymin]), }) # insert row into main gallery im = im.insert(row_im, 0, rowpos, expand=True, background=255) filename = '%s_gallery-%d' % (nameprefix, galno + 1) savepath = os.path.join(gallery_savepath, filename + '.tiff') print("Saving gallery %d of %d to %s" % ( galno + 1, n_galleries, savepath)) # save temporarily to disk to be uploaded im.tiffsave( savepath, tile=True, tile_width=256, tile_height=256, pyramid=True) if upload_results: # upload the gallery to DSA resps.append(gc.uploadFileToFolder( folderId=gallery_folderid, filepath=savepath, filename=filename)) os.remove(savepath) # get and post FOV location annotations annotation_docs = get_annotation_documents_from_contours( DataFrame(contours), separate_docs_by_group=True, annprops=annprops) for doc in annotation_docs: _ = gc.post( "/annotation?itemId=" + resps[-1]['itemId'], json=doc) else: savepaths.append(savepath) return resps if upload_results else savepaths
int(m['y']) + im_size[1], int(m['y']) + im_size[1])) prediction_data[item_id]['has_holes'].append(0.0) prediction_data[item_id]['touches_edge-top'].append(0.0) prediction_data[item_id]['touches_edge-left'].append(0.0) prediction_data[item_id]['touches_edge-bottom'].append(0.0) prediction_data[item_id]['touches_edge-right'].append(0.0) # loop through each item id and push to annotations for item_id, contour_rows in prediction_data.items(): contours_df = DataFrame(contour_rows) annotation_docs = get_annotation_documents_from_contours( contours_df.copy(), separate_docs_by_group=False, annots_per_doc=100, docnamePrefix=DOCNAME_PREFIX, annprops=annprops, verbose=False, monitorPrefix='') # get current annotations documents from item existing_annotations = gc.get('/annotation/item/' + item_id) # delete annotation documents starting with the same prefix as about to be pushed for ann in existing_annotations: if 'name' in ann['annotation']: doc_name = ann['annotation']['name'] if doc_name.startswith(DOCNAME_PREFIX): gc.delete('/annotation/%s' % ann['_id']) # post the annotation documents you created
def push_annotations_as_doc(gc, item_id, contours, doc_name='Default', group_name='default', color='rgb(255,0,0)', opacity=0.2): """Given a list of opencv contours, push them as annotations to DSA HistomicsUI viewer. Parameters ---------- gc : girder_client.GirderClient authenticated client if private item item_id : str image item id contours : list list of contours in opencv format to push as annotations doc_name : str (optional) prefix of the document to push annoations as, default-# will be appended at the end. Mulitple documents may be pushed if enough contours are present. group_name : str (optional) group name to assign to annotations color : str (optional) rgb color to use for annoations opacity : float (optional) set the opacity to use for the annotation Return ------ contours_df : pandas.DataFrame dataframe used to create the annotation document """ # seed the dict to create dataframe data = {'group': [], 'color': [], 'ymin': [], 'ymax': [], 'xmin': [], 'xmax': [], 'has_holes': [], 'touches_edge-top': [], 'touches_edge-left': [], 'touches_edge-bottom': [], 'touches_edge-right': [], 'coords_x': [], 'coords_y': []} # for each contour convert the x, y coordinates to string if 'x1,x2,...,xn' and 'y1,y2,...,yn' for contour in contours: # append the values that don't change between contour data['color'].append(color) data['group'].append(group_name) data['has_holes'].append(0.0) data['touches_edge-top'].append(0.0) data['touches_edge-left'].append(0.0) data['touches_edge-bottom'].append(0.0) data['touches_edge-right'].append(0.0) # based on the contours, get the min and max of each axis xmin, ymin = np.min(contour, axis=0)[0] xmax, ymax = np.max(contour, axis=0)[0] data['xmin'].append(xmin) data['ymin'].append(ymin) data['xmax'].append(xmax) data['ymax'].append(ymax) # do the conversion to string format of x and y coordinates xs, ys = [], [] for xy in contour: xs.append(str(xy[0, 0])) ys.append(str(xy[0, 1])) data['coords_x'].append(','.join(xs)) data['coords_y'].append(','.join(ys)) # build dataframe form dict contours_df = DataFrame(data) # convert dataframe to an annotation doc annprops = {'X_OFFSET': 0, 'Y_OFFSET': 0, 'opacity': opacity, 'lineWidth': 4.0} annotation_docs = get_annotation_documents_from_contours( contours_df.copy(), separate_docs_by_group=False, annots_per_doc=100, docnamePrefix=doc_name, annprops=annprops, verbose=False, monitorPrefix='' ) # get current annotations documents from item existing_annotations = gc.get('/annotation/item/' + item_id) # delete annotation documents starting with the same prefix as about to be pushed for ann in existing_annotations: # some annotations may not have a name, handle this case annotation = ann['annotation'] if 'name' in annotation: current_doc_name = annotation['name'] if current_doc_name.startswith(doc_name): gc.delete('/annotation/%s' % ann['_id']) # post the annotation documents you created for annotation_doc in annotation_docs: resp = gc.post( "/annotation?itemId=" + item_id, json=annotation_doc) return contours_df
def dsa_predict(model, gc, item_id, group_name='Positive', ann_doc_name='Default', preprocess_input=None, tile_size=(224, 224), save_mag=10, mask_mag=1.25, tissue_threshold=0.3, batch_size=8, pred_threshold=0.5, color='rgb(255,153,0)'): """Predict on DSA image item, using a grid tiling approach given a binary trained model. Parameters ---------- model : tensorflow.keras.models.Model a trained keras model for binary classification gc : girder_client.GirderClient authenticated client, used to get the images item_id : str image item id group_name : str (optional) name of the positive class, will be used as the group name in annotation elements ann_doc_name : str (optional) prepend name of the annotation documents preprocess_input : function (optional) a function that is applied to the images to process them, works on a tensor-style image tile_size : tuple (optional) size to predict images at save_mag : float (optional) magnification to extract tiles at mask_mag : float (optional) tissue mask is used to decide which tiles to predict on, this is the magnification of the tissue mask tissue_threshold : float (optional) fraction of tile that must contain tissue to be predicted on batch_size : int (optional) predictions are done on images in batches pred_threshold : float (optinal) model predicts a probability from 0 to 1, predictions above pred_threshold are considered the positive class that will be pushed as annotations color : str (optional) rgb(###,###,###) color of element box in annotation element Return ------ annotation_data : dict annotation data that was pushed as annotation """ # info about the source image im_info = gc.get('item/{}/tiles'.format(item_id)) fr_mag = im_info['magnification'] fr_width = im_info['sizeX'] fr_height = im_info['sizeY'] if save_mag is None: # save magnification will be native magnification save_mag = fr_mag fr_to_lr_factor = save_mag / fr_mag # tile size is determined by the save res fr_tile_size = int(tile_size[0] / fr_to_lr_factor), int(tile_size[1] / fr_to_lr_factor) # (width, height) # get tissue mask lr_im = get_region_im(gc, item_id, {'magnification': mask_mag})[:, :, :3] tissue_mask = get_tissue_mask(lr_im)[0] # we will loop through image in batches, get the coordinates for batches coords = [] for x in range(0, fr_width, fr_tile_size[0]): for y in range(0, fr_height, fr_tile_size[1]): # check that the tile won't go over the edge of image, if so skip if x + fr_tile_size[0] > fr_width or y + fr_tile_size[1] > fr_height: continue # check tile for tissue, using the binary mask for tissue tissue_tile = tissue_mask[ int(y * mask_mag / fr_mag):int((y + fr_tile_size[1]) * mask_mag / fr_mag), int(x * mask_mag / fr_mag):int((x + fr_tile_size[0]) * mask_mag / fr_mag) ] # skip if tile does not contain enough tissue if np.count_nonzero(tissue_tile) / tissue_tile.size < tissue_threshold: continue coords.append((x, y)) # break the coords in batch size chunks coord_batches = [coords[i:i + batch_size] for i in range(0, len(coords), batch_size)] annotation_data = {col_name: [] for col_name in COL_NAMES} print('predicting in batches') print('*********************') for batch_num, coord_batch in enumerate(coord_batches): print('{} of {}'.format(batch_num + 1, len(coord_batches))) # get all the images in this batch batch_ims = [] for coord in coord_batch: region = {'left': coord[0], 'top': coord[1], 'width': fr_tile_size[0], 'height': fr_tile_size[1], 'magnification': save_mag} batch_ims.append(get_region_im(gc, item_id, region)[:, :, :3]) # convert to tensor shape batch_ims = np.array(batch_ims) # process the image before prediction on it batch_ims = preprocess_input(batch_ims) / 255. # predict on the batch predictions = model.predict(batch_ims) # identify predictions that are glomeruli for i, pred in enumerate(predictions): if pred[0] > pred_threshold: # add the data to annotation data annotation_data['group'].append(group_name) annotation_data['color'].append(color) annotation_data['has_holes'].append(0.0) annotation_data['touches_edge-top'].append(0.0) annotation_data['touches_edge-left'].append(0.0) annotation_data['touches_edge-bottom'].append(0.0) annotation_data['touches_edge-right'].append(0.0) xmin, ymin = coord_batch[i][0], coord_batch[i][1] annotation_data['xmin'].append(xmin) annotation_data['ymin'].append(ymin) xmax = xmin + fr_tile_size[0] ymax = ymin + fr_tile_size[1] annotation_data['xmax'].append(xmax) annotation_data['ymax'].append(ymax) annotation_data['coords_x'].append('{},{},{},{}'.format(xmin, xmax, xmax, xmin)) annotation_data['coords_y'].append('{},{},{},{}'.format(ymin, ymin, ymax, ymax)) # only push if annotation data is not empty n = len(annotation_data['group']) if n: print('number of tiles to push: {}'.format(n)) contours_df = DataFrame(annotation_data) annotation_docs = get_annotation_documents_from_contours( contours_df.copy(), separate_docs_by_group=False, annots_per_doc=100, docnamePrefix=ann_doc_name, annprops=ANNPROPS, verbose=False, monitorPrefix='' ) # get current annotations documents from item existing_annotations = gc.get('/annotation/item/' + item_id) # delete annotation documents starting with the same prefix as about to be pushed for ann in existing_annotations: if 'name' in ann['annotation']: doc_name = ann['annotation']['name'] if doc_name.startswith(ann_doc_name): gc.delete('/annotation/%s' % ann['_id']) # post the annotation documents you created for annotation_doc in annotation_docs: _ = gc.post( "/annotation?itemId=" + item_id, json=annotation_doc) else: print('no positive tiles to push..') return annotation_data