def segment_wsi_foreground_at_low_res(ts, lres_size=2048): ts_metadata = ts.getMetadata() # get image at low-res maxSize = max(ts_metadata['sizeX'], ts_metadata['sizeY']) maxSize = float(max(maxSize, lres_size)) downsample_factor = 2.0 ** np.floor(np.log2(maxSize / lres_size)) fgnd_seg_mag = ts_metadata['magnification'] / downsample_factor fgnd_seg_scale = {'magnification': fgnd_seg_mag} im_lres, _ = ts.getRegion( scale=fgnd_seg_scale, format=large_image.tilesource.TILE_FORMAT_NUMPY ) im_lres = im_lres[:, :, :3] # compute foreground mask at low-res im_fgnd_mask_lres = htk_utils.simple_mask(im_lres) return im_fgnd_mask_lres, fgnd_seg_scale
def grabTilesFromImage(imageData, outputDir, lowResMag=1.25, outputRes=20, tilesToOutput=200, debug=False): ### This receives a list of images from Girder and will generate tiles and place them in ### Train and Test Directories-- it will split based on train_test_split and also ## Will run a low res segmentation step prior to trying to randomly grab tiles from the input stream ## outputDir should be something like /data/train/gbm or similar; I'll have the function calling this make sure those ## Dirs already exist ## Pull the image from girder and then use PIL to turn the raw bytes in an image object if debug: print( "Analyzing %s; pulling base image at %s and outputing tiles at %s" % (imageData['name'], lowResMag, outputRes)) try: lowResImg = gc.get('/item/%s/tiles/region?magnification=%s' % (imageData['_id'], lowResMag), jsonResp=False) lowResPILimage = Image.open(io.BytesIO(lowResImg.content)) except: print("Could not open item %s" % imageData['name']) return ## Pass the low res image to htk_simple_mask; but first convert to nparray for processing im_fgnd_mask_lres = htk_utils.simple_mask(np.asarray(lowResPILimage)) ### NEXT STEP--- I need to grab every POINT in the MASK.. and then grab 100 Random Tiles... ## FIGuRE OUT THE INDEXES OF ALL POINTS IN THE MASK (YmaskPts, XmaskPts) = np.nonzero( im_fgnd_mask_lres ) ## This returns a Tuples of 2 arrays X and Y coordinates of non zero points ## Zip Y and X into Coords maskCoords = zip(YmaskPts, XmaskPts) scaleFactor = outputRes / lowResMag ### Need to multiply the Y And X coords by this number to get the target coordinates maskCoords = list(maskCoords) maxx = len(maskCoords) random.shuffle( maskCoords ) ### shuffle the points and then chose however many pts I wnat to grab tiles for # To save the tiles into file slideBaseName = imageData['name'].split(".")[0] tilename = [] tilecount = 0 for idx, c in enumerate(maskCoords): top = c[0] * scaleFactor ## These are scaled to the output res left = c[1] * scaleFactor regionWidth = regionHeight = 256 curTile = gc.get( '/item/%s/tiles/region?magnification=%s&top=%d&left=%d®ionWidth=%d®ionHeight=%d' % (imageData['_id'], outputRes, top, left, regionWidth, regionHeight), jsonResp=False) img = Image.open(io.BytesIO(curTile.content)) #avg = np.average(img) avg = 180 status = "Image %s of %s, imgavg:%s" % (idx, maxx, avg) if debug: LinePrinter(status) ##Need to determine what these numbers mean if avg > 150 and avg < 210: tilename = slideBaseName + '_%dx_%d_%d_%dx%d.png' % ( outputRes, top, left, regionWidth, regionHeight) img.save(opj(outputDir, tilename)) tilecount += 1 if tilecount > tilesToOutput or tilecount > maxx: break
def grabTilesFromImage(imageData, outputDir, lowResMag=0.625, outputRes=20, tilesToOutput=200, debug=False): ### This receives a list of images from Girder and will generate tiles and place them in ### Train and Test Directories-- it will split based on train_test_split and also ## Will run a low res segmentation step prior to trying to randomly grab tiles from the input stream ## outputDir should be something like /data/train/gbm or similar; I'll have the function calling this make sure those ## Dirs already exist ## Pull the image from girder and then use PIL to turn the raw bytes in an image object if debug: print( "Analyzing %s; pulling base image at %s and outputing tiles at %s" % (imageData['name'], lowResMag, outputRes)) lowResImg = gc.get('/item/%s/tiles/region?magnification=%s' % (imageData['_id'], lowResMag), jsonResp=False) lowResPILimage = Image.open(io.BytesIO(lowResImg.content)) # Using HistomicsTK utils's simple mask function to mask out tissue areas from background in brightfield H&E images im_fgnd_mask_lres = htk_utils.simple_mask(np.asarray(lowResPILimage)) # To extract masked coordinates from numpy array (YmaskPts, XmaskPts) = np.nonzero(im_fgnd_mask_lres) maskCoords = zip(XmaskPts, YmaskPts) # To change into (x, y) form # To Create a bounding box of masked image scaleFactor = 32 # this is the high magnification (20x) / low res (0.625) left = int(min(XmaskPts) * scaleFactor) top = int(min(YmaskPts) * scaleFactor) right = int(max(XmaskPts) * scaleFactor) bottom = int(max(YmaskPts) * scaleFactor) left, right, top, bottom # To generate tile corners from the bounding box corners = [] for x in range(left, right, 256): for y in range(top, bottom, 256): corners.append([x, y]) np.random.shuffle(corners) slideBaseName = sl['name'].split(".")[0] regionWidth = regionHeight = 256 outputRes = 20 count = 0 for c in corners: x_low = c[0] / scaleFactor y_low = c[1] / scaleFactor top = c[1] left = c[0] # Extracts tile for the specified corner region curTile = gc.get( '/item/%s/tiles/region?magnification=%s&top=%d&left=%d®ionWidth=%d®ionHeight=%d' % (imageData['_id'], outputRes, top, left, regionWidth, regionHeight), jsonResp=False) img = np.asarray(Image.open(io.BytesIO( curTile.content))) # Converts binary image to numpy array label = im_fgnd_mask_lres[y_low, x_low] if label: count += 1 plt.imshow(img) #plt.show() tilename = outputDir + "/" + slideBaseName + '_%dx_%d_%d_%dx%d.jpg' % ( outputRes, top, left, regionWidth, regionHeight) plt.savefig(tilename) if count > tilesToOutput: break
def main(args): print('\n>> CLI Parameters ...\n') print args if not os.path.isfile(args.inputImageFile): raise IOError('Input image file does not exist.') if len(args.reference_mu_lab) != 3: raise ValueError('Reference Mean LAB should be a 3 element vector.') if len(args.reference_std_lab) != 3: raise ValueError('Reference Stddev LAB should be a 3 element vector.') if len(args.analysis_roi) != 4: raise ValueError('Analysis ROI must be a vector of 4 elements.') # # Initiate Dask client # print('\n>> Creating Dask client ...\n') scheduler_address = json.loads(args.scheduler_address) if scheduler_address is None: scheduler_address = LocalCluster( n_workers=multiprocessing.cpu_count() - 1, scheduler_port=0, silence_logs=False) c = Client(scheduler_address) print c # # Read Input Image # print('\n>> Reading input image ... \n') ts = large_image.getTileSource(args.inputImageFile) ts_metadata = ts.getMetadata() print json.dumps(ts_metadata, indent=2) is_wsi = ts_metadata['magnification'] is not None # # Compute tissue/foreground mask at low-res for whole slide images # if is_wsi: print('\n>> Computing tissue/foreground mask at low-res ...\n') # get image at low-res maxSize = max(ts_metadata['sizeX'], ts_metadata['sizeY']) downsample_factor = 2**np.floor(np.log2(maxSize / 2048)) fgnd_seg_mag = ts_metadata['magnification'] / downsample_factor fgnd_seg_scale = {'magnification': fgnd_seg_mag} im_lres, _ = ts.getRegion( scale=fgnd_seg_scale, format=large_image.tilesource.TILE_FORMAT_NUMPY) im_lres = im_lres[:, :, :3] # compute foreground mask at low-res im_fgnd_mask_lres = htk_utils.simple_mask(im_lres) # # Detect nuclei in paralle using Dask # print('\n>> Detecting nuclei in parallel using Dask ...\n') it_kwargs = { 'format': large_image.tilesource.TILE_FORMAT_NUMPY, 'tile_size': { 'width': args.analysis_tile_size }, 'scale': { 'magnification': args.analysis_mag }, } if np.all(np.array(args.analysis_roi) == -1): process_whole_image = True else: process_whole_image = False if not process_whole_image: it_kwargs['region'] = { 'left': args.analysis_roi[0], 'top': args.analysis_roi[1], 'width': args.analysis_roi[2], 'height': args.analysis_roi[3], 'units': 'base_pixels' } tile_nuclei_list = [] for tile in ts.tileIterator(**it_kwargs): if is_wsi: # get current region in base_pixels rgn_hres = { 'left': tile['gx'], 'top': tile['gy'], 'right': tile['gx'] + tile['gwidth'], 'bottom': tile['gy'] + tile['gheight'], 'units': 'base_pixels' } # get foreground mask for current tile at low resolution rgn_lres = ts.convertRegionScale(rgn_hres, targetScale=fgnd_seg_scale, targetUnits='mag_pixels') top = np.int(rgn_lres['top']) bottom = np.int(rgn_lres['bottom']) left = np.int(rgn_lres['left']) right = np.int(rgn_lres['right']) im_tile_fgnd_mask_lres = im_fgnd_mask_lres[top:bottom, left:right] # skip tile if there is not enough foreground in the slide cur_fgnd_frac = im_tile_fgnd_mask_lres.mean() if np.isnan(cur_fgnd_frac) or cur_fgnd_frac <= args.min_fgnd_frac: continue # detect nuclei cur_nuclei_list = dask.delayed(detect_tile_nuclei)( args.inputImageFile, tile['tile_position']['position'], args, **it_kwargs) # append result to list tile_nuclei_list.append(cur_nuclei_list) def collect(x): return x tile_nuclei_list = dask.delayed(collect)(tile_nuclei_list).compute() nuclei_list = list(itertools.chain.from_iterable(tile_nuclei_list)) print 'Number of nuclei = ', len(nuclei_list) # # Write annotation file # print('\n>> Writing annotation file ...\n') annot_fname = os.path.splitext( os.path.basename(args.outputNucleiAnnotationFile))[0] annotation = {"name": annot_fname, "elements": nuclei_list} with open(args.outputNucleiAnnotationFile, 'w') as annotation_file: json.dump(annotation, annotation_file, indent=2, sort_keys=False)