Example #1
0
def segment_wsi_foreground_at_low_res(ts, lres_size=2048):

    ts_metadata = ts.getMetadata()

    # get image at low-res
    maxSize = max(ts_metadata['sizeX'], ts_metadata['sizeY'])
    maxSize = float(max(maxSize, lres_size))

    downsample_factor = 2.0 ** np.floor(np.log2(maxSize / lres_size))

    fgnd_seg_mag = ts_metadata['magnification'] / downsample_factor

    fgnd_seg_scale = {'magnification': fgnd_seg_mag}

    im_lres, _ = ts.getRegion(
        scale=fgnd_seg_scale,
        format=large_image.tilesource.TILE_FORMAT_NUMPY
    )

    im_lres = im_lres[:, :, :3]

    # compute foreground mask at low-res
    im_fgnd_mask_lres = htk_utils.simple_mask(im_lres)

    return im_fgnd_mask_lres, fgnd_seg_scale
def grabTilesFromImage(imageData,
                       outputDir,
                       lowResMag=1.25,
                       outputRes=20,
                       tilesToOutput=200,
                       debug=False):
    ### This receives a list of images from Girder and will generate tiles and place them in
    ### Train and Test Directories-- it will split based on train_test_split and also
    ## Will run a low res segmentation step prior to trying to randomly grab tiles from the input stream
    ## outputDir should be something like /data/train/gbm or similar; I'll have the function calling this make sure those
    ## Dirs already exist

    ## Pull the image from girder and then use PIL to turn the raw bytes in an image object
    if debug:
        print(
            "Analyzing %s; pulling base image at %s and outputing tiles at %s"
            % (imageData['name'], lowResMag, outputRes))

    try:
        lowResImg = gc.get('/item/%s/tiles/region?magnification=%s' %
                           (imageData['_id'], lowResMag),
                           jsonResp=False)
        lowResPILimage = Image.open(io.BytesIO(lowResImg.content))
    except:
        print("Could not open item %s" % imageData['name'])
        return
    ## Pass the low res image to htk_simple_mask; but first convert to nparray for processing
    im_fgnd_mask_lres = htk_utils.simple_mask(np.asarray(lowResPILimage))

    ### NEXT STEP--- I need to grab every POINT in the MASK.. and then grab 100 Random Tiles...
    ## FIGuRE OUT THE INDEXES OF ALL POINTS IN THE MASK
    (YmaskPts, XmaskPts) = np.nonzero(
        im_fgnd_mask_lres
    )  ## This returns a  Tuples of 2 arrays  X and Y coordinates of non zero points
    ## Zip Y and X into Coords
    maskCoords = zip(YmaskPts, XmaskPts)

    scaleFactor = outputRes / lowResMag  ### Need to multiply the Y And X coords by this number to get the target coordinates

    maskCoords = list(maskCoords)
    maxx = len(maskCoords)

    random.shuffle(
        maskCoords
    )  ### shuffle the points and then chose however many pts I wnat to grab tiles for
    # To save the tiles into file
    slideBaseName = imageData['name'].split(".")[0]

    tilename = []
    tilecount = 0
    for idx, c in enumerate(maskCoords):
        top = c[0] * scaleFactor  ## These are scaled to the output res
        left = c[1] * scaleFactor
        regionWidth = regionHeight = 256

        curTile = gc.get(
            '/item/%s/tiles/region?magnification=%s&top=%d&left=%d&regionWidth=%d&regionHeight=%d'
            % (imageData['_id'], outputRes, top, left, regionWidth,
               regionHeight),
            jsonResp=False)
        img = Image.open(io.BytesIO(curTile.content))
        #avg = np.average(img)
        avg = 180
        status = "Image %s of %s, imgavg:%s" % (idx, maxx, avg)
        if debug:
            LinePrinter(status)

        ##Need to determine what these numbers mean
        if avg > 150 and avg < 210:
            tilename = slideBaseName + '_%dx_%d_%d_%dx%d.png' % (
                outputRes, top, left, regionWidth, regionHeight)
            img.save(opj(outputDir, tilename))
            tilecount += 1

        if tilecount > tilesToOutput or tilecount > maxx:
            break
Example #3
0
def grabTilesFromImage(imageData,
                       outputDir,
                       lowResMag=0.625,
                       outputRes=20,
                       tilesToOutput=200,
                       debug=False):
    ### This receives a list of images from Girder and will generate tiles and place them in
    ### Train and Test Directories-- it will split based on train_test_split and also
    ## Will run a low res segmentation step prior to trying to randomly grab tiles from the input stream
    ## outputDir should be something like /data/train/gbm or similar; I'll have the function calling this make sure those
    ## Dirs already exist

    ## Pull the image from girder and then use PIL to turn the raw bytes in an image object
    if debug:
        print(
            "Analyzing %s; pulling base image at %s and outputing tiles at %s"
            % (imageData['name'], lowResMag, outputRes))
    lowResImg = gc.get('/item/%s/tiles/region?magnification=%s' %
                       (imageData['_id'], lowResMag),
                       jsonResp=False)
    lowResPILimage = Image.open(io.BytesIO(lowResImg.content))

    # Using HistomicsTK utils's simple mask function to mask out tissue areas from background in brightfield H&E images
    im_fgnd_mask_lres = htk_utils.simple_mask(np.asarray(lowResPILimage))

    # To extract masked coordinates from numpy array
    (YmaskPts, XmaskPts) = np.nonzero(im_fgnd_mask_lres)
    maskCoords = zip(XmaskPts, YmaskPts)  # To change into (x, y) form

    # To Create a bounding box of masked image
    scaleFactor = 32  # this is the high magnification (20x) / low res (0.625)
    left = int(min(XmaskPts) * scaleFactor)
    top = int(min(YmaskPts) * scaleFactor)
    right = int(max(XmaskPts) * scaleFactor)
    bottom = int(max(YmaskPts) * scaleFactor)
    left, right, top, bottom

    # To generate tile corners from the bounding box
    corners = []
    for x in range(left, right, 256):
        for y in range(top, bottom, 256):
            corners.append([x, y])

    np.random.shuffle(corners)

    slideBaseName = sl['name'].split(".")[0]

    regionWidth = regionHeight = 256
    outputRes = 20
    count = 0

    for c in corners:
        x_low = c[0] / scaleFactor
        y_low = c[1] / scaleFactor
        top = c[1]
        left = c[0]

        # Extracts tile for the specified corner region
        curTile = gc.get(
            '/item/%s/tiles/region?magnification=%s&top=%d&left=%d&regionWidth=%d&regionHeight=%d'
            % (imageData['_id'], outputRes, top, left, regionWidth,
               regionHeight),
            jsonResp=False)

        img = np.asarray(Image.open(io.BytesIO(
            curTile.content)))  # Converts binary image to numpy array
        label = im_fgnd_mask_lres[y_low, x_low]

        if label:
            count += 1
            plt.imshow(img)
            #plt.show()
            tilename = outputDir + "/" + slideBaseName + '_%dx_%d_%d_%dx%d.jpg' % (
                outputRes, top, left, regionWidth, regionHeight)
            plt.savefig(tilename)

            if count > tilesToOutput:
                break
Example #4
0
def main(args):

    print('\n>> CLI Parameters ...\n')

    print args

    if not os.path.isfile(args.inputImageFile):
        raise IOError('Input image file does not exist.')

    if len(args.reference_mu_lab) != 3:
        raise ValueError('Reference Mean LAB should be a 3 element vector.')

    if len(args.reference_std_lab) != 3:
        raise ValueError('Reference Stddev LAB should be a 3 element vector.')

    if len(args.analysis_roi) != 4:
        raise ValueError('Analysis ROI must be a vector of 4 elements.')

    #
    # Initiate Dask client
    #
    print('\n>> Creating Dask client ...\n')

    scheduler_address = json.loads(args.scheduler_address)

    if scheduler_address is None:

        scheduler_address = LocalCluster(
            n_workers=multiprocessing.cpu_count() - 1,
            scheduler_port=0,
            silence_logs=False)

    c = Client(scheduler_address)
    print c

    #
    # Read Input Image
    #
    print('\n>> Reading input image ... \n')

    ts = large_image.getTileSource(args.inputImageFile)

    ts_metadata = ts.getMetadata()

    print json.dumps(ts_metadata, indent=2)

    is_wsi = ts_metadata['magnification'] is not None

    #
    # Compute tissue/foreground mask at low-res for whole slide images
    #
    if is_wsi:

        print('\n>> Computing tissue/foreground mask at low-res ...\n')

        # get image at low-res
        maxSize = max(ts_metadata['sizeX'], ts_metadata['sizeY'])

        downsample_factor = 2**np.floor(np.log2(maxSize / 2048))

        fgnd_seg_mag = ts_metadata['magnification'] / downsample_factor

        fgnd_seg_scale = {'magnification': fgnd_seg_mag}

        im_lres, _ = ts.getRegion(
            scale=fgnd_seg_scale,
            format=large_image.tilesource.TILE_FORMAT_NUMPY)

        im_lres = im_lres[:, :, :3]

        # compute foreground mask at low-res
        im_fgnd_mask_lres = htk_utils.simple_mask(im_lres)

    #
    # Detect nuclei in paralle using Dask
    #
    print('\n>> Detecting nuclei in parallel using Dask ...\n')

    it_kwargs = {
        'format': large_image.tilesource.TILE_FORMAT_NUMPY,
        'tile_size': {
            'width': args.analysis_tile_size
        },
        'scale': {
            'magnification': args.analysis_mag
        },
    }

    if np.all(np.array(args.analysis_roi) == -1):
        process_whole_image = True
    else:
        process_whole_image = False

    if not process_whole_image:

        it_kwargs['region'] = {
            'left': args.analysis_roi[0],
            'top': args.analysis_roi[1],
            'width': args.analysis_roi[2],
            'height': args.analysis_roi[3],
            'units': 'base_pixels'
        }

    tile_nuclei_list = []

    for tile in ts.tileIterator(**it_kwargs):

        if is_wsi:

            # get current region in base_pixels
            rgn_hres = {
                'left': tile['gx'],
                'top': tile['gy'],
                'right': tile['gx'] + tile['gwidth'],
                'bottom': tile['gy'] + tile['gheight'],
                'units': 'base_pixels'
            }

            # get foreground mask for current tile at low resolution
            rgn_lres = ts.convertRegionScale(rgn_hres,
                                             targetScale=fgnd_seg_scale,
                                             targetUnits='mag_pixels')

            top = np.int(rgn_lres['top'])
            bottom = np.int(rgn_lres['bottom'])
            left = np.int(rgn_lres['left'])
            right = np.int(rgn_lres['right'])

            im_tile_fgnd_mask_lres = im_fgnd_mask_lres[top:bottom, left:right]

            # skip tile if there is not enough foreground in the slide
            cur_fgnd_frac = im_tile_fgnd_mask_lres.mean()

            if np.isnan(cur_fgnd_frac) or cur_fgnd_frac <= args.min_fgnd_frac:
                continue

        # detect nuclei
        cur_nuclei_list = dask.delayed(detect_tile_nuclei)(
            args.inputImageFile, tile['tile_position']['position'], args,
            **it_kwargs)

        # append result to list
        tile_nuclei_list.append(cur_nuclei_list)

    def collect(x):
        return x

    tile_nuclei_list = dask.delayed(collect)(tile_nuclei_list).compute()

    nuclei_list = list(itertools.chain.from_iterable(tile_nuclei_list))

    print 'Number of nuclei = ', len(nuclei_list)

    #
    # Write annotation file
    #
    print('\n>> Writing annotation file ...\n')

    annot_fname = os.path.splitext(
        os.path.basename(args.outputNucleiAnnotationFile))[0]

    annotation = {"name": annot_fname, "elements": nuclei_list}

    with open(args.outputNucleiAnnotationFile, 'w') as annotation_file:
        json.dump(annotation, annotation_file, indent=2, sort_keys=False)