def __getitem__(self, idx):

        batchx = self.train_files[idx * self.batch_size:(idx + 1) *
                                  self.batch_size]
        batchy = self.train_outputs[idx * self.batch_size:(idx + 1) *
                                    self.batch_size]

        X_RGB = np.zeros((self.batch_size, 256, 256, 3))
        X_Per = np.zeros((self.batch_size, 32, 32, 1))
        Y = np.zeros((self.batch_size, 2))

        for i in range(self.batch_size):
            image_id = int(float(re.findall("\d+\.\d+", batchx[i])[0]))

            fnameRGB = batchx[i] + '.jpg'
            fnamePer = batchx[i] + '.pkl'

            if batchy[i] == self.label['malignant']:
                pathRGB = os.path.join(self.path_mal_train, fnameRGB)
                pathPer = os.path.join(self.path_mal_train_per, fnamePer)

            elif batchy[i] == self.label['benign']:
                pathRGB = os.path.join(self.path_ben_train, fnameRGB)
                pathPer = os.path.join(self.path_ben_train_per, fnamePer)

            img = skimage.io.imread(pathRGB)
            if img.shape == (1024, 1024, 3):
                img = img[::4, ::4, :]

            image_id = int(float(re.findall("\d+\.\d+", pathRGB)[0]))

            if image_id in self.config['stats'].keys():
                [src_mu, src_sigma] = self.stats[image_id]
                img_nmzd = htk_cnorm.reinhard(
                    img,
                    self.ref_mu_lab,
                    self.ref_std_lab,
                    src_mu=src_mu,
                    src_sigma=src_sigma).astype('float')
            else:
                print '#### stats for %d not present' % (image_id)
                img_nmzd = htk_cnorm.reinhard(img, self.ref_mu_lab,
                                              self.ref_std_lab).astype('float')

            imgRGB = preprocess_resnet(img_nmzd)

            with open(pathPer, 'rb') as f:
                img = pickle.load(f)

            imgPer = self.preprocess_persistence(img)

            X_RGB[i] = imgRGB
            X_Per[i] = imgPer
            Y[i] = to_categorical(batchy[i], num_classes=2)

        return ([X_RGB, X_Per], Y)
def detect_nuclei(im_tile, tile_info=None, args=None,
                  src_mu_lab=None, src_sigma_lab=None):
    args = args or default_args

    # perform color normalization
    im_nmzd = htk_cnorm.reinhard(im_tile,
                                 args.reference_mu_lab,
                                 args.reference_std_lab,
                                 src_mu=src_mu_lab,
                                 src_sigma=src_sigma_lab)

    # perform color decovolution
    w = cli_utils.get_stain_matrix(args)

    im_stains = htk_cdeconv.color_deconvolution(im_nmzd, w).Stains

    im_nuclei_stain = im_stains[:, :, 0].astype(np.float)

    # segment nuclei
    im_nuclei_seg_mask = cli_utils.detect_nuclei_kofahi(im_nuclei_stain, args)

    # generate nuclei annotations
    nuclei_annot_list = cli_utils.create_tile_nuclei_annotations(
        im_nuclei_seg_mask, tile_info=tile_info,
        format=args.nuclei_annotation_format)

    return nuclei_annot_list
예제 #3
0
def detect_nuclei_kofahi(im_input, args):

    # perform color normalization
    im_nmzd = htk_cnorm.reinhard(im_input, args.reference_mu_lab,
                                 args.reference_std_lab)

    # perform color decovolution
    w = np.array([
        stain_color_map[args.stain_1], stain_color_map[args.stain_2],
        stain_color_map[args.stain_3]
    ]).T

    im_stains = htk_cdeconv.color_deconvolution(im_nmzd, w).Stains

    im_nuclei_stain = im_stains[:, :, 0].astype(np.float)

    # segment foreground (assumes nuclei are darker on a bright background)
    im_nuclei_fgnd_mask = sp.ndimage.morphology.binary_fill_holes(
        im_nuclei_stain < args.foreground_threshold)

    # run adaptive multi-scale LoG filter
    im_log = htk_shape_filters.clog(im_nuclei_stain,
                                    im_nuclei_fgnd_mask,
                                    sigma_min=args.min_radius * np.sqrt(2),
                                    sigma_max=args.max_radius * np.sqrt(2))

    # apply local maximum clustering
    im_nuclei_seg_mask, seeds, max = htk_seg.nuclear.max_clustering(
        im_log, im_nuclei_fgnd_mask, args.local_max_search_radius)

    # filter out small objects
    im_nuclei_seg_mask = htk_seg.label.area_open(
        im_nuclei_seg_mask, args.min_nucleus_area).astype(np.int)

    return im_nuclei_seg_mask
예제 #4
0
def detect_tile_nuclei(slide_path, tile_position, args, **it_kwargs):

    # get slide tile source
    ts = large_image.getTileSource(slide_path)

    # get requested tile
    tile_info = ts.getSingleTile(
        tile_position=tile_position,
        format=large_image.tilesource.TILE_FORMAT_NUMPY,
        **it_kwargs)

    # get tile image
    im_tile = tile_info['tile'][:, :, :3]

    # perform color normalization
    im_nmzd = htk_cnorm.reinhard(im_tile, args.reference_mu_lab,
                                 args.reference_std_lab)

    # perform color decovolution
    w = cli_utils.get_stain_matrix(args)

    im_stains = htk_cdeconv.color_deconvolution(im_nmzd, w).Stains

    im_nuclei_stain = im_stains[:, :, 0].astype(np.float)

    # segment nuclei
    im_nuclei_seg_mask = cli_utils.detect_nuclei_kofahi(im_nuclei_stain, args)

    # generate nuclei annotations
    nuclei_annot_list = cli_utils.create_tile_nuclei_annotations(
        im_nuclei_seg_mask, tile_info, args.nuclei_annotation_format)

    return nuclei_annot_list
    def setUp(self):

        # define parameters
        args = {
            'reference_mu_lab': [8.63234435, -0.11501964, 0.03868433],
            'reference_std_lab': [0.57506023, 0.10403329, 0.01364062],
            'min_radius': 12,
            'max_radius': 30,
            'foreground_threshold': 60,
            'min_nucleus_area': 80,
            'local_max_search_radius': 10,
        }

        args = collections.namedtuple('Parameters', args.keys())(**args)

        # read input image
        input_image_file = os.path.join(TEST_DATA_DIR, 'Easy1.png')

        im_input = skimage.io.imread(input_image_file)[:, :, :3]

        # perform color normalization
        im_input_nmzd = htk_cnorm.reinhard(im_input, args.reference_mu_lab,
                                           args.reference_std_lab)

        # perform color decovolution
        w = htk_cdeconv.rgb_separate_stains_macenko_pca(
            im_input_nmzd, im_input_nmzd.max())

        im_stains = htk_cdeconv.color_deconvolution(im_input_nmzd, w).Stains

        nuclei_channel = htk_cdeconv.find_stain_index(
            htk_cdeconv.stain_color_map['hematoxylin'], w)

        im_nuclei_stain = im_stains[:, :, nuclei_channel].astype(np.float)

        cytoplasm_channel = htk_cdeconv.find_stain_index(
            htk_cdeconv.stain_color_map['eosin'], w)

        im_cytoplasm_stain = im_stains[:, :,
                                       cytoplasm_channel].astype(np.float)

        # segment nuclei
        im_nuclei_seg_mask = cli_utils.detect_nuclei_kofahi(
            im_nuclei_stain, args)

        # perform connected component analysis
        nuclei_rprops = skimage.measure.regionprops(im_nuclei_seg_mask)

        # compute nuclei features
        fdata_nuclei = htk_features.compute_nuclei_features(
            im_nuclei_seg_mask,
            im_nuclei_stain,
            im_cytoplasm=im_cytoplasm_stain)

        self.im_input = im_input
        self.im_input_nmzd = im_input_nmzd
        self.im_nuclei_stain = im_nuclei_stain
        self.im_nuclei_seg_mask = im_nuclei_seg_mask
        self.nuclei_rprops = nuclei_rprops
        self.fdata_nuclei = fdata_nuclei
예제 #6
0
    def set_slide_info_and_get_tissue_mask(self):
        """Set self.slide_info dict and self.labeled tissue mask."""
        # This is a presistent dict to store information about slide
        self.slide_info = self.gc.get('item/%s/tiles' % self.slide_id)

        # get tissue mask
        thumbnail_rgb = get_slide_thumbnail(self.gc, self.slide_id)

        # color normalization if desired
        if 'thumbnail' in self.cnorm_params.keys():
            thumbnail_rgb = np.uint8(
                reinhard(im_src=thumbnail_rgb,
                         target_mu=self.cnorm_params['thumbnail']['mu'],
                         target_sigma=self.cnorm_params['thumbnail']['sigma']))

        # get labeled tissue mask -- each unique value is one tissue piece
        labeled, _ = get_tissue_mask(thumbnail_rgb,
                                     **self.get_tissue_mask_kwargs)

        if len(np.unique(labeled)) < 2:
            raise ValueError("No tissue detected!")

        if self.visualize_tissue_boundary:
            annotation_docs = get_tissue_boundary_annotation_documents(
                self.gc, slide_id=self.slide_id, labeled=labeled)
            for doc in annotation_docs:
                _ = self.gc.post("/annotation?itemId=" + self.slide_id,
                                 json=doc)

        # Find size relative to WSI
        self.slide_info[
            'F_tissue'] = self.slide_info['sizeX'] / labeled.shape[1]

        return labeled
예제 #7
0
def compute_tile_nuclei_features(slide_path, tile_position, args, it_kwargs,
                                 src_mu_lab=None, src_sigma_lab=None):

    # get slide tile source
    ts = large_image.getTileSource(slide_path)

    # get requested tile
    tile_info = ts.getSingleTile(
        tile_position=tile_position,
        format=large_image.tilesource.TILE_FORMAT_NUMPY,
        **it_kwargs)

    # get tile image
    im_tile = tile_info['tile'][:, :, :3]

    # perform color normalization
    im_nmzd = htk_cnorm.reinhard(im_tile,
                                 args.reference_mu_lab,
                                 args.reference_std_lab,
                                 src_mu=src_mu_lab,
                                 src_sigma=src_sigma_lab)

    # perform color decovolution
    w = cli_utils.get_stain_matrix(args)

    im_stains = htk_cdeconv.color_deconvolution(im_nmzd, w).Stains

    im_nuclei_stain = im_stains[:, :, 0].astype(np.float)

    # segment nuclei
    im_nuclei_seg_mask = cli_utils.detect_nuclei_kofahi(im_nuclei_stain, args)

    # generate nuclei annotations
    nuclei_annot_list = cli_utils.create_tile_nuclei_annotations(
        im_nuclei_seg_mask, tile_info, args.nuclei_annotation_format)

    # compute nuclei features
    if args.cytoplasm_features:
        im_cytoplasm_stain = im_stains[:, :, 1].astype(np.float)
    else:
        im_cytoplasm_stain = None

    fdata = htk_features.compute_nuclei_features(
        im_nuclei_seg_mask, im_nuclei_stain, im_cytoplasm_stain,
        fsd_bnd_pts=args.fsd_bnd_pts,
        fsd_freq_bins=args.fsd_freq_bins,
        cyto_width=args.cyto_width,
        num_glcm_levels=args.num_glcm_levels,
        morphometry_features_flag=args.morphometry_features,
        fsd_features_flag=args.fsd_features,
        intensity_features_flag=args.intensity_features,
        gradient_features_flag=args.gradient_features,
    )

    fdata.columns = ['Feature.' + col for col in fdata.columns]

    return nuclei_annot_list, fdata
예제 #8
0
def detect_tile_nuclei(slide_path, tile_position, args, it_kwargs,
                       src_mu_lab=None, src_sigma_lab=None):

    # get slide tile source
    ts = large_image.getTileSource(slide_path)

    # get requested tile
    tile_info = ts.getSingleTile(
        tile_position=tile_position,
        format=large_image.tilesource.TILE_FORMAT_NUMPY,
        **it_kwargs)

    # get tile image
    im_tile = tile_info['tile'][:, :, :3]

    # perform color normalization
    im_nmzd = htk_cnorm.reinhard(im_tile,
                                 args.reference_mu_lab,
                                 args.reference_std_lab,
                                 src_mu=src_mu_lab,
                                 src_sigma=src_sigma_lab)

    # perform color decovolution
    w = cli_utils.get_stain_matrix(args)

    im_stains = htk_cdeconv.color_deconvolution(im_nmzd, w).Stains

    im_nuclei_stain = im_stains[:, :, 0].astype(np.float)

    # segment nuclear foreground
    im_nuclei_fgnd_mask = im_nuclei_stain < args.foreground_threshold

    # segment nuclei
    im_nuclei_seg_mask = htk_nuclear.detect_nuclei_kofahi(
        im_nuclei_stain,
        im_nuclei_fgnd_mask,
        args.min_radius,
        args.max_radius,
        args.min_nucleus_area,
        args.local_max_search_radius
    )

    # Delete border nuclei
    if args.ignore_border_nuclei is True:

        im_nuclei_seg_mask = htk_seg_label.delete_border(im_nuclei_seg_mask)

    # generate nuclei annotations
    nuclei_annot_list = []

    flag_nuclei_found = np.any(im_nuclei_seg_mask)

    if flag_nuclei_found:
        nuclei_annot_list = cli_utils.create_tile_nuclei_annotations(
            im_nuclei_seg_mask, tile_info, args.nuclei_annotation_format)

    return nuclei_annot_list
예제 #9
0
def compute_superpixel_data(img_path, tile_position, wsi_mean, wsi_stddev):

    # get slide tile source
    ts = large_image.getTileSource(img_path)

    # get requested tile information
    tile_info = ts.getSingleTile(
        tile_position=tile_position,
        resample=True,
        format=large_image.tilesource.TILE_FORMAT_NUMPY)

    im_tile = tile_info['tile'][:, :, :3]

    reference_mu_lab = [8.63234435, -0.11501964, 0.03868433]
    reference_std_lab = [0.57506023, 0.10403329, 0.01364062]

    # perform color normalization
    im_nmzd = htk_cnorm.reinhard(im_tile, reference_mu_lab, reference_std_lab,
                                 wsi_mean, wsi_stddev)
    patchSize = 32
    # compute the number of super-pixels
    im_width, im_height = im_nmzd.shape[:2]
    n_superpixels = (im_width / patchSize) * (im_height / patchSize)

    #
    # Generate labels using a superpixel algorithm (SLIC)
    # In SLIC, compactness controls image space proximity.
    # Higher compactness will make the shape of superpixels more square.
    #

    compactness = 50
    im_label = slic(im_nmzd, n_segments=n_superpixels,
                    compactness=compactness) + 1

    region_props = regionprops(im_label)

    # set superpixel data list
    s_data = []

    for i in range(len(region_props)):
        # get x, y centroids for superpixel
        cen_x, cen_y = region_props[i].centroid

        # get bounds of superpixel region
        min_row, max_row, min_col, max_col = \
            get_patch_bounds(cen_x, cen_y, patchSize, im_width, im_height)

        rgb_data = im_nmzd[min_row:max_row, min_col:max_col]

        s_data.append(rgb_data)

    return s_data
예제 #10
0
    def preprocess(self, img_path):

        img = skimage.io.imread(img_path)
        if img.shape == (1024, 1024, 3):
            img = img[::4, ::4, :]

        image_id = int(float(re.findall("\d+\.\d+", img_path)[0]))

        if image_id in self.stats.keys():
            [src_mu, src_sigma] = self.stats[image_id]
            img_nmzd = htk_cnorm.reinhard(img,
                                          self.ref_mu_lab,
                                          self.ref_std_lab,
                                          src_mu=src_mu,
                                          src_sigma=src_sigma).astype('float')
        else:
            print '#### stats for %d not present' % (image_id)
            img_nmzd = htk_cnorm.reinhard(img, self.ref_mu_lab,
                                          self.ref_std_lab).astype('float')

        img = preprocess_resnet(img_nmzd)

        return img
예제 #11
0
    def test_reinhard(self):
        """Test reinhard."""
        # # SANITY CHECK! normalize to LAB mean and std from SAME slide
        # mean_lab, std_lab = lab_mean_std(tissue_rgb)
        # tissue_rgb_normalized = reinhard(
        #     tissue_rgb, target_mu=mean_lab, target_sigma=std_lab)
        # # we expect the images to be (almost) exactly the same
        # assert np.mean(tissue_rgb - tissue_rgb_normalized) < 1

        # color norm. standard (from TCGA-A2-A3XS-DX1, Amgad et al, 2019)
        cnorm = {
            'mu': np.array([8.74108109, -0.12440419, 0.0444982]),
            'sigma': np.array([0.6135447, 0.10989545, 0.0286032]),
        }

        # Normalize to pre-set color standard (unmasked)
        tissue_rgb_normalized = reinhard(cfg.tissue_rgb,
                                         target_mu=cnorm['mu'],
                                         target_sigma=cnorm['sigma'])

        # check that it matches
        mean_lab, std_lab = lab_mean_std(tissue_rgb_normalized)
        assert all(np.abs(mean_lab - cnorm['mu']) < [0.1, 0.1, 0.1])
        assert all(np.abs(std_lab - cnorm['sigma']) < [0.1, 0.1, 0.1])

        # Do MASKED normalization to preset standard
        tissue_rgb_normalized = reinhard(cfg.tissue_rgb,
                                         target_mu=cnorm['mu'],
                                         target_sigma=cnorm['sigma'],
                                         mask_out=cfg.mask_out)

        # check that it matches
        mean_lab, std_lab = lab_mean_std(tissue_rgb_normalized,
                                         mask_out=cfg.mask_out)
        assert all(np.abs(mean_lab - cnorm['mu']) < [0.1, 0.1, 0.1])
        assert all(np.abs(std_lab - cnorm['sigma']) < [0.1, 0.1, 0.1])
예제 #12
0
    def set_tissue_rgb(self):
        """Load RGB from server for single tissue piece."""
        # load RGB for this tissue piece at saliency magnification
        getStr = "/item/%s/tiles/region?left=%d&right=%d&top=%d&bottom=%d&encoding=PNG" % (
            self.cd.slide_id, self.xmin, self.xmax, self.ymin,
            self.ymax) + "&magnification=%d" % self.cd.MAG
        resp = self.cd.gc.get(getStr, jsonResp=False)
        self.tissue_rgb = get_image_from_htk_response(resp)

        # color normalization if desired
        if 'main' in self.cd.cnorm_params.keys():
            self.tissue_rgb = np.uint8(
                reinhard(im_src=self.tissue_rgb,
                         target_mu=self.cd.cnorm_params['main']['mu'],
                         target_sigma=self.cd.cnorm_params['main']['sigma']))
    def test_normalization(self):

        input_image_file = os.path.join(TEST_DATA_DIR, 'L1.png')

        ref_image_file = os.path.join(TEST_DATA_DIR, 'Easy1.png')

        # read input image
        im_input = skimage.io.imread(input_image_file)[:, :, :3]

        # read reference image
        im_reference = skimage.io.imread(ref_image_file)[:, :, :3]

        # get mean and stddev of reference image in lab space
        mean_ref, std_ref = htk_cvt.lab_mean_std(im_reference)

        # perform color normalization
        im_nmzd = htk_cn.reinhard(im_input, mean_ref, std_ref)

        # transform normalized image to LAB color space
        mean_nmzd, std_nmzd = htk_cvt.lab_mean_std(im_nmzd)

        # check if mean and stddev of normalized and reference images are equal
        np.testing.assert_allclose(mean_nmzd, mean_ref, atol=1e-1)
        np.testing.assert_allclose(std_nmzd, std_ref, atol=1e-1)
예제 #14
0
    def test_normalization(self):

        input_image_file = os.path.join(TEST_DATA_DIR, 'L1.png')

        ref_image_file = os.path.join(TEST_DATA_DIR, 'Easy1.png')

        # read input image
        im_input = skimage.io.imread(input_image_file)[:, :, :3]

        # read reference image
        im_reference = skimage.io.imread(ref_image_file)[:, :, :3]

        # get mean and stddev of reference image in lab space
        mean_ref, std_ref = htk_cvt.lab_mean_std(im_reference)

        # perform color normalization
        im_nmzd = htk_cn.reinhard(im_input, mean_ref, std_ref)

        # transform normalized image to LAB color space
        mean_nmzd, std_nmzd = htk_cvt.lab_mean_std(im_nmzd)

        # check if mean and stddev of normalized and reference images are equal
        np.testing.assert_allclose(mean_nmzd, mean_ref, atol=1e-1)
        np.testing.assert_allclose(std_nmzd, std_ref, atol=1e-1)
    def color_normalize_unspecified_components(self):
        """Color normalize "true" tissue components."""
        if self.cdt.color_normalization_method == 'reinhard':
            self.cdt._print2("%s: -- reinhard normalization ..." %
                             self.monitorPrefix)
            self.tissue_rgb = reinhard(
                self.tissue_rgb,
                target_mu=self.cdt.target_stats_reinhard['mu'],
                target_sigma=self.cdt.target_stats_reinhard['sigma'],
                mask_out=self.labeled != self.cdt.GTcodes.loc["not_specified",
                                                              "GT_code"])

        elif self.cdt.color_normalization_method == 'macenko_pca':
            self.cdt._print2("%s: -- macenko normalization ..." %
                             self.monitorPrefix)
            self.tissue_rgb = deconvolution_based_normalization(
                self.tissue_rgb,
                W_target=self.cdt.target_W_macenko,
                mask_out=self.labeled != self.cdt.GTcodes.loc["not_specified",
                                                              "GT_code"],
                stain_unmixing_routine_params=self.cdt.
                stain_unmixing_routine_params)
        else:
            self.cdt._print2("%s: -- No normalization!" % self.monitorPrefix)
def compute_superpixel_data(img_path, tile_position, wsi_mean, wsi_stddev,
                            args, **it_kwargs):

    # get slide tile source
    ts = large_image.getTileSource(img_path)

    # get requested tile information
    tile_info = ts.getSingleTile(
        tile_position=tile_position,
        resample=True,
        format=large_image.tilesource.TILE_FORMAT_NUMPY,
        **it_kwargs)

    im_tile = tile_info['tile'][:, :, :3]

    # get global x and y positions
    left = tile_info['gx']
    top = tile_info['gy']

    # get scale
    scale = tile_info['gwidth'] / tile_info['width']

    # perform color normalization
    im_nmzd = htk_cnorm.reinhard(im_tile, args.reference_mu_lab,
                                 args.reference_std_lab, wsi_mean, wsi_stddev)

    # compute the number of super-pixels
    im_width, im_height = im_nmzd.shape[:2]
    n_superpixels = (im_width / args.patchSize) * (im_height / args.patchSize)

    #
    # Generate labels using a superpixel algorithm (SLIC)
    # In SLIC, compactness controls image space proximity.
    # Higher compactness will make the shape of superpixels more square.
    #
    im_label = slic(
        im_nmzd, n_segments=n_superpixels, compactness=args.compactness) + 1

    region_props = regionprops(im_label)

    # set superpixel data list
    s_data = []
    x_cent = []
    y_cent = []
    x_brs = []
    y_brs = []

    for i in range(len(region_props)):
        # get x, y centroids for superpixel
        cen_x, cen_y = region_props[i].centroid

        # get bounds of superpixel region
        min_row, max_row, min_col, max_col = \
            get_patch_bounds(cen_x, cen_y, args.patchSize, im_width, im_height)

        # grab superpixel label mask
        lmask = (im_label[:, :] == region_props[i].label).astype(np.bool)

        # embed with center pixel in middle of padded window
        emask = np.zeros((lmask.shape[0] + 2, lmask.shape[1] + 2),
                         dtype=np.bool)
        emask[1:-1, 1:-1] = lmask

        # find boundaries
        bx, by = htk_seg.label.trace_object_boundaries(emask)

        with np.errstate(invalid='ignore'):
            # remove redundant points
            mby, mbx = htk_utils.merge_colinear(by[0].astype(float),
                                                bx[0].astype(float))

        scaled_x = (mbx - 1) * scale
        scaled_y = (mby - 1) * scale

        # get superpixel boundary at highest-res
        x_brs.append(scaled_x + top)
        y_brs.append(scaled_y + left)

        rgb_data = im_nmzd[min_row:max_row, min_col:max_col]

        s_data.append(rgb_data)

        # get superpixel centers at highest-res
        x_cent.append(round((cen_x * scale + top), 1))
        y_cent.append(round((cen_y * scale + left), 1))

    return s_data, x_cent, y_cent, x_brs, y_brs
예제 #17
0
    def test_create_tile_nuclei_annotations(self):

        wsi_path = os.path.join(
            utilities.externaldata(
                'data/TCGA-06-0129-01Z-00-DX3.bae772ea-dd36-47ec-8185-761989be3cc8.svs.sha512'  # noqa
            ))

        # define parameters
        args = {
            'reference_mu_lab': [8.63234435, -0.11501964, 0.03868433],
            'reference_std_lab': [0.57506023, 0.10403329, 0.01364062],
            'stain_1': 'hematoxylin',
            'stain_2': 'eosin',
            'stain_3': 'null',
            'stain_1_vector': [-1, -1, -1],
            'stain_2_vector': [-1, -1, -1],
            'stain_3_vector': [-1, -1, -1],
            'min_fgnd_frac': 0.50,
            'analysis_mag': 20,
            'analysis_tile_size': 1200,
            'foreground_threshold': 60,
            'min_radius': 6,
            'max_radius': 12,
            'min_nucleus_area': 25,
            'local_max_search_radius': 8,

            # In Python 3 unittesting, the scheduler fails if it uses processes
            'scheduler': 'multithreading',  # None,
            'num_workers': -1,
            'num_threads_per_worker': 1,
        }

        args = collections.namedtuple('Parameters', args.keys())(**args)

        # read WSI
        ts = large_image.getTileSource(wsi_path)

        ts_metadata = ts.getMetadata()

        analysis_tile_size = {
            'width':
            int(ts_metadata['tileWidth'] * np.floor(
                1.0 * args.analysis_tile_size / ts_metadata['tileWidth'])),
            'height':
            int(ts_metadata['tileHeight'] * np.floor(
                1.0 * args.analysis_tile_size / ts_metadata['tileHeight']))
        }

        # define ROI
        roi = {
            'left': ts_metadata['sizeX'] / 2,
            'top': ts_metadata['sizeY'] * 3 / 4,
            'width': analysis_tile_size['width'],
            'height': analysis_tile_size['height'],
            'units': 'base_pixels'
        }

        # define tile iterator parameters
        it_kwargs = {
            'tile_size': {
                'width': args.analysis_tile_size
            },
            'scale': {
                'magnification': args.analysis_mag
            },
            'region': roi
        }

        # create dask client
        cli_utils.create_dask_client(args)

        # get tile foregreoung at low res
        im_fgnd_mask_lres, fgnd_seg_scale = \
            cli_utils.segment_wsi_foreground_at_low_res(ts)

        # compute tile foreground fraction
        tile_fgnd_frac_list = htk_utils.compute_tile_foreground_fraction(
            wsi_path, im_fgnd_mask_lres, fgnd_seg_scale, it_kwargs)

        num_fgnd_tiles = np.count_nonzero(
            tile_fgnd_frac_list >= args.min_fgnd_frac)

        np.testing.assert_equal(num_fgnd_tiles, 2)

        # create nuclei annotations
        nuclei_bbox_annot_list = []
        nuclei_bndry_annot_list = []

        for tile_info in ts.tileIterator(
                format=large_image.tilesource.TILE_FORMAT_NUMPY, **it_kwargs):

            im_tile = tile_info['tile'][:, :, :3]

            # perform color normalization
            im_nmzd = htk_cnorm.reinhard(im_tile, args.reference_mu_lab,
                                         args.reference_std_lab)

            # perform color deconvolution
            w = cli_utils.get_stain_matrix(args)

            im_stains = htk_cdeconv.color_deconvolution(im_nmzd, w).Stains

            im_nuclei_stain = im_stains[:, :, 0].astype(np.float)

            # segment nuclei
            im_nuclei_seg_mask = htk_nuclear.detect_nuclei_kofahi(
                im_nuclei_stain, im_nuclei_stain < args.foreground_threshold,
                args.min_radius, args.max_radius, args.min_nucleus_area,
                args.local_max_search_radius)

            # generate nuclei annotations as bboxes
            cur_bbox_annot_list = cli_utils.create_tile_nuclei_annotations(
                im_nuclei_seg_mask, tile_info, 'bbox')

            nuclei_bbox_annot_list.extend(cur_bbox_annot_list)

            # generate nuclei annotations as boundaries
            cur_bndry_annot_list = cli_utils.create_tile_nuclei_annotations(
                im_nuclei_seg_mask, tile_info, 'boundary')

            nuclei_bndry_annot_list.extend(cur_bndry_annot_list)

        # compare nuclei bbox annotations with gtruth
        nuclei_bbox_annot_gtruth_file = os.path.join(
            utilities.externaldata(
                'data/TCGA-06-0129-01Z-00-DX3_roi_nuclei_bbox.anot.sha512'  # noqa
            ))

        with open(nuclei_bbox_annot_gtruth_file, 'r') as fbbox_annot:
            nuclei_bbox_annot_list_gtruth = json.load(fbbox_annot)['elements']

        # Check that nuclei_bbox_annot_list is nearly equal to
        # nuclei_bbox_annot_list_gtruth
        assert len(nuclei_bbox_annot_list) == len(
            nuclei_bbox_annot_list_gtruth)
        for pos in range(len(nuclei_bbox_annot_list)):
            np.testing.assert_array_almost_equal(
                nuclei_bbox_annot_list[pos]['center'],
                nuclei_bbox_annot_list_gtruth[pos]['center'], 0)
            np.testing.assert_almost_equal(
                nuclei_bbox_annot_list[pos]['width'],
                nuclei_bbox_annot_list_gtruth[pos]['width'], 1)
            np.testing.assert_almost_equal(
                nuclei_bbox_annot_list[pos]['height'],
                nuclei_bbox_annot_list_gtruth[pos]['height'], 1)

        # compare nuclei boundary annotations with gtruth
        nuclei_bndry_annot_gtruth_file = os.path.join(
            utilities.externaldata(
                'data/TCGA-06-0129-01Z-00-DX3_roi_nuclei_boundary.anot.sha512'  # noqa
            ))

        with open(nuclei_bndry_annot_gtruth_file, 'r') as fbndry_annot:
            nuclei_bndry_annot_list_gtruth = json.load(
                fbndry_annot)['elements']

        assert len(nuclei_bndry_annot_list) == len(
            nuclei_bndry_annot_list_gtruth)

        for pos in range(len(nuclei_bndry_annot_list)):

            np.testing.assert_array_almost_equal(
                nuclei_bndry_annot_list[pos]['points'],
                nuclei_bndry_annot_list_gtruth[pos]['points'], 0)
예제 #18
0
def RGBTestData(config):

    print 'loading RGB data'
    path_mal_test, _, files_malignant_test = next(
        os.walk(os.path.join(config.test_dir, 'malignant', 'rgb')))
    path_ben_test, _, files_benign_test = next(
        os.walk(os.path.join(config.test_dir, 'benign', 'rgb')))

    mal_paths_test = glob.glob(os.path.join(path_mal_test, '*'))
    ben_paths_test = glob.glob(os.path.join(path_ben_test, '*'))

    mal_outputs_test = [config.label.malignant] * len(mal_paths_test)
    ben_outputs_test = [config.label.benign] * len(ben_paths_test)

    test_paths = mal_paths_test + ben_paths_test
    test_outputs = mal_outputs_test + ben_outputs_test

    z = zip(test_paths, test_outputs)
    random.shuffle(z)
    test_paths, test_outputs = zip(*z)

    ref_std_lab = (0.57506023, 0.10403329, 0.01364062)
    ref_mu_lab = (8.63234435, -0.11501964, 0.03868433)

    if os.path.isfile('configs/stats.pkl'):
        with open('configs/stats.pkl', 'rb') as f:
            stats = pickle.load(f)
        print '###################  Stats loaded Test ####################'
        config['stats'] = stats
    else:
        print 'No stats file found (To obtain Mu and Sigma from original whole image).'

    len_test = len(test_outputs)

    X = np.zeros((len_test, 256, 256, 3))
    Y = [-1] * len_test

    for i in range(len_test):

        img = skimage.io.imread(test_paths[i])
        if img.shape == (1024, 1024, 3):
            img = img[::4, ::4, :]

        image_id = int(float(re.findall("\d+\.\d+", test_paths[i])[0]))

        if image_id in stats.keys():
            [src_mu, src_sigma] = stats[image_id]
            img_nmzd = htk_cnorm.reinhard(img,
                                          ref_mu_lab,
                                          ref_std_lab,
                                          src_mu=src_mu,
                                          src_sigma=src_sigma).astype('float')
        else:
            print '#### stats for %d not present' % (image_id)
            img_nmzd = htk_cnorm.reinhard(img, ref_mu_lab,
                                          ref_std_lab).astype('float')

        img = preprocess_resnet(img_nmzd)

        X[i] = img
        Y[i] = test_outputs[i]

    return (X, Y)
def detect_nuclei(im_input, min_radius=6, max_radius=10, display_result=False):

    # color normalization
    ref_mu_lab = (8.63234435, -0.11501964, 0.03868433)
    ref_std_lab = (0.57506023, 0.10403329, 0.01364062)

    im_nmzd = htk_cnorm.reinhard(im_input, ref_mu_lab, ref_std_lab)

    # color deconvolution
    w_est = htk_cdeconv.rgb_separate_stains_macenko_pca(im_nmzd, 255)
    nuclear_chid = htk_cdeconv.find_stain_index(
        htk_cdeconv.stain_color_map['hematoxylin'], w_est)
    im_nuclei_stain = htk_cdeconv.color_deconvolution(im_nmzd, w_est,
                                                      255).Stains[:, :,
                                                                  nuclear_chid]

    # segment nuclei foreground
    th = skimage.filters.threshold_li(im_nuclei_stain) * 0.8
    # th = skimage.filters.threshold_otsu(im_nuclei_stain)
    im_fgnd_mask = im_nuclei_stain < th
    im_fgnd_mask = skimage.morphology.opening(im_fgnd_mask,
                                              skimage.morphology.disk(2))
    im_fgnd_mask = skimage.morphology.closing(im_fgnd_mask,
                                              skimage.morphology.disk(1))

    # detect nuclei
    im_dog, im_dog_sigma = htk_shape_filters.cdog(
        im_nuclei_stain,
        im_fgnd_mask,
        sigma_min=min_radius / np.sqrt(2),
        sigma_max=max_radius / np.sqrt(2))

    nuclei_coord = skimage.feature.peak_local_max(im_dog,
                                                  min_distance=min_radius / 2,
                                                  threshold_rel=0.1)

    nuclei_coord = nuclei_coord[im_fgnd_mask[nuclei_coord[:, 0],
                                             nuclei_coord[:, 1]], :]

    nuclei_rad = np.array([
        im_dog_sigma[nuclei_coord[i, 0], nuclei_coord[i, 1]] * np.sqrt(2)
        for i in range(nuclei_coord.shape[0])
    ])

    # display result
    if display_result:

        print 'Number of nuclei = ', nuclei_coord.shape[0]

        plt.figure(figsize=(30, 20))
        plt.subplot(2, 2, 1)
        plt.imshow(im_input)
        plt.title('Input', fontsize=labelsize)
        plt.axis('off')

        plt.subplot(2, 2, 2)
        plt.imshow(im_nuclei_stain)
        plt.title('Deconv nuclei stain', fontsize=labelsize)
        plt.axis('off')

        plt.subplot(2, 2, 3)
        plt.imshow(im_fgnd_mask)
        plt.title('Foreground mask', fontsize=labelsize)
        plt.axis('off')

        plt.subplot(2, 2, 4)
        plt.imshow(im_nmzd)
        plt.plot(nuclei_coord[:, 1], nuclei_coord[:, 0], 'k+')

        for i in range(nuclei_coord.shape[0]):

            cx = nuclei_coord[i, 1]
            cy = nuclei_coord[i, 0]
            r = nuclei_rad[i]

            mcircle = mpatches.Circle((cx, cy), r, color='g', fill=False)
            plt.gca().add_patch(mcircle)

        plt.title('Nuclei detection', fontsize=labelsize)
        plt.axis('off')

        plt.tight_layout()

    return nuclei_coord, nuclei_rad
def CombinedTestData(config):

    print 'Loading combined data'
    path_mal_test, _, files_malignant_test = next(
        os.walk(os.path.join(config.test_dir, 'malignant', 'rgb')))
    path_ben_test, _, files_benign_test = next(
        os.walk(os.path.join(config.test_dir, 'benign', 'rgb')))

    path_mal_test_per, _, _ = next(
        os.walk(
            os.path.join(config.test_dir, 'malignant', 'persistence_images')))
    path_ben_test_per, _, _ = next(
        os.walk(os.path.join(config.test_dir, 'benign', 'persistence_images')))

    batch_size = config.trainer.batch_size
    label = config.label

    mal_paths_test = glob.glob(os.path.join(path_mal_test_per, '*'))
    ben_paths_test = glob.glob(os.path.join(path_ben_test_per, '*'))

    mal_outputs_test = [label['malignant']] * len(mal_paths_test)
    ben_outputs_test = [label['benign']] * len(ben_paths_test)

    test_paths = mal_paths_test + ben_paths_test
    test_outputs = mal_outputs_test + ben_outputs_test

    test_files = [os.path.basename(elem) for elem in test_paths]
    test_files = [elem.replace('.pkl', '') for elem in test_files]

    ref_std_lab = (0.57506023, 0.10403329, 0.01364062)
    ref_mu_lab = (8.63234435, -0.11501964, 0.03868433)

    if os.path.isfile('configs/stats.pkl'):
        with open('configs/stats.pkl', 'rb') as f:
            stats = pickle.load(f)
        print 'Stats loaded'
        config['stats'] = stats
    else:
        print 'No stats file found (To obtain Mu and Sigma from original whole image).'

    len_test = len(test_outputs)

    X_RGB = np.zeros((len_test, 256, 256, 3))
    X_Per = np.zeros((len_test, 32, 32, 1))
    Y = [-1] * len_test

    for i in range(len_test):

        image_id = int(float(re.findall("\d+\.\d+", test_files[i])[0]))

        fnameRGB = test_files[i] + '.jpg'
        fnamePer = test_files[i] + '.pkl'

        if test_outputs[i] == config.label['malignant']:
            pathRGB = os.path.join(path_mal_test, fnameRGB)
            pathPer = os.path.join(path_mal_test_per, fnamePer)

        elif test_outputs[i] == config.label['benign']:
            pathRGB = os.path.join(path_ben_test, fnameRGB)
            pathPer = os.path.join(path_ben_test_per, fnamePer)

        img = skimage.io.imread(pathRGB)
        if img.shape == (1024, 1024, 3):
            img = img[::4, ::4, :]

        image_id = int(float(re.findall("\d+\.\d+", pathRGB)[0]))

        if image_id in stats.keys():
            [src_mu, src_sigma] = stats[image_id]
            img_nmzd = htk_cnorm.reinhard(img,
                                          ref_mu_lab,
                                          ref_std_lab,
                                          src_mu=src_mu,
                                          src_sigma=src_sigma).astype('float')
        else:
            print '#### stats for %d not present' % (image_id)
            img_nmzd = htk_cnorm.reinhard(img, ref_mu_lab,
                                          ref_std_lab).astype('float')

        imgRGB = preprocess_resnet(img_nmzd)

        with open(pathPer, 'rb') as f:
            img = pickle.load(f)
        img = img / config.trainer.percentile_factor
        img = np.array([img])
        imgPer = np.moveaxis(img, 0, 2)

        X_RGB[i] = imgRGB
        X_Per[i] = imgPer
        Y[i] = test_outputs[i]

    print 'RGB : ', X_RGB.shape
    print 'Per : ', X_Per.shape
    print 'len(Y) : ', len(Y)

    return [X_RGB, X_Per, Y]
예제 #21
0
def main(args):

    #
    # Read Input Image
    #
    print('>> Reading input image')

    im_input = skimage.io.imread(args.inputImageFile)[:, :, :3]

    #
    # Perform color normalization
    #
    print('>> Performing color normalization')

    # compute mean and stddev of input in LAB color space
    mu, sigma = htk_ccvt.lab_mean_std(im_input)

    # perform reinhard normalization
    im_nmzd = htk_cnorm.reinhard(im_input, mu, sigma)

    #
    # Perform color deconvolution
    #
    print('>> Performing color deconvolution')

    stain_color_1 = stain_color_map[args.stain_1]
    stain_color_2 = stain_color_map[args.stain_2]
    stain_color_3 = stain_color_map[args.stain_3]

    w = np.array([stain_color_1, stain_color_2, stain_color_3]).T

    im_stains = htk_cdeconv.color_deconvolution(im_nmzd, w).Stains

    im_nuclei_stain = im_stains[:, :, 0].astype(np.float)

    #
    # Perform nuclei segmentation
    #
    print('>> Performing nuclei segmentation')

    # segment foreground
    im_fgnd_mask = sp.ndimage.morphology.binary_fill_holes(
        im_nuclei_stain < args.foreground_threshold)

    # run adaptive multi-scale LoG filter
    im_log = htk_shape_filters.clog(im_nuclei_stain,
                                    im_fgnd_mask,
                                    sigma_min=args.min_radius * np.sqrt(2),
                                    sigma_max=args.max_radius * np.sqrt(2))

    im_nuclei_seg_mask, seeds, max = htk_seg.nuclear.max_clustering(
        im_log, im_fgnd_mask, args.local_max_search_radius)

    # filter out small objects
    im_nuclei_seg_mask = htk_seg.label.area_open(
        im_nuclei_seg_mask, args.min_nucleus_area).astype(np.int)

    #
    # Perform feature extraction
    #
    print('>> Performing feature extraction')

    im_nuclei = im_stains[:, :, 0]

    if args.cytoplasm_features:
        im_cytoplasm = im_stains[:, :, 1]
    else:
        im_cytoplasm = None

    df = htk_features.ComputeNucleiFeatures(
        im_nuclei_seg_mask,
        im_nuclei,
        im_cytoplasm,
        fsd_bnd_pts=args.fsd_bnd_pts,
        fsd_freq_bins=args.fsd_freq_bins,
        cyto_width=args.cyto_width,
        num_glcm_levels=args.num_glcm_levels,
        morphometry_features_flag=args.morphometry_features,
        fsd_features_flag=args.fsd_features,
        intensity_features_flag=args.intensity_features,
        gradient_features_flag=args.gradient_features,
    )

    #
    # Create HDF5 file
    #
    print('>> Writing HDF5 file')

    hdf = pd.HDFStore(args.outputFile)
    hdf.put('d1', df, format='table', data_columns=True)

    print '--- Object x Features = ', hdf['d1'].shape
예제 #22
0
    def test_segment_nuclei_kofahi(self):

        input_image_file = datastore.fetch('Easy1.png')

        ref_image_file = datastore.fetch('L1.png')

        # read input image
        im_input = skimage.io.imread(input_image_file)[:, :, :3]

        # read reference image
        im_reference = skimage.io.imread(ref_image_file)[:, :, :3]

        # get mean and stddev of reference image in lab space
        mean_ref, std_ref = htk_cvt.lab_mean_std(im_reference)

        # perform color normalization
        im_nmzd = htk_cnorm.reinhard(im_input, mean_ref, std_ref)

        # perform color decovolution
        stain_color_map = {
            'hematoxylin': [0.65, 0.70, 0.29],
            'eosin': [0.07, 0.99, 0.11],
            'dab': [0.27, 0.57, 0.78],
            'null': [0.0, 0.0, 0.0]
        }

        w = htk_cdeconv.rgb_separate_stains_macenko_pca(im_nmzd, im_nmzd.max())

        im_stains = htk_cdeconv.color_deconvolution(im_nmzd, w).Stains

        nuclei_channel = htk_cdeconv.find_stain_index(
            stain_color_map['hematoxylin'], w)

        im_nuclei_stain = im_stains[:, :, nuclei_channel].astype(np.float)

        # segment nuclei
        im_nuclei_seg_mask = htk_seg.nuclear.detect_nuclei_kofahi(
            im_nuclei_stain,
            im_nuclei_stain < 60,
            min_radius=20,
            max_radius=30,
            min_nucleus_area=80,
            local_max_search_radius=10)

        num_nuclei = len(np.unique(im_nuclei_seg_mask)) - 1

        # check if segmentation mask matches ground truth
        gtruth_mask_file = os.path.join(
            datastore.fetch('Easy1_nuclei_seg_kofahi.npy'))

        im_gtruth_mask = np.load(gtruth_mask_file)

        num_nuclei_gtruth = len(np.unique(im_gtruth_mask)) - 1

        assert num_nuclei == num_nuclei_gtruth

        np.testing.assert_allclose(im_nuclei_seg_mask, im_gtruth_mask)

        # check no nuclei case
        im_nuclei_seg_mask = htk_seg.nuclear.detect_nuclei_kofahi(
            255 * np.ones_like(im_nuclei_stain),
            np.ones_like(im_nuclei_stain),
            min_radius=20,
            max_radius=30,
            min_nucleus_area=80,
            local_max_search_radius=10)

        num_nuclei = len(np.unique(im_nuclei_seg_mask)) - 1

        assert num_nuclei == 0
def main(args):

    #
    # Read Input Image
    #
    print('>> Reading input image')

    imInput = skimage.io.imread(args.inputImageFile)[:, :, :3]

    #
    # Perform color normalization
    #
    print('>> Performing color normalization')

    # compute mean and stddev of input in LAB color space
    Mu, Sigma = htk_color_conversion.lab_mean_std(imInput)

    # perform reinhard normalization
    imNmzd = htk_color_normalization.reinhard(imInput, Mu, Sigma)

    #
    # Perform color deconvolution
    #
    print('>> Performing color deconvolution')

    stainColor_1 = stainColorMap[args.stain_1]
    stainColor_2 = stainColorMap[args.stain_2]
    stainColor_3 = stainColorMap[args.stain_3]

    W = np.array([stainColor_1, stainColor_2, stainColor_3]).T

    imDeconvolved = htk_color_deconvolution.ColorDeconvolution(imNmzd, W)

    imNucleiStain = imDeconvolved.Stains[::2, ::2, 0].astype(np.float)

    #
    # Perform nuclei segmentation
    #
    print('>> Performing nuclei segmentation')

    # segment foreground
    imFgndMask = sp.ndimage.morphology.binary_fill_holes(
        imNucleiStain < args.foreground_threshold)

    # run adaptive multi-scale LoG filter
    imLog = htk_shape_filters.clog(imNucleiStain, imFgndMask,
                                   sigma_min=args.min_radius * np.sqrt(2),
                                   sigma_max=args.max_radius * np.sqrt(2))

    imNucleiSegMask, Seeds, Max = htk_seg.nuclear.max_clustering(
        imLog, imFgndMask, args.local_max_search_radius)

    # filter out small objects
    imNucleiSegMask = htk_seg.label.area_open(
        imNucleiSegMask, args.min_nucleus_area).astype(np.int)

    #
    # Perform feature extraction
    #
    print('>> Performing feature extraction')

    im_nuclei = imDeconvolved.Stains[::2, ::2, 0]

    if args.cytoplasm_features:
        im_cytoplasm = imDeconvolved.Stains[::2, ::2, 1]
    else:
        im_cytoplasm = None

    df = htk_features.ComputeNucleiFeatures(
        imNucleiSegMask, im_nuclei, im_cytoplasm,
        fsd_bnd_pts=args.fsd_bnd_pts,
        fsd_freq_bins=args.fsd_freq_bins,
        cyto_width=args.cyto_width,
        num_glcm_levels=args.num_glcm_levels,
        morphometry_features_flag=args.morphometry_features,
        fsd_features_flag=args.fsd_features,
        intensity_features_flag=args.intensity_features,
        gradient_features_flag=args.gradient_features,
    )

    #
    # Create HDF5 file
    #
    print('>> Writing HDF5 file')

    hdf = pd.HDFStore(args.outputFile)
    hdf.put('d1', df, format='table', data_columns=True)

    print '--- Object x Features = ', hdf['d1'].shape
    def test_segment_nuclei_kofahi(self):

        input_image_file = os.path.join(TEST_DATA_DIR, 'Easy1.png')

        ref_image_file = os.path.join(TEST_DATA_DIR, 'L1.png')

        # read input image
        im_input = skimage.io.imread(input_image_file)[:, :, :3]

        # read reference image
        im_reference = skimage.io.imread(ref_image_file)[:, :, :3]

        # get mean and stddev of reference image in lab space
        mean_ref, std_ref = htk_cvt.lab_mean_std(im_reference)

        # perform color normalization
        im_nmzd = htk_cnorm.reinhard(im_input, mean_ref, std_ref)

        # perform color decovolution
        stain_color_map = {
            'hematoxylin': [0.65, 0.70, 0.29],
            'eosin': [0.07, 0.99, 0.11],
            'dab': [0.27, 0.57, 0.78],
            'null': [0.0, 0.0, 0.0]
        }

        w = htk_cdeconv.rgb_separate_stains_macenko_pca(im_nmzd, im_nmzd.max())

        im_stains = htk_cdeconv.color_deconvolution(im_nmzd, w).Stains

        nuclei_channel = htk_cdeconv.find_stain_index(stain_color_map['hematoxylin'], w)

        im_nuclei_stain = im_stains[:, :, nuclei_channel].astype(np.float)

        # segment foreground (assumes nuclei are darker on a bright background)
        im_nuclei_fgnd_mask = sp.ndimage.morphology.binary_fill_holes(
            im_nuclei_stain < 60)

        # run adaptive multi-scale LoG filter
        im_log, im_sigma_max = htk_shape_filters.clog(
            im_nuclei_stain, im_nuclei_fgnd_mask,
            sigma_min=20 / np.sqrt(2), sigma_max=30 / np.sqrt(2))

        # apply local maximum clustering
        im_nuclei_seg_mask, seeds, maxima = htk_seg.nuclear.max_clustering(
            im_log, im_nuclei_fgnd_mask, 10)

        # filter out small objects
        im_nuclei_seg_mask = htk_seg.label.area_open(
            im_nuclei_seg_mask, 80).astype(np.uint8)

        # perform connected component analysis
        obj_props = skimage.measure.regionprops(im_nuclei_seg_mask)

        num_nuclei = len(obj_props)

        # check if segmentation mask matches ground truth
        gtruth_mask_file = os.path.join(TEST_DATA_DIR,
                                        'Easy1_nuclei_seg_kofahi_adaptive.npy')

        im_gtruth_mask = np.load(gtruth_mask_file)

        obj_props_gtruth = skimage.measure.regionprops(im_gtruth_mask)

        num_nuclei_gtruth = len(obj_props_gtruth)

        assert(num_nuclei == num_nuclei_gtruth)

        np.testing.assert_allclose(im_nuclei_seg_mask, im_gtruth_mask)
예제 #25
0
def compute_superpixel_data(img_path, tile_position, wsi_mean, wsi_stddev,
                            args, **it_kwargs):

    # get slide tile source
    ts = large_image.getTileSource(img_path)

    # get requested tile information
    tile_info = ts.getSingleTile(
        tile_position=tile_position,
        resample=True,
        format=large_image.tilesource.TILE_FORMAT_NUMPY,
        **it_kwargs)

    im_tile = tile_info['tile'][:, :, :3]

    # get global x and y positions
    left = tile_info['gx']
    top = tile_info['gy']

    # get scale
    scale = tile_info['gwidth'] / tile_info['width']

    # perform color normalization
    im_nmzd = htk_cnorm.reinhard(im_tile, args.reference_mu_lab,
                                 args.reference_std_lab, wsi_mean, wsi_stddev)

    # compute the number of super-pixels
    im_width, im_height = im_nmzd.shape[:2]
    n_superpixels = (im_width / args.patchSize) * (im_height / args.patchSize)

    #
    # Generate labels using a superpixel algorithm (SLIC)
    # In SLIC, compactness controls image space proximity.
    # Higher compactness will make the shape of superpixels more square.
    #
    im_label = slic(
        im_nmzd, n_segments=n_superpixels, compactness=args.compactness) + 1

    region_props = regionprops(im_label)

    # set superpixel data list
    s_data = []
    x_cent = []
    y_cent = []

    for i in range(len(region_props)):
        # get x, y centroids for superpixel
        cen_x, cen_y = region_props[i].centroid

        # get bounds of superpixel region
        min_row, max_row, min_col, max_col = \
            get_patch_bounds(cen_x, cen_y, args.patchSize, im_width, im_height)

        rgb_data = im_nmzd[min_row:max_row, min_col:max_col]

        s_data.append(rgb_data)

        # get superpixel centers at highest-res
        x_cent.append(round((cen_x * scale + top), 1))
        y_cent.append(round((cen_y * scale + left), 1))

    return s_data, x_cent, y_cent
def main(args):

    #
    # Read Input Image
    #
    print('>> Reading input image')

    imInput = skimage.io.imread(args.inputImageFile)[:, :, :3]

    #
    # Perform color normalization
    #
    print('>> Performing color normalization')

    # compute mean and stddev of input in LAB color space
    Mu, Sigma = htk_color_conversion.lab_mean_std(imInput)

    # perform reinhard normalization
    imNmzd = htk_color_normalization.reinhard(imInput, Mu, Sigma)

    #
    # Perform color deconvolution
    #
    print('>> Performing color deconvolution')

    stainColor_1 = stainColorMap[args.stain_1]
    stainColor_2 = stainColorMap[args.stain_2]
    stainColor_3 = stainColorMap[args.stain_3]

    W = np.array([stainColor_1, stainColor_2, stainColor_3]).T

    imDeconvolved = htk_color_deconvolution.ColorDeconvolution(imNmzd, W)

    imNucleiStain = imDeconvolved.Stains[:, :, 0].astype(np.float)

    #
    # Perform nuclei segmentation
    #
    print('>> Performing nuclei segmentation')

    # segment foreground
    imFgndMask = sp.ndimage.morphology.binary_fill_holes(
        imNucleiStain < args.foreground_threshold)

    # run adaptive multi-scale LoG filter
    imLog = htk_shape_filters.clog(imNucleiStain, imFgndMask,
                                   sigma_min=args.min_radius * np.sqrt(2),
                                   sigma_max=args.max_radius * np.sqrt(2))

    imNucleiSegMask, Seeds, Max = htk_seg.nuclear.max_clustering(
        imLog, imFgndMask, args.local_max_search_radius)

    # filter out small objects
    imNucleiSegMask = htk_seg.label.area_open(
        imNucleiSegMask, args.min_nucleus_area).astype(np.int)

    #
    # Generate annotations
    #
    objProps = skimage.measure.regionprops(imNucleiSegMask)

    print 'Number of nuclei = ', len(objProps)

    # create basic schema
    annotation = {
        "name":          "Nuclei",
        "description":   "Nuclei bounding boxes from a segmentation algorithm",
        "attributes": {
            "algorithm": {
                "color_normalization": "reinhard",
                "color_deconvolution": "ColorDeconvolution",
                "nuclei_segmentation": ["cLOG",
                                        "MaxClustering",
                                        "FilterLabel"]
            }
        },
        "elements": []
    }

    # add each nucleus as an element into the annotation schema
    for i in range(len(objProps)):

        c = [objProps[i].centroid[1], objProps[i].centroid[0], 0]
        width = objProps[i].bbox[3] - objProps[i].bbox[1] + 1
        height = objProps[i].bbox[2] - objProps[i].bbox[0] + 1

        cur_bbox = {
            "type":        "rectangle",
            "center":      c,
            "width":       width,
            "height":      height,
            "rotation":    0,
            "fillColor":   "rgba(255, 255, 255, 0)",
            "lineWidth":   2,
            "lineColor":   "rgb(34, 139, 34)"
        }

        annotation["elements"].append(cur_bbox)

    #
    # Save output segmentation mask
    #
    print('>> Outputting nuclei segmentation mask')

    skimage.io.imsave(args.outputNucleiMaskFile, imNucleiSegMask)

    #
    # Save output annotation
    #
    print('>> Outputting nuclei annotation')

    with open(args.outputNucleiAnnotationFile, 'w') as annotationFile:
        json.dump(annotation, annotationFile, indent=2, sort_keys=False)
예제 #27
0
    def test_reinhard(self):
        """Test reinhard."""
        # get RGB image at a small magnification
        slide_info = gc.get('item/%s/tiles' % SAMPLE_SLIDE_ID)
        getStr = "/item/%s/tiles/region?left=%d&right=%d&top=%d&bottom=%d" % (
            SAMPLE_SLIDE_ID, 0, slide_info['sizeX'], 0, slide_info['sizeY']
            ) + "&magnification=%.2f" % MAG
        tissue_rgb = get_image_from_htk_response(
            gc.get(getStr, jsonResp=False))

        # # SANITY CHECK! normalize to LAB mean and std from SAME slide
        # mean_lab, std_lab = lab_mean_std(tissue_rgb)
        # tissue_rgb_normalized = reinhard(
        #     tissue_rgb, target_mu=mean_lab, target_sigma=std_lab)
        #
        # # we expect the images to be (almost) exactly the same
        # assert np.mean(tissue_rgb - tissue_rgb_normalized) < 1

        # Normalize to pre-set color standard
        tissue_rgb_normalized = reinhard(
            tissue_rgb, target_mu=cnorm['mu'], target_sigma=cnorm['sigma'])

        # check that it matches
        mean_lab, std_lab = lab_mean_std(tissue_rgb_normalized)
        self.assertTrue(all(
            np.abs(mean_lab - cnorm['mu']) < [0.1, 0.1, 0.1]))
        self.assertTrue(all(
            np.abs(std_lab - cnorm['sigma']) < [0.1, 0.1, 0.1]))

        # get tissue mask
        thumbnail_rgb = get_slide_thumbnail(gc, SAMPLE_SLIDE_ID)
        labeled, mask = get_tissue_mask(
            thumbnail_rgb, deconvolve_first=True,
            n_thresholding_steps=1, sigma=1.5, min_size=30)

        # # visualize result
        # vals = np.random.rand(256, 3)
        # vals[0, ...] = [0.9, 0.9, 0.9]
        # cMap = ListedColormap(1 - vals)
        #
        # f, ax = plt.subplots(1, 3, figsize=(20, 20))
        # ax[0].imshow(thumbnail_rgb)
        # ax[1].imshow(labeled, cmap=cMap)
        # ax[2].imshow(mask, cmap=cMap)
        # plt.show()

        # Do MASKED normalization to preset standard
        mask_out = resize(
            labeled == 0, output_shape=tissue_rgb.shape[:2],
            order=0, preserve_range=True) == 1
        tissue_rgb_normalized = reinhard(
            tissue_rgb, target_mu=cnorm['mu'], target_sigma=cnorm['sigma'],
            mask_out=mask_out)

        # check that it matches
        mean_lab, std_lab = lab_mean_std(
            tissue_rgb_normalized, mask_out=mask_out)
        self.assertTrue(all(
            np.abs(mean_lab - cnorm['mu']) < [0.1, 0.1, 0.1]))
        self.assertTrue(all(
            np.abs(std_lab - cnorm['sigma']) < [0.1, 0.1, 0.1]))
예제 #28
0
def detect_tile_nuclei(slide_path, tile_position, args, it_kwargs,
                       src_mu_lab=None, src_sigma_lab=None, debug=False):

    # =========================================================================
    # ======================= Tile Loading ====================================
    # =========================================================================
    print('\n>> Loading Tile ... \n')

    csv_dict = {}

    csv_dict['PreparationTime'] = []
    csv_dict['ColorDeconvTime'] = []
    csv_dict['TotalTileLoadingTime'] = []

    csv_dict['CKPTLoadingTime'] = []
    csv_dict['ModelInfernceTime'] = []
    csv_dict['DetectionTime'] = []

    csv_dict['ROIShape'] = []
    csv_dict['ObjectsDict'] = []
    csv_dict['NumObjects'] = []

    csv_dict['AnnotationWritingTime'] = []

    csv_dict['AnnotationDict'] = []
    csv_dict['AnalysisDict'] = []

    start_time = time.time()
    total_tileloading_start_time = time.time()

    ts = large_image.getTileSource(slide_path)
    tile_info = ts.getSingleTile(
        tile_position=tile_position,
        format=large_image.tilesource.TILE_FORMAT_NUMPY,
        **it_kwargs)
    im_tile = tile_info['tile'][:, :, :3]
    csv_dict['ROIShape'] = im_tile.shape[:2]

    prep_time = time.time() - start_time
    csv_dict['PreparationTime'] = round(prep_time, 3)

    # =========================================================================
    # =================Img Normalization & Color Deconv========================
    # =========================================================================
    print('\n>> Color Deconvolving ... \n')
    start_time = time.time()

    im_nmzd = htk_cnorm.reinhard(
        im_tile,
        REFERENCE_MU_LAB,
        REFERENCE_STD_LAB,
        src_mu=src_mu_lab,
        src_sigma=src_sigma_lab
    )

    # perform color decovolution
    if args.deconv_method == 'ruifrok':

        w = cli_utils.get_stain_matrix(args)
        im_stains = htk_cdeconv.color_deconvolution(
            im_nmzd, w).Stains.astype(np.float)[:, :, :2]

    elif args.deconv_method == 'macenko':

        w_est = htk_cdeconv.rgb_separate_stains_macenko_pca(im_tile, 255)
        im_stains = htk_cdeconv.color_deconvolution(
            im_tile, w_est, 255).Stains.astype(np.float)
        ch1 = htk_cdeconv.find_stain_index(
            htk_cdeconv.stain_color_map[args.stain_1], w_est)
        ch2 = htk_cdeconv.find_stain_index(
            htk_cdeconv.stain_color_map[args.stain_2], w_est)
        im_stains = im_stains[:, :, [ch1, ch2]]

    else:

        raise ValueError('Invalid deconvolution method parameter.')

    # =========================================================================
    # ====================== Fuse the stain1 & stain2 pix======================
    # =========================================================================

    # compute nuclear foreground mask
    im_fgnd_mask_stain_1 = im_stains[
        :, :, 0] < threshold_yen(im_stains[:, :, 0])
    im_fgnd_mask_stain_2 = im_stains[
        :, :, 1] < threshold_yen(im_stains[:, :, 1])
    im_fgnd_seg_mask = im_fgnd_mask_stain_1 | im_fgnd_mask_stain_2

    # segment nuclei
    im_nuc_det_input = np.squeeze(np.min(im_stains[:, :, :2], axis=2))
    print('---> Fusing 2 Stains')
    deconv_time = time.time() - start_time
    csv_dict['ColorDeconvTime'] = round(deconv_time, 3)

    # =========================================================================
    # ================= Nuclie Detection Deep Learning Block ==================
    # =========================================================================

    total_tileloading_time = time.time() - total_tileloading_start_time
    csv_dict['TotalTileLoadingTime'] = round(total_tileloading_time, 3)

    start_time = time.time()

    config = get_config(CONFIG)
    config.model.rcnn.proposals.total_max_detections = args.max_det
    config.model.rcnn.proposals.min_prob_threshold = args.min_prob
    im_nuc_det_input = np.stack((im_nuc_det_input,) * 3, axis=-1)

    # ====================================================================================================================================
    tf.reset_default_graph()

    dataset_class = get_dataset('object_detection')
    model_class = get_model('fasterrcnn')
    dataset = dataset_class(config)
    model = model_class(config)

    graph = tf.Graph()
    session = tf.Session(graph=graph)

    with graph.as_default():
        image_placeholder = tf.placeholder(
            tf.float32, (None, None, 3), name='Input_Placeholder'
        )
        pred_dict = model(image_placeholder)

        ckpt_loading_start_time = time.time()

        saver = tf.train.Saver(sharded=True, allow_empty=True)
        saver.restore(session, CKPT_DIR)
        tf.logging.info('Loaded checkpoint.')

        ckpt_loading_time = time.time() - ckpt_loading_start_time
        csv_dict['CKPTLoadingTime'] = round(ckpt_loading_time, 3)

        inference_start_time = time.time()

        cls_prediction = pred_dict['classification_prediction']
        objects_tf = cls_prediction['objects']
        objects_labels_tf = cls_prediction['labels']
        objects_labels_prob_tf = cls_prediction['probs']

        fetches = {
            'objects': objects_tf,
            'labels': objects_labels_tf,
            'probs': objects_labels_prob_tf,
        }

        fetched = session.run(fetches, feed_dict={
            image_placeholder: np.array(im_nuc_det_input)
        })

        inference_time = time.time() - inference_start_time
        csv_dict['ModelInfernceTime'] = round(inference_time, 3)

        objects = fetched['objects']
        labels = fetched['labels'].tolist()
        probs = fetched['probs'].tolist()

        # Cast to int to consistently return the same type in Python 2 and 3
        objects = [
            [int(round(coord)) for coord in obj]
            for obj in objects.tolist()
        ]

        predictions = sorted([
            {
                'bbox': obj,
                'label': label,
                'prob': round(prob, 4),
            } for obj, label, prob in zip(objects, labels, probs)
        ], key=lambda x: x['prob'], reverse=True)

    print('\n>> Finishing Detection ... \n')
    print('***** Number of Detected Cells ****** : ', len(predictions))
    detection_time = time.time() - start_time
    csv_dict['DetectionTime'] = round(detection_time, 3)
    csv_dict['NumObjects'] = len(predictions)
    csv_dict['ObjectsDict'] = predictions

    # =========================================================================
    # ======================= TODO: Implement border deletion =================
    # =========================================================================

    # =========================================================================
    # ======================= Write Annotations ===============================
    # =========================================================================

    start_time = time.time()

    objects_df = pd.DataFrame(objects)
    formatted_annot_list,\
        formatter_analysis_list = cli_utils.convert_preds_to_utilformat(
            objects_df,
            probs,
            args.ignore_border_nuclei,
            im_tile_size=args.analysis_tile_size)

    nuclei_annot_list = cli_utils.create_tile_nuclei_annotations(
        formatted_annot_list, tile_info, args.nuclei_annotation_format)
    csv_dict['AnnotationDict'] = nuclei_annot_list

    csv_dict['AnalysisDict'] = formatter_analysis_list

    num_nuclei = len(nuclei_annot_list)

    anot_time = time.time() - start_time
    csv_dict['AnnotationWritingTime'] = round(anot_time, 3)

    return csv_dict
예제 #29
0
def main(args):

    #
    # Read Input Image
    #
    print('>> Reading input image')

    im_input = skimage.io.imread(args.inputImageFile)[:, :, :3]

    #
    # Perform color normalization
    #
    print('>> Performing color normalization')

    # compute mean and stddev of input in LAB color space
    mu, sigma = htk_ccvt.lab_mean_std(im_input)

    # perform reinhard normalization
    im_nmzd = htk_cnorm.reinhard(im_input, mu, sigma)

    #
    # Perform color deconvolution
    #
    print('>> Performing color deconvolution')

    stain_color_1 = stain_color_map[args.stain_1]
    stain_color_2 = stain_color_map[args.stain_2]
    stain_color_3 = stain_color_map[args.stain_3]

    w = np.array([stain_color_1, stain_color_2, stain_color_3]).T

    im_stains = htk_cdeconv.color_deconvolution(im_nmzd, w).Stains

    im_nuclei_stain = im_stains[:, :, 0].astype(np.float)

    #
    # Perform nuclei segmentation
    #
    print('>> Performing nuclei segmentation')

    # segment foreground
    im_fgnd_mask = sp.ndimage.morphology.binary_fill_holes(
        im_nuclei_stain < args.foreground_threshold)

    # run adaptive multi-scale LoG filter
    im_log = htk_shape_filters.clog(im_nuclei_stain, im_fgnd_mask,
                                    sigma_min=args.min_radius * np.sqrt(2),
                                    sigma_max=args.max_radius * np.sqrt(2))

    im_nuclei_seg_mask, seeds, max = htk_seg.nuclear.max_clustering(
        im_log, im_fgnd_mask, args.local_max_search_radius)

    # filter out small objects
    im_nuclei_seg_mask = htk_seg.label.area_open(
        im_nuclei_seg_mask, args.min_nucleus_area).astype(np.int)

    #
    # Generate annotations
    #
    obj_props = skimage.measure.regionprops(im_nuclei_seg_mask)

    print 'Number of nuclei = ', len(obj_props)

    # create basic schema
    annotation = {
        "name":          "Nuclei",
        "description":   "Nuclei bounding boxes from a segmentation algorithm",
        "attributes": {
            "algorithm": {
                "color_normalization": "reinhard",
                "color_deconvolution": "ColorDeconvolution",
                "nuclei_segmentation": ["cLOG",
                                        "MaxClustering",
                                        "FilterLabel"]
            }
        },
        "elements": []
    }

    # add each nucleus as an element into the annotation schema
    for i in range(len(obj_props)):

        c = [obj_props[i].centroid[1], obj_props[i].centroid[0], 0]
        width = obj_props[i].bbox[3] - obj_props[i].bbox[1] + 1
        height = obj_props[i].bbox[2] - obj_props[i].bbox[0] + 1

        cur_bbox = {
            "type":        "rectangle",
            "center":      c,
            "width":       width,
            "height":      height,
            "rotation":    0,
            "fillColor":   "rgba(255, 255, 255, 0)",
            "lineWidth":   2,
            "lineColor":   "rgb(34, 139, 34)"
        }

        annotation["elements"].append(cur_bbox)

    #
    # Save output segmentation mask
    #
    print('>> Outputting nuclei segmentation mask')

    skimage.io.imsave(args.outputNucleiMaskFile, im_nuclei_seg_mask)

    #
    # Save output annotation
    #
    print('>> Outputting nuclei annotation')

    with open(args.outputNucleiAnnotationFile, 'w') as annotation_file:
        json.dump(annotation, annotation_file, indent=2, sort_keys=False)
예제 #30
0
def grid_tiling(gc,
                item_id,
                group_names,
                save_dir,
                save_mag=None,
                mask_mag=1.25,
                tile_size=(224, 224),
                tissue_threshold=0.3,
                annotation_threshold=0.15,
                random_seed=64,
                is_test=False,
                oversample_background=2.0,
                reinhard_stats=None):
    """Split a DSA image item (WSI) into smaller images and save locally grouped by annotations. This approach grids the
    image into equal sized small images, or tiles (i.e. a grid is placed over the WSI starting at the top left corner).
    At the bottom and right edge of the WSI the tiles are ignored if not of correct size (the case where the WSI
    dimensions are not a multiple factor of the tile size). A list of annotation group names are needed to group the
    tiles into classes of images saved in their own directories. Tiles with no tissue detected are ignored and tiles not
    containing annotations (but have tissue) are by default saved into background class. A background annotation group
    will cause issues so avoid having this annotation group name.

    Tiles can be saved at a lower magnification than source image if needed (param: save_mag). Note that tiles size
    specified should be the tile size at the save magnification not the source magnification. Image saved will be of the
    tile size specified in parameters, regardless of the save_mag used.

    Parameters
    ----------
    gc : girder_client.GirderClient
        authenticated client
    item_id : str
        DSA image item id
    group_names : list
        list of annotation group names
    save_dir : str
        directory to create group directories with images - save_dir / group_name_1, save_dir / background, etc.
    save_mag : float (optional)
        magnification to use when saving the images, if None then source magnification will be used.
    mask_mag : float (optional)
        magnification to create binary mask of tissue and annotations. Note that if your annotations are very small
        it will benefit to use a larger value than default here, but binary masks will fail to create at very high
        magnifications.
    tile_size : tuple (optional)
        size (width, height) to save tiles at, note that this is the size it will be saved at regardless of the
        magnification used to save the images at (i.e. if save_mag is 4 times less than the source magnification than
        the actual tile_size will represent 4 times the pixels at full resolution).
    tissue_threshold : float (optional)
        from 0 to 1, percentage of tile that must contain tissue to be included
    annotation_threshold : float (optional)
        from 0 to 1, percentage of tile that must contain annotation (per group) to be labeled as annotation. Note
        that a single tile may be saved twice, representing multiple classes.
    random_seed : int (optional)
        random seed to use when shuffling the background regions
    is_test : bool (optional)
        if True then all the background regions will be saved, otherwise oversample_background will be used to determine
        how many background regions to save
    oversample_background : float (optional)
        factor to oversample background class images, compared to the number of images of the class of annoation images
        with the most images saved
    reinhard_stats : dict (optional)
        if not None then the images saved will be color augmented by color normalizing the tiles using the Reinhard
        color norm method. This dict should contain src_mu and scr_sigma keys with the stats for this image and
        target_mu and targe_sigma keys which are lists contain 1 or more target images to normalize to.

    """
    im_info = gc.get('item/{}/tiles'.format(item_id))
    if save_mag is None:
        save_mag = im_info['magnification']

    if reinhard_stats is not None:
        # get color stats for image
        mu, sigma = reinhard_color_stats(gc, item_id)

    # ----- prep work ----- #
    filename = splitext(gc.getItem(item_id)['name'])[0]

    # create dirs for each image class to save
    group_dirs = [join(save_dir, group_name) for group_name in group_names]
    for group_dir in group_dirs:
        makedirs(group_dir, exist_ok=True)
    background_dir = join(save_dir, 'background')
    makedirs(background_dir, exist_ok=True)

    # get image annotations
    annotations = gc.get('/annotation/item/' + item_id)

    # create a dataframe to use with annotation to mask handler functions (gt codes)
    gt_data = [[group_name, 1, i + 1, 0, 0, 'rgb(0, 0, {})'.format(i), '']
               for i, group_name in enumerate(group_names)]
    gt_codes = pd.DataFrame(columns=[
        'group', 'overlay_order', 'GT_code', 'is_roi', 'is_background_class',
        'color', 'comments'
    ],
                            data=gt_data,
                            index=range(len(group_names)))
    gt_codes.index = gt_codes.loc[:, 'group']

    # get binary masks - tissue mask and annotation(s) mask
    mask_mag_factor, _ = get_scale_factor_and_appendStr(gc=gc,
                                                        slide_id=item_id,
                                                        MAG=mask_mag)
    # - scaling the annotations to lower magnification
    mask_annotations = scale_slide_annotations(deepcopy(annotations),
                                               sf=mask_mag_factor)

    # - binary masks are for the whole image at low resolution, function returns also the RGB image which we use for
    # - getting the tissue mask
    mask_element_info = get_bboxes_from_slide_annotations(mask_annotations)
    get_kwargs = deepcopy(
        GET_KWARGS)  # avoid referencing on the global variable
    get_kwargs['gc'] = gc
    get_kwargs['slide_id'] = item_id
    get_kwargs['GTCodes_dict'] = gt_codes.T.to_dict()
    get_kwargs['bounds'] = None
    get_kwargs['MAG'] = mask_mag
    ann_mask_and_image = get_image_and_mask_from_slide(
        mode='wsi',
        slide_annotations=mask_annotations,
        element_infos=mask_element_info,
        **get_kwargs)
    tissue_mask = get_tissue_mask(ann_mask_and_image['rgb'])[0]

    # convert the annotations to lower magnification
    fr_to_lr_factor, _ = get_scale_factor_and_appendStr(gc=gc,
                                                        slide_id=item_id,
                                                        MAG=save_mag)
    annotations = scale_slide_annotations(annotations, sf=fr_to_lr_factor)
    lr_element_info = get_bboxes_from_slide_annotations(annotations)

    # get full resolution information for image
    fr_mag = im_info['magnification']
    fr_width = im_info['sizeX']
    fr_height = im_info['sizeY']
    fr_tile_size = int(tile_size[0] / fr_to_lr_factor), int(
        tile_size[1] / fr_to_lr_factor)  # (width, height)

    # change the get_kwargs to save magnification
    get_kwargs['MAG'] = save_mag

    # ----- loop through image at full res ----- #
    group_annotation_counts = [0] * len(group_names)
    background_regions = []
    for x in range(0, fr_width, fr_tile_size[0]):
        for y in range(0, fr_height, fr_tile_size[1]):
            # check that the tile won't go over the edge of image, if so skip
            if x + fr_tile_size[0] > fr_width or y + fr_tile_size[
                    1] > fr_height:
                continue

            # check tile for tissue, using the binary mask for tissue
            tissue_tile = tissue_mask[int(y * mask_mag /
                                          fr_mag):int((y + fr_tile_size[1]) *
                                                      mask_mag / fr_mag),
                                      int(x * mask_mag /
                                          fr_mag):int((x + fr_tile_size[0]) *
                                                      mask_mag / fr_mag)]

            # skip if tile does not contain enough tissue
            if np.count_nonzero(
                    tissue_tile) / tissue_tile.size < tissue_threshold:
                continue

            # check tile for annotations, using the binary mask for annotations
            annotation_tile = ann_mask_and_image['ROI'][
                int(y * mask_mag / fr_mag):int((y + fr_tile_size[1]) *
                                               mask_mag / fr_mag),
                int(x * mask_mag / fr_mag):int((x + fr_tile_size[0]) *
                                               mask_mag / fr_mag)]

            # tile is background if no annotation is present (of any group)
            background_flag = True
            # - check for each annotation group
            for i, group_name in enumerate(group_names):
                group_annotation_tile = annotation_tile == i + 1

                # tile is ignored if not enough contain annotation
                if np.count_nonzero(
                        group_annotation_tile
                ) / group_annotation_tile.size < annotation_threshold:
                    continue

                background_flag = False
                group_annotation_counts[i] += 1

                # get annotation image and save it
                get_kwargs['bounds'] = {
                    'XMIN': x,
                    'XMAX': x + fr_tile_size[0],
                    'YMIN': y,
                    'YMAX': y + fr_tile_size[1]
                }

                annotation_im = get_image_and_mask_from_slide(
                    mode='manual_bounds',
                    slide_annotations=annotations,
                    element_infos=lr_element_info,
                    **get_kwargs)['rgb']

                # save the image to correct directory
                imwrite(
                    join(group_dirs[i],
                         '{}_x_{}_y_{}.png'.format(filename, x, y)),
                    annotation_im)

                if reinhard_stats is not None:
                    # add color augmentation with Reinhard method
                    for j, (_, v) in enumerate(reinhard_stats.items()):
                        im_norm = reinhard(annotation_im.copy(),
                                           v['mu'],
                                           v['sigma'],
                                           src_mu=mu,
                                           src_sigma=sigma)
                        imwrite(
                            join(
                                group_dirs[i],
                                '{}_x_{}_y_{}_norm_{}.png'.format(
                                    filename, x, y, j)), im_norm)

            if background_flag:
                # save coordinates for non-glomeruli images candidates
                background_regions.append({
                    'magnification': save_mag,
                    'left': x,
                    'top': y,
                    'width': fr_tile_size[0],
                    'height': fr_tile_size[1]
                })

    # randomly select background class coordinates
    # - oversample the background class by a factor of the most represented annoation class
    Random(random_seed).shuffle(background_regions)
    if not is_test:
        background_regions = background_regions[:int(
            oversample_background * max(group_annotation_counts))]

    for region in background_regions:
        tile_im = get_region_im(gc, item_id, region)[:, :, :3]

        # save background image
        imwrite(
            join(
                background_dir,
                '{}_x_{}_y_{}.png'.format(filename, region['left'],
                                          region['top'])), tile_im)

        if reinhard_stats is not None:
            # add color augmentation with Reinhard method
            for j, (_, v) in enumerate(reinhard_stats.items()):
                im_norm = reinhard(tile_im.copy(),
                                   v['mu'],
                                   v['sigma'],
                                   src_mu=mu,
                                   src_sigma=sigma)
                imwrite(
                    join(
                        background_dir, '{}_x_{}_y_{}_norm_{}.png'.format(
                            filename, region['left'], region['top'], j)),
                    im_norm)