Example #1
0
def test_prep(girderClient):  # noqa

    cfg.gc = girderClient

    iteminfo = cfg.gc.get('/item',
                          parameters={'text': "TCGA-A2-A0YE-01Z-00-DX1"})[0]

    # get RGB region at a small magnification
    MAG = 1.5
    getStr = "/item/%s/tiles/region?left=%d&right=%d&top=%d&bottom=%d" % (
        iteminfo['_id'], 46890, 50000, 40350,
        43000) + "&magnification=%.2f" % MAG
    cfg.tissue_rgb = get_image_from_htk_response(
        cfg.gc.get(getStr, jsonResp=False))

    # get mask of things to ignore
    cfg.mask_out, _ = get_tissue_mask(cfg.tissue_rgb,
                                      deconvolve_first=False,
                                      n_thresholding_steps=1,
                                      sigma=1.5,
                                      min_size=30)
    cfg.mask_out = resize(cfg.mask_out == 0,
                          output_shape=cfg.tissue_rgb.shape[:2],
                          order=0,
                          preserve_range=True) == 1
Example #2
0
    def set_slide_info_and_get_tissue_mask(self):
        """Set self.slide_info dict and self.labeled tissue mask."""
        # This is a presistent dict to store information about slide
        self.slide_info = self.gc.get('item/%s/tiles' % self.slide_id)

        # get tissue mask
        thumbnail_rgb = get_slide_thumbnail(self.gc, self.slide_id)

        # color normalization if desired
        if 'thumbnail' in self.cnorm_params.keys():
            thumbnail_rgb = np.uint8(
                reinhard(im_src=thumbnail_rgb,
                         target_mu=self.cnorm_params['thumbnail']['mu'],
                         target_sigma=self.cnorm_params['thumbnail']['sigma']))

        # get labeled tissue mask -- each unique value is one tissue piece
        labeled, _ = get_tissue_mask(thumbnail_rgb,
                                     **self.get_tissue_mask_kwargs)

        if len(np.unique(labeled)) < 2:
            raise ValueError("No tissue detected!")

        if self.visualize_tissue_boundary:
            annotation_docs = get_tissue_boundary_annotation_documents(
                self.gc, slide_id=self.slide_id, labeled=labeled)
            for doc in annotation_docs:
                _ = self.gc.post("/annotation?itemId=" + self.slide_id,
                                 json=doc)

        # Find size relative to WSI
        self.slide_info[
            'F_tissue'] = self.slide_info['sizeX'] / labeled.shape[1]

        return labeled
Example #3
0
    def test_get_tissue_mask(self):
        """Test get_tissue_mask()."""
        thumbnail_rgb = get_slide_thumbnail(gc, SAMPLE_SLIDE_ID)

        labeled, mask = get_tissue_mask(
            thumbnail_rgb, deconvolve_first=True,
            n_thresholding_steps=1, sigma=1.5, min_size=30)

        # # visualize result
        # vals = np.random.rand(256,3)
        # vals[0, ...] = [0.9, 0.9, 0.9]
        # cMap = ListedColormap(1 - vals)
        #
        # f, ax = plt.subplots(1, 3, figsize=(20, 20))
        # ax[0].imshow(thumbnail_rgb)
        # ax[1].imshow(labeled, cmap=cMap)
        # ax[2].imshow(mask, cmap=cMap)
        # plt.show()

        self.assertTupleEqual(labeled.shape, (156, 256))
        self.assertEqual(len(np.unique(labeled)), 10)

        # save for use in the next test
        imwrite(os.path.join(
            savepath, 'tissue_binmask.png'), np.uint8(0 + (labeled > 0)))
    def test_get_tissue_mask(self):
        """Test get_tissue_mask()."""
        thumbnail_rgb = get_slide_thumbnail(cfg.gc, cfg.iteminfo['_id'])
        cfg.labeled, mask = get_tissue_mask(
            thumbnail_rgb, deconvolve_first=True,
            n_thresholding_steps=1, sigma=1.5, min_size=30)

        assert cfg.labeled.shape == (156, 256)
        assert len(np.unique(cfg.labeled)) == 11
def get_tissue_contours(gc,
                        item_id,
                        magnification=1.25,
                        contour_area_threshold=15000):
    """Get the contours of the tissue in a WSI using lower magnification version of the image. Threshold can be provided
    to remove any small contours.

    Parameters
    ----------
    gc : girder_client.GirderClient
        authenticated client for private images
    item_id : str
        DSA item id of image
    magnification : float (optional)
        magnification to pull image at
    contour_area_threshold : int (optional)
        contours with area smaller than this value will be excluded

    Returns
    -------
    tissue_contours : list
        opencv formatted contours of the tissue
    im : np.ndarray
        RGB low magnification image
    contour_im : np.ndarray
        RGB low magnification image with contours drawn in red

    """
    # get whole image at specific magnification
    im = get_region_im(gc, item_id, {'magnification': magnification})[:, :, :3]

    # get tissue mask using histomicsTK method
    mask = get_tissue_mask(im)[0].astype(np.uint8)
    mask[mask > 0] = 255

    # extract contours
    contours, _ = cv.findContours(mask, cv.RETR_EXTERNAL,
                                  cv.CHAIN_APPROX_TC89_KCOS)

    # remove any small contours
    tissue_contours = []
    for i, contour in enumerate(contours):
        contour_area = cv.contourArea(contour)
        if contour_area > contour_area_threshold:
            tissue_contours.append(contour)

    # draw the contours on an image copy
    contour_im = cv.drawContours(im.copy(), tissue_contours, -1, [255, 0, 0],
                                 2)
    return tissue_contours, im, contour_im
def reinhard_color_stats(gc, item_id, magnification=1.25):
    """Calculate the Reinhard color stats (mean and standard dev. of each channel in LAB color space) for a DSA image
    item. The color stats are calculated from only the pixels that fall within the tissue, as detected by the
    HistomicsTK function: saliency.tissue_detection.get_tissue_mask(..) with default parameters.

    Parameters
    ----------
    gc : girder_client.GirderClient
        authenticated girder client for private images
    item_id : str
        image item id
    magnification : float (optional)
        magnification of thumbnail used to calculate the color stats

    Returns
    -------
    mu : np.array
        LAB mean for each channel (length of 3)
    sigma : np.array
        LAB standard dev. for each channel (length of 3)

    """
    im_info = gc.get('item/{}/tiles'.format(item_id))

    # get thumbnail as specified magnification
    thumbnail = get_item_image(gc, item_id, 'thumbnail', return_type='Array',
                               width=int(im_info['sizeX']*magnification/im_info['magnification']))

    # get the tissue mask
    tissue_mask = get_tissue_mask(thumbnail)[0] == 0

    # convert image to LAB color space
    im_lab = rgb_to_lab(thumbnail)

    # get the pixels inside mask
    tissue_mask_reshaped = tissue_mask[..., None]
    im_lab = np.ma.masked_array(im_lab, mask=np.tile(tissue_mask_reshaped, (1, 1, 3)))

    # calculate the channel's mean and standard deviation
    mu = [im_lab[..., i].mean() for i in range(3)]
    sigma = [im_lab[..., i].std() for i in range(3)]
    return mu, sigma
    def set_slide_info_and_get_tissue_mask(self):
        """Set self.slide_info dict and self.labeled tissue mask."""
        # This is a presistent dict to store information about slide
        self.slide_info = self.gc.get('item/%s/tiles' % self.slide_id)

        # get tissue mask
        thumbnail_rgb = get_slide_thumbnail(self.gc, self.slide_id)

        # get labeled tissue mask -- each unique value is one tissue piece
        labeled, _ = get_tissue_mask(thumbnail_rgb,
                                     **self.get_tissue_mask_kwargs)

        if len(np.unique(labeled)) < 2:
            raise ValueError("No tissue detected!")

        # Find size relative to WSI
        self.slide_info[
            'F_tissue'] = self.slide_info['sizeX'] / labeled.shape[1]

        return labeled
Example #8
0
    def test_get_tissue_mask(self):
        """Test get_tissue_mask()."""
        thumbnail_rgb = get_slide_thumbnail(gc, SAMPLE_SLIDE_ID)

        labeled, mask = get_tissue_mask(thumbnail_rgb,
                                        deconvolve_first=True,
                                        n_thresholding_steps=2,
                                        sigma=0.,
                                        min_size=30)

        # # visualize result
        # vals = np.random.rand(256,3)
        # vals[0, ...] = [0.9, 0.9, 0.9]
        # cMap = ListedColormap(1 - vals)
        #
        # f, ax = plt.subplots(1, 3, figsize=(20, 20))
        # ax[0].imshow(thumbnail_rgb)
        # ax[1].imshow(labeled, cmap=cMap)
        # ax[2].imshow(mask, cmap=cMap)
        # plt.show()

        self.assertTupleEqual(labeled.shape, (152, 256))
        self.assertEqual(len(np.unique(labeled)), 10)
    def find_potentially_cellular_regions(self):
        """Find regions that are potentially cellular."""
        mask_out = self.labeled != self.cdt.GTcodes.loc["not_specified",
                                                        "GT_code"]

        # deconvolvve to ge hematoxylin channel (cellular areas)
        # hematoxylin channel return shows MINIMA so we invert
        self.tissue_htx, _, _ = color_deconvolution_routine(
            self.tissue_rgb,
            mask_out=mask_out,
            **self.cdt.stain_unmixing_routine_params)
        self.tissue_htx = 255 - self.tissue_htx[..., 0]

        # get cellular regions by threshold HTX stain channel
        self.maybe_cellular, _ = get_tissue_mask(
            self.tissue_htx.copy(),
            deconvolve_first=False,
            n_thresholding_steps=1,
            sigma=self.cdt.cellular_step1_sigma,
            min_size=self.cdt.cellular_step1_min_size)

        # Second, low-pass filter to dilate and smooth a bit
        self.maybe_cellular = gaussian(0 + (self.maybe_cellular > 0),
                                       sigma=self.cdt.cellular_step2_sigma,
                                       output=None,
                                       mode='nearest',
                                       preserve_range=True)

        # find connected components
        self.maybe_cellular, _ = ndimage.label(self.maybe_cellular)

        # restrict cellular regions to not-otherwise-specified
        self.maybe_cellular[mask_out] = 0

        # assign to mask
        self.labeled[self.maybe_cellular > 0] = self.cdt.GTcodes.loc[
            'maybe_cellular', 'GT_code']
Example #10
0
    def test_reinhard(self):
        """Test reinhard."""
        # get RGB image at a small magnification
        slide_info = gc.get('item/%s/tiles' % SAMPLE_SLIDE_ID)
        getStr = "/item/%s/tiles/region?left=%d&right=%d&top=%d&bottom=%d" % (
            SAMPLE_SLIDE_ID, 0, slide_info['sizeX'], 0, slide_info['sizeY']
            ) + "&magnification=%.2f" % MAG
        tissue_rgb = get_image_from_htk_response(
            gc.get(getStr, jsonResp=False))

        # # SANITY CHECK! normalize to LAB mean and std from SAME slide
        # mean_lab, std_lab = lab_mean_std(tissue_rgb)
        # tissue_rgb_normalized = reinhard(
        #     tissue_rgb, target_mu=mean_lab, target_sigma=std_lab)
        #
        # # we expect the images to be (almost) exactly the same
        # assert np.mean(tissue_rgb - tissue_rgb_normalized) < 1

        # Normalize to pre-set color standard
        tissue_rgb_normalized = reinhard(
            tissue_rgb, target_mu=cnorm['mu'], target_sigma=cnorm['sigma'])

        # check that it matches
        mean_lab, std_lab = lab_mean_std(tissue_rgb_normalized)
        self.assertTrue(all(
            np.abs(mean_lab - cnorm['mu']) < [0.1, 0.1, 0.1]))
        self.assertTrue(all(
            np.abs(std_lab - cnorm['sigma']) < [0.1, 0.1, 0.1]))

        # get tissue mask
        thumbnail_rgb = get_slide_thumbnail(gc, SAMPLE_SLIDE_ID)
        labeled, mask = get_tissue_mask(
            thumbnail_rgb, deconvolve_first=True,
            n_thresholding_steps=1, sigma=1.5, min_size=30)

        # # visualize result
        # vals = np.random.rand(256, 3)
        # vals[0, ...] = [0.9, 0.9, 0.9]
        # cMap = ListedColormap(1 - vals)
        #
        # f, ax = plt.subplots(1, 3, figsize=(20, 20))
        # ax[0].imshow(thumbnail_rgb)
        # ax[1].imshow(labeled, cmap=cMap)
        # ax[2].imshow(mask, cmap=cMap)
        # plt.show()

        # Do MASKED normalization to preset standard
        mask_out = resize(
            labeled == 0, output_shape=tissue_rgb.shape[:2],
            order=0, preserve_range=True) == 1
        tissue_rgb_normalized = reinhard(
            tissue_rgb, target_mu=cnorm['mu'], target_sigma=cnorm['sigma'],
            mask_out=mask_out)

        # check that it matches
        mean_lab, std_lab = lab_mean_std(
            tissue_rgb_normalized, mask_out=mask_out)
        self.assertTrue(all(
            np.abs(mean_lab - cnorm['mu']) < [0.1, 0.1, 0.1]))
        self.assertTrue(all(
            np.abs(std_lab - cnorm['sigma']) < [0.1, 0.1, 0.1]))
Example #11
0
# %%===========================================================================

print("Getting images to be normalized ...")

# get RGB image at a small magnification
slide_info = gc.get('item/%s/tiles' % SAMPLE_SLIDE_ID)
getStr = "/item/%s/tiles/region?left=%d&right=%d&top=%d&bottom=%d" % (
    SAMPLE_SLIDE_ID, 0, slide_info['sizeX'], 0,
    slide_info['sizeY']) + "&magnification=%.2f" % MAG
tissue_rgb = get_image_from_htk_response(gc.get(getStr, jsonResp=False))

# get mask of things to ignore
thumbnail_rgb = get_slide_thumbnail(gc, SAMPLE_SLIDE_ID)
mask_out, _ = get_tissue_mask(thumbnail_rgb,
                              deconvolve_first=True,
                              n_thresholding_steps=1,
                              sigma=1.5,
                              min_size=30)
mask_out = resize(mask_out == 0,
                  output_shape=tissue_rgb.shape[:2],
                  order=0,
                  preserve_range=True) == 1

# since this is a unit test, just work on a small image
tissue_rgb = tissue_rgb[1000:1500, 2500:3000, :]
mask_out = mask_out[1000:1500, 2500:3000]

# for reproducibility
np.random.seed(0)

# %%===========================================================================
Example #12
0
def dsa_predict(model, gc, item_id, group_name='Positive', ann_doc_name='Default', preprocess_input=None,
                tile_size=(224, 224), save_mag=10, mask_mag=1.25, tissue_threshold=0.3, batch_size=8,
                pred_threshold=0.5, color='rgb(255,153,0)'):
    """Predict on DSA image item, using a grid tiling approach given a binary trained model.
    Parameters
    ----------
    model : tensorflow.keras.models.Model
        a trained keras model for binary classification
    gc : girder_client.GirderClient
        authenticated client, used to get the images
    item_id : str
        image item id
    group_name : str (optional)
        name of the positive class, will be used as the group name in annotation elements
    ann_doc_name : str (optional)
        prepend name of the annotation documents
    preprocess_input : function (optional)
        a function that is applied to the images to process them, works on a tensor-style image
    tile_size : tuple (optional)
        size to predict images at
    save_mag : float (optional)
        magnification to extract tiles at
    mask_mag : float (optional)
        tissue mask is used to decide which tiles to predict on, this is the magnification of the tissue mask
    tissue_threshold : float (optional)
        fraction of tile that must contain tissue to be predicted on
    batch_size : int (optional)
        predictions are done on images in batches
    pred_threshold : float (optinal)
        model predicts a probability from 0 to 1, predictions above pred_threshold are considered the positive class
        that will be pushed as annotations
    color : str (optional)
        rgb(###,###,###) color of element box in annotation element
    Return
    ------
    annotation_data : dict
        annotation data that was pushed as annotation
    """
    # info about the source image
    im_info = gc.get('item/{}/tiles'.format(item_id))
    fr_mag = im_info['magnification']
    fr_width = im_info['sizeX']
    fr_height = im_info['sizeY']

    if save_mag is None:
        # save magnification will be native magnification
        save_mag = fr_mag

    fr_to_lr_factor = save_mag / fr_mag
    # tile size is determined by the save res
    fr_tile_size = int(tile_size[0] / fr_to_lr_factor), int(tile_size[1] / fr_to_lr_factor)  # (width, height)

    # get tissue mask
    lr_im = get_region_im(gc, item_id, {'magnification': mask_mag})[:, :, :3]
    tissue_mask = get_tissue_mask(lr_im)[0]

    # we will loop through image in batches, get the coordinates for batches
    coords = []
    for x in range(0, fr_width, fr_tile_size[0]):
        for y in range(0, fr_height, fr_tile_size[1]):
            # check that the tile won't go over the edge of image, if so skip
            if x + fr_tile_size[0] > fr_width or y + fr_tile_size[1] > fr_height:
                continue

            # check tile for tissue, using the binary mask for tissue
            tissue_tile = tissue_mask[
                          int(y * mask_mag / fr_mag):int((y + fr_tile_size[1]) * mask_mag / fr_mag),
                          int(x * mask_mag / fr_mag):int((x + fr_tile_size[0]) * mask_mag / fr_mag)
                          ]

            # skip if tile does not contain enough tissue
            if np.count_nonzero(tissue_tile) / tissue_tile.size < tissue_threshold:
                continue
            coords.append((x, y))

    # break the coords in batch size chunks
    coord_batches = [coords[i:i + batch_size] for i in range(0, len(coords), batch_size)]
    annotation_data = {col_name: [] for col_name in COL_NAMES}

    print('predicting in batches')
    print('*********************')
    for batch_num, coord_batch in enumerate(coord_batches):
        print('{} of {}'.format(batch_num + 1, len(coord_batches)))
        # get all the images in this batch
        batch_ims = []
        for coord in coord_batch:
            region = {'left': coord[0], 'top': coord[1], 'width': fr_tile_size[0], 'height': fr_tile_size[1],
                      'magnification': save_mag}
            batch_ims.append(get_region_im(gc, item_id, region)[:, :, :3])

        # convert to tensor shape
        batch_ims = np.array(batch_ims)

        # process the image before prediction on it
        batch_ims = preprocess_input(batch_ims) / 255.

        # predict on the batch
        predictions = model.predict(batch_ims)

        # identify predictions that are glomeruli
        for i, pred in enumerate(predictions):
            if pred[0] > pred_threshold:
                # add the data to annotation data
                annotation_data['group'].append(group_name)
                annotation_data['color'].append(color)
                annotation_data['has_holes'].append(0.0)
                annotation_data['touches_edge-top'].append(0.0)
                annotation_data['touches_edge-left'].append(0.0)
                annotation_data['touches_edge-bottom'].append(0.0)
                annotation_data['touches_edge-right'].append(0.0)
                xmin, ymin = coord_batch[i][0], coord_batch[i][1]
                annotation_data['xmin'].append(xmin)
                annotation_data['ymin'].append(ymin)
                xmax = xmin + fr_tile_size[0]
                ymax = ymin + fr_tile_size[1]
                annotation_data['xmax'].append(xmax)
                annotation_data['ymax'].append(ymax)
                annotation_data['coords_x'].append('{},{},{},{}'.format(xmin, xmax, xmax, xmin))
                annotation_data['coords_y'].append('{},{},{},{}'.format(ymin, ymin, ymax, ymax))

    # only push if annotation data is not empty
    n = len(annotation_data['group'])
    if n:
        print('number of tiles to push: {}'.format(n))
        contours_df = DataFrame(annotation_data)
        annotation_docs = get_annotation_documents_from_contours(
            contours_df.copy(), separate_docs_by_group=False, annots_per_doc=100, docnamePrefix=ann_doc_name,
            annprops=ANNPROPS, verbose=False, monitorPrefix=''
        )

        # get current annotations documents from item
        existing_annotations = gc.get('/annotation/item/' + item_id)

        # delete annotation documents starting with the same prefix as about to be pushed
        for ann in existing_annotations:
            if 'name' in ann['annotation']:
                doc_name = ann['annotation']['name']
                if doc_name.startswith(ann_doc_name):
                    gc.delete('/annotation/%s' % ann['_id'])

        # post the annotation documents you created
        for annotation_doc in annotation_docs:
            _ = gc.post(
                "/annotation?itemId=" + item_id, json=annotation_doc)
    else:
        print('no positive tiles to push..')
    return annotation_data
Example #13
0
def grid_tiling(gc,
                item_id,
                group_names,
                save_dir,
                save_mag=None,
                mask_mag=1.25,
                tile_size=(224, 224),
                tissue_threshold=0.3,
                annotation_threshold=0.15,
                random_seed=64,
                is_test=False,
                oversample_background=2.0,
                reinhard_stats=None):
    """Split a DSA image item (WSI) into smaller images and save locally grouped by annotations. This approach grids the
    image into equal sized small images, or tiles (i.e. a grid is placed over the WSI starting at the top left corner).
    At the bottom and right edge of the WSI the tiles are ignored if not of correct size (the case where the WSI
    dimensions are not a multiple factor of the tile size). A list of annotation group names are needed to group the
    tiles into classes of images saved in their own directories. Tiles with no tissue detected are ignored and tiles not
    containing annotations (but have tissue) are by default saved into background class. A background annotation group
    will cause issues so avoid having this annotation group name.

    Tiles can be saved at a lower magnification than source image if needed (param: save_mag). Note that tiles size
    specified should be the tile size at the save magnification not the source magnification. Image saved will be of the
    tile size specified in parameters, regardless of the save_mag used.

    Parameters
    ----------
    gc : girder_client.GirderClient
        authenticated client
    item_id : str
        DSA image item id
    group_names : list
        list of annotation group names
    save_dir : str
        directory to create group directories with images - save_dir / group_name_1, save_dir / background, etc.
    save_mag : float (optional)
        magnification to use when saving the images, if None then source magnification will be used.
    mask_mag : float (optional)
        magnification to create binary mask of tissue and annotations. Note that if your annotations are very small
        it will benefit to use a larger value than default here, but binary masks will fail to create at very high
        magnifications.
    tile_size : tuple (optional)
        size (width, height) to save tiles at, note that this is the size it will be saved at regardless of the
        magnification used to save the images at (i.e. if save_mag is 4 times less than the source magnification than
        the actual tile_size will represent 4 times the pixels at full resolution).
    tissue_threshold : float (optional)
        from 0 to 1, percentage of tile that must contain tissue to be included
    annotation_threshold : float (optional)
        from 0 to 1, percentage of tile that must contain annotation (per group) to be labeled as annotation. Note
        that a single tile may be saved twice, representing multiple classes.
    random_seed : int (optional)
        random seed to use when shuffling the background regions
    is_test : bool (optional)
        if True then all the background regions will be saved, otherwise oversample_background will be used to determine
        how many background regions to save
    oversample_background : float (optional)
        factor to oversample background class images, compared to the number of images of the class of annoation images
        with the most images saved
    reinhard_stats : dict (optional)
        if not None then the images saved will be color augmented by color normalizing the tiles using the Reinhard
        color norm method. This dict should contain src_mu and scr_sigma keys with the stats for this image and
        target_mu and targe_sigma keys which are lists contain 1 or more target images to normalize to.

    """
    im_info = gc.get('item/{}/tiles'.format(item_id))
    if save_mag is None:
        save_mag = im_info['magnification']

    if reinhard_stats is not None:
        # get color stats for image
        mu, sigma = reinhard_color_stats(gc, item_id)

    # ----- prep work ----- #
    filename = splitext(gc.getItem(item_id)['name'])[0]

    # create dirs for each image class to save
    group_dirs = [join(save_dir, group_name) for group_name in group_names]
    for group_dir in group_dirs:
        makedirs(group_dir, exist_ok=True)
    background_dir = join(save_dir, 'background')
    makedirs(background_dir, exist_ok=True)

    # get image annotations
    annotations = gc.get('/annotation/item/' + item_id)

    # create a dataframe to use with annotation to mask handler functions (gt codes)
    gt_data = [[group_name, 1, i + 1, 0, 0, 'rgb(0, 0, {})'.format(i), '']
               for i, group_name in enumerate(group_names)]
    gt_codes = pd.DataFrame(columns=[
        'group', 'overlay_order', 'GT_code', 'is_roi', 'is_background_class',
        'color', 'comments'
    ],
                            data=gt_data,
                            index=range(len(group_names)))
    gt_codes.index = gt_codes.loc[:, 'group']

    # get binary masks - tissue mask and annotation(s) mask
    mask_mag_factor, _ = get_scale_factor_and_appendStr(gc=gc,
                                                        slide_id=item_id,
                                                        MAG=mask_mag)
    # - scaling the annotations to lower magnification
    mask_annotations = scale_slide_annotations(deepcopy(annotations),
                                               sf=mask_mag_factor)

    # - binary masks are for the whole image at low resolution, function returns also the RGB image which we use for
    # - getting the tissue mask
    mask_element_info = get_bboxes_from_slide_annotations(mask_annotations)
    get_kwargs = deepcopy(
        GET_KWARGS)  # avoid referencing on the global variable
    get_kwargs['gc'] = gc
    get_kwargs['slide_id'] = item_id
    get_kwargs['GTCodes_dict'] = gt_codes.T.to_dict()
    get_kwargs['bounds'] = None
    get_kwargs['MAG'] = mask_mag
    ann_mask_and_image = get_image_and_mask_from_slide(
        mode='wsi',
        slide_annotations=mask_annotations,
        element_infos=mask_element_info,
        **get_kwargs)
    tissue_mask = get_tissue_mask(ann_mask_and_image['rgb'])[0]

    # convert the annotations to lower magnification
    fr_to_lr_factor, _ = get_scale_factor_and_appendStr(gc=gc,
                                                        slide_id=item_id,
                                                        MAG=save_mag)
    annotations = scale_slide_annotations(annotations, sf=fr_to_lr_factor)
    lr_element_info = get_bboxes_from_slide_annotations(annotations)

    # get full resolution information for image
    fr_mag = im_info['magnification']
    fr_width = im_info['sizeX']
    fr_height = im_info['sizeY']
    fr_tile_size = int(tile_size[0] / fr_to_lr_factor), int(
        tile_size[1] / fr_to_lr_factor)  # (width, height)

    # change the get_kwargs to save magnification
    get_kwargs['MAG'] = save_mag

    # ----- loop through image at full res ----- #
    group_annotation_counts = [0] * len(group_names)
    background_regions = []
    for x in range(0, fr_width, fr_tile_size[0]):
        for y in range(0, fr_height, fr_tile_size[1]):
            # check that the tile won't go over the edge of image, if so skip
            if x + fr_tile_size[0] > fr_width or y + fr_tile_size[
                    1] > fr_height:
                continue

            # check tile for tissue, using the binary mask for tissue
            tissue_tile = tissue_mask[int(y * mask_mag /
                                          fr_mag):int((y + fr_tile_size[1]) *
                                                      mask_mag / fr_mag),
                                      int(x * mask_mag /
                                          fr_mag):int((x + fr_tile_size[0]) *
                                                      mask_mag / fr_mag)]

            # skip if tile does not contain enough tissue
            if np.count_nonzero(
                    tissue_tile) / tissue_tile.size < tissue_threshold:
                continue

            # check tile for annotations, using the binary mask for annotations
            annotation_tile = ann_mask_and_image['ROI'][
                int(y * mask_mag / fr_mag):int((y + fr_tile_size[1]) *
                                               mask_mag / fr_mag),
                int(x * mask_mag / fr_mag):int((x + fr_tile_size[0]) *
                                               mask_mag / fr_mag)]

            # tile is background if no annotation is present (of any group)
            background_flag = True
            # - check for each annotation group
            for i, group_name in enumerate(group_names):
                group_annotation_tile = annotation_tile == i + 1

                # tile is ignored if not enough contain annotation
                if np.count_nonzero(
                        group_annotation_tile
                ) / group_annotation_tile.size < annotation_threshold:
                    continue

                background_flag = False
                group_annotation_counts[i] += 1

                # get annotation image and save it
                get_kwargs['bounds'] = {
                    'XMIN': x,
                    'XMAX': x + fr_tile_size[0],
                    'YMIN': y,
                    'YMAX': y + fr_tile_size[1]
                }

                annotation_im = get_image_and_mask_from_slide(
                    mode='manual_bounds',
                    slide_annotations=annotations,
                    element_infos=lr_element_info,
                    **get_kwargs)['rgb']

                # save the image to correct directory
                imwrite(
                    join(group_dirs[i],
                         '{}_x_{}_y_{}.png'.format(filename, x, y)),
                    annotation_im)

                if reinhard_stats is not None:
                    # add color augmentation with Reinhard method
                    for j, (_, v) in enumerate(reinhard_stats.items()):
                        im_norm = reinhard(annotation_im.copy(),
                                           v['mu'],
                                           v['sigma'],
                                           src_mu=mu,
                                           src_sigma=sigma)
                        imwrite(
                            join(
                                group_dirs[i],
                                '{}_x_{}_y_{}_norm_{}.png'.format(
                                    filename, x, y, j)), im_norm)

            if background_flag:
                # save coordinates for non-glomeruli images candidates
                background_regions.append({
                    'magnification': save_mag,
                    'left': x,
                    'top': y,
                    'width': fr_tile_size[0],
                    'height': fr_tile_size[1]
                })

    # randomly select background class coordinates
    # - oversample the background class by a factor of the most represented annoation class
    Random(random_seed).shuffle(background_regions)
    if not is_test:
        background_regions = background_regions[:int(
            oversample_background * max(group_annotation_counts))]

    for region in background_regions:
        tile_im = get_region_im(gc, item_id, region)[:, :, :3]

        # save background image
        imwrite(
            join(
                background_dir,
                '{}_x_{}_y_{}.png'.format(filename, region['left'],
                                          region['top'])), tile_im)

        if reinhard_stats is not None:
            # add color augmentation with Reinhard method
            for j, (_, v) in enumerate(reinhard_stats.items()):
                im_norm = reinhard(tile_im.copy(),
                                   v['mu'],
                                   v['sigma'],
                                   src_mu=mu,
                                   src_sigma=sigma)
                imwrite(
                    join(
                        background_dir, '{}_x_{}_y_{}_norm_{}.png'.format(
                            filename, region['left'], region['top'], j)),
                    im_norm)