def _get_contours_df(MASK,
                     GTCodes_df,
                     groups_to_get=None,
                     MIN_SIZE=30,
                     MAX_SIZE=None,
                     verbose=False,
                     monitorPrefix=""):
    """Parse ground truth mask and gets countours (Internal)."""
    cpr = Print_and_log(verbose=verbose)
    _print = cpr._print

    # pad with zeros to be able to detect edge contours later
    pad_margin = 50
    MASK = np.pad(MASK, pad_margin, 'constant')

    # Go through unique groups one by one -- each group (i.e. GTCode)
    # is extracted separately by binarizing the multi-class mask
    if groups_to_get is None:
        groups_to_get = list(GTCodes_df.index)
    contours_df = DataFrame()

    for nestgroup in groups_to_get:

        bin_mask = 0 + (MASK == GTCodes_df.loc[nestgroup, 'GT_code'])

        if bin_mask.sum() < MIN_SIZE * MIN_SIZE:
            _print("%s: %s: NO OBJECTS!!" % (monitorPrefix, nestgroup))
            continue

        _print("%s: %s: getting contours" % (monitorPrefix, nestgroup))
        conts = get_contours_from_bin_mask(bin_mask=bin_mask)
        n_tumor_nests = conts['outer_contours'].shape[0]

        # add nest contours
        _print("%s: %s: adding contours" % (monitorPrefix, nestgroup))
        for cidx in range(n_tumor_nests):
            try:
                nestcountStr = "%s: nest %s of %s" % (monitorPrefix, cidx,
                                                      n_tumor_nests)
                if cidx % 25 == 100:
                    _print(nestcountStr)

                contours_df = _add_contour_to_df(
                    contours_df,
                    mask_shape=bin_mask.shape,
                    conts=conts,
                    cidx=cidx,
                    nest_info=dict(GTCodes_df.loc[nestgroup, :]),
                    pad_margin=pad_margin,
                    MIN_SIZE=MIN_SIZE,
                    MAX_SIZE=MAX_SIZE,
                    monitorPrefix=nestcountStr)

            except Exception as e:
                _print(e)
                continue

    return contours_df
def get_single_annotation_document_from_contours(contours_df_slice,
                                                 docname='default',
                                                 F=1.0,
                                                 X_OFFSET=0,
                                                 Y_OFFSET=0,
                                                 opacity=0.3,
                                                 lineWidth=4.0,
                                                 verbose=True,
                                                 monitorPrefix=""):
    """Given dataframe of contours, get annotation document.

    This uses the large_image annotation schema to create an annotation
    document that maybe posted to DSA for viewing using something like:
    resp = gc.post("/annotation?itemId=" + slide_id, json=annotation_doc)
    The annotation schema can be found at:
    github.com/girder/large_image/blob/master/docs/annotations.md .

    Parameters
    -----------
    contours_df_slice : pandas DataFrame
        The following columns are of relevance and must be contained.

        group : str
            annotation group (ground truth label).
        color : str
            annotation color if it were to be posted to DSA.
        coords_x : str
            vertix x coordinates comma-separated values
        coords_y
            vertix y coordinated comma-separated values
    docname : str
        annotation document name
    F : float
        how much smaller is the mask where the contours come from is relative
        to the slide scan magnification. For example, if the mask is at 10x
        whereas the slide scan magnification is 20x, then F would be 2.0.
    X_OFFSET : int
        x offset to add to contours at BASE (SCAN) magnification
    Y_OFFSET : int
        y offset to add to contours at BASE (SCAN) magnification
    opacity : float
        opacity of annotation elements (in the range [0, 1])
    lineWidth : float
        width of boarders of annotation elements
    verbose : bool
        Print progress to screen?
    monitorPrefix : str
        text to prepend to printed statements

    Returns
    --------
    dict
        DSA-style annotation document ready to be post for viewing.

    """
    cpr = Print_and_log(verbose=verbose)
    _print = cpr._print

    def _get_fillColor(lineColor):
        fillColor = lineColor.replace("rgb", "rgba")
        return fillColor[:fillColor.rfind(")")] + ",%.1f)" % opacity

    # Init annotation document in DSA style
    annotation_doc = {'name': docname, 'description': '', 'elements': []}

    # go through nests
    nno = 0
    nnests = contours_df_slice.shape[0]
    for _, nest in contours_df_slice.iterrows():

        nno += 1
        nestStr = "%s: contour %d of %s" % (monitorPrefix, nno, nnests)
        _print(nestStr)

        # Parse coordinates
        try:
            x_coords = F * np.int32(
                [int(j) for j in nest['coords_x'].split(',')]) + X_OFFSET
            y_coords = F * np.int32(
                [int(j) for j in nest['coords_y'].split(',')]) + Y_OFFSET
            zeros = np.zeros(x_coords.shape, dtype=np.int32)
            coords = np.concatenate(
                (x_coords[:, None], y_coords[:, None], zeros[:, None]), axis=1)
            coords = coords.tolist()
            coords.append(coords[0])
        except Exception as e:
            _print("%s: ERROR (below) - moving on!!!" % nestStr)
            _print(e)
            continue

        # assign to annotation style. See:
        # github.com/girder/large_image/blob/master/docs/annotations.md
        annotation_style = {
            "group": nest['group'],
            "type": "polyline",
            "lineColor": nest['color'],
            "lineWidth": lineWidth,
            "closed": True,
            "points": coords,
            "label": {
                'value': nest['label']
            },
        }
        if opacity > 0:
            annotation_style["fillColor"] = _get_fillColor(nest['color'])

        # append to document
        annotation_doc['elements'].append(annotation_style)

    return annotation_doc
def get_contours_from_mask(MASK,
                           GTCodes_df,
                           groups_to_get=None,
                           MIN_SIZE=30,
                           MAX_SIZE=None,
                           get_roi_contour=True,
                           roi_group='roi',
                           discard_nonenclosed_background=False,
                           background_group='mostly_stroma',
                           verbose=False,
                           monitorPrefix=""):
    """Parse ground truth mask and gets countours for annotations.

    Parameters
    -----------
    MASK : nd array
        ground truth mask (m,n) where pixel values encode group membership.
    GTCodes_df : pandas Dataframe
        the ground truth codes and information dataframe.
        This is a dataframe that is indexed by the annotation group name and
        has the following columns.

        group: str
            group name of annotation, eg. mostly_tumor.
        GT_code: int
            desired ground truth code (in the mask). Pixels of this value
            belong to corresponding group (class).
        color: str
            rgb format. eg. rgb(255,0,0).
    groups_to_get : None
        if None (default) then all groups (ground truth labels) will be
        extracted. Otherwise pass a list fo strings like ['mostly_tumor',].
    MIN_SIZE : int
        minimum bounding box size of contour
    MAX_SIZE : None
        if not None, int. Maximum bounding box size of contour. Sometimes
        very large contours cause segmentation faults that originate from
        opencv and are not caught by python, causing the python process
        to unexpectedly hault. If you would like to set a maximum size to
        defend against this, a suggested maximum would be 15000.
    get_roi_contour : bool
        whether to get contour for boundary of region of interest (ROI). This
        is most relevant when dealing with multiple ROIs per slide and with
        rotated rectangular or polygonal ROIs.
    roi_group : str
        name of roi group in the GT_Codes dataframe (eg roi)
    discard_nonenclosed_background : bool
        If a background group contour is NOT fully enclosed, discard it.
        This is a purely aesthetic method, makes sure that the background group
        contours (eg stroma) are discarded by default to avoid cluttering the
        field when posted to DSA for viewing online. The only exception is
        if they are enclosed within something else (eg tumor), in which case
        they are kept since they represent holes. This is related to
        https://github.com/DigitalSlideArchive/HistomicsTK/issues/675
        WARNING - This is a bit slower since the contours will have to be
        converted to shapely polygons. It is not noticeable for hundreds of
        contours, but you will notice the speed difference if you are parsing
        thousands of contours. Default, for this reason, is False.
    background_group : str
        name of background group in the GT_codes dataframe (eg mostly_stroma)
    verbose : bool
        Print progress to screen?
    monitorPrefix : str
        text to prepend to printed statements

    Returns
    --------
    pandas DataFrame
        contours extracted from input mask. The following columns are output.

        group : str
            annotation group (ground truth label).
        color : str
            annotation color if it were to be posted to DSA.
        is_roi : bool
            whether this annotation is a region of interest boundary
        ymin : int
            minimun y coordinate
        ymax : int
            maximum y coordinate
        xmin : int
            minimum x coordinate
        xmax : int
            maximum x coordinate
        has_holes : bool
            whether this contour has holes
        touches_edge-top : bool
            whether this contour touches top mask edge
        touches_edge-bottom : bool
            whether this contour touches bottom mask edge
        touches_edge-left : bool
            whether this contour touches left mask edge
        touches_edge-right : bool
            whether this contour touches right mask edge
        coords_x : str
            vertix x coordinates comma-separated values
        coords_y
            vertix y coordinated comma-separated values

    """
    if MASK.sum() < 3:
        raise Exception("Mask is empty!!")
    cpr = Print_and_log(verbose=verbose)
    _print = cpr._print
    if groups_to_get is not None:
        _print("""WARNING!! Only specify groups_to_get is you do NOT mind
               having NO holes in polygons with holes that are occupied
               by a non-specified group. For example, let's say you
               specified that you only want to extract contours for
               tumor and stroma. If there is a large tumor polygon with two
               holes for stroma and blood vessel, the stroma hole will be
               accounted for, but not the blood vessel hole when you
               post these contours to DSA for viewing then pull them
               to be parse back into mask form. It's a subtle issue related
               to
               https://github.com/DigitalSlideArchive/HistomicsTK/issues/675
               and will eventually be accounted for once HistomicsTK
               has an official format to encode polygons with holes.""")

    cont_kwargs = {
        'GTCodes_df': GTCodes_df,
        'MIN_SIZE': MIN_SIZE,
        'MAX_SIZE': MAX_SIZE,
        'verbose': verbose,
    }

    # get contours df for non-roi contours
    contours_df = _get_contours_df(MASK=MASK,
                                   groups_to_get=groups_to_get,
                                   monitorPrefix="%s: %s" %
                                   (monitorPrefix, "non-roi"),
                                   **cont_kwargs)

    # discard non-enclosed background (eg stroma) if needed
    if discard_nonenclosed_background:
        contours_df = _discard_nonenclosed_background_group(
            contours_df,
            background_group=background_group,
            verbose=verbose,
            monitorPrefix="%s: %s" % (monitorPrefix, "discarding backgrnd"))

    # get contours df for roi boundary and concat
    if get_roi_contour:
        MASK_BIN = np.zeros(MASK.shape, dtype=np.uint8)
        MASK_BIN[MASK > 0] = GTCodes_df.loc[roi_group, 'GT_code']
        contours_df_roi = _get_contours_df(MASK=MASK_BIN,
                                           groups_to_get=[
                                               roi_group,
                                           ],
                                           monitorPrefix="%s: %s" %
                                           (monitorPrefix, roi_group),
                                           **cont_kwargs)
        contours_df = concat((contours_df_roi, contours_df),
                             axis=0,
                             ignore_index=True)

    return contours_df
def _discard_nonenclosed_background_group(contours_df,
                                          background_group='mostly_stroma',
                                          verbose=False,
                                          monitorPrefix=""):
    """If a background group contour is NOT fully enclosed, discard it.

    This is a purely aesthetic method, makes sure that the background group
    contours (eg stroma) are discarded by default to avoid cluttering the
    field when posted to DSA for viewing online. The only exception is
    if they are enclosed within something else (eg tumor), in which case they
    are kept since they represent holes. This is related to
    https://github.com/DigitalSlideArchive/HistomicsTK/issues/675
    (Internal).

    """
    cpr = Print_and_log(verbose=verbose)
    _print = cpr._print

    # isolate background contours and non-background contours with holes
    background = contours_df.loc[contours_df.loc[:, "group"] ==
                                 background_group, :]
    contours_with_holes = contours_df.loc[
        contours_df.loc[:, "group"] != background_group, :]
    contours_with_holes = contours_with_holes.loc[
        contours_with_holes.loc[:, "has_holes"] == 1, :]

    def _append_polygon_if_valid(contDict, cid, polygon_list):
        try:
            polygon = Polygon(_parse_annot_coords(contDict))
            if polygon.is_valid:
                polygon_list.append(polygon)
        except Exception as e:
            _print("%s: contour %d: Shapely Error (below) -- IGNORED!" %
                   (monitorPrefix, cid))
            _print(e)
        return polygon_list

    # to avoid redoing things, keep all non-background with holes in a list
    contour_polygons = []
    for cid, cont in contours_with_holes.iterrows():
        contour_polygons = _append_polygon_if_valid(
            dict(cont), cid=cid, polygon_list=contour_polygons)

    # iterate through stromal polygons and find if enclosed within something
    discard_cids = []
    for cid, cont in background.iterrows():
        bck_list = _append_polygon_if_valid(dict(cont),
                                            cid=cid,
                                            polygon_list=[])
        # only keep if enclosed with another contour
        discard = True
        if len(bck_list) > 0:
            for contour_polygon in contour_polygons:
                if contour_polygon.contains(bck_list[0]):
                    discard = False
        if discard:
            discard_cids.append(cid)

    # now drop unnecessary contours
    _print("%s: discarded %d contours" % (monitorPrefix, len(discard_cids)))
    contours_df.drop(discard_cids, axis=0, inplace=True)

    return contours_df