コード例 #1
0
    def test_get_contours_from_mask_with_zeroes(self):
        """Test get_contours_from_mask()."""
        self._setup()
        groups_to_get = None
        gtcodes = self.GTCodes_df.append(
            {
                'group': 'zeroes',
                'overlay_order': 4,
                'GT_code': 0,
                'is_roi': 0,
                'is_background_class': 0,
                'color': 'rgb(0,128,0)',
                'comments': 'zeroes'
            },
            ignore_index=True)
        gtcodes.index = gtcodes.loc[:, 'group']
        contours_df = get_contours_from_mask(
            MASK=self.MASK,
            GTCodes_df=gtcodes,
            groups_to_get=groups_to_get,
            get_roi_contour=True,
            roi_group='roi',
            discard_nonenclosed_background=True,
            background_group='mostly_stroma',
            MIN_SIZE=30,
            MAX_SIZE=None)

        # make sure it is what we expect
        assert set(contours_df.columns) == set(self.CONTOURS_DF.columns)
        assert all(contours_df.iloc[:10, :] == self.CONTOURS_DF.iloc[:10, :])
        assert len(contours_df) == 49
コード例 #2
0
    def set_contours_from_all_masks(self, monitorPrefix=""):
        """Get contours_df from all masks.

        This is a wrapper around get_contours_from_mask(), with the added
        functionality of separating out contorus at roi edge from those that
        are not.

        Sets:
        - self.ordinary_contours: dict: indexed by maskname, each entry
        is a contours dataframe
        - self.edge_contours: dict: indexed by maskname, each entry is
        a contours dataframe
        - self.merged_contours: pandas DataFrame: single dataframe to
        save all merged contours

        """
        ordinary_contours = dict()
        edge_contours = dict()

        to_remove = []

        for midx, maskpath in enumerate(self.maskpaths):

            # read mask
            MASK = imread(maskpath)

            # mask is empty!
            if MASK.sum() < 2:
                to_remove.append(maskpath)
                continue

            # extract contours
            contours_df = get_contours_from_mask(
                MASK=MASK,
                monitorPrefix="%s: mask %d of %d" % (
                    monitorPrefix, midx, len(self.maskpaths)),
                **self.contkwargs)

            # no contours!
            if contours_df.shape[0] < 1:
                to_remove.append(maskpath)
                continue

            # separate edge from non-edge contours
            edgeids = []
            for edge in ['top', 'left', 'bottom', 'right']:
                edgeids.extend(list(contours_df.loc[contours_df.loc[
                    :, 'touches_edge-%s' % edge] == 1, :].index))
            edgeids = list(set(edgeids))
            roiname = os.path.split(maskpath)[1]
            edge_contours[roiname] = contours_df.loc[edgeids, :].copy()
            ordinary_contours[roiname] = contours_df.drop(edgeids, axis=0)

        self.maskpaths = [j for j in self.maskpaths if j not in to_remove]

        self.ordinary_contours = ordinary_contours
        self.edge_contours = edge_contours
        # init dataframe to save merged contours
        colnames = edge_contours[list(edge_contours.keys())[0]].columns
        self.merged_contours = DataFrame(columns=colnames)
コード例 #3
0
    def visualize_results(self):
        """Visualize results in DSA."""
        # get contours
        contours_df = get_contours_from_mask(
            MASK=self.labeled,
            GTCodes_df=self.cdt.GTcodes.copy(),
            get_roi_contour=True,
            roi_group='roi',
            background_group='not_specified',
            discard_nonenclosed_background=True,
            MIN_SIZE=15,
            MAX_SIZE=None,
            verbose=self.cdt.verbose == 3,
            monitorPrefix=self.monitorPrefix + ": -- contours")

        # get annotation docs
        annprops = {
            'F': self.cdt.slide_info['magnification'] / self.cdt.MAG,
            'X_OFFSET': self.xmin,
            'Y_OFFSET': self.ymin,
            'opacity': self.cdt.opacity,
            'lineWidth': self.cdt.lineWidth,
        }
        annotation_docs = get_annotation_documents_from_contours(
            contours_df.copy(),
            separate_docs_by_group=True,
            docnamePrefix='cdt',
            annprops=annprops,
            verbose=self.cdt.verbose == 3,
            monitorPrefix=self.monitorPrefix + ": -- annotation docs")

        # post annotations to slide
        for doc in annotation_docs:
            _ = self.cdt.gc.post("/annotation?itemId=" + self.cdt.slide_id,
                                 json=doc)
コード例 #4
0
def get_tissue_boundary_annotation_documents(
        gc, slide_id, labeled,
        color='rgb(0,0,0)', group='tissue', annprops=None):
    """Get annotation documents of tissue boundaries to visualize on DSA.

    Parameters
    -----------
    gc : object
        girder client to use
    slide_id : str
        girder ID of slide
    labeled : np array
        mask of tissue regions using slide thumbnail. This could either be
        a binary mask or a mask where each unique value corresponds to one
        tissue region. It will be binalized anyways. This can be obtained
        using get_tissue_mask().
    color : str
        color to assign to boundaries. format like rgb(0,0,0)
    group : str
        label for annotations
    annpops : dict
        properties of annotation elements. Contains the following keys
        F, X_OFFSET, Y_OFFSET, opacity, lineWidth. Refer to
        get_single_annotation_document_from_contours() for details.

    Returns
    --------
    list of dicts
        each dict is an annotation document that you can post to DSA

    """
    # Get annotations properties
    if annprops is None:
        slide_info = gc.get('item/%s/tiles' % slide_id)
        annprops = {
            'F': slide_info['sizeX'] / labeled.shape[1],  # relative to base
            'X_OFFSET': 0,
            'Y_OFFSET': 0,
            'opacity': 0,
            'lineWidth': 4.0,
        }

    # Define GTCodes dataframe
    GTCodes_df = DataFrame(columns=['group', 'GT_code', 'color'])
    GTCodes_df.loc['tissue', 'group'] = group
    GTCodes_df.loc['tissue', 'GT_code'] = 1
    GTCodes_df.loc['tissue', 'color'] = color

    # get annotation docs
    contours_tissue = get_contours_from_mask(
        MASK=0 + (labeled > 0), GTCodes_df=GTCodes_df,
        get_roi_contour=False, MIN_SIZE=0, MAX_SIZE=None, verbose=False,
        monitorPrefix="tissue: getting contours")
    annotation_docs = get_annotation_documents_from_contours(
        contours_tissue.copy(), docnamePrefix='test', annprops=annprops,
        verbose=False, monitorPrefix="tissue : annotation docs")

    return annotation_docs
コード例 #5
0
def parse_sparse_mask_for_use(sparse_mask,
                              labels: list = None,
                              rgtcodes: dict = None,
                              min_bbox_side=None,
                              max_bbox_side=None):
    """Parse sparse mask for visualization and pushing to histomicsUI etc.

    Parameters
    ----------
    sparse_mask (np.array): n_objects, m, n
    labels (list): labels corresponding to the channels
    rgtcodes (dict): keys are integer ground truth codes, values are the
        histomicstk style for this label (group, lineColor, ...)

    Returns
    -------
    np.array: dense mask where first channel is label (semantic segmentation),
        while product of second and third channels is nucleus id
    dict: keys are indivisual nucleus ids, values are labels of nuclei
    DataFrame: each row is a contour. Histomicstk style.

    """
    if labels is None:
        labels = [1] * sparse_mask.shape[0]
        rgtcodes = {1: {'group': 'nucleus', 'color': 'rgb(255,255,0)'}}

    rgtcodes = DefaultAnnotationStyles.rgtcodes_dict if rgtcodes is None else \
        rgtcodes

    # "condense" masks
    dense_mask, labels_map = from_sparse_to_dense_object_mask(
        sparse_mask=sparse_mask,
        labels=labels,
        min_side=min_bbox_side,
        max_side=max_bbox_side)
    nids_mask = np.float32(dense_mask)
    nids_mask = nids_mask[..., 1] * nids_mask[..., 2]

    # extract contours from condensed mask
    contours_df = get_contours_from_mask(
        MASK=nids_mask,
        GTCodes_df=DataFrame.from_records(
            data=[{
                'group': rgtcodes[label]['group'],
                'GT_code': instanceid,
                'color': rgtcodes[label]['color']
            } for instanceid, label in labels_map.items()]),
        MIN_SIZE=1 if min_bbox_side is None else min_bbox_side,
        get_roi_contour=False,
    )

    return dense_mask, labels_map, contours_df
コード例 #6
0
    def visualize_contiguous_superpixels(self):
        """Visualize contiguous spixels, color-coded by cellularity."""
        # get cellularity cluster membership mask
        cellularity_mask = np.zeros(self.spixel_mask.shape)
        for spval, sp in self.fdata.iterrows():
            cellularity_mask[self.spixel_mask == spval] = sp['cluster']

        # Define GTCodes dataframe
        GTCodes_df = DataFrame(columns=['group', 'GT_code', 'color'])
        for spval, cp in self.cluster_props.items():
            spstr = 'cellularity-%d' % (cp['cellularity'])
            GTCodes_df.loc[spstr, 'group'] = spstr
            GTCodes_df.loc[spstr, 'GT_code'] = spval
            GTCodes_df.loc[spstr, 'color'] = cp['color']

        # get contours df
        contours_df = get_contours_from_mask(MASK=cellularity_mask,
                                             GTCodes_df=GTCodes_df,
                                             get_roi_contour=False,
                                             MIN_SIZE=0,
                                             MAX_SIZE=None,
                                             verbose=self.cd.verbose == 3,
                                             monitorPrefix=self.monitorPrefix)

        # get annotation docs
        annprops = {
            'F': (self.ymax - self.ymin) / self.tissue_rgb.shape[0],
            'X_OFFSET': self.xmin,
            'Y_OFFSET': self.ymin,
            'opacity': self.cd.opacity_contig,
            'lineWidth': self.cd.lineWidth,
        }
        annotation_docs = get_annotation_documents_from_contours(
            contours_df.copy(),
            docnamePrefix='contig',
            annprops=annprops,
            annots_per_doc=1000,
            separate_docs_by_group=True,
            verbose=self.cd.verbose == 3,
            monitorPrefix=self.monitorPrefix)
        for didx, doc in enumerate(annotation_docs):
            self.cd._print2(
                "%s: Posting doc %d of %d" %
                (self.monitorPrefix, didx + 1, len(annotation_docs)))
            _ = self.cd.gc.post("/annotation?itemId=" + self.cd.slide_id,
                                json=doc)
コード例 #7
0
    def test_get_contours_from_mask(self):
        """Test get_contours_from_mask()."""
        self._setup()
        # get contours from mask
        # groups_to_get = [
        #     'mostly_tumor', 'mostly_stroma']
        groups_to_get = None
        contours_df = get_contours_from_mask(
            MASK=self.MASK, GTCodes_df=self.GTCodes_df,
            groups_to_get=groups_to_get,
            get_roi_contour=True, roi_group='roi',
            discard_nonenclosed_background=True,
            background_group='mostly_stroma',
            MIN_SIZE=30, MAX_SIZE=None, verbose=False,
            monitorPrefix=self.MASKNAME[:12] + ": getting contours")

        # make sure it is what we expect
        assert set(contours_df.columns) == set(self.CONTOURS_DF.columns)
        assert all(contours_df.iloc[:10, :] == self.CONTOURS_DF.iloc[:10, :])
コード例 #8
0
    def test_get_contours_from_mask(self):
        """Test get_contours_from_mask()."""
        # get contours from mask
        # groups_to_get = [
        #     'mostly_tumor', 'mostly_stroma']
        groups_to_get = None
        contours_df = get_contours_from_mask(
            MASK=MASK,
            GTCodes_df=GTCodes_df,
            groups_to_get=groups_to_get,
            get_roi_contour=True,
            roi_group='roi',
            discard_nonenclosed_background=True,
            background_group='mostly_stroma',
            MIN_SIZE=30,
            MAX_SIZE=None,
            verbose=False,
            monitorPrefix=MASKNAME[:12] + ": getting contours")

        # make sure it is what we expect
        self.assertTupleEqual(contours_df.shape, CONTOURS_DF.shape)
        self.assertSetEqual(set(contours_df.columns), set(CONTOURS_DF.columns))
        self.assertTrue(all(contours_df == CONTOURS_DF))
コード例 #9
0
def get_image_and_mask_from_slide(gc,
                                  slide_id,
                                  GTCodes_dict,
                                  MPP=5.0,
                                  MAG=None,
                                  mode='min_bounding_box',
                                  bounds=None,
                                  idx_for_roi=None,
                                  slide_annotations=None,
                                  element_infos=None,
                                  get_roi_mask_kwargs=None,
                                  get_contours_kwargs=None,
                                  linewidth=0.2,
                                  get_rgb=True,
                                  get_contours=True,
                                  get_visualization=True):
    """Parse region from the slide and get its corresponding labeled mask.

    This is a wrapper around get_roi_mask() which should be referred to for
    implementation details.

    Parameters
    -----------
    gc : object
        girder client object to make requests, for example:
        gc = girder_client.GirderClient(apiUrl = APIURL)
        gc.authenticate(interactive=True)

    slide_id : str
        girder id for item (slide)

    GTCodes_dict : dict
        the ground truth codes and information dict.
        This is a dict that is indexed by the annotation group name and
        each entry is in turn a dict with the following keys:
        - group: group name of annotation (string), eg. mostly_tumor
        - overlay_order: int, how early to place the annotation in the
        mask. Larger values means this annotation group is overlayed
        last and overwrites whatever overlaps it.
        - GT_code: int, desired ground truth code (in the mask)
        Pixels of this value belong to corresponding group (class)
        - is_roi: Flag for whether this group encodes an ROI
        - is_background_class: Flag, whether this group is the default
        fill value inside the ROI. For example, you may descide that
        any pixel inside the ROI is considered stroma.

    MPP : float or None
        Microns-per-pixel -- best use this as it's more well-defined than
        magnification which is more scanner/manufacturer specific.
        MPP of 0.25 often roughly translates to 40x

    MAG : float or None
        If you prefer to use whatever magnification is reported in slide.
        If neither MPP or MAG is provided, everything is retrieved without
        scaling at base (scan) magnification.

    mode : str
        This specifies which part of the slide to get the mask from. Allowed
        modes include the following
        - wsi: get scaled up/down version of mask of whole slide
        - min_bounding_box: get minimum box for all annotations in slide
        - manual_bounds: use given ROI bounds provided by the 'bounds' param
        - polygonal_bounds: use the idx_for_roi param to get coordinates

    bounds : dict or None
        if not None, has keys 'XMIN', 'XMAX', 'YMIN', 'YMAX' for slide
        region coordinates (AT BASE MAGNIFICATION) to get labeled image
        (mask) for. Use this with the 'manual_bounds' run mode.

    idx_for_roi : int
        index of ROI within the element_infos dataframe.
        Use this with the 'polygonal_bounds' run mode.

    slide_annotations : list or None
        Give this parameter to avoid re-getting slide annotations. If you do
        provide the annotations, though, make sure you have used
        scale_slide_annotations() to scale them up/down by sf BEFOREHAND.

    element_infos : pandas DataFrame.
        The columns annidx and elementidx
        encode the dict index of annotation document and element,
        respectively, in the original slide_annotations list of dictionaries.
        This can be obained by get_bboxes_from_slide_annotations() method.
        Make sure you have used scale_slide_annotations().

    get_roi_mask_kwargs : dict
        extra kwargs for get_roi_mask()

    get_contours_kwargs : dict
        extra kwargs for get_contours_from_mask()

    linewidth : float
        visualization line width

    get_rgb: bool
        get rgb image?

    get_contours : bool
        get annotation contours? (relative to final mask)

    get_visualization : bool
        get overlayed annotation bounds over RGB for visualization

    Returns
    --------
    dict
        Results dict containing one or more of the following keys
        bounds: dict of bounds at scan magnification
        ROI - (mxn) labeled image (mask)
        rgb - (mxnx3 np array) corresponding rgb image
        contours - list, each entry is a dict version of a row from the output
        of masks_to_annotations_handler.get_contours_from_mask()
        visualization - (mxnx3 np array) visualization overlay

    """
    get_roi_mask_kwargs = get_roi_mask_kwargs or {}
    get_contours_kwargs = get_contours_kwargs or {}
    # important sanity checks
    (MPP, MAG, mode, bounds, idx_for_roi, get_roi_mask_kwargs, get_rgb,
     get_contours,
     get_visualization) = _sanity_checks(MPP, MAG, mode, bounds, idx_for_roi,
                                         get_roi_mask_kwargs, get_rgb,
                                         get_contours, get_visualization)

    # calculate the scale factor
    sf, appendStr = get_scale_factor_and_appendStr(gc=gc,
                                                   slide_id=slide_id,
                                                   MPP=MPP,
                                                   MAG=MAG)

    if slide_annotations is not None:
        assert element_infos is not None, "must also provide element_infos"
    else:
        # get annotations for slide
        slide_annotations = gc.get('/annotation/item/' + slide_id)

        # scale up/down annotations by a factor
        slide_annotations = scale_slide_annotations(slide_annotations, sf=sf)

        # get bounding box information for all annotations -> scaled by sf
        element_infos = get_bboxes_from_slide_annotations(slide_annotations)

    # Detemine get region based on run mode, keeping in mind that it
    # must be at BASE MAGNIFICATION coordinates before it is passed
    # on to get_mask_from_slide()
    bounds = _get_roi_bounds_by_run_mode(gc=gc,
                                         slide_id=slide_id,
                                         mode=mode,
                                         bounds=bounds,
                                         element_infos=element_infos,
                                         idx_for_roi=idx_for_roi,
                                         sf=sf)
    result = {
        'bounds': bounds,
    }

    # get mask for specified area
    if mode == 'polygonal_bounds':
        # get roi mask and info
        ROI, _ = get_roi_mask(slide_annotations=slide_annotations,
                              element_infos=element_infos,
                              GTCodes_df=DataFrame.from_dict(GTCodes_dict,
                                                             orient='index'),
                              idx_for_roi=idx_for_roi,
                              **get_roi_mask_kwargs)
    else:
        ROI, _ = get_mask_from_slide(GTCodes_dict=GTCodes_dict,
                                     roiinfo=copy.deepcopy(bounds),
                                     slide_annotations=slide_annotations,
                                     element_infos=element_infos,
                                     sf=sf,
                                     get_roi_mask_kwargs=get_roi_mask_kwargs)

    # get RGB
    if get_rgb:
        rgb, ROI = _get_rgb_and_pad_roi(gc=gc,
                                        slide_id=slide_id,
                                        bounds=bounds,
                                        appendStr=appendStr,
                                        ROI=ROI)
        result['rgb'] = rgb

    # pack result (we have to do it here in case of padding)
    result['ROI'] = ROI

    # get contours
    if get_contours:
        contours_list = get_contours_from_mask(MASK=ROI,
                                               GTCodes_df=DataFrame.from_dict(
                                                   GTCodes_dict,
                                                   orient='index'),
                                               **get_contours_kwargs)
        contours_list = contours_list.to_dict(orient='records')
        result['contours'] = contours_list

    # get visualization of annotations on RGB
    if get_visualization:
        result['visualization'] = _visualize_annotations_on_rgb(
            rgb=rgb, contours_list=contours_list, linewidth=linewidth)

    return result
コード例 #10
0
def evaluate_maskrcnn_fold_on_inferred_truth(fold: int,
                                             cfg,
                                             model_root: str,
                                             model_name: str,
                                             whoistruth='Ps',
                                             evalset='E',
                                             getmetrics=True,
                                             n_vis=100):

    model_folder = opj(model_root, f'fold_{fold}')
    checkpoint_path = opj(model_folder, f'{model_name}.ckpt')
    savepath = opj(model_folder, f'Eval_{whoistruth}AreTruth_{evalset}')
    maybe_mkdir(savepath)

    # %% --------------------------------------------------------------
    # Init model

    model = MaskRCNN(**cfg.MaskRCNNConfigs.maskrcnn_params)

    # %% --------------------------------------------------------------
    # Prep data loaders

    slides = read_csv(opj(model_folder,
                          f'fold_{fold}_test.csv')).loc[:,
                                                        'slide_name'].tolist()
    dataset = NucleusDatasetMask(
        root=EvalSets.dataset_roots[evalset][whoistruth],
        dbpath=EvalSets.dbpaths[evalset][whoistruth],
        slides=slides,
        **cfg.MaskDatasetConfigs.test_dataset)

    # %% --------------------------------------------------------------
    # Evaluate model

    ckpt = load_ckp(checkpoint_path=checkpoint_path, model=model)
    model = ckpt['model']

    if getmetrics:
        ecfgs = {
            k: v
            for k, v in cfg.MaskRCNNConfigs.training_params.items() if k in [
                'test_maxDets', 'n_testtime_augmentations',
                'crop_inference_to_fov'
            ]
        }
        tsls = evaluateNucleusModel(model=model,
                                    checkpoint_path=checkpoint_path,
                                    dloader=DataLoader(
                                        dataset=dataset,
                                        **cfg.MaskDatasetConfigs.test_loader),
                                    **ecfgs)

        # save results
        for i, tsl in enumerate(tsls):
            with open(opj(savepath, f'testingMetrics_{i}.txt'), 'w') as f:
                f.write(str(tsl)[1:-1].replace(', ', '\n'))

    # %% --------------------------------------------------------------
    # Visualize some predictions

    min_iou = 0.5
    vis_props = {'linewidth': 0.15, 'text': False}

    maybe_mkdir(opj(savepath, 'predictions'))

    # cropper = tvdt.Cropper()

    model.eval()
    model.to('cpu')

    for imno in range(min(n_vis, len(dataset))):

        # pick one image from the dataset
        imgtensor, target = dataset.__getitem__(imno)
        imname = dataset.rfovids[int(target['image_id'])]

        print(f"visualizing image {imno} of {n_vis}: {imname}")

        # get prediction
        with torch.no_grad():
            output = model([imgtensor.to('cpu')])
        cpu_device = torch.device('cpu')
        output = [{k: v.to(cpu_device) for k, v in t.items()} for t in output]
        output = output[0]

        # mTODO?: the cropper does not support sparse masks
        # # crop the prediction to FOV

        # Ignore ambiguous nuclei from matching. Note that the
        #  model already filters out anything predicted as ignore_label
        #  in inference mode, so we only need to do this for gtruth
        keep = target['iscrowd'] == 0
        trg_boxes = np.int32(target['boxes'][keep])

        # get true/false positives/negatives
        output_boxes = np.int32(output['boxes'])
        _, TP, FN, FP = map_bboxes_using_hungarian_algorithm(
            bboxes1=trg_boxes, bboxes2=output_boxes, min_iou=min_iou)

        # concat relevant bounding boxes
        relevant_bboxes = np.concatenate((
            output_boxes[TP],
            output_boxes[FP],
            trg_boxes[FN],
        ),
                                         axis=0)
        match_colors = [VisConfigs.MATCHING_COLORS['TP']] * len(TP) \
            + [VisConfigs.MATCHING_COLORS['FP']] * len(FP) \
            + [VisConfigs.MATCHING_COLORS['FN']] * len(FN)

        # just to comply with histomicstk default style
        rgtcodes = {
            k: {
                'group':
                v,
                'color':
                f'rgb(' + ','.join(str(c)
                                   for c in VisConfigs.CATEG_COLORS[v]) + ')',
            }
            for k, v in dataset.rlabelcodes.items()
        }

        # extract contours +/ condensed masks (truth)
        # noinspection PyTupleAssignmentBalance
        dense_mask, _, contoursdf_truth = parse_sparse_mask_for_use(
            sparse_mask=np.uint8(target['masks']),
            rgtcodes=rgtcodes,
            labels=target['labels'].tolist(),
        )

        # extract contours +/ condensed masks (prediction)
        output_labels = np.int32(output['labels'])
        output_labels = output_labels.tolist()
        if not model.transform.densify_mask:
            # output mask is sparse
            # noinspection PyTupleAssignmentBalance
            _, _, contoursdf_prediction = parse_sparse_mask_for_use(
                sparse_mask=np.uint8(output['masks'][:, 0, :, :] > 0.5),
                rgtcodes=rgtcodes,
                labels=output_labels,
            )
        else:
            # output mask is already dense
            contoursdf_prediction = get_contours_from_mask(
                MASK=output['masks'].numpy(),
                GTCodes_df=DataFrame.from_records(
                    data=[{
                        'group': rgtcodes[label]['group'],
                        'GT_code': idx + 1,
                        'color': rgtcodes[label]['color']
                    } for idx, label in enumerate(output_labels)]),
                MIN_SIZE=1,
                get_roi_contour=False,
            )

        # get rgb
        rgb = np.uint8(imgtensor * 255.).transpose(1, 2, 0)

        # visualize bounding boxes and masks
        nperrow = 4
        nrows = 1
        fig, ax = plt.subplots(nrows,
                               nperrow,
                               figsize=(5 * nperrow, 5.3 * nrows))

        # just the image
        axis = ax[0]
        axis.imshow(rgb)
        axis.set_title('rgb', fontsize=12)

        # relevant predicted (TP, FP) & true (FN) boxes
        axis = ax[1]
        axis = pu.vis_bbox(
            img=rgb,
            bbox=relevant_bboxes,
            ax=axis,
            instance_colors=match_colors,
            linewidth=1.5,
        )
        axis.set_title('Bboxes detection (TP/FP/FN)', fontsize=12)

        # predicted masks
        axis = ax[2]
        prediction_vis = _visualize_annotations_on_rgb(
            rgb=rgb,
            contours_list=contoursdf_prediction.to_dict(orient='records'),
            **vis_props)
        axis.imshow(prediction_vis)
        axis.set_title('Predicted masks + classif.', fontsize=12)

        # true masks
        axis = ax[3]
        truth_vis = _visualize_annotations_on_rgb(
            rgb=rgb,
            contours_list=contoursdf_truth.to_dict(orient='records'),
            **vis_props)
        axis.imshow(truth_vis)
        axis.set_title('True masks/bboxes + classif.', fontsize=12)

        # plt.show()
        plt.savefig(opj(savepath, f'predictions/{imno}_{imname}.png'))
        plt.close()
コード例 #11
0
def run_one_maskrcnn_fold(fold: int,
                          cfg,
                          model_root: str,
                          model_name: str,
                          qcd_training=True,
                          train=True,
                          vis_test=True,
                          n_vis=100,
                          randomvis=True):

    # FIXME: for prototyping
    if fold == 999:
        cfg.MaskRCNNConfigs.training_params.update({
            'effective_batch_size': 4,
            'smoothing_window': 1,
            'test_evaluate_freq': 1,
        })

    model_folder = opj(model_root, f'fold_{fold}')
    maybe_mkdir(model_folder)
    checkpoint_path = opj(model_folder, f'{model_name}.ckpt')

    # %% --------------------------------------------------------------
    # Init model

    model = MaskRCNN(**cfg.MaskRCNNConfigs.maskrcnn_params)

    # %% --------------------------------------------------------------
    # Test that it works in forward mode

    # model.eval()
    # x = [torch.rand(3, 300, 400), torch.rand(3, 500, 400)]
    # predictions = model(x)

    # %% --------------------------------------------------------------
    # Prep data loaders

    train_slides, test_slides = get_cv_fold_slides(
        train_test_splits_path=CoreSetQC.train_test_splits_path, fold=fold)

    # copy train/test slides with model itself just to be safe
    for tr in ('train', 'test'):
        fname = f'fold_{fold}_{tr}.csv'
        copyfile(
            opj(CoreSetQC.train_test_splits_path, fname),
            opj(model_folder, fname),
        )

    # training data optionally QCd
    if qcd_training:
        train_dataset = NucleusDatasetMask(
            root=CoreSetQC.dataset_root,
            dbpath=CoreSetQC.dbpath,
            slides=train_slides,
            **cfg.MaskDatasetConfigs.train_dataset)
    else:
        train_dataset = NucleusDatasetMask(
            root=CoreSetNoQC.dataset_root,
            dbpath=CoreSetNoQC.dbpath,
            slides=train_slides,
            **cfg.MaskDatasetConfigs.train_dataset)

    # test set is always the QC'd data
    test_dataset = NucleusDatasetMask(root=CoreSetQC.dataset_root,
                                      dbpath=CoreSetQC.dbpath,
                                      slides=test_slides,
                                      **cfg.MaskDatasetConfigs.test_dataset)

    # handle class imbalance
    if cfg.MaskRCNNConfigs.handle_class_imbalance:
        del cfg.BaseDatasetConfigs.train_loader['shuffle']
        cfg.BaseDatasetConfigs.train_loader['sampler'] = WeightedRandomSampler(
            weights=train_dataset.fov_weights,
            num_samples=len(train_dataset.fov_weights),
            replacement=cfg.MaskRCNNConfigs.sample_with_replacement,
        )

    # %% --------------------------------------------------------------
    # Train model

    if train:
        trainNucleusModel(
            model=model,
            checkpoint_path=checkpoint_path,
            data_loader=DataLoader(dataset=train_dataset,
                                   **cfg.MaskDatasetConfigs.train_loader),
            data_loader_test=DataLoader(dataset=test_dataset,
                                        **cfg.MaskDatasetConfigs.test_loader),
            **cfg.MaskRCNNConfigs.training_params)

    elif os.path.exists(checkpoint_path):
        ckpt = load_ckp(checkpoint_path=checkpoint_path, model=model)
        model = ckpt['model']

    # %% --------------------------------------------------------------
    # Visualize some predictions

    min_iou = 0.5
    vis_props = {'linewidth': 0.15, 'text': False}

    maybe_mkdir(opj(model_folder, 'predictions'))

    if vis_test:
        dataset = test_dataset
    else:
        dataset = train_dataset

    # cropper = tvdt.Cropper()

    model.eval()
    model.to('cpu')

    if randomvis:
        tovis = list(np.random.choice(len(dataset), size=(n_vis, )))
    else:
        tovis = list(range(n_vis))

    for imidx, imno in enumerate(tovis):

        # pick one image from the dataset
        imgtensor, target = dataset.__getitem__(imno)
        imname = dataset.rfovids[int(target['image_id'])]

        print(f"predicting image {imidx} of {n_vis}: {imname}")

        # get prediction
        with torch.no_grad():
            output = model([imgtensor.to('cpu')])
        cpu_device = torch.device('cpu')
        output = [{k: v.to(cpu_device) for k, v in t.items()} for t in output]
        output = output[0]

        # mTODO?: the cropper does not support sparse masks
        # # crop the prediction to FOV

        # Ignore ambiguous nuclei from matching. Note that the
        #  model already filters out anything predicted as ignore_label
        #  in inference mode, so we only need to do this for gtruth
        keep = target['iscrowd'] == 0
        trg_boxes = np.int32(target['boxes'][keep])

        # get true/false positives/negatives
        output_boxes = np.int32(output['boxes'])
        _, TP, FN, FP = map_bboxes_using_hungarian_algorithm(
            bboxes1=trg_boxes, bboxes2=output_boxes, min_iou=min_iou)

        # concat relevant bounding boxes
        relevant_bboxes = np.concatenate((
            output_boxes[TP],
            output_boxes[FP],
            trg_boxes[FN],
        ),
                                         axis=0)
        match_colors = [VisConfigs.MATCHING_COLORS['TP']] * len(TP) \
            + [VisConfigs.MATCHING_COLORS['FP']] * len(FP) \
            + [VisConfigs.MATCHING_COLORS['FN']] * len(FN)

        # just to comply with histomicstk default style
        rgtcodes = {
            k: {
                'group':
                v,
                'color':
                f'rgb(' + ','.join(str(c)
                                   for c in VisConfigs.CATEG_COLORS[v]) + ')',
            }
            for k, v in dataset.rlabelcodes.items()
        }

        # extract contours +/ condensed masks (truth)
        # noinspection PyTupleAssignmentBalance
        _, _, contoursdf_truth = parse_sparse_mask_for_use(
            sparse_mask=np.uint8(target['masks']),
            rgtcodes=rgtcodes,
            labels=target['labels'].tolist(),
        )

        # extract contours +/ condensed masks (prediction)
        output_labels = np.int32(output['labels'])
        output_labels = output_labels.tolist()
        if not model.transform.densify_mask:
            # output mask is sparse
            # noinspection PyTupleAssignmentBalance
            _, _, contoursdf_prediction = parse_sparse_mask_for_use(
                sparse_mask=np.uint8(output['masks'][:, 0, :, :] > 0.5),
                rgtcodes=rgtcodes,
                labels=output_labels,
            )
        else:
            # output mask is already dense
            contoursdf_prediction = get_contours_from_mask(
                MASK=output['masks'].numpy(),
                GTCodes_df=DataFrame.from_records(
                    data=[{
                        'group': rgtcodes[label]['group'],
                        'GT_code': idx + 1,
                        'color': rgtcodes[label]['color']
                    } for idx, label in enumerate(output_labels)]),
                MIN_SIZE=1,
                get_roi_contour=False,
            )

        # get rgb
        rgb = np.uint8(imgtensor * 255.).transpose(1, 2, 0)

        # visualize bounding boxes and masks
        nperrow = 4
        nrows = 1
        fig, ax = plt.subplots(nrows,
                               nperrow,
                               figsize=(5 * nperrow, 5.3 * nrows))

        # just the image
        axis = ax[0]
        axis.imshow(rgb)
        axis.set_title('rgb', fontsize=12)

        # relevant predicted (TP, FP) & true (FN) boxes
        axis = ax[1]
        axis = pu.vis_bbox(
            img=rgb,
            bbox=relevant_bboxes,
            ax=axis,
            instance_colors=match_colors,
            linewidth=1.5,
        )
        axis.set_title('Bboxes detection (TP/FP/FN)', fontsize=12)

        # predicted masks
        axis = ax[2]
        prediction_vis = _visualize_annotations_on_rgb(
            rgb=rgb,
            contours_list=contoursdf_prediction.to_dict(orient='records'),
            **vis_props)
        axis.imshow(prediction_vis)
        axis.set_title('Predicted masks + classif.', fontsize=12)

        # true masks
        axis = ax[3]
        truth_vis = _visualize_annotations_on_rgb(
            rgb=rgb,
            contours_list=contoursdf_truth.to_dict(orient='records'),
            **vis_props)
        axis.imshow(truth_vis)
        axis.set_title('True masks/bboxes + classif.', fontsize=12)

        # plt.show()
        plt.savefig(opj(model_folder, f'predictions/{imno}_{imname}.png'))
        plt.close()