def test_get_contours_from_mask_with_zeroes(self): """Test get_contours_from_mask().""" self._setup() groups_to_get = None gtcodes = self.GTCodes_df.append( { 'group': 'zeroes', 'overlay_order': 4, 'GT_code': 0, 'is_roi': 0, 'is_background_class': 0, 'color': 'rgb(0,128,0)', 'comments': 'zeroes' }, ignore_index=True) gtcodes.index = gtcodes.loc[:, 'group'] contours_df = get_contours_from_mask( MASK=self.MASK, GTCodes_df=gtcodes, groups_to_get=groups_to_get, get_roi_contour=True, roi_group='roi', discard_nonenclosed_background=True, background_group='mostly_stroma', MIN_SIZE=30, MAX_SIZE=None) # make sure it is what we expect assert set(contours_df.columns) == set(self.CONTOURS_DF.columns) assert all(contours_df.iloc[:10, :] == self.CONTOURS_DF.iloc[:10, :]) assert len(contours_df) == 49
def set_contours_from_all_masks(self, monitorPrefix=""): """Get contours_df from all masks. This is a wrapper around get_contours_from_mask(), with the added functionality of separating out contorus at roi edge from those that are not. Sets: - self.ordinary_contours: dict: indexed by maskname, each entry is a contours dataframe - self.edge_contours: dict: indexed by maskname, each entry is a contours dataframe - self.merged_contours: pandas DataFrame: single dataframe to save all merged contours """ ordinary_contours = dict() edge_contours = dict() to_remove = [] for midx, maskpath in enumerate(self.maskpaths): # read mask MASK = imread(maskpath) # mask is empty! if MASK.sum() < 2: to_remove.append(maskpath) continue # extract contours contours_df = get_contours_from_mask( MASK=MASK, monitorPrefix="%s: mask %d of %d" % ( monitorPrefix, midx, len(self.maskpaths)), **self.contkwargs) # no contours! if contours_df.shape[0] < 1: to_remove.append(maskpath) continue # separate edge from non-edge contours edgeids = [] for edge in ['top', 'left', 'bottom', 'right']: edgeids.extend(list(contours_df.loc[contours_df.loc[ :, 'touches_edge-%s' % edge] == 1, :].index)) edgeids = list(set(edgeids)) roiname = os.path.split(maskpath)[1] edge_contours[roiname] = contours_df.loc[edgeids, :].copy() ordinary_contours[roiname] = contours_df.drop(edgeids, axis=0) self.maskpaths = [j for j in self.maskpaths if j not in to_remove] self.ordinary_contours = ordinary_contours self.edge_contours = edge_contours # init dataframe to save merged contours colnames = edge_contours[list(edge_contours.keys())[0]].columns self.merged_contours = DataFrame(columns=colnames)
def visualize_results(self): """Visualize results in DSA.""" # get contours contours_df = get_contours_from_mask( MASK=self.labeled, GTCodes_df=self.cdt.GTcodes.copy(), get_roi_contour=True, roi_group='roi', background_group='not_specified', discard_nonenclosed_background=True, MIN_SIZE=15, MAX_SIZE=None, verbose=self.cdt.verbose == 3, monitorPrefix=self.monitorPrefix + ": -- contours") # get annotation docs annprops = { 'F': self.cdt.slide_info['magnification'] / self.cdt.MAG, 'X_OFFSET': self.xmin, 'Y_OFFSET': self.ymin, 'opacity': self.cdt.opacity, 'lineWidth': self.cdt.lineWidth, } annotation_docs = get_annotation_documents_from_contours( contours_df.copy(), separate_docs_by_group=True, docnamePrefix='cdt', annprops=annprops, verbose=self.cdt.verbose == 3, monitorPrefix=self.monitorPrefix + ": -- annotation docs") # post annotations to slide for doc in annotation_docs: _ = self.cdt.gc.post("/annotation?itemId=" + self.cdt.slide_id, json=doc)
def get_tissue_boundary_annotation_documents( gc, slide_id, labeled, color='rgb(0,0,0)', group='tissue', annprops=None): """Get annotation documents of tissue boundaries to visualize on DSA. Parameters ----------- gc : object girder client to use slide_id : str girder ID of slide labeled : np array mask of tissue regions using slide thumbnail. This could either be a binary mask or a mask where each unique value corresponds to one tissue region. It will be binalized anyways. This can be obtained using get_tissue_mask(). color : str color to assign to boundaries. format like rgb(0,0,0) group : str label for annotations annpops : dict properties of annotation elements. Contains the following keys F, X_OFFSET, Y_OFFSET, opacity, lineWidth. Refer to get_single_annotation_document_from_contours() for details. Returns -------- list of dicts each dict is an annotation document that you can post to DSA """ # Get annotations properties if annprops is None: slide_info = gc.get('item/%s/tiles' % slide_id) annprops = { 'F': slide_info['sizeX'] / labeled.shape[1], # relative to base 'X_OFFSET': 0, 'Y_OFFSET': 0, 'opacity': 0, 'lineWidth': 4.0, } # Define GTCodes dataframe GTCodes_df = DataFrame(columns=['group', 'GT_code', 'color']) GTCodes_df.loc['tissue', 'group'] = group GTCodes_df.loc['tissue', 'GT_code'] = 1 GTCodes_df.loc['tissue', 'color'] = color # get annotation docs contours_tissue = get_contours_from_mask( MASK=0 + (labeled > 0), GTCodes_df=GTCodes_df, get_roi_contour=False, MIN_SIZE=0, MAX_SIZE=None, verbose=False, monitorPrefix="tissue: getting contours") annotation_docs = get_annotation_documents_from_contours( contours_tissue.copy(), docnamePrefix='test', annprops=annprops, verbose=False, monitorPrefix="tissue : annotation docs") return annotation_docs
def parse_sparse_mask_for_use(sparse_mask, labels: list = None, rgtcodes: dict = None, min_bbox_side=None, max_bbox_side=None): """Parse sparse mask for visualization and pushing to histomicsUI etc. Parameters ---------- sparse_mask (np.array): n_objects, m, n labels (list): labels corresponding to the channels rgtcodes (dict): keys are integer ground truth codes, values are the histomicstk style for this label (group, lineColor, ...) Returns ------- np.array: dense mask where first channel is label (semantic segmentation), while product of second and third channels is nucleus id dict: keys are indivisual nucleus ids, values are labels of nuclei DataFrame: each row is a contour. Histomicstk style. """ if labels is None: labels = [1] * sparse_mask.shape[0] rgtcodes = {1: {'group': 'nucleus', 'color': 'rgb(255,255,0)'}} rgtcodes = DefaultAnnotationStyles.rgtcodes_dict if rgtcodes is None else \ rgtcodes # "condense" masks dense_mask, labels_map = from_sparse_to_dense_object_mask( sparse_mask=sparse_mask, labels=labels, min_side=min_bbox_side, max_side=max_bbox_side) nids_mask = np.float32(dense_mask) nids_mask = nids_mask[..., 1] * nids_mask[..., 2] # extract contours from condensed mask contours_df = get_contours_from_mask( MASK=nids_mask, GTCodes_df=DataFrame.from_records( data=[{ 'group': rgtcodes[label]['group'], 'GT_code': instanceid, 'color': rgtcodes[label]['color'] } for instanceid, label in labels_map.items()]), MIN_SIZE=1 if min_bbox_side is None else min_bbox_side, get_roi_contour=False, ) return dense_mask, labels_map, contours_df
def visualize_contiguous_superpixels(self): """Visualize contiguous spixels, color-coded by cellularity.""" # get cellularity cluster membership mask cellularity_mask = np.zeros(self.spixel_mask.shape) for spval, sp in self.fdata.iterrows(): cellularity_mask[self.spixel_mask == spval] = sp['cluster'] # Define GTCodes dataframe GTCodes_df = DataFrame(columns=['group', 'GT_code', 'color']) for spval, cp in self.cluster_props.items(): spstr = 'cellularity-%d' % (cp['cellularity']) GTCodes_df.loc[spstr, 'group'] = spstr GTCodes_df.loc[spstr, 'GT_code'] = spval GTCodes_df.loc[spstr, 'color'] = cp['color'] # get contours df contours_df = get_contours_from_mask(MASK=cellularity_mask, GTCodes_df=GTCodes_df, get_roi_contour=False, MIN_SIZE=0, MAX_SIZE=None, verbose=self.cd.verbose == 3, monitorPrefix=self.monitorPrefix) # get annotation docs annprops = { 'F': (self.ymax - self.ymin) / self.tissue_rgb.shape[0], 'X_OFFSET': self.xmin, 'Y_OFFSET': self.ymin, 'opacity': self.cd.opacity_contig, 'lineWidth': self.cd.lineWidth, } annotation_docs = get_annotation_documents_from_contours( contours_df.copy(), docnamePrefix='contig', annprops=annprops, annots_per_doc=1000, separate_docs_by_group=True, verbose=self.cd.verbose == 3, monitorPrefix=self.monitorPrefix) for didx, doc in enumerate(annotation_docs): self.cd._print2( "%s: Posting doc %d of %d" % (self.monitorPrefix, didx + 1, len(annotation_docs))) _ = self.cd.gc.post("/annotation?itemId=" + self.cd.slide_id, json=doc)
def test_get_contours_from_mask(self): """Test get_contours_from_mask().""" self._setup() # get contours from mask # groups_to_get = [ # 'mostly_tumor', 'mostly_stroma'] groups_to_get = None contours_df = get_contours_from_mask( MASK=self.MASK, GTCodes_df=self.GTCodes_df, groups_to_get=groups_to_get, get_roi_contour=True, roi_group='roi', discard_nonenclosed_background=True, background_group='mostly_stroma', MIN_SIZE=30, MAX_SIZE=None, verbose=False, monitorPrefix=self.MASKNAME[:12] + ": getting contours") # make sure it is what we expect assert set(contours_df.columns) == set(self.CONTOURS_DF.columns) assert all(contours_df.iloc[:10, :] == self.CONTOURS_DF.iloc[:10, :])
def test_get_contours_from_mask(self): """Test get_contours_from_mask().""" # get contours from mask # groups_to_get = [ # 'mostly_tumor', 'mostly_stroma'] groups_to_get = None contours_df = get_contours_from_mask( MASK=MASK, GTCodes_df=GTCodes_df, groups_to_get=groups_to_get, get_roi_contour=True, roi_group='roi', discard_nonenclosed_background=True, background_group='mostly_stroma', MIN_SIZE=30, MAX_SIZE=None, verbose=False, monitorPrefix=MASKNAME[:12] + ": getting contours") # make sure it is what we expect self.assertTupleEqual(contours_df.shape, CONTOURS_DF.shape) self.assertSetEqual(set(contours_df.columns), set(CONTOURS_DF.columns)) self.assertTrue(all(contours_df == CONTOURS_DF))
def get_image_and_mask_from_slide(gc, slide_id, GTCodes_dict, MPP=5.0, MAG=None, mode='min_bounding_box', bounds=None, idx_for_roi=None, slide_annotations=None, element_infos=None, get_roi_mask_kwargs=None, get_contours_kwargs=None, linewidth=0.2, get_rgb=True, get_contours=True, get_visualization=True): """Parse region from the slide and get its corresponding labeled mask. This is a wrapper around get_roi_mask() which should be referred to for implementation details. Parameters ----------- gc : object girder client object to make requests, for example: gc = girder_client.GirderClient(apiUrl = APIURL) gc.authenticate(interactive=True) slide_id : str girder id for item (slide) GTCodes_dict : dict the ground truth codes and information dict. This is a dict that is indexed by the annotation group name and each entry is in turn a dict with the following keys: - group: group name of annotation (string), eg. mostly_tumor - overlay_order: int, how early to place the annotation in the mask. Larger values means this annotation group is overlayed last and overwrites whatever overlaps it. - GT_code: int, desired ground truth code (in the mask) Pixels of this value belong to corresponding group (class) - is_roi: Flag for whether this group encodes an ROI - is_background_class: Flag, whether this group is the default fill value inside the ROI. For example, you may descide that any pixel inside the ROI is considered stroma. MPP : float or None Microns-per-pixel -- best use this as it's more well-defined than magnification which is more scanner/manufacturer specific. MPP of 0.25 often roughly translates to 40x MAG : float or None If you prefer to use whatever magnification is reported in slide. If neither MPP or MAG is provided, everything is retrieved without scaling at base (scan) magnification. mode : str This specifies which part of the slide to get the mask from. Allowed modes include the following - wsi: get scaled up/down version of mask of whole slide - min_bounding_box: get minimum box for all annotations in slide - manual_bounds: use given ROI bounds provided by the 'bounds' param - polygonal_bounds: use the idx_for_roi param to get coordinates bounds : dict or None if not None, has keys 'XMIN', 'XMAX', 'YMIN', 'YMAX' for slide region coordinates (AT BASE MAGNIFICATION) to get labeled image (mask) for. Use this with the 'manual_bounds' run mode. idx_for_roi : int index of ROI within the element_infos dataframe. Use this with the 'polygonal_bounds' run mode. slide_annotations : list or None Give this parameter to avoid re-getting slide annotations. If you do provide the annotations, though, make sure you have used scale_slide_annotations() to scale them up/down by sf BEFOREHAND. element_infos : pandas DataFrame. The columns annidx and elementidx encode the dict index of annotation document and element, respectively, in the original slide_annotations list of dictionaries. This can be obained by get_bboxes_from_slide_annotations() method. Make sure you have used scale_slide_annotations(). get_roi_mask_kwargs : dict extra kwargs for get_roi_mask() get_contours_kwargs : dict extra kwargs for get_contours_from_mask() linewidth : float visualization line width get_rgb: bool get rgb image? get_contours : bool get annotation contours? (relative to final mask) get_visualization : bool get overlayed annotation bounds over RGB for visualization Returns -------- dict Results dict containing one or more of the following keys bounds: dict of bounds at scan magnification ROI - (mxn) labeled image (mask) rgb - (mxnx3 np array) corresponding rgb image contours - list, each entry is a dict version of a row from the output of masks_to_annotations_handler.get_contours_from_mask() visualization - (mxnx3 np array) visualization overlay """ get_roi_mask_kwargs = get_roi_mask_kwargs or {} get_contours_kwargs = get_contours_kwargs or {} # important sanity checks (MPP, MAG, mode, bounds, idx_for_roi, get_roi_mask_kwargs, get_rgb, get_contours, get_visualization) = _sanity_checks(MPP, MAG, mode, bounds, idx_for_roi, get_roi_mask_kwargs, get_rgb, get_contours, get_visualization) # calculate the scale factor sf, appendStr = get_scale_factor_and_appendStr(gc=gc, slide_id=slide_id, MPP=MPP, MAG=MAG) if slide_annotations is not None: assert element_infos is not None, "must also provide element_infos" else: # get annotations for slide slide_annotations = gc.get('/annotation/item/' + slide_id) # scale up/down annotations by a factor slide_annotations = scale_slide_annotations(slide_annotations, sf=sf) # get bounding box information for all annotations -> scaled by sf element_infos = get_bboxes_from_slide_annotations(slide_annotations) # Detemine get region based on run mode, keeping in mind that it # must be at BASE MAGNIFICATION coordinates before it is passed # on to get_mask_from_slide() bounds = _get_roi_bounds_by_run_mode(gc=gc, slide_id=slide_id, mode=mode, bounds=bounds, element_infos=element_infos, idx_for_roi=idx_for_roi, sf=sf) result = { 'bounds': bounds, } # get mask for specified area if mode == 'polygonal_bounds': # get roi mask and info ROI, _ = get_roi_mask(slide_annotations=slide_annotations, element_infos=element_infos, GTCodes_df=DataFrame.from_dict(GTCodes_dict, orient='index'), idx_for_roi=idx_for_roi, **get_roi_mask_kwargs) else: ROI, _ = get_mask_from_slide(GTCodes_dict=GTCodes_dict, roiinfo=copy.deepcopy(bounds), slide_annotations=slide_annotations, element_infos=element_infos, sf=sf, get_roi_mask_kwargs=get_roi_mask_kwargs) # get RGB if get_rgb: rgb, ROI = _get_rgb_and_pad_roi(gc=gc, slide_id=slide_id, bounds=bounds, appendStr=appendStr, ROI=ROI) result['rgb'] = rgb # pack result (we have to do it here in case of padding) result['ROI'] = ROI # get contours if get_contours: contours_list = get_contours_from_mask(MASK=ROI, GTCodes_df=DataFrame.from_dict( GTCodes_dict, orient='index'), **get_contours_kwargs) contours_list = contours_list.to_dict(orient='records') result['contours'] = contours_list # get visualization of annotations on RGB if get_visualization: result['visualization'] = _visualize_annotations_on_rgb( rgb=rgb, contours_list=contours_list, linewidth=linewidth) return result
def evaluate_maskrcnn_fold_on_inferred_truth(fold: int, cfg, model_root: str, model_name: str, whoistruth='Ps', evalset='E', getmetrics=True, n_vis=100): model_folder = opj(model_root, f'fold_{fold}') checkpoint_path = opj(model_folder, f'{model_name}.ckpt') savepath = opj(model_folder, f'Eval_{whoistruth}AreTruth_{evalset}') maybe_mkdir(savepath) # %% -------------------------------------------------------------- # Init model model = MaskRCNN(**cfg.MaskRCNNConfigs.maskrcnn_params) # %% -------------------------------------------------------------- # Prep data loaders slides = read_csv(opj(model_folder, f'fold_{fold}_test.csv')).loc[:, 'slide_name'].tolist() dataset = NucleusDatasetMask( root=EvalSets.dataset_roots[evalset][whoistruth], dbpath=EvalSets.dbpaths[evalset][whoistruth], slides=slides, **cfg.MaskDatasetConfigs.test_dataset) # %% -------------------------------------------------------------- # Evaluate model ckpt = load_ckp(checkpoint_path=checkpoint_path, model=model) model = ckpt['model'] if getmetrics: ecfgs = { k: v for k, v in cfg.MaskRCNNConfigs.training_params.items() if k in [ 'test_maxDets', 'n_testtime_augmentations', 'crop_inference_to_fov' ] } tsls = evaluateNucleusModel(model=model, checkpoint_path=checkpoint_path, dloader=DataLoader( dataset=dataset, **cfg.MaskDatasetConfigs.test_loader), **ecfgs) # save results for i, tsl in enumerate(tsls): with open(opj(savepath, f'testingMetrics_{i}.txt'), 'w') as f: f.write(str(tsl)[1:-1].replace(', ', '\n')) # %% -------------------------------------------------------------- # Visualize some predictions min_iou = 0.5 vis_props = {'linewidth': 0.15, 'text': False} maybe_mkdir(opj(savepath, 'predictions')) # cropper = tvdt.Cropper() model.eval() model.to('cpu') for imno in range(min(n_vis, len(dataset))): # pick one image from the dataset imgtensor, target = dataset.__getitem__(imno) imname = dataset.rfovids[int(target['image_id'])] print(f"visualizing image {imno} of {n_vis}: {imname}") # get prediction with torch.no_grad(): output = model([imgtensor.to('cpu')]) cpu_device = torch.device('cpu') output = [{k: v.to(cpu_device) for k, v in t.items()} for t in output] output = output[0] # mTODO?: the cropper does not support sparse masks # # crop the prediction to FOV # Ignore ambiguous nuclei from matching. Note that the # model already filters out anything predicted as ignore_label # in inference mode, so we only need to do this for gtruth keep = target['iscrowd'] == 0 trg_boxes = np.int32(target['boxes'][keep]) # get true/false positives/negatives output_boxes = np.int32(output['boxes']) _, TP, FN, FP = map_bboxes_using_hungarian_algorithm( bboxes1=trg_boxes, bboxes2=output_boxes, min_iou=min_iou) # concat relevant bounding boxes relevant_bboxes = np.concatenate(( output_boxes[TP], output_boxes[FP], trg_boxes[FN], ), axis=0) match_colors = [VisConfigs.MATCHING_COLORS['TP']] * len(TP) \ + [VisConfigs.MATCHING_COLORS['FP']] * len(FP) \ + [VisConfigs.MATCHING_COLORS['FN']] * len(FN) # just to comply with histomicstk default style rgtcodes = { k: { 'group': v, 'color': f'rgb(' + ','.join(str(c) for c in VisConfigs.CATEG_COLORS[v]) + ')', } for k, v in dataset.rlabelcodes.items() } # extract contours +/ condensed masks (truth) # noinspection PyTupleAssignmentBalance dense_mask, _, contoursdf_truth = parse_sparse_mask_for_use( sparse_mask=np.uint8(target['masks']), rgtcodes=rgtcodes, labels=target['labels'].tolist(), ) # extract contours +/ condensed masks (prediction) output_labels = np.int32(output['labels']) output_labels = output_labels.tolist() if not model.transform.densify_mask: # output mask is sparse # noinspection PyTupleAssignmentBalance _, _, contoursdf_prediction = parse_sparse_mask_for_use( sparse_mask=np.uint8(output['masks'][:, 0, :, :] > 0.5), rgtcodes=rgtcodes, labels=output_labels, ) else: # output mask is already dense contoursdf_prediction = get_contours_from_mask( MASK=output['masks'].numpy(), GTCodes_df=DataFrame.from_records( data=[{ 'group': rgtcodes[label]['group'], 'GT_code': idx + 1, 'color': rgtcodes[label]['color'] } for idx, label in enumerate(output_labels)]), MIN_SIZE=1, get_roi_contour=False, ) # get rgb rgb = np.uint8(imgtensor * 255.).transpose(1, 2, 0) # visualize bounding boxes and masks nperrow = 4 nrows = 1 fig, ax = plt.subplots(nrows, nperrow, figsize=(5 * nperrow, 5.3 * nrows)) # just the image axis = ax[0] axis.imshow(rgb) axis.set_title('rgb', fontsize=12) # relevant predicted (TP, FP) & true (FN) boxes axis = ax[1] axis = pu.vis_bbox( img=rgb, bbox=relevant_bboxes, ax=axis, instance_colors=match_colors, linewidth=1.5, ) axis.set_title('Bboxes detection (TP/FP/FN)', fontsize=12) # predicted masks axis = ax[2] prediction_vis = _visualize_annotations_on_rgb( rgb=rgb, contours_list=contoursdf_prediction.to_dict(orient='records'), **vis_props) axis.imshow(prediction_vis) axis.set_title('Predicted masks + classif.', fontsize=12) # true masks axis = ax[3] truth_vis = _visualize_annotations_on_rgb( rgb=rgb, contours_list=contoursdf_truth.to_dict(orient='records'), **vis_props) axis.imshow(truth_vis) axis.set_title('True masks/bboxes + classif.', fontsize=12) # plt.show() plt.savefig(opj(savepath, f'predictions/{imno}_{imname}.png')) plt.close()
def run_one_maskrcnn_fold(fold: int, cfg, model_root: str, model_name: str, qcd_training=True, train=True, vis_test=True, n_vis=100, randomvis=True): # FIXME: for prototyping if fold == 999: cfg.MaskRCNNConfigs.training_params.update({ 'effective_batch_size': 4, 'smoothing_window': 1, 'test_evaluate_freq': 1, }) model_folder = opj(model_root, f'fold_{fold}') maybe_mkdir(model_folder) checkpoint_path = opj(model_folder, f'{model_name}.ckpt') # %% -------------------------------------------------------------- # Init model model = MaskRCNN(**cfg.MaskRCNNConfigs.maskrcnn_params) # %% -------------------------------------------------------------- # Test that it works in forward mode # model.eval() # x = [torch.rand(3, 300, 400), torch.rand(3, 500, 400)] # predictions = model(x) # %% -------------------------------------------------------------- # Prep data loaders train_slides, test_slides = get_cv_fold_slides( train_test_splits_path=CoreSetQC.train_test_splits_path, fold=fold) # copy train/test slides with model itself just to be safe for tr in ('train', 'test'): fname = f'fold_{fold}_{tr}.csv' copyfile( opj(CoreSetQC.train_test_splits_path, fname), opj(model_folder, fname), ) # training data optionally QCd if qcd_training: train_dataset = NucleusDatasetMask( root=CoreSetQC.dataset_root, dbpath=CoreSetQC.dbpath, slides=train_slides, **cfg.MaskDatasetConfigs.train_dataset) else: train_dataset = NucleusDatasetMask( root=CoreSetNoQC.dataset_root, dbpath=CoreSetNoQC.dbpath, slides=train_slides, **cfg.MaskDatasetConfigs.train_dataset) # test set is always the QC'd data test_dataset = NucleusDatasetMask(root=CoreSetQC.dataset_root, dbpath=CoreSetQC.dbpath, slides=test_slides, **cfg.MaskDatasetConfigs.test_dataset) # handle class imbalance if cfg.MaskRCNNConfigs.handle_class_imbalance: del cfg.BaseDatasetConfigs.train_loader['shuffle'] cfg.BaseDatasetConfigs.train_loader['sampler'] = WeightedRandomSampler( weights=train_dataset.fov_weights, num_samples=len(train_dataset.fov_weights), replacement=cfg.MaskRCNNConfigs.sample_with_replacement, ) # %% -------------------------------------------------------------- # Train model if train: trainNucleusModel( model=model, checkpoint_path=checkpoint_path, data_loader=DataLoader(dataset=train_dataset, **cfg.MaskDatasetConfigs.train_loader), data_loader_test=DataLoader(dataset=test_dataset, **cfg.MaskDatasetConfigs.test_loader), **cfg.MaskRCNNConfigs.training_params) elif os.path.exists(checkpoint_path): ckpt = load_ckp(checkpoint_path=checkpoint_path, model=model) model = ckpt['model'] # %% -------------------------------------------------------------- # Visualize some predictions min_iou = 0.5 vis_props = {'linewidth': 0.15, 'text': False} maybe_mkdir(opj(model_folder, 'predictions')) if vis_test: dataset = test_dataset else: dataset = train_dataset # cropper = tvdt.Cropper() model.eval() model.to('cpu') if randomvis: tovis = list(np.random.choice(len(dataset), size=(n_vis, ))) else: tovis = list(range(n_vis)) for imidx, imno in enumerate(tovis): # pick one image from the dataset imgtensor, target = dataset.__getitem__(imno) imname = dataset.rfovids[int(target['image_id'])] print(f"predicting image {imidx} of {n_vis}: {imname}") # get prediction with torch.no_grad(): output = model([imgtensor.to('cpu')]) cpu_device = torch.device('cpu') output = [{k: v.to(cpu_device) for k, v in t.items()} for t in output] output = output[0] # mTODO?: the cropper does not support sparse masks # # crop the prediction to FOV # Ignore ambiguous nuclei from matching. Note that the # model already filters out anything predicted as ignore_label # in inference mode, so we only need to do this for gtruth keep = target['iscrowd'] == 0 trg_boxes = np.int32(target['boxes'][keep]) # get true/false positives/negatives output_boxes = np.int32(output['boxes']) _, TP, FN, FP = map_bboxes_using_hungarian_algorithm( bboxes1=trg_boxes, bboxes2=output_boxes, min_iou=min_iou) # concat relevant bounding boxes relevant_bboxes = np.concatenate(( output_boxes[TP], output_boxes[FP], trg_boxes[FN], ), axis=0) match_colors = [VisConfigs.MATCHING_COLORS['TP']] * len(TP) \ + [VisConfigs.MATCHING_COLORS['FP']] * len(FP) \ + [VisConfigs.MATCHING_COLORS['FN']] * len(FN) # just to comply with histomicstk default style rgtcodes = { k: { 'group': v, 'color': f'rgb(' + ','.join(str(c) for c in VisConfigs.CATEG_COLORS[v]) + ')', } for k, v in dataset.rlabelcodes.items() } # extract contours +/ condensed masks (truth) # noinspection PyTupleAssignmentBalance _, _, contoursdf_truth = parse_sparse_mask_for_use( sparse_mask=np.uint8(target['masks']), rgtcodes=rgtcodes, labels=target['labels'].tolist(), ) # extract contours +/ condensed masks (prediction) output_labels = np.int32(output['labels']) output_labels = output_labels.tolist() if not model.transform.densify_mask: # output mask is sparse # noinspection PyTupleAssignmentBalance _, _, contoursdf_prediction = parse_sparse_mask_for_use( sparse_mask=np.uint8(output['masks'][:, 0, :, :] > 0.5), rgtcodes=rgtcodes, labels=output_labels, ) else: # output mask is already dense contoursdf_prediction = get_contours_from_mask( MASK=output['masks'].numpy(), GTCodes_df=DataFrame.from_records( data=[{ 'group': rgtcodes[label]['group'], 'GT_code': idx + 1, 'color': rgtcodes[label]['color'] } for idx, label in enumerate(output_labels)]), MIN_SIZE=1, get_roi_contour=False, ) # get rgb rgb = np.uint8(imgtensor * 255.).transpose(1, 2, 0) # visualize bounding boxes and masks nperrow = 4 nrows = 1 fig, ax = plt.subplots(nrows, nperrow, figsize=(5 * nperrow, 5.3 * nrows)) # just the image axis = ax[0] axis.imshow(rgb) axis.set_title('rgb', fontsize=12) # relevant predicted (TP, FP) & true (FN) boxes axis = ax[1] axis = pu.vis_bbox( img=rgb, bbox=relevant_bboxes, ax=axis, instance_colors=match_colors, linewidth=1.5, ) axis.set_title('Bboxes detection (TP/FP/FN)', fontsize=12) # predicted masks axis = ax[2] prediction_vis = _visualize_annotations_on_rgb( rgb=rgb, contours_list=contoursdf_prediction.to_dict(orient='records'), **vis_props) axis.imshow(prediction_vis) axis.set_title('Predicted masks + classif.', fontsize=12) # true masks axis = ax[3] truth_vis = _visualize_annotations_on_rgb( rgb=rgb, contours_list=contoursdf_truth.to_dict(orient='records'), **vis_props) axis.imshow(truth_vis) axis.set_title('True masks/bboxes + classif.', fontsize=12) # plt.show() plt.savefig(opj(model_folder, f'predictions/{imno}_{imname}.png')) plt.close()