def verify_stack(filename_base, subimg_path_base, settings, segments_all, channels, overlap_base): db_path_base = os.path.basename(subimg_path_base) stats_detection = None fdbk = None try: # Truth databases are any database stored with manually # verified blobs and loaded at command-line with the # `--truth_db` flag or loaded here. While all experiments # can be stored in a single database, this verification also # supports experiments saved to separate databases in the # software root directory and named as a sub-image but with # the `sqlite.DB_SUFFIX_TRUTH` suffix. Experiments in the # database are also assumed to be named based on the full # image or the sub-image filename, without any directories. # load ROIs from previously loaded truth database or one loaded # based on sub-image filename exp_name, rois = _get_truth_db_rois( subimg_path_base, filename_base, db_path_base if config.truth_db is None else None) if rois is None: # load alternate truth database based on sub-image filename print("Loading truth ROIs from experiment:", exp_name) exp_name, rois = _get_truth_db_rois( subimg_path_base, filename_base, db_path_base) if config.truth_db is None: raise LookupError( "No truth database found for experiment {}, will " "skip detection verification".format(exp_name)) if rois is None: raise LookupError( "No truth set ROIs found for experiment {}, will " "skip detection verification".format(exp_name)) # verify each ROI and store results in a separate database exp_id = sqlite.insert_experiment( config.verified_db.conn, config.verified_db.cur, exp_name, None) verify_tol = np.multiply( overlap_base, settings["verify_tol_factor"]) stats_detection, fdbk, df_verify = verify_rois( rois, segments_all, config.truth_db.blobs_truth, verify_tol, config.verified_db, exp_id, exp_name, channels) df_io.data_frames_to_csv(df_verify, libmag.combine_paths( exp_name, "verify.csv")) except FileNotFoundError: libmag.warn("Could not load truth DB from {}; " "will not verify ROIs".format(db_path_base)) except LookupError as e: libmag.warn(str(e)) return stats_detection, fdbk
def meas_plot_zscores(path, metric_cols, extra_cols, composites, size=None, show=True): """Measure and plot z-scores for given columns in a data frame. Args: path (str): Path to data frame. metric_cols (List[str]): Sequence of column names for which to compute z-scores. extra_cols (List[str]): Additional columns to included in the output data frame. composites (List[Enum]): Sequence of enums specifying the combination, typically from :class:`vols.MetricCombos`. size (List[int]): Sequence of ``width, height`` to size the figure; defaults to None. show (bool): True to display the image; defaults to True. """ # generate z-scores df = pd.read_csv(path) df = df_io.zscore_df(df, "Region", metric_cols, extra_cols, True) # generate composite score column df_comb = df_io.combine_cols(df, composites) df_io.data_frames_to_csv( df_comb, libmag.insert_before_ext(config.filename, "_zhomogeneity")) # shift metrics from each condition to separate columns conds = np.unique(df["Condition"]) df = df_io.cond_to_cols_df(df, ["Sample", "Region"], "Condition", "original", metric_cols) path = libmag.insert_before_ext(config.filename, "_zscore") df_io.data_frames_to_csv(df, path) # display as probability plot lims = (-3, 3) plot_2d.plot_probability(path, conds, metric_cols, "Volume", xlim=lims, ylim=lims, title="Region Match Z-Scores", fig_size=size, show=show, suffix=None, df=df)
def select_blob_matches_by_blob_id(self, row_id, blobn, blob_ids): """Select blob matches corresponding to the given blob IDs in the given blob column. Args: row_id (int): Row ID. blobn (int): 1 or 2 to indicate the first or second blob column, respectively. blob_ids (List[int]): Blob IDs. Returns: :class:`magmap.cv.colocalizer.BlobMatch`: Blob match object, which is empty if not matches are found. """ matches = [] if isinstance(blob_ids, np.ndarray): blob_ids = blob_ids.tolist() max_params = 990 # max params of 999 in sqlite < v3.32.0 for i in range(len(blob_ids) // max_params + 1): # select blob matches by block to avoid exceeding sqlite parameter # limit ids = blob_ids[i * max_params:(i + 1) * max_params] ids.insert(0, row_id) self.cur.execute( "SELECT {}, id FROM blob_matches WHERE roi_id = ?" "AND blob{} IN ({})".format(_COLS_BLOB_MATCHES, blobn, ",".join("?" * (len(ids) - 1))), ids) df = self._parse_blob_matches(self.cur.fetchall()).df if df is not None: matches.append(df) if len(matches) > 0: return colocalizer.BlobMatch(df=df_io.data_frames_to_csv(matches)) return colocalizer.BlobMatch()
def meas_plot_coefvar(path, id_cols, cond_col, cond_base, metric_cols, composites, size_col=None, size=None, show=True): """Measure and plot coefficient of variation (CV) as a scatter plot. CV is computed two ways: - Based on columns and equation specified in ``composites``, applied across all samples regardless of group - For each metric in ``metric_cols``, separated by groups Args: path (str): Path to data frame. id_cols (List[str]): Sequence of columns to serve as index/indices. cond_col (str): Name of the condition column. cond_base (str): Name of the condition to which all other conditions will be normalized. metric_cols (List[str]): Sequence of column names for which to compute z-scores. composites (List[Enum]): Sequence of enums specifying the combination, typically from :class:`vols.MetricCombos`. size_col (str): Name of weighting column for coefficient of variation measurement; defaults to None. size (List[int]): Sequence of ``width, height`` to size the figure; defaults to None. show (bool): True to display the image; defaults to True. """ # measure coefficient of variation per sample-region regardless of group df = pd.read_csv(path) df = df_io.combine_cols(df, composites) df_io.data_frames_to_csv( df, libmag.insert_before_ext(config.filename, "_coefvar")) # measure CV within each condition and shift metrics from each # condition to separate columns df = df_io.coefvar_df(df, [*id_cols, cond_col], metric_cols, size_col) conds = np.unique(df[cond_col]) df = df_io.cond_to_cols_df(df, id_cols, cond_col, cond_base, metric_cols) path = libmag.insert_before_ext(config.filename, "_coefvartransp") df_io.data_frames_to_csv(df, path) # display CV measured by condition as probability plot lims = (0, 0.7) plot_2d.plot_probability( path, conds, metric_cols, "Volume", xlim=lims, ylim=lims, title="Coefficient of Variation", fig_size=size, show=show, suffix=None, df=df)
def plot_knns(img_paths, suffix=None, show=False, names=None): """Plot k-nearest-neighbor distances for multiple sets of blobs, overlaying on a single plot. Args: img_paths (List[str]): Base paths from which registered labels and blobs files will be found and output blobs file save location will be constructed. suffix (str): Suffix for ``path``; defaults to None. show (bool): True to plot the distances; defaults to False. names (List[str]): Sequence of names corresponding to ``img_paths`` for the plot legend. """ cluster_settings = config.atlas_profile[profiles.RegKeys.METRICS_CLUSTER] knn_n = cluster_settings[profiles.RegKeys.KNN_N] if not knn_n: knn_n = cluster_settings[profiles.RegKeys.DBSCAN_MINPTS] - 1 print("Calculating k-nearest-neighbor distances and plotting distances " "for neighbor {}".format(knn_n)) # set up combined data frames for all samples at each zoom level df_keys = ("ov", "zoom") dfs_comb = {key: [] for key in df_keys} names_disp = names if names else [] for i, img_path in enumerate(img_paths): # load blobs associated with image mod_path = img_path if suffix is not None: mod_path = libmag.insert_before_ext(img_path, suffix) labels_img_np = sitk_io.load_registered_img( mod_path, config.RegNames.IMG_LABELS.value) blobs = detector.Blobs().load_blobs(np_io.img_to_blobs_path(img_path)) scaling, res = np_io.find_scaling(img_path, labels_img_np.shape) if blobs is None: libmag.warn("unable to load nuclei coordinates for", img_path) continue # convert to physical units and display k-nearest-neighbors for nuclei blobs_phys = np.multiply(blobs.blobs[:, :3], res) # TESTING: given the same blobs, simply shift #blobs = np.multiply(blobs[i*10000000:, :3], res) _, _, dfs = knn_dist(blobs_phys, knn_n, 2, 1000000, False) if names is None: # default to naming from filename names_disp.append(os.path.basename(mod_path)) for j, df in enumerate(dfs): dfs_comb[df_keys[j]].append(df) for key in dfs_comb: # combine data frames at each zoom level, save, and plot with # different colors for each image df = df_io.join_dfs(dfs_comb[key], "point") dist_cols = [col for col in df.columns if col.startswith("dist")] rename_cols = {col: name for col, name in zip(dist_cols, names_disp)} df = df.rename(rename_cols, axis=1) out_path = "knn_dist_combine_{}".format(key) df_io.data_frames_to_csv(df, out_path) plot_2d.plot_lines(out_path, "point", rename_cols.values(), df=df, show=show, title=config.plot_labels[config.PlotLabels.TITLE])
def plot_intensity_nuclei(paths, labels, size=None, show=True, unit=None): """Plot nuclei vs. intensity as a scatter plot. Args: paths (List[str]): Sequence of paths to CSV files. labels (List[str]): Sequence of label metrics corresponding to ``paths``. size (List[int]): Sequence of ``width, height`` to size the figure; defaults to None. show (bool): True to display the image; defaults to True. unit (str): Denominator unit for density plot; defaults to None. Returns: :obj:`pd.DataFrame`: Data frame with columns matching ``labels`` for the given ``paths`` concatenated. """ def plot(lbls, suffix=None, unit=None): cols_xy = [] for label in lbls: # get columns for the given label to plot on a given axis; assume # same order of labels for each group of columns so they correspond cols_xy.append([c for c in df.columns if c.split(".")[0] == label]) names_group = None if cols_xy: # extract legend names assuming label.cond format names_group = np.unique([c.split(".")[1] for c in cols_xy[0]]) units = (["{}/{}".format(l.split("_")[0], unit) for l in lbls] if unit else (None, None)) lbls = [l.replace("_", " ") for l in lbls] title = "{} Vs. {} By Region".format(*lbls) plot_2d.plot_scatter( "vols_stats_intensVnuc", cols_xy[0], cols_xy[1], yunit=units[0], xunit=units[1], ylabel=lbls[0], xlabel=lbls[1], title=title, # col_annot=config.AtlasMetrics.REGION_ABBR.value, names_group=names_group, fig_size=size, show=show, suffix=suffix, df=df) if len(paths) < 2 or len(labels) < 2: return dfs = [pd.read_csv(path) for path in paths] # merge data frames with all columns ending with .mean, prepending labels extra_cols = [ config.AtlasMetrics.REGION.value, config.AtlasMetrics.REGION_ABBR.value, vols.LabelMetrics.Volume.name, ] tag = ".mean" df = df_io.append_cols(dfs[:2], labels, lambda x: x.lower().endswith(tag), extra_cols) dens = "{}_density" for col in df.columns: if col.startswith(labels): col_split = col.split(".") col_split[0] = dens.format(col_split[0]) df.loc[:, ".".join(col_split)] = (df[col] / df[vols.LabelMetrics.Volume.name]) # strip the tag from column names names = {col: col.rsplit(tag)[0] for col in df.columns} df = df.rename(columns=names) df_io.data_frames_to_csv(df, "vols_stats_intensVnuc.csv") # plot labels and density labels plot(labels) plot([dens.format(l) for l in labels], "_density", unit) return df
def merge_atlas_segmentations(img_paths, show=True, atlas=True, suffix=None): """Merge atlas segmentations for a list of files as a multiprocessing wrapper for :func:``merge_atlas_segmentations``, after which edge image post-processing is performed separately since it contains tasks also performed in multiprocessing. Args: img_paths (List[str]): Sequence of image paths to load. show (bool): True if the output images should be displayed; defaults to True. atlas (bool): True if the image is an atlas; defaults to True. suffix (str): Modifier to append to end of ``img_path`` basename for registered image files that were output to a modified name; defaults to None. """ start_time = time() # erode all labels images into markers for watershed; not multiprocessed # since erosion is itself multiprocessed erode = config.atlas_profile["erode_labels"] erosion = config.atlas_profile[profiles.RegKeys.EDGE_AWARE_REANNOTATION] erosion_frac = config.atlas_profile["erosion_frac"] mirrored = atlas and _is_profile_mirrored() mirror_mult = _get_mirror_mult() dfs_eros = [] for img_path in img_paths: mod_path = img_path if suffix is not None: mod_path = libmag.insert_before_ext(mod_path, suffix) labels_sitk = sitk_io.load_registered_img( mod_path, config.RegNames.IMG_LABELS.value, get_sitk=True) print("Eroding labels to generate markers for atlas segmentation") df = None if erode["markers"]: # use default minimal post-erosion size (not setting erosion frac) markers, df = erode_labels( sitk.GetArrayFromImage(labels_sitk), erosion, mirrored=mirrored, mirror_mult=mirror_mult) labels_sitk_markers = sitk_io.replace_sitk_with_numpy( labels_sitk, markers) sitk_io.write_reg_images( {config.RegNames.IMG_LABELS_MARKERS.value: labels_sitk_markers}, mod_path) df_io.data_frames_to_csv( df, "{}_markers.csv".format(os.path.splitext(mod_path)[0])) dfs_eros.append(df) pool = chunking.get_mp_pool() pool_results = [] for img_path, df in zip(img_paths, dfs_eros): print("setting up atlas segmentation merge for", img_path) # convert labels image into markers exclude = df.loc[ np.isnan(df[config.SmoothingMetrics.FILTER_SIZE.value]), config.AtlasMetrics.REGION.value] print("excluding these labels from re-segmentation:\n", exclude) pool_results.append(pool.apply_async( edge_aware_segmentation, args=(img_path, show, atlas, suffix, exclude, mirror_mult))) for result in pool_results: # edge distance calculation and labels interior image generation # are multiprocessed, so run them as post-processing tasks to # avoid nested multiprocessing path = result.get() mod_path = path if suffix is not None: mod_path = libmag.insert_before_ext(path, suffix) # make edge distance images and stats labels_sitk = sitk_io.load_registered_img( mod_path, config.RegNames.IMG_LABELS.value, get_sitk=True) labels_np = sitk.GetArrayFromImage(labels_sitk) dist_to_orig, labels_edge = edge_distances( labels_np, path=path, spacing=labels_sitk.GetSpacing()[::-1]) dist_sitk = sitk_io.replace_sitk_with_numpy(labels_sitk, dist_to_orig) labels_sitk_edge = sitk_io.replace_sitk_with_numpy( labels_sitk, labels_edge) labels_sitk_interior = None if erode["interior"]: # make interior images from labels using given targeted # post-erosion frac interior, _ = erode_labels( labels_np, erosion, erosion_frac=erosion_frac, mirrored=mirrored, mirror_mult=mirror_mult) labels_sitk_interior = sitk_io.replace_sitk_with_numpy( labels_sitk, interior) # write images to same directory as atlas imgs_write = { config.RegNames.IMG_LABELS_DIST.value: dist_sitk, config.RegNames.IMG_LABELS_EDGE.value: labels_sitk_edge, config.RegNames.IMG_LABELS_INTERIOR.value: labels_sitk_interior, } sitk_io.write_reg_images(imgs_write, mod_path) if show: for img in imgs_write.values(): if img: sitk.Show(img) print("finished {}".format(path)) pool.close() pool.join() print("time elapsed for merging atlas segmentations:", time() - start_time)
def select_blob_matches_by_blob_id( self, row_id: int, blobn: int, blob_ids: Sequence[int], max_params: int = 100000 ) -> "colocalizer.BlobMatch": """Select blob matches corresponding to the given blob IDs in the given blob column. Args: row_id: Row ID. blobn: 1 or 2 to indicate the first or second blob column, respectively. blob_ids: Blob IDs. max_params: Maximum number of parameters for the `SELECT` statements; defaults to 100000. The max is determined by `SQLITE_MAX_VARIABLE_NUMBER` set at the sqlite3 compile time. If this number is exceeded, this function is called recursively with half the given `max_params`. Returns: Blob match object, which is empty if not matches are found. Raises: :meth:`sqlit3.OperationalError`: if the maximum number of parameters is < 1. Deprecated: 1.6.0 Use :meth:`select_blob_matches` instead. """ if max_params < 1: raise sqlite3.OperationalError( "Could not determine number of parameters for selecting blob " "matches") matches = [] if isinstance(blob_ids, np.ndarray): blob_ids = blob_ids.tolist() try: # select matches by block to avoid exceeding sqlite parameter limit nblocks = len(blob_ids) // max_params + 1 for i in range(nblocks): _logger.info( "Selecting blob matches block %s of %s", i, nblocks - 1) ids = blob_ids[i*max_params:(i+1)*max_params] ids.insert(0, row_id) self.cur.execute( f"SELECT {_COLS_BLOB_MATCHES}, id FROM blob_matches " f"WHERE roi_id = ? AND blob{blobn} " f"IN ({','.join('?' * (len(ids) - 1))})", ids) df = self._parse_blob_matches(self.cur.fetchall()).df if df is not None: matches.append(df) except sqlite3.OperationalError: # call recursively with halved number of parameters _logger.debug( "Exceeded max sqlite query parameters; trying with smaller " "number") return self.select_blob_matches_by_blob_id( row_id, blobn, blob_ids, max_params // 2) if len(matches) > 0: return colocalizer.BlobMatch(df=df_io.data_frames_to_csv(matches)) return colocalizer.BlobMatch()
def export_rois(db, image5d, channel, path, padding=None, unit_factor=None, truth_mode=None, exp_name=None): """Export all ROIs from database. If the current processing profile includes isotropic interpolation, the ROIs will be resized to make isotropic according to this factor. Args: db: Database from which to export. image5d: The image with the ROIs. channel (List[int]): Channels to export; currently only the first channel is used. path: Path with filename base from which to save the exported files. padding (List[int]): Padding in x,y,z to exclude from the ROI; defaults to None. unit_factor (float): Linear conversion factor for units (eg 1000.0 to convert um to mm). truth_mode (:obj:`config.TruthDBModes`): Truth mode enum; defaults to None. exp_name (str): Name of experiment to export; defaults to None to export all experiments in ``db``. Returns: :obj:`pd.DataFrame`: ROI metrics in a data frame. """ if padding is not None: padding = np.array(padding) # TODO: consider iterating through all channels channel = channel[0] if channel else 0 # convert volume base on scaling and unit factor phys_mult = np.prod(detector.calc_scaling_factor()) if unit_factor: phys_mult /= unit_factor**3 metrics_all = {} exps = sqlite.select_experiment(db.cur, None) for exp in exps: if exp_name and exp["name"] != exp_name: # DBs may contain many experiments, which may not correspond to # image5d, eg verified DBs from many truth sets continue rois = sqlite.select_rois(db.cur, exp["id"]) for roi in rois: # get ROI as a small image size = sqlite.get_roi_size(roi) offset = sqlite.get_roi_offset(roi) img3d = plot_3d.prepare_roi(image5d, size, offset) # get blobs and change confirmation flag to avoid confirmation # color in 2D plots roi_id = roi["id"] blobs = sqlite.select_blobs(db.cur, roi_id) blobs_detected = None if truth_mode is config.TruthDBModes.VERIFIED: # verified DBs use a truth value of -1 to indicate "detected", # non-truth blobs, including both correct and incorrect # detections, while the rest of blobs are "truth" blobs truth_vals = detector.get_blob_truth(blobs) blobs_detected = blobs[truth_vals == -1] blobs = blobs[truth_vals != -1] else: # default to include only confirmed blobs; truth sets # ironically do not use the truth flag but instead # assume all confirmed blobs are "truth" blobs = blobs[detector.get_blob_confirmed(blobs) == 1] blobs[:, 4] = -1 # adjust ROI size and offset if border set if padding is not None: size = np.subtract(img3d.shape[::-1], 2 * padding) img3d = plot_3d.prepare_roi(img3d, size, padding) blobs[:, 0:3] = np.subtract(blobs[:, 0:3], np.add(offset, padding)[::-1]) print("exporting ROI of shape {}".format(img3d.shape)) isotropic = config.roi_profile["isotropic"] blobs_orig = blobs if isotropic is not None: # interpolation for isotropy if set in first processing profile img3d = cv_nd.make_isotropic(img3d, isotropic) isotropic_factor = cv_nd.calc_isotropic_factor(isotropic) blobs_orig = np.copy(blobs) blobs = detector.multiply_blob_rel_coords( blobs, isotropic_factor) # export ROI and 2D plots path_base, path_dir_nifti, path_img, path_img_nifti, path_blobs, \ path_img_annot, path_img_annot_nifti = make_roi_paths( path, roi_id, channel, make_dirs=True) np.save(path_img, img3d) print("saved 3D image to {}".format(path_img)) # WORKAROUND: for some reason SimpleITK gives a conversion error # when converting from uint16 (>u2) Numpy array img3d = img3d.astype(np.float64) img3d_sitk = sitk.GetImageFromArray(img3d) ''' print(img3d_sitk) print("orig img:\n{}".format(img3d[0])) img3d_back = sitk.GetArrayFromImage(img3d_sitk) print(img3d.shape, img3d.dtype, img3d_back.shape, img3d_back.dtype) print("sitk img:\n{}".format(img3d_back[0])) ''' sitk.WriteImage(img3d_sitk, path_img_nifti, False) roi_ed = roi_editor.ROIEditor(img3d) roi_ed.plot_roi(blobs, channel, show=False, title=os.path.splitext(path_img)[0]) libmag.show_full_arrays() # export image and blobs, stripping blob flags and adjusting # user-added segments' radii; use original rather than blobs with # any interpolation since the ground truth will itself be # interpolated blobs = blobs_orig blobs = blobs[:, 0:4] # prior to v.0.5.0, user-added segments had a radius of 0.0 blobs[np.isclose(blobs[:, 3], 0), 3] = 5.0 # as of v.0.5.0, user-added segments have neg radii whose abs # value corresponds to the displayed radius blobs[:, 3] = np.abs(blobs[:, 3]) # make more rounded since near-integer values appear to give # edges of 5 straight pixels # https://github.com/scikit-image/scikit-image/issues/2112 #blobs[:, 3] += 1E-1 blobs[:, 3] -= 0.5 libmag.printv("blobs:\n{}".format(blobs)) np.save(path_blobs, blobs) # convert blobs to ground truth img3d_truth = plot_3d.build_ground_truth( np.zeros(size[::-1], dtype=np.uint8), blobs) if isotropic is not None: img3d_truth = cv_nd.make_isotropic(img3d_truth, isotropic) # remove fancy blending since truth set must be binary img3d_truth[img3d_truth >= 0.5] = 1 img3d_truth[img3d_truth < 0.5] = 0 print("exporting truth ROI of shape {}".format(img3d_truth.shape)) np.save(path_img_annot, img3d_truth) #print(img3d_truth) sitk.WriteImage(sitk.GetImageFromArray(img3d_truth), path_img_annot_nifti, False) # avoid smoothing interpolation, using "nearest" instead with plt.style.context(config.rc_params_mpl2_img_interp): roi_ed.plot_roi(img3d_truth, None, channel, show=False, title=os.path.splitext(path_img_annot)[0]) # measure ROI metrics and export to data frame; use AtlasMetrics # enum vals since will need LabelMetrics names instead metrics = { config.AtlasMetrics.SAMPLE.value: exp["name"], config.AtlasMetrics.CONDITION.value: "truth", config.AtlasMetrics.CHANNEL.value: channel, config.AtlasMetrics.OFFSET.value: offset, config.AtlasMetrics.SIZE.value: size, } # get basic counts for ROI and update volume for physical units vols.MeasureLabel.set_data(img3d, np.ones_like(img3d, dtype=np.int8)) _, metrics_counts = vols.MeasureLabel.measure_counts(1) metrics_counts[vols.LabelMetrics.Volume] *= phys_mult for key, val in metrics_counts.items(): # convert LabelMetrics to their name metrics[key.name] = val metrics[vols.LabelMetrics.Nuclei.name] = len(blobs) metrics_dicts = [metrics] if blobs_detected is not None: # add another row for detected blobs metrics_detected = dict(metrics) metrics_detected[ config.AtlasMetrics.CONDITION.value] = "detected" metrics_detected[vols.LabelMetrics.Nuclei.name] = len( blobs_detected) metrics_dicts.append(metrics_detected) for m in metrics_dicts: for key, val in m.items(): metrics_all.setdefault(key, []).append(val) print("exported {}".format(path_base)) #_test_loading_rois(db, channel, path) # convert to data frame and compute densities for nuclei and intensity df = df_io.dict_to_data_frame(metrics_all) vol = df[vols.LabelMetrics.Volume.name] df.loc[:, vols.LabelMetrics.DensityIntens.name] = ( df[vols.LabelMetrics.Intensity.name] / vol) df.loc[:, vols.LabelMetrics.Density.name] = ( df[vols.LabelMetrics.Nuclei.name] / vol) df = df_io.data_frames_to_csv(df, "{}_rois.csv".format(path)) return df
def detect_blobs_blocks(filename_base, image5d, offset, size, channels, verify=False, save_dfs=True, full_roi=False, coloc=False): """Detect blobs by block processing of a large image. All channels are processed in the same blocks. Args: filename_base: Base path to use file output. image5d: Large image to process as a Numpy array of t,z,y,x,[c] offset: Sub-image offset given as coordinates in z,y,x. size: Sub-image shape given in z,y,x. channels (Sequence[int]): Sequence of channels, where None detects in all channels. verify: True to verify detections against truth database; defaults to False. save_dfs: True to save data frames to file; defaults to True. full_roi (bool): True to treat ``image5d`` as the full ROI; defaults to False. coloc (bool): True to perform blob co-localizations; defaults to False. Returns: tuple[int, int, int], str, :class:`magmap.cv.detector.Blobs`: Accuracy metrics from :class:`magmap.cv.detector.verify_rois`, feedback message from this same function, and detected blobs. """ time_start = time() subimg_path_base = filename_base if size is None or offset is None: # uses the entire stack if no size or offset specified size = image5d.shape[1:4] offset = (0, 0, 0) else: # get base path for sub-image subimg_path_base = naming.make_subimage_name( filename_base, offset, size) filename_blobs = libmag.combine_paths(subimg_path_base, config.SUFFIX_BLOBS) # get ROI for given region, including all channels if full_roi: # treat the full image as the ROI roi = image5d[0] else: roi = plot_3d.prepare_subimg(image5d, offset, size) num_chls_roi = 1 if len(roi.shape) < 4 else roi.shape[3] if num_chls_roi < 2: coloc = False print("Unable to co-localize as image has only 1 channel") # prep chunking ROI into sub-ROIs with size based on segment_size, scaling # by physical units to make more independent of resolution; use profile # from first channel to be processed for block settings time_detection_start = time() settings = config.get_roi_profile(channels[0]) print("Profile for block settings:", settings[settings.NAME_KEY]) sub_roi_slices, sub_rois_offsets, denoise_max_shape, exclude_border, \ tol, overlap_base, overlap, overlap_padding = setup_blocks( settings, roi.shape) # TODO: option to distribute groups of sub-ROIs to different servers # for blob detection seg_rois = StackDetector.detect_blobs_sub_rois( roi, sub_roi_slices, sub_rois_offsets, denoise_max_shape, exclude_border, coloc, channels) detection_time = time() - time_detection_start print("blob detection time (s):", detection_time) # prune blobs in overlapping portions of sub-ROIs time_pruning_start = time() segments_all, df_pruning = StackPruner.prune_blobs_mp( roi, seg_rois, overlap, tol, sub_roi_slices, sub_rois_offsets, channels, overlap_padding) pruning_time = time() - time_pruning_start print("blob pruning time (s):", pruning_time) #print("maxes:", np.amax(segments_all, axis=0)) # get weighted mean of ratios if df_pruning is not None: print("\nBlob pruning ratios:") path_pruning = "blob_ratios.csv" if save_dfs else None df_pruning_all = df_io.data_frames_to_csv( df_pruning, path_pruning, show=" ") cols = df_pruning_all.columns.tolist() blob_pruning_means = {} if "blobs" in cols: blobs_unpruned = df_pruning_all["blobs"] num_blobs_unpruned = np.sum(blobs_unpruned) for col in cols[1:]: blob_pruning_means["mean_{}".format(col)] = [ np.sum(np.multiply(df_pruning_all[col], blobs_unpruned)) / num_blobs_unpruned] path_pruning_means = "blob_ratios_means.csv" if save_dfs else None df_pruning_means = df_io.dict_to_data_frame( blob_pruning_means, path_pruning_means, show=" ") else: print("no blob ratios found") '''# report any remaining duplicates np.set_printoptions(linewidth=500, threshold=10000000) print("all blobs (len {}):".format(len(segments_all))) sort = np.lexsort( (segments_all[:, 2], segments_all[:, 1], segments_all[:, 0])) blobs = segments_all[sort] print(blobs) print("checking for duplicates in all:") print(detector.remove_duplicate_blobs(blobs, slice(0, 3))) ''' stats_detection = None fdbk = None colocs = None if segments_all is not None: # remove the duplicated elements that were used for pruning detector.replace_rel_with_abs_blob_coords(segments_all) if coloc: colocs = segments_all[:, 10:10+num_chls_roi].astype(np.uint8) # remove absolute coordinate and any co-localization columns segments_all = detector.remove_abs_blob_coords(segments_all) # compare detected blobs with truth blobs # TODO: assumes ground truth is relative to any ROI offset, # but should make customizable if verify: stats_detection, fdbk = verifier.verify_stack( filename_base, subimg_path_base, settings, segments_all, channels, overlap_base) if config.save_subimg: subimg_base_path = libmag.combine_paths( subimg_path_base, config.SUFFIX_SUBIMG) if (isinstance(config.image5d, np.memmap) and config.image5d.filename == os.path.abspath(subimg_base_path)): # file at sub-image save path may have been opened as a memmap # file, in which case saving would fail libmag.warn("{} is currently open, cannot save sub-image" .format(subimg_base_path)) else: # write sub-image, which is in ROI (3D) format with open(subimg_base_path, "wb") as f: np.save(f, roi) # store blobs in Blobs instance # TODO: consider separating into blobs and blobs metadata archives blobs = detector.Blobs( segments_all, colocalizations=colocs, path=filename_blobs) blobs.resolutions = config.resolutions blobs.basename = os.path.basename(config.filename) blobs.roi_offset = offset blobs.roi_size = size # whole image benchmarking time times = ( [detection_time], [pruning_time], time() - time_start) times_dict = {} for key, val in zip(StackTimes, times): times_dict[key] = val if segments_all is None: print("\nNo blobs detected") else: print("\nTotal blobs found:", len(segments_all)) detector.show_blobs_per_channel(segments_all) print("\nTotal detection processing times (s):") path_times = "stack_detection_times.csv" if save_dfs else None df_io.dict_to_data_frame(times_dict, path_times, show=" ") return stats_detection, fdbk, blobs
def edge_aware_segmentation( path_atlas: str, atlas_profile: atlas_prof.AtlasProfile, show: bool = True, atlas: bool = True, suffix: Optional[str] = None, exclude_labels: Optional[pd.DataFrame] = None, mirror_mult: int = -1): """Segment an atlas using its previously generated edge map. Labels may not match their own underlying atlas image well, particularly in the orthogonal directions in which the labels were not constructed. To improve alignment between the labels and the atlas itself, register the labels to an automated, roughly segmented version of the atlas. The goal is to improve the labels' alignment so that the atlas/labels combination can be used for another form of automated segmentation by registering them to experimental brains via :func:``register``. Edge files are assumed to have been generated by :func:``make_edge_images``. Args: path_atlas: Path to the fixed file, typically the atlas file with stained sections. The corresponding edge and labels files will be loaded based on this path. atlas_profile: Atlas profile. show: True if the output images should be displayed; defaults to True. atlas: True if the primary image is an atlas, which is assumed to be symmetrical. False if the image is an experimental/sample image, in which case segmentation will be performed on the full images, and stats will not be performed. suffix: Modifier to append to end of ``path_atlas`` basename for registered image files that were output to a modified name; defaults to None. If ``atlas`` is True, ``suffix`` will only be applied to saved files, with files still loaded based on the original path. exclude_labels: Sequence of labels to exclude from the segmentation; defaults to None. mirror_mult: Multiplier for mirrored labels; defaults to -1 to make mirrored labels the inverse of their source labels. """ # adjust image path with suffix load_path = path_atlas mod_path = path_atlas if suffix is not None: mod_path = libmag.insert_before_ext(mod_path, suffix) if atlas: load_path = mod_path # load corresponding files via SimpleITK atlas_sitk = sitk_io.load_registered_img( load_path, config.RegNames.IMG_ATLAS.value, get_sitk=True) atlas_sitk_edge = sitk_io.load_registered_img( load_path, config.RegNames.IMG_ATLAS_EDGE.value, get_sitk=True) labels_sitk = sitk_io.load_registered_img( load_path, config.RegNames.IMG_LABELS.value, get_sitk=True) labels_sitk_markers = sitk_io.load_registered_img( load_path, config.RegNames.IMG_LABELS_MARKERS.value, get_sitk=True) # get Numpy arrays of images atlas_img_np = sitk.GetArrayFromImage(atlas_sitk) atlas_edge = sitk.GetArrayFromImage(atlas_sitk_edge) labels_img_np = sitk.GetArrayFromImage(labels_sitk) markers = sitk.GetArrayFromImage(labels_sitk_markers) # segment image from markers sym_axis = atlas_refiner.find_symmetric_axis(atlas_img_np) mirrorred = atlas and sym_axis >= 0 len_half = None seg_args = {"exclude_labels": exclude_labels} edge_prof = atlas_profile[profiles.RegKeys.EDGE_AWARE_REANNOTATION] if edge_prof: edge_filt = edge_prof[profiles.RegKeys.WATERSHED_MASK_FILTER] if edge_filt and len(edge_filt) > 1: # watershed mask filter settings from atlas profile seg_args["mask_filt"] = edge_filt[0] seg_args["mask_filt_size"] = edge_filt[1] if mirrorred: # segment only half of image, assuming symmetry len_half = atlas_img_np.shape[sym_axis] // 2 slices = [slice(None)] * labels_img_np.ndim slices[sym_axis] = slice(len_half) sl = tuple(slices) labels_seg = segmenter.segment_from_labels( atlas_edge[sl], markers[sl], labels_img_np[sl], **seg_args) else: # segment the full image, including excluded labels on the opposite side exclude_labels = exclude_labels.tolist().extend( (mirror_mult * exclude_labels).tolist()) seg_args["exclude_labels"] = exclude_labels labels_seg = segmenter.segment_from_labels( atlas_edge, markers, labels_img_np, **seg_args) smoothing = atlas_profile["smooth"] smoothing_mode = atlas_profile["smoothing_mode"] cond = ["edge-aware_seg"] if smoothing is not None: # smoothing by opening operation based on profile setting meas_smoothing = atlas_profile["meas_smoothing"] cond.append("smoothing") df_aggr, df_raw = atlas_refiner.smooth_labels( labels_seg, smoothing, smoothing_mode, meas_smoothing, labels_sitk.GetSpacing()[::-1]) df_base_path = os.path.splitext(mod_path)[0] if df_raw is not None: # write raw smoothing metrics df_io.data_frames_to_csv( df_raw, f"{df_base_path}_{config.PATH_SMOOTHING_RAW_METRICS}") if df_aggr is not None: # write aggregated smoothing metrics df_io.data_frames_to_csv( df_aggr, f"{df_base_path}_{config.PATH_SMOOTHING_METRICS}") if mirrorred: # mirror back to other half labels_seg = _mirror_imported_labels( labels_seg, len_half, mirror_mult, sym_axis) # expand background to smoothed background of original labels to # roughly match background while still allowing holes to be filled crop = atlas_profile["crop_to_orig"] atlas_refiner.crop_to_orig( labels_img_np, labels_seg, crop) if labels_seg.dtype != labels_img_np.dtype: # watershed may give different output type, so cast back if so labels_seg = labels_seg.astype(labels_img_np.dtype) labels_sitk_seg = sitk_io.replace_sitk_with_numpy(labels_sitk, labels_seg) # show DSCs for labels _logger.info( "\nMeasuring overlap of individual original and watershed labels:") dsc_lbls_comb = atlas_refiner.measure_overlap_labels( labels_sitk, labels_sitk_seg) _logger.info( "\nMeasuring overlap of combined original and watershed labels:") dsc_lbls_indiv = atlas_refiner.measure_overlap_labels( atlas_refiner.make_labels_fg(labels_sitk), atlas_refiner.make_labels_fg(labels_sitk_seg)) _logger.info("") # measure and save whole atlas metrics metrics = { config.AtlasMetrics.SAMPLE: [os.path.basename(mod_path)], config.AtlasMetrics.REGION: config.REGION_ALL, config.AtlasMetrics.CONDITION: "|".join(cond), config.AtlasMetrics.DSC_LABELS_ORIG_NEW_COMBINED: dsc_lbls_comb, config.AtlasMetrics.DSC_LABELS_ORIG_NEW_INDIV: dsc_lbls_indiv, } df_metrics_path = libmag.combine_paths( mod_path, config.PATH_ATLAS_IMPORT_METRICS) atlas_refiner.measure_atlas_refinement( metrics, atlas_sitk, labels_sitk_seg, atlas_profile, df_metrics_path) # show and write image to same directory as atlas with appropriate suffix sitk_io.write_reg_images( {config.RegNames.IMG_LABELS.value: labels_sitk_seg}, mod_path) if show: sitk.Show(labels_sitk_seg) return path_atlas
def detect_blobs_large_image(filename_base, image5d, offset, size, verify=False, save_dfs=True, full_roi=False): """Detect blobs within a large image through parallel processing of smaller chunks. Args: filename_base: Base path to use file output. image5d: Large image to process as a Numpy array of t,z,y,x,[c] offset: Sub-image offset given as coordinates in z,y,x. size: Sub-image shape given in z,y,x. verify: True to verify detections against truth database; defaults to False. save_dfs: True to save data frames to file; defaults to True. full_roi (bool): True to treat ``image5d`` as the full ROI; defaults to False. """ time_start = time() if size is None or offset is None: # uses the entire stack if no size or offset specified size = image5d.shape[1:4] offset = (0, 0, 0) else: # change base filename for ROI-based partial stack filename_base = make_subimage_name(filename_base, offset, size) filename_subimg = libmag.combine_paths(filename_base, config.SUFFIX_SUBIMG) filename_blobs = libmag.combine_paths(filename_base, config.SUFFIX_BLOBS) # get ROI for given region, including all channels if full_roi: # treat the full image as the ROI roi = image5d[0] else: roi = plot_3d.prepare_subimg(image5d, size, offset) _, channels = plot_3d.setup_channels(roi, config.channel, 3) # prep chunking ROI into sub-ROIs with size based on segment_size, scaling # by physical units to make more independent of resolution time_detection_start = time() settings = config.roi_profile # use default settings scaling_factor = detector.calc_scaling_factor() print("microsope scaling factor based on resolutions: {}" .format(scaling_factor)) denoise_size = config.roi_profile["denoise_size"] denoise_max_shape = None if denoise_size: # further subdivide each sub-ROI for local preprocessing denoise_max_shape = np.ceil( np.multiply(scaling_factor, denoise_size)).astype(int) # overlap sub-ROIs to minimize edge effects overlap_base = chunking.calc_overlap() tol = np.multiply(overlap_base, settings["prune_tol_factor"]).astype(int) overlap_padding = np.copy(tol) overlap = np.copy(overlap_base) exclude_border = config.roi_profile["exclude_border"] if exclude_border is not None: # exclude border to avoid blob detector edge effects, where blobs # often collect at the faces of the sub-ROI; # ensure that overlap is greater than twice the border exclusion per # axis so that no plane will be excluded from both overlapping sub-ROIs exclude_border_thresh = np.multiply(2, exclude_border) overlap_less = np.less(overlap, exclude_border_thresh) overlap[overlap_less] = exclude_border_thresh[overlap_less] excluded = np.greater(exclude_border, 0) overlap[excluded] += 1 # additional padding overlap_padding[excluded] = 0 # no need to prune past excluded border print("sub-ROI overlap: {}, pruning tolerance: {}, padding beyond " "overlap for pruning: {}, exclude borders: {}" .format(overlap, tol, overlap_padding, exclude_border)) max_pixels = np.ceil(np.multiply( scaling_factor, config.roi_profile["segment_size"])).astype(int) print("preprocessing max shape: {}, detection max pixels: {}" .format(denoise_max_shape, max_pixels)) sub_roi_slices, sub_rois_offsets = chunking.stack_splitter( roi.shape, max_pixels, overlap) # TODO: option to distribute groups of sub-ROIs to different servers # for blob detection seg_rois = detect_blobs_sub_rois( roi, sub_roi_slices, sub_rois_offsets, denoise_max_shape, exclude_border) detection_time = time() - time_detection_start print("blob detection time (s):", detection_time) # prune blobs in overlapping portions of sub-ROIs time_pruning_start = time() segments_all, df_pruning = _prune_blobs_mp( roi, seg_rois, overlap, tol, sub_roi_slices, sub_rois_offsets, channels, overlap_padding) pruning_time = time() - time_pruning_start print("blob pruning time (s):", pruning_time) #print("maxes:", np.amax(segments_all, axis=0)) # get weighted mean of ratios if df_pruning is not None: print("\nBlob pruning ratios:") path_pruning = "blob_ratios.csv" if save_dfs else None df_pruning_all = df_io.data_frames_to_csv( df_pruning, path_pruning, show=" ") cols = df_pruning_all.columns.tolist() blob_pruning_means = {} if "blobs" in cols: blobs_unpruned = df_pruning_all["blobs"] num_blobs_unpruned = np.sum(blobs_unpruned) for col in cols[1:]: blob_pruning_means["mean_{}".format(col)] = [ np.sum(np.multiply(df_pruning_all[col], blobs_unpruned)) / num_blobs_unpruned] path_pruning_means = "blob_ratios_means.csv" if save_dfs else None df_pruning_means = df_io.dict_to_data_frame( blob_pruning_means, path_pruning_means, show=" ") else: print("no blob ratios found") '''# report any remaining duplicates np.set_printoptions(linewidth=500, threshold=10000000) print("all blobs (len {}):".format(len(segments_all))) sort = np.lexsort( (segments_all[:, 2], segments_all[:, 1], segments_all[:, 0])) blobs = segments_all[sort] print(blobs) print("checking for duplicates in all:") print(detector.remove_duplicate_blobs(blobs, slice(0, 3))) ''' stats_detection = None fdbk = None if segments_all is not None: # remove the duplicated elements that were used for pruning detector.replace_rel_with_abs_blob_coords(segments_all) segments_all = detector.remove_abs_blob_coords(segments_all) # compare detected blobs with truth blobs # TODO: assumes ground truth is relative to any ROI offset, # but should make customizable if verify: db_path_base = None exp_name = os.path.splitext(os.path.basename(config.filename))[0] try: if config.truth_db is None: # find and load truth DB based on filename and subimage db_path_base = os.path.basename(filename_base) print("about to verify with truth db from {}" .format(db_path_base)) sqlite.load_truth_db(db_path_base) if config.truth_db is not None: # truth DB may contain multiple experiments for different # subimages; series not included in exp name since in ROI rois = config.truth_db.get_rois(exp_name) if rois is None: # exp may have been named by ROI print("{} experiment name not found, will try with" "ROI offset/size".format(exp_name)) exp_name = make_subimage_name(exp_name, offset, size) rois = config.truth_db.get_rois(exp_name) if rois is None: raise LookupError( "No truth set ROIs found for experiment {}, will " "skip detection verification".format(exp_name)) print("load ROIs from exp: {}".format(exp_name)) exp_id = sqlite.insert_experiment( config.verified_db.conn, config.verified_db.cur, exp_name, None) verify_tol = np.multiply( overlap_base, settings["verify_tol_factor"]) stats_detection, fdbk = detector.verify_rois( rois, segments_all, config.truth_db.blobs_truth, verify_tol, config.verified_db, exp_id, config.channel) except FileNotFoundError: libmag.warn("Could not load truth DB from {}; " "will not verify ROIs".format(db_path_base)) except LookupError as e: libmag.warn(str(e)) file_time_start = time() if config.save_subimg: if (isinstance(config.image5d, np.memmap) and config.image5d.filename == os.path.abspath(filename_subimg)): # file at sub-image save path may have been opened as a memmap # file, in which case saving would fail libmag.warn("{} is currently open, cannot save sub-image" .format(filename_subimg)) else: # write sub-image, which is in ROI (3D) format with open(filename_subimg, "wb") as f: np.save(f, roi) # save blobs # TODO: only segments used; consider removing the rest except ver outfile_blobs = open(filename_blobs, "wb") np.savez(outfile_blobs, ver=BLOBS_NP_VER, segments=segments_all, resolutions=config.resolutions, basename=os.path.basename(config.filename), # only save name offset=offset, roi_size=size) # None unless explicitly set outfile_blobs.close() file_save_time = time() - file_time_start # whole image benchmarking time times = ( [detection_time], [pruning_time], time() - time_start) times_dict = {} for key, val in zip(StackTimes, times): times_dict[key] = val if segments_all is None: print("\nNo blobs detected") else: print("\nTotal blobs found:", len(segments_all)) detector.show_blobs_per_channel(segments_all) print("file save time:", file_save_time) print("\nTotal detection processing times (s):") path_times = "stack_detection_times.csv" if save_dfs else None df_io.dict_to_data_frame(times_dict, path_times, show=" ") return stats_detection, fdbk, segments_all