def verify_stack(filename_base, subimg_path_base, settings, segments_all,
                 channels, overlap_base):
    db_path_base = os.path.basename(subimg_path_base)
    stats_detection = None
    fdbk = None
        # Truth databases are any database stored with manually
        # verified blobs and loaded at command-line with the
        # `--truth_db` flag or loaded here. While all experiments
        # can be stored in a single database, this verification also
        # supports experiments saved to separate databases in the
        # software root directory and named as a sub-image but with
        # the `sqlite.DB_SUFFIX_TRUTH` suffix. Experiments in the
        # database are also assumed to be named based on the full
        # image or the sub-image filename, without any directories.
        # load ROIs from previously loaded truth database or one loaded
        # based on sub-image filename
        exp_name, rois = _get_truth_db_rois(
            subimg_path_base, filename_base,
            db_path_base if config.truth_db is None else None)
        if rois is None:
            # load alternate truth database based on sub-image filename
            print("Loading truth ROIs from experiment:", exp_name)
            exp_name, rois = _get_truth_db_rois(
                subimg_path_base, filename_base, db_path_base)
        if config.truth_db is None:
            raise LookupError(
                "No truth database found for experiment {}, will "
                "skip detection verification".format(exp_name))
        if rois is None:
            raise LookupError(
                "No truth set ROIs found for experiment {}, will "
                "skip detection verification".format(exp_name))
        # verify each ROI and store results in a separate database
        exp_id = sqlite.insert_experiment(
            config.verified_db.conn, config.verified_db.cur,
            exp_name, None)
        verify_tol = np.multiply(
            overlap_base, settings["verify_tol_factor"])
        stats_detection, fdbk, df_verify = verify_rois(
            rois, segments_all, config.truth_db.blobs_truth,
            verify_tol, config.verified_db, exp_id, exp_name,
        df_io.data_frames_to_csv(df_verify, libmag.combine_paths(
            exp_name, "verify.csv"))
    except FileNotFoundError:
        libmag.warn("Could not load truth DB from {}; "
                    "will not verify ROIs".format(db_path_base))
    except LookupError as e:
    return stats_detection, fdbk
def meas_plot_zscores(path,
    """Measure and plot z-scores for given columns in a data frame.
        path (str): Path to data frame.
        metric_cols (List[str]): Sequence of column names for which to 
            compute z-scores.
        extra_cols (List[str]): Additional columns to included in the 
            output data frame.
        composites (List[Enum]): Sequence of enums specifying the 
            combination, typically from :class:`vols.MetricCombos`.
        size (List[int]): Sequence of ``width, height`` to size the figure; 
            defaults to None.
        show (bool): True to display the image; defaults to True.

    # generate z-scores
    df = pd.read_csv(path)
    df = df_io.zscore_df(df, "Region", metric_cols, extra_cols, True)

    # generate composite score column
    df_comb = df_io.combine_cols(df, composites)
        df_comb, libmag.insert_before_ext(config.filename, "_zhomogeneity"))

    # shift metrics from each condition to separate columns
    conds = np.unique(df["Condition"])
    df = df_io.cond_to_cols_df(df, ["Sample", "Region"], "Condition",
                               "original", metric_cols)
    path = libmag.insert_before_ext(config.filename, "_zscore")
    df_io.data_frames_to_csv(df, path)

    # display as probability plot
    lims = (-3, 3)
                             title="Region Match Z-Scores",
    def select_blob_matches_by_blob_id(self, row_id, blobn, blob_ids):
        """Select blob matches corresponding to the given blob IDs in the
        given blob column.

            row_id (int): Row ID.
            blobn (int): 1 or 2 to indicate the first or second blob column,
            blob_ids (List[int]): Blob IDs.

            :class:``: Blob match object,
            which is empty if not matches are found.

        matches = []
        if isinstance(blob_ids, np.ndarray):
            blob_ids = blob_ids.tolist()
        max_params = 990  # max params of 999 in sqlite < v3.32.0
        for i in range(len(blob_ids) // max_params + 1):
            # select blob matches by block to avoid exceeding sqlite parameter
            # limit
            ids = blob_ids[i * max_params:(i + 1) * max_params]
            ids.insert(0, row_id)
                "SELECT {}, id FROM blob_matches WHERE roi_id = ?"
                "AND blob{} IN ({})".format(_COLS_BLOB_MATCHES, blobn,
                                            ",".join("?" * (len(ids) - 1))),
            df = self._parse_blob_matches(self.cur.fetchall()).df
            if df is not None:
        if len(matches) > 0:
            return colocalizer.BlobMatch(df=df_io.data_frames_to_csv(matches))
        return colocalizer.BlobMatch()
def meas_plot_coefvar(path, id_cols, cond_col, cond_base, metric_cols, 
                      composites, size_col=None, size=None, show=True):
    """Measure and plot coefficient of variation (CV) as a scatter plot.
    CV is computed two ways:
    - Based on columns and equation specified in ``composites``, applied 
      across all samples regardless of group
    - For each metric in ``metric_cols``, separated by groups
        path (str): Path to data frame.
        id_cols (List[str]): Sequence of columns to serve as index/indices.
        cond_col (str): Name of the condition column.
        cond_base (str): Name of the condition to which all other conditions 
            will be normalized.
        metric_cols (List[str]): Sequence of column names for which to 
            compute z-scores.
        composites (List[Enum]): Sequence of enums specifying the 
            combination, typically from :class:`vols.MetricCombos`.
        size_col (str): Name of weighting column for coefficient of 
            variation measurement; defaults to None.
        size (List[int]): Sequence of ``width, height`` to size the figure; 
            defaults to None.
        show (bool): True to display the image; defaults to True.

    # measure coefficient of variation per sample-region regardless of group
    df = pd.read_csv(path)
    df = df_io.combine_cols(df, composites)
        df, libmag.insert_before_ext(config.filename, "_coefvar"))
    # measure CV within each condition and shift metrics from each 
    # condition to separate columns
    df = df_io.coefvar_df(df, [*id_cols, cond_col], metric_cols, size_col)
    conds = np.unique(df[cond_col])
    df = df_io.cond_to_cols_df(df, id_cols, cond_col, cond_base, metric_cols)
    path = libmag.insert_before_ext(config.filename, "_coefvartransp")
    df_io.data_frames_to_csv(df, path)
    # display CV measured by condition as probability plot
    lims = (0, 0.7)
        path, conds, metric_cols, "Volume",
        xlim=lims, ylim=lims, title="Coefficient of Variation", 
        fig_size=size, show=show, suffix=None, df=df)
def plot_knns(img_paths, suffix=None, show=False, names=None):
    """Plot k-nearest-neighbor distances for multiple sets of blobs,
    overlaying on a single plot.

        img_paths (List[str]): Base paths from which registered labels and
            blobs files will be found and output blobs file save location
            will be constructed.
        suffix (str): Suffix for ``path``; defaults to None.
        show (bool): True to plot the distances; defaults to False.
        names (List[str]): Sequence of names corresponding to ``img_paths``
            for the plot legend.

    cluster_settings = config.atlas_profile[profiles.RegKeys.METRICS_CLUSTER]
    knn_n = cluster_settings[profiles.RegKeys.KNN_N]
    if not knn_n:
        knn_n = cluster_settings[profiles.RegKeys.DBSCAN_MINPTS] - 1
    print("Calculating k-nearest-neighbor distances and plotting distances "
          "for neighbor {}".format(knn_n))

    # set up combined data frames for all samples at each zoom level
    df_keys = ("ov", "zoom")
    dfs_comb = {key: [] for key in df_keys}
    names_disp = names if names else []
    for i, img_path in enumerate(img_paths):
        # load blobs associated with image
        mod_path = img_path
        if suffix is not None:
            mod_path = libmag.insert_before_ext(img_path, suffix)
        labels_img_np = sitk_io.load_registered_img(
            mod_path, config.RegNames.IMG_LABELS.value)
        blobs = detector.Blobs().load_blobs(np_io.img_to_blobs_path(img_path))
        scaling, res = np_io.find_scaling(img_path, labels_img_np.shape)
        if blobs is None:
            libmag.warn("unable to load nuclei coordinates for", img_path)
        # convert to physical units and display k-nearest-neighbors for nuclei
        blobs_phys = np.multiply(blobs.blobs[:, :3], res)
        # TESTING: given the same blobs, simply shift
        #blobs = np.multiply(blobs[i*10000000:, :3], res)
        _, _, dfs = knn_dist(blobs_phys, knn_n, 2, 1000000, False)
        if names is None:
            # default to naming from filename
        for j, df in enumerate(dfs):

    for key in dfs_comb:
        # combine data frames at each zoom level, save, and plot with
        # different colors for each image
        df = df_io.join_dfs(dfs_comb[key], "point")
        dist_cols = [col for col in df.columns if col.startswith("dist")]
        rename_cols = {col: name for col, name in zip(dist_cols, names_disp)}
        df = df.rename(rename_cols, axis=1)
        out_path = "knn_dist_combine_{}".format(key)
        df_io.data_frames_to_csv(df, out_path)
def plot_intensity_nuclei(paths, labels, size=None, show=True, unit=None):
    """Plot nuclei vs. intensity as a scatter plot.
        paths (List[str]): Sequence of paths to CSV files.
        labels (List[str]): Sequence of label metrics corresponding to 
        size (List[int]): Sequence of ``width, height`` to size the figure; 
            defaults to None.
        show (bool): True to display the image; defaults to True.
        unit (str): Denominator unit for density plot; defaults to None.
        :obj:`pd.DataFrame`: Data frame with columns matching ``labels``
        for the given ``paths`` concatenated.

    def plot(lbls, suffix=None, unit=None):
        cols_xy = []
        for label in lbls:
            # get columns for the given label to plot on a given axis; assume
            # same order of labels for each group of columns so they correspond
            cols_xy.append([c for c in df.columns if c.split(".")[0] == label])

        names_group = None
        if cols_xy:
            # extract legend names assuming label.cond format
            names_group = np.unique([c.split(".")[1] for c in cols_xy[0]])
        units = (["{}/{}".format(l.split("_")[0], unit)
                  for l in lbls] if unit else (None, None))
        lbls = [l.replace("_", " ") for l in lbls]
        title = "{} Vs. {} By Region".format(*lbls)
            # col_annot=config.AtlasMetrics.REGION_ABBR.value,

    if len(paths) < 2 or len(labels) < 2: return
    dfs = [pd.read_csv(path) for path in paths]
    # merge data frames with all columns ending with .mean, prepending labels
    extra_cols = [
    tag = ".mean"
    df = df_io.append_cols(dfs[:2], labels, lambda x: x.lower().endswith(tag),
    dens = "{}_density"
    for col in df.columns:
        if col.startswith(labels):
            col_split = col.split(".")
            col_split[0] = dens.format(col_split[0])
                   ".".join(col_split)] = (df[col] /
    # strip the tag from column names
    names = {col: col.rsplit(tag)[0] for col in df.columns}
    df = df.rename(columns=names)
    df_io.data_frames_to_csv(df, "vols_stats_intensVnuc.csv")

    # plot labels and density labels
    plot([dens.format(l) for l in labels], "_density", unit)

    return df
def merge_atlas_segmentations(img_paths, show=True, atlas=True, suffix=None):
    """Merge atlas segmentations for a list of files as a multiprocessing 
    wrapper for :func:``merge_atlas_segmentations``, after which 
    edge image post-processing is performed separately since it 
    contains tasks also performed in multiprocessing.
        img_paths (List[str]): Sequence of image paths to load.
        show (bool): True if the output images should be displayed; defaults 
            to True.
        atlas (bool): True if the image is an atlas; defaults to True.
        suffix (str): Modifier to append to end of ``img_path`` basename for 
            registered image files that were output to a modified name; 
            defaults to None.
    start_time = time()
    # erode all labels images into markers for watershed; not multiprocessed
    # since erosion is itself multiprocessed
    erode = config.atlas_profile["erode_labels"]
    erosion = config.atlas_profile[profiles.RegKeys.EDGE_AWARE_REANNOTATION]
    erosion_frac = config.atlas_profile["erosion_frac"]
    mirrored = atlas and _is_profile_mirrored()
    mirror_mult = _get_mirror_mult()
    dfs_eros = []
    for img_path in img_paths:
        mod_path = img_path
        if suffix is not None:
            mod_path = libmag.insert_before_ext(mod_path, suffix)
        labels_sitk = sitk_io.load_registered_img(
            mod_path, config.RegNames.IMG_LABELS.value, get_sitk=True)
        print("Eroding labels to generate markers for atlas segmentation")
        df = None
        if erode["markers"]:
            # use default minimal post-erosion size (not setting erosion frac)
            markers, df = erode_labels(
                sitk.GetArrayFromImage(labels_sitk), erosion,
                mirrored=mirrored, mirror_mult=mirror_mult)
            labels_sitk_markers = sitk_io.replace_sitk_with_numpy(
                labels_sitk, markers)
                {config.RegNames.IMG_LABELS_MARKERS.value: labels_sitk_markers},
                df, "{}_markers.csv".format(os.path.splitext(mod_path)[0]))
    pool = chunking.get_mp_pool()
    pool_results = []
    for img_path, df in zip(img_paths, dfs_eros):
        print("setting up atlas segmentation merge for", img_path)
        # convert labels image into markers
        exclude = df.loc[
        print("excluding these labels from re-segmentation:\n", exclude)
            args=(img_path, show, atlas, suffix, exclude, mirror_mult)))
    for result in pool_results:
        # edge distance calculation and labels interior image generation 
        # are multiprocessed, so run them as post-processing tasks to 
        # avoid nested multiprocessing
        path = result.get()
        mod_path = path
        if suffix is not None:
            mod_path = libmag.insert_before_ext(path, suffix)
        # make edge distance images and stats
        labels_sitk = sitk_io.load_registered_img(
            mod_path, config.RegNames.IMG_LABELS.value, get_sitk=True)
        labels_np = sitk.GetArrayFromImage(labels_sitk)
        dist_to_orig, labels_edge = edge_distances(
            labels_np, path=path, spacing=labels_sitk.GetSpacing()[::-1])
        dist_sitk = sitk_io.replace_sitk_with_numpy(labels_sitk, dist_to_orig)
        labels_sitk_edge = sitk_io.replace_sitk_with_numpy(
            labels_sitk, labels_edge)

        labels_sitk_interior = None
        if erode["interior"]:
            # make interior images from labels using given targeted 
            # post-erosion frac
            interior, _ = erode_labels(
                labels_np, erosion, erosion_frac=erosion_frac, 
                mirrored=mirrored, mirror_mult=mirror_mult)
            labels_sitk_interior = sitk_io.replace_sitk_with_numpy(
                labels_sitk, interior)
        # write images to same directory as atlas
        imgs_write = {
            config.RegNames.IMG_LABELS_DIST.value: dist_sitk, 
            config.RegNames.IMG_LABELS_EDGE.value: labels_sitk_edge, 
            config.RegNames.IMG_LABELS_INTERIOR.value: labels_sitk_interior, 
        sitk_io.write_reg_images(imgs_write, mod_path)
        if show:
            for img in imgs_write.values():
                if img: sitk.Show(img)
        print("finished {}".format(path))
    print("time elapsed for merging atlas segmentations:", time() - start_time)
    def select_blob_matches_by_blob_id(
            row_id: int,
            blobn: int,
            blob_ids: Sequence[int],
            max_params: int = 100000
    ) -> "colocalizer.BlobMatch":
        """Select blob matches corresponding to the given blob IDs in the
        given blob column.

            row_id: Row ID.
            blobn: 1 or 2 to indicate the first or second blob column,
            blob_ids: Blob IDs.
            max_params: Maximum number of parameters for the `SELECT`
                statements; defaults to 100000. The max is determined by
                `SQLITE_MAX_VARIABLE_NUMBER` set at the sqlite3 compile
                time. If this number is exceeded, this function is called
                recursively with half the given `max_params`.

            Blob match object, which is empty if not matches are found.
            :meth:`sqlit3.OperationalError`: if the maximum number of
            parameters is < 1.
        Deprecated: 1.6.0
            Use :meth:`select_blob_matches` instead.

        if max_params < 1:
            raise sqlite3.OperationalError(
                "Could not determine number of parameters for selecting blob "
        matches = []
        if isinstance(blob_ids, np.ndarray):
            blob_ids = blob_ids.tolist()
            # select matches by block to avoid exceeding sqlite parameter limit
            nblocks = len(blob_ids) // max_params + 1
            for i in range(nblocks):
                    "Selecting blob matches block %s of %s", i, nblocks - 1)
                ids = blob_ids[i*max_params:(i+1)*max_params]
                ids.insert(0, row_id)
                    f"SELECT {_COLS_BLOB_MATCHES}, id FROM blob_matches "
                    f"WHERE roi_id = ? AND blob{blobn} "
                    f"IN ({','.join('?' * (len(ids) - 1))})",
                df = self._parse_blob_matches(self.cur.fetchall()).df
                if df is not None:
        except sqlite3.OperationalError:
            # call recursively with halved number of parameters
                "Exceeded max sqlite query parameters; trying with smaller "
            return self.select_blob_matches_by_blob_id(
                row_id, blobn, blob_ids, max_params // 2)
        if len(matches) > 0:
            return colocalizer.BlobMatch(df=df_io.data_frames_to_csv(matches))
        return colocalizer.BlobMatch()
def export_rois(db,
    """Export all ROIs from database.
    If the current processing profile includes isotropic interpolation, the 
    ROIs will be resized to make isotropic according to this factor.
        db: Database from which to export.
        image5d: The image with the ROIs.
        channel (List[int]): Channels to export; currently only the first
            channel is used.
        path: Path with filename base from which to save the exported files.
        padding (List[int]): Padding in x,y,z to exclude from the ROI;
            defaults to None.
        unit_factor (float): Linear conversion factor for units (eg 1000.0
            to convert um to mm).
        truth_mode (:obj:`config.TruthDBModes`): Truth mode enum; defaults
            to None.
        exp_name (str): Name of experiment to export; defaults to None to
            export all experiments in ``db``.
        :obj:`pd.DataFrame`: ROI metrics in a data frame.
    if padding is not None:
        padding = np.array(padding)

    # TODO: consider iterating through all channels
    channel = channel[0] if channel else 0

    # convert volume base on scaling and unit factor
    phys_mult =
    if unit_factor: phys_mult /= unit_factor**3

    metrics_all = {}
    exps = sqlite.select_experiment(db.cur, None)
    for exp in exps:
        if exp_name and exp["name"] != exp_name:
            # DBs may contain many experiments, which may not correspond to
            # image5d, eg verified DBs from many truth sets
        rois = sqlite.select_rois(db.cur, exp["id"])
        for roi in rois:
            # get ROI as a small image
            size = sqlite.get_roi_size(roi)
            offset = sqlite.get_roi_offset(roi)
            img3d = plot_3d.prepare_roi(image5d, size, offset)

            # get blobs and change confirmation flag to avoid confirmation
            # color in 2D plots
            roi_id = roi["id"]
            blobs = sqlite.select_blobs(db.cur, roi_id)
            blobs_detected = None
            if truth_mode is config.TruthDBModes.VERIFIED:
                # verified DBs use a truth value of -1 to indicate "detected",
                # non-truth blobs, including both correct and incorrect
                # detections, while the rest of blobs are "truth" blobs
                truth_vals = detector.get_blob_truth(blobs)
                blobs_detected = blobs[truth_vals == -1]
                blobs = blobs[truth_vals != -1]
                # default to include only confirmed blobs; truth sets
                # ironically do not use the truth flag but instead
                # assume all confirmed blobs are "truth"
                blobs = blobs[detector.get_blob_confirmed(blobs) == 1]
            blobs[:, 4] = -1

            # adjust ROI size and offset if border set
            if padding is not None:
                size = np.subtract(img3d.shape[::-1], 2 * padding)
                img3d = plot_3d.prepare_roi(img3d, size, padding)
                blobs[:, 0:3] = np.subtract(blobs[:, 0:3],
                                            np.add(offset, padding)[::-1])
            print("exporting ROI of shape {}".format(img3d.shape))

            isotropic = config.roi_profile["isotropic"]
            blobs_orig = blobs
            if isotropic is not None:
                # interpolation for isotropy if set in first processing profile
                img3d = cv_nd.make_isotropic(img3d, isotropic)
                isotropic_factor = cv_nd.calc_isotropic_factor(isotropic)
                blobs_orig = np.copy(blobs)
                blobs = detector.multiply_blob_rel_coords(
                    blobs, isotropic_factor)

            # export ROI and 2D plots
            path_base, path_dir_nifti, path_img, path_img_nifti, path_blobs, \
                path_img_annot, path_img_annot_nifti = make_roi_paths(
                    path, roi_id, channel, make_dirs=True)
  , img3d)
            print("saved 3D image to {}".format(path_img))
            # WORKAROUND: for some reason SimpleITK gives a conversion error
            # when converting from uint16 (>u2) Numpy array
            img3d = img3d.astype(np.float64)
            img3d_sitk = sitk.GetImageFromArray(img3d)
            print("orig img:\n{}".format(img3d[0]))
            img3d_back = sitk.GetArrayFromImage(img3d_sitk)
            print(img3d.shape, img3d.dtype, img3d_back.shape, img3d_back.dtype)
            print("sitk img:\n{}".format(img3d_back[0]))
            sitk.WriteImage(img3d_sitk, path_img_nifti, False)
            roi_ed = roi_editor.ROIEditor(img3d)

            # export image and blobs, stripping blob flags and adjusting
            # user-added segments' radii; use original rather than blobs with
            # any interpolation since the ground truth will itself be
            # interpolated
            blobs = blobs_orig
            blobs = blobs[:, 0:4]
            # prior to v.0.5.0, user-added segments had a radius of 0.0
            blobs[np.isclose(blobs[:, 3], 0), 3] = 5.0
            # as of v.0.5.0, user-added segments have neg radii whose abs
            # value corresponds to the displayed radius
            blobs[:, 3] = np.abs(blobs[:, 3])
            # make more rounded since near-integer values appear to give
            # edges of 5 straight pixels
            #blobs[:, 3] += 1E-1
            blobs[:, 3] -= 0.5
  , blobs)

            # convert blobs to ground truth
            img3d_truth = plot_3d.build_ground_truth(
                np.zeros(size[::-1], dtype=np.uint8), blobs)
            if isotropic is not None:
                img3d_truth = cv_nd.make_isotropic(img3d_truth, isotropic)
                # remove fancy blending since truth set must be binary
                img3d_truth[img3d_truth >= 0.5] = 1
                img3d_truth[img3d_truth < 0.5] = 0
            print("exporting truth ROI of shape {}".format(img3d_truth.shape))
  , img3d_truth)
                            path_img_annot_nifti, False)
            # avoid smoothing interpolation, using "nearest" instead

            # measure ROI metrics and export to data frame; use AtlasMetrics
            # enum vals since will need LabelMetrics names instead
            metrics = {
                config.AtlasMetrics.SAMPLE.value: exp["name"],
                config.AtlasMetrics.CONDITION.value: "truth",
                config.AtlasMetrics.CHANNEL.value: channel,
                config.AtlasMetrics.OFFSET.value: offset,
                config.AtlasMetrics.SIZE.value: size,
            # get basic counts for ROI and update volume for physical units
            vols.MeasureLabel.set_data(img3d, np.ones_like(img3d,
            _, metrics_counts = vols.MeasureLabel.measure_counts(1)
            metrics_counts[vols.LabelMetrics.Volume] *= phys_mult
            for key, val in metrics_counts.items():
                # convert LabelMetrics to their name
                metrics[] = val
            metrics[] = len(blobs)
            metrics_dicts = [metrics]
            if blobs_detected is not None:
                # add another row for detected blobs
                metrics_detected = dict(metrics)
                    config.AtlasMetrics.CONDITION.value] = "detected"
                metrics_detected[] = len(
            for m in metrics_dicts:
                for key, val in m.items():
                    metrics_all.setdefault(key, []).append(val)

            print("exported {}".format(path_base))

    #_test_loading_rois(db, channel, path)

    # convert to data frame and compute densities for nuclei and intensity
    df = df_io.dict_to_data_frame(metrics_all)
    vol = df[]
    df.loc[:,] = (
        df[] / vol)
    df.loc[:,] = (
        df[] / vol)
    df = df_io.data_frames_to_csv(df, "{}_rois.csv".format(path))
    return df
def detect_blobs_blocks(filename_base, image5d, offset, size, channels,
                        verify=False, save_dfs=True, full_roi=False,
    """Detect blobs by block processing of a large image.
    All channels are processed in the same blocks.
        filename_base: Base path to use file output.
        image5d: Large image to process as a Numpy array of t,z,y,x,[c]
        offset: Sub-image offset given as coordinates in z,y,x.
        size: Sub-image shape given in z,y,x.
        channels (Sequence[int]): Sequence of channels, where None detects
            in all channels.
        verify: True to verify detections against truth database; defaults 
            to False.
        save_dfs: True to save data frames to file; defaults to True.
        full_roi (bool): True to treat ``image5d`` as the full ROI; defaults
            to False.
        coloc (bool): True to perform blob co-localizations; defaults to False.
        tuple[int, int, int], str, :class:``:
        Accuracy metrics from :class:``,
        feedback message from this same function, and detected blobs.
    time_start = time()
    subimg_path_base = filename_base
    if size is None or offset is None:
        # uses the entire stack if no size or offset specified
        size = image5d.shape[1:4]
        offset = (0, 0, 0)
        # get base path for sub-image
        subimg_path_base = naming.make_subimage_name(
            filename_base, offset, size)
    filename_blobs = libmag.combine_paths(subimg_path_base, config.SUFFIX_BLOBS)
    # get ROI for given region, including all channels
    if full_roi:
        # treat the full image as the ROI
        roi = image5d[0]
        roi = plot_3d.prepare_subimg(image5d, offset, size)
    num_chls_roi = 1 if len(roi.shape) < 4 else roi.shape[3]
    if num_chls_roi < 2:
        coloc = False
        print("Unable to co-localize as image has only 1 channel")
    # prep chunking ROI into sub-ROIs with size based on segment_size, scaling
    # by physical units to make more independent of resolution; use profile
    # from first channel to be processed for block settings
    time_detection_start = time()
    settings = config.get_roi_profile(channels[0])
    print("Profile for block settings:", settings[settings.NAME_KEY])
    sub_roi_slices, sub_rois_offsets, denoise_max_shape, exclude_border, \
        tol, overlap_base, overlap, overlap_padding = setup_blocks(
            settings, roi.shape)
    # TODO: option to distribute groups of sub-ROIs to different servers 
    # for blob detection
    seg_rois = StackDetector.detect_blobs_sub_rois(
        roi, sub_roi_slices, sub_rois_offsets, denoise_max_shape,
        exclude_border, coloc, channels)
    detection_time = time() - time_detection_start
    print("blob detection time (s):", detection_time)
    # prune blobs in overlapping portions of sub-ROIs
    time_pruning_start = time()
    segments_all, df_pruning = StackPruner.prune_blobs_mp(
        roi, seg_rois, overlap, tol, sub_roi_slices, sub_rois_offsets, channels,
    pruning_time = time() - time_pruning_start
    print("blob pruning time (s):", pruning_time)
    #print("maxes:", np.amax(segments_all, axis=0))
    # get weighted mean of ratios
    if df_pruning is not None:
        print("\nBlob pruning ratios:")
        path_pruning = "blob_ratios.csv" if save_dfs else None
        df_pruning_all = df_io.data_frames_to_csv(
            df_pruning, path_pruning, show=" ")
        cols = df_pruning_all.columns.tolist()
        blob_pruning_means = {}
        if "blobs" in cols:
            blobs_unpruned = df_pruning_all["blobs"]
            num_blobs_unpruned = np.sum(blobs_unpruned)
            for col in cols[1:]:
                blob_pruning_means["mean_{}".format(col)] = [
                    np.sum(np.multiply(df_pruning_all[col], blobs_unpruned)) 
                    / num_blobs_unpruned]
            path_pruning_means = "blob_ratios_means.csv" if save_dfs else None
            df_pruning_means = df_io.dict_to_data_frame(
                blob_pruning_means, path_pruning_means, show=" ")
            print("no blob ratios found")
    '''# report any remaining duplicates
    np.set_printoptions(linewidth=500, threshold=10000000)
    print("all blobs (len {}):".format(len(segments_all)))
    sort = np.lexsort(
        (segments_all[:, 2], segments_all[:, 1], segments_all[:, 0]))
    blobs = segments_all[sort]
    print("checking for duplicates in all:")
    print(detector.remove_duplicate_blobs(blobs, slice(0, 3)))
    stats_detection = None
    fdbk = None
    colocs = None
    if segments_all is not None:
        # remove the duplicated elements that were used for pruning
        if coloc:
            colocs = segments_all[:, 10:10+num_chls_roi].astype(np.uint8)
        # remove absolute coordinate and any co-localization columns
        segments_all = detector.remove_abs_blob_coords(segments_all)
        # compare detected blobs with truth blobs
        # TODO: assumes ground truth is relative to any ROI offset,
        # but should make customizable
        if verify:
            stats_detection, fdbk = verifier.verify_stack(
                filename_base, subimg_path_base, settings, segments_all,
                channels, overlap_base)
    if config.save_subimg:
        subimg_base_path = libmag.combine_paths(
            subimg_path_base, config.SUFFIX_SUBIMG)
        if (isinstance(config.image5d, np.memmap) and 
                config.image5d.filename == os.path.abspath(subimg_base_path)):
            # file at sub-image save path may have been opened as a memmap
            # file, in which case saving would fail
            libmag.warn("{} is currently open, cannot save sub-image"
            # write sub-image, which is in ROI (3D) format
            with open(subimg_base_path, "wb") as f:
      , roi)

    # store blobs in Blobs instance
    # TODO: consider separating into blobs and blobs metadata archives
    blobs = detector.Blobs(
        segments_all, colocalizations=colocs, path=filename_blobs)
    blobs.resolutions = config.resolutions
    blobs.basename = os.path.basename(config.filename)
    blobs.roi_offset = offset
    blobs.roi_size = size
    # whole image benchmarking time
    times = (
        time() - time_start)
    times_dict = {}
    for key, val in zip(StackTimes, times):
        times_dict[key] = val
    if segments_all is None:
        print("\nNo blobs detected")
        print("\nTotal blobs found:", len(segments_all))
    print("\nTotal detection processing times (s):")
    path_times = "stack_detection_times.csv" if save_dfs else None
    df_io.dict_to_data_frame(times_dict, path_times, show=" ")
    return stats_detection, fdbk, blobs
def edge_aware_segmentation(
        path_atlas: str, atlas_profile: atlas_prof.AtlasProfile,
        show: bool = True, atlas: bool = True, suffix: Optional[str] = None,
        exclude_labels: Optional[pd.DataFrame] = None, mirror_mult: int = -1):
    """Segment an atlas using its previously generated edge map.
    Labels may not match their own underlying atlas image well, 
    particularly in the orthogonal directions in which the labels 
    were not constructed. To improve alignment between the labels 
    and the atlas itself, register the labels to an automated, roughly 
    segmented version of the atlas. The goal is to improve the 
    labels' alignment so that the atlas/labels combination can be 
    used for another form of automated segmentation by registering 
    them to experimental brains via :func:``register``.
    Edge files are assumed to have been generated by 
        path_atlas: Path to the fixed file, typically the atlas file 
            with stained sections. The corresponding edge and labels 
            files will be loaded based on this path.
        atlas_profile: Atlas profile.
        show: True if the output images should be displayed; defaults 
            to True.
        atlas: True if the primary image is an atlas, which is assumed 
            to be symmetrical. False if the image is an experimental/sample 
            image, in which case segmentation will be performed on the full 
            images, and stats will not be performed.
        suffix: Modifier to append to end of ``path_atlas`` basename for 
            registered image files that were output to a modified name; 
            defaults to None. If ``atlas`` is True, ``suffix`` will only 
            be applied to saved files, with files still loaded based on the 
            original path.
        exclude_labels: Sequence of labels to exclude from the
            segmentation; defaults to None.
        mirror_mult: Multiplier for mirrored labels; defaults to -1
            to make mirrored labels the inverse of their source labels.
    # adjust image path with suffix
    load_path = path_atlas
    mod_path = path_atlas
    if suffix is not None:
        mod_path = libmag.insert_before_ext(mod_path, suffix)
        if atlas: load_path = mod_path
    # load corresponding files via SimpleITK
    atlas_sitk = sitk_io.load_registered_img(
        load_path, config.RegNames.IMG_ATLAS.value, get_sitk=True)
    atlas_sitk_edge = sitk_io.load_registered_img(
        load_path, config.RegNames.IMG_ATLAS_EDGE.value, get_sitk=True)
    labels_sitk = sitk_io.load_registered_img(
        load_path, config.RegNames.IMG_LABELS.value, get_sitk=True)
    labels_sitk_markers = sitk_io.load_registered_img(
        load_path, config.RegNames.IMG_LABELS_MARKERS.value, get_sitk=True)
    # get Numpy arrays of images
    atlas_img_np = sitk.GetArrayFromImage(atlas_sitk)
    atlas_edge = sitk.GetArrayFromImage(atlas_sitk_edge)
    labels_img_np = sitk.GetArrayFromImage(labels_sitk)
    markers = sitk.GetArrayFromImage(labels_sitk_markers)
    # segment image from markers
    sym_axis = atlas_refiner.find_symmetric_axis(atlas_img_np)
    mirrorred = atlas and sym_axis >= 0
    len_half = None
    seg_args = {"exclude_labels": exclude_labels}
    edge_prof = atlas_profile[profiles.RegKeys.EDGE_AWARE_REANNOTATION]
    if edge_prof:
        edge_filt = edge_prof[profiles.RegKeys.WATERSHED_MASK_FILTER]
        if edge_filt and len(edge_filt) > 1:
            # watershed mask filter settings from atlas profile
            seg_args["mask_filt"] = edge_filt[0]
            seg_args["mask_filt_size"] = edge_filt[1]
    if mirrorred:
        # segment only half of image, assuming symmetry
        len_half = atlas_img_np.shape[sym_axis] // 2
        slices = [slice(None)] * labels_img_np.ndim
        slices[sym_axis] = slice(len_half)
        sl = tuple(slices)
        labels_seg = segmenter.segment_from_labels(
            atlas_edge[sl], markers[sl], labels_img_np[sl], **seg_args)
        # segment the full image, including excluded labels on the opposite side
        exclude_labels = exclude_labels.tolist().extend(
            (mirror_mult * exclude_labels).tolist())
        seg_args["exclude_labels"] = exclude_labels
        labels_seg = segmenter.segment_from_labels(
            atlas_edge, markers, labels_img_np, **seg_args)
    smoothing = atlas_profile["smooth"]
    smoothing_mode = atlas_profile["smoothing_mode"]
    cond = ["edge-aware_seg"]
    if smoothing is not None:
        # smoothing by opening operation based on profile setting
        meas_smoothing = atlas_profile["meas_smoothing"]
        df_aggr, df_raw = atlas_refiner.smooth_labels(
            labels_seg, smoothing, smoothing_mode,
            meas_smoothing, labels_sitk.GetSpacing()[::-1])
        df_base_path = os.path.splitext(mod_path)[0]
        if df_raw is not None:
            # write raw smoothing metrics
                df_raw, f"{df_base_path}_{config.PATH_SMOOTHING_RAW_METRICS}")
        if df_aggr is not None:
            # write aggregated smoothing metrics
                df_aggr, f"{df_base_path}_{config.PATH_SMOOTHING_METRICS}")
    if mirrorred:
        # mirror back to other half
        labels_seg = _mirror_imported_labels(
            labels_seg, len_half, mirror_mult, sym_axis)
    # expand background to smoothed background of original labels to 
    # roughly match background while still allowing holes to be filled
    crop = atlas_profile["crop_to_orig"]
        labels_img_np, labels_seg, crop)
    if labels_seg.dtype != labels_img_np.dtype:
        # watershed may give different output type, so cast back if so
        labels_seg = labels_seg.astype(labels_img_np.dtype)
    labels_sitk_seg = sitk_io.replace_sitk_with_numpy(labels_sitk, labels_seg)
    # show DSCs for labels
        "\nMeasuring overlap of individual original and watershed labels:")
    dsc_lbls_comb = atlas_refiner.measure_overlap_labels(
        labels_sitk, labels_sitk_seg)
        "\nMeasuring overlap of combined original and watershed labels:")
    dsc_lbls_indiv = atlas_refiner.measure_overlap_labels(
    # measure and save whole atlas metrics
    metrics = {
        config.AtlasMetrics.SAMPLE: [os.path.basename(mod_path)],
        config.AtlasMetrics.REGION: config.REGION_ALL,
        config.AtlasMetrics.CONDITION: "|".join(cond),
        config.AtlasMetrics.DSC_LABELS_ORIG_NEW_COMBINED: dsc_lbls_comb,
        config.AtlasMetrics.DSC_LABELS_ORIG_NEW_INDIV: dsc_lbls_indiv,
    df_metrics_path = libmag.combine_paths(
        mod_path, config.PATH_ATLAS_IMPORT_METRICS)
        metrics, atlas_sitk, labels_sitk_seg, atlas_profile, df_metrics_path)

    # show and write image to same directory as atlas with appropriate suffix
        {config.RegNames.IMG_LABELS.value: labels_sitk_seg}, mod_path)
    if show: sitk.Show(labels_sitk_seg)
    return path_atlas
def detect_blobs_large_image(filename_base, image5d, offset, size,
                             verify=False, save_dfs=True, full_roi=False):
    """Detect blobs within a large image through parallel processing of 
    smaller chunks.
        filename_base: Base path to use file output.
        image5d: Large image to process as a Numpy array of t,z,y,x,[c]
        offset: Sub-image offset given as coordinates in z,y,x.
        size: Sub-image shape given in z,y,x.
        verify: True to verify detections against truth database; defaults 
            to False.
        save_dfs: True to save data frames to file; defaults to True.
        full_roi (bool): True to treat ``image5d`` as the full ROI; defaults
            to False.
    time_start = time()
    if size is None or offset is None:
        # uses the entire stack if no size or offset specified
        size = image5d.shape[1:4]
        offset = (0, 0, 0)
        # change base filename for ROI-based partial stack
        filename_base = make_subimage_name(filename_base, offset, size)
    filename_subimg = libmag.combine_paths(filename_base, config.SUFFIX_SUBIMG)
    filename_blobs = libmag.combine_paths(filename_base, config.SUFFIX_BLOBS)
    # get ROI for given region, including all channels
    if full_roi:
        # treat the full image as the ROI
        roi = image5d[0]
        roi = plot_3d.prepare_subimg(image5d, size, offset)
    _, channels = plot_3d.setup_channels(roi,, 3)
    # prep chunking ROI into sub-ROIs with size based on segment_size, scaling
    # by physical units to make more independent of resolution
    time_detection_start = time()
    settings = config.roi_profile  # use default settings
    scaling_factor = detector.calc_scaling_factor()
    print("microsope scaling factor based on resolutions: {}"
    denoise_size = config.roi_profile["denoise_size"]
    denoise_max_shape = None
    if denoise_size:
        # further subdivide each sub-ROI for local preprocessing
        denoise_max_shape = np.ceil(
            np.multiply(scaling_factor, denoise_size)).astype(int)

    # overlap sub-ROIs to minimize edge effects
    overlap_base = chunking.calc_overlap()
    tol = np.multiply(overlap_base, settings["prune_tol_factor"]).astype(int)
    overlap_padding = np.copy(tol)
    overlap = np.copy(overlap_base)
    exclude_border = config.roi_profile["exclude_border"]
    if exclude_border is not None:
        # exclude border to avoid blob detector edge effects, where blobs
        # often collect at the faces of the sub-ROI;
        # ensure that overlap is greater than twice the border exclusion per
        # axis so that no plane will be excluded from both overlapping sub-ROIs
        exclude_border_thresh = np.multiply(2, exclude_border)
        overlap_less = np.less(overlap, exclude_border_thresh)
        overlap[overlap_less] = exclude_border_thresh[overlap_less]
        excluded = np.greater(exclude_border, 0)
        overlap[excluded] += 1  # additional padding
        overlap_padding[excluded] = 0  # no need to prune past excluded border
    print("sub-ROI overlap: {}, pruning tolerance: {}, padding beyond "
          "overlap for pruning: {}, exclude borders: {}"
          .format(overlap, tol, overlap_padding, exclude_border))
    max_pixels = np.ceil(np.multiply(
    print("preprocessing max shape: {}, detection max pixels: {}"
          .format(denoise_max_shape, max_pixels))
    sub_roi_slices, sub_rois_offsets = chunking.stack_splitter(
        roi.shape, max_pixels, overlap)
    # TODO: option to distribute groups of sub-ROIs to different servers 
    # for blob detection
    seg_rois = detect_blobs_sub_rois(
        roi, sub_roi_slices, sub_rois_offsets, denoise_max_shape, exclude_border)
    detection_time = time() - time_detection_start
    print("blob detection time (s):", detection_time)
    # prune blobs in overlapping portions of sub-ROIs
    time_pruning_start = time()
    segments_all, df_pruning = _prune_blobs_mp(
        roi, seg_rois, overlap, tol, sub_roi_slices, sub_rois_offsets, channels,
    pruning_time = time() - time_pruning_start
    print("blob pruning time (s):", pruning_time)
    #print("maxes:", np.amax(segments_all, axis=0))
    # get weighted mean of ratios
    if df_pruning is not None:
        print("\nBlob pruning ratios:")
        path_pruning = "blob_ratios.csv" if save_dfs else None
        df_pruning_all = df_io.data_frames_to_csv(
            df_pruning, path_pruning, show=" ")
        cols = df_pruning_all.columns.tolist()
        blob_pruning_means = {}
        if "blobs" in cols:
            blobs_unpruned = df_pruning_all["blobs"]
            num_blobs_unpruned = np.sum(blobs_unpruned)
            for col in cols[1:]:
                blob_pruning_means["mean_{}".format(col)] = [
                    np.sum(np.multiply(df_pruning_all[col], blobs_unpruned)) 
                    / num_blobs_unpruned]
            path_pruning_means = "blob_ratios_means.csv" if save_dfs else None
            df_pruning_means = df_io.dict_to_data_frame(
                blob_pruning_means, path_pruning_means, show=" ")
            print("no blob ratios found")
    '''# report any remaining duplicates
    np.set_printoptions(linewidth=500, threshold=10000000)
    print("all blobs (len {}):".format(len(segments_all)))
    sort = np.lexsort(
        (segments_all[:, 2], segments_all[:, 1], segments_all[:, 0]))
    blobs = segments_all[sort]
    print("checking for duplicates in all:")
    print(detector.remove_duplicate_blobs(blobs, slice(0, 3)))
    stats_detection = None
    fdbk = None
    if segments_all is not None:
        # remove the duplicated elements that were used for pruning
        segments_all = detector.remove_abs_blob_coords(segments_all)
        # compare detected blobs with truth blobs
        # TODO: assumes ground truth is relative to any ROI offset,
        # but should make customizable
        if verify:
            db_path_base = None
            exp_name = os.path.splitext(os.path.basename(config.filename))[0]
                if config.truth_db is None:
                    # find and load truth DB based on filename and subimage
                    db_path_base = os.path.basename(filename_base)
                    print("about to verify with truth db from {}"
                if config.truth_db is not None:
                    # truth DB may contain multiple experiments for different
                    # subimages; series not included in exp name since in ROI
                    rois = config.truth_db.get_rois(exp_name)
                    if rois is None:
                        # exp may have been named by ROI
                        print("{} experiment name not found, will try with"
                              "ROI offset/size".format(exp_name))
                        exp_name = make_subimage_name(exp_name, offset, size)
                        rois = config.truth_db.get_rois(exp_name)
                    if rois is None:
                        raise LookupError(
                            "No truth set ROIs found for experiment {}, will "
                            "skip detection verification".format(exp_name))
                    print("load ROIs from exp: {}".format(exp_name))
                    exp_id = sqlite.insert_experiment(
                        config.verified_db.conn, config.verified_db.cur, 
                        exp_name, None)
                    verify_tol = np.multiply(
                        overlap_base, settings["verify_tol_factor"])
                    stats_detection, fdbk = detector.verify_rois(
                        rois, segments_all, config.truth_db.blobs_truth, 
                        verify_tol, config.verified_db, exp_id,
            except FileNotFoundError:
                libmag.warn("Could not load truth DB from {}; "
                            "will not verify ROIs".format(db_path_base))
            except LookupError as e:
    file_time_start = time()
    if config.save_subimg:
        if (isinstance(config.image5d, np.memmap) and 
                config.image5d.filename == os.path.abspath(filename_subimg)):
            # file at sub-image save path may have been opened as a memmap
            # file, in which case saving would fail
            libmag.warn("{} is currently open, cannot save sub-image"
            # write sub-image, which is in ROI (3D) format
            with open(filename_subimg, "wb") as f:
      , roi)

    # save blobs
    # TODO: only segments used; consider removing the rest except ver
    outfile_blobs = open(filename_blobs, "wb")
    np.savez(outfile_blobs, ver=BLOBS_NP_VER, segments=segments_all,
             basename=os.path.basename(config.filename),  # only save name
             offset=offset, roi_size=size)  # None unless explicitly set
    file_save_time = time() - file_time_start
    # whole image benchmarking time
    times = (
        time() - time_start)
    times_dict = {}
    for key, val in zip(StackTimes, times):
        times_dict[key] = val
    if segments_all is None:
        print("\nNo blobs detected")
        print("\nTotal blobs found:", len(segments_all))
    print("file save time:", file_save_time)
    print("\nTotal detection processing times (s):")
    path_times = "stack_detection_times.csv" if save_dfs else None
    df_io.dict_to_data_frame(times_dict, path_times, show=" ")
    return stats_detection, fdbk, segments_all