Python dict_to_data_frame Beispiele, magmap.io.df_io.dict_to_data_frame Python Beispiele

Beispiel #1

0

Datei anzeigen

Datei: verifier.py Projekt: kaparna126/magellanmapper

def _test_blob_verification(a, b, tol):
    # test verifying blobs by checking for closest matches within a tolerance
    print("test (b):\n{}".format(b))
    print("master (a):\n{}".format(a))
    #found_truth, detected = _find_closest_blobs(b, a, tol)
    #dists = np.zeros(len(blobs)
    detected, found_truth, dists = find_closest_blobs_cdist(b, a, tol)
    df_io.dict_to_data_frame(
        {"Testi": detected, "Masteri": found_truth, "Dist": dists}, show=True)

Beispiel #2

0

Datei anzeigen

Datei: ontology.py Projekt: sanderslab/magellanmapper

    def get_ref_lookup_as_df(self) -> Optional[pd.DataFrame]:
        """Get the reference lookup dict as a data frame.
        
        Returns:
            :attr:`ref_lookup` converted to a data frame. Returns the object
            as-is if it is already a data frame.

        """
        if self.ref_lookup is None:
            # return immediately if no reference dict to convert
            return None

        if isinstance(self.ref_lookup, pd.DataFrame):
            # return existing data frame
            return self.ref_lookup

        # convert dict reference to data frame with main columns
        labels_ref_regions = {}
        keys_node = (
            config.ABAKeys.NAME.value,
            config.ABAKeys.LEVEL.value,
            config.ABAKeys.ACRONYM.value,
        )
        for key, val in self.ref_lookup.items():
            # extract a subset of entries
            labels_ref_regions.setdefault(config.ABAKeys.ABA_ID.value,
                                          []).append(key)
            node = val[NODE]
            for node_k in keys_node:
                labels_ref_regions.setdefault(
                    node_k, []).append(node.get(node_k) if node else None)
            labels_ref_regions.setdefault(PARENT_IDS,
                                          []).append(val.get(PARENT_IDS))
        df_regions = df_io.dict_to_data_frame(labels_ref_regions)
        return df_regions

Beispiel #3

0

Datei anzeigen

Datei: sqlite.py Projekt: sanderslab/magellanmapper

    def _parse_blob_matches(self, rows):
        """Parse blob match selection.
        
        Args:
            rows (List[:obj:`sqlite3.Row`]): Sequence of rows.

        Returns:
            :class:`magmap.cv.colocalizer.BlobMatch`: Blob match object.
        
        Deprecated: 1.6.0
            Use :meth:`select_blob_matches` instead.

        """
        # build list of blob matches, which contain matching blobs and their
        # distances, converting blob IDs to full blobs
        matches = []
        for row in rows:
            matches.append((
                self.select_blob_by_id(row["blob1"])[0],
                self.select_blob_by_id(row["blob2"])[0], row["dist"]))
        
        if len(rows) > 0:
            # convert to data frame to access by named columns
            df = df_io.dict_to_data_frame(rows, records_cols=rows[0].keys())
            blob_matches = colocalizer.BlobMatch(
                matches, df["id"], df["roi_id"], df["blob1"], df["blob2"])
        else:
            blob_matches = colocalizer.BlobMatch()
        return blob_matches

Beispiel #4

0

Datei anzeigen

    def __init__(self,
                 matches=None,
                 match_id=None,
                 roi_id=None,
                 blob1_id=None,
                 blob2_id=None,
                 df=None):
        """Initialize blob match object.

        Args:
            matches (list[list[
                :class:`numpy.ndarray`, :class:`numpy.ndarray`, float]]:
                List of blob match lists, which each contain,
                ``blob1, blob2, distance``. Defaults to None, which
                sets the data frame to None.
            match_id (Sequence[int]): Sequence of match IDs, which should be
                of the same length as ``matches``; defaults to None.
            roi_id (Sequence[int]): Sequence of ROI IDs, which should be
                of the same length as ``matches``; defaults to None.
            blob1_id (Sequence[int]): Sequence of blob 1 IDs, which should be
                of the same length as ``matches``; defaults to None.
            blob2_id (Sequence[int]): Sequence of blob2 IDs, which should be
                of the same length as ``matches``; defaults to None.
            df (:class:`pandas.DataFrame`): Pandas data frame to set in
                place of any other arguments; defaults to None.
        """
        self.df: Optional[pd.DataFrame] = None
        self.coords: Optional[np.ndarray] = None
        self.cmap: Optional[np.ndarray] = None

        if df is not None:
            # set data frame directly and ignore any other arguments
            self.df = df
            return
        if matches is None:
            # return since any other arguments must correspond to matches
            return

        matches_dict = {}
        for i, match in enumerate(matches):
            # assumes that all first sequences are of the same length
            vals = {
                BlobMatch.Cols.BLOB1: match[0],
                BlobMatch.Cols.BLOB2: match[1],
                BlobMatch.Cols.DIST: match[2],
            }
            if match_id is not None:
                vals[BlobMatch.Cols.MATCH_ID] = match_id[i]
            if roi_id is not None:
                vals[BlobMatch.Cols.ROI_ID] = roi_id[i]
            if blob1_id is not None:
                vals[BlobMatch.Cols.BLOB1_ID] = blob1_id[i]
            if blob2_id is not None:
                vals[BlobMatch.Cols.BLOB2_ID] = blob2_id[i]
            for key in BlobMatch.Cols:
                matches_dict.setdefault(
                    key, []).append(vals[key] if key in vals else None)
        self.df = df_io.dict_to_data_frame(matches_dict)

Beispiel #5

0

Datei anzeigen

def parse_grid_stats(stats_dict):
    """Parse stats from multiple grid searches.
    
    Args:
        stats_dict: Dictionary where key is a string with the parameters
            up to the last parameter group, and each value is a tuple of 
            the raw stats as (pos, true_pos, false_pos); the array of
            values for the last parameter; the last parameter key; and an 
            ``OrderedDict`` of the parent parameters and their values for 
            the given set of stats.
    """
    parsed_stats = {}
    dfs = []
    param_keys = []
    for group, iterable_dicts in stats_dict.items():
        # parse a grid search
        stats_for_df = {}
        headers = None
        print("{}:".format(group))
        group_dict = {}
        parsed_stats[group] = group_dict
        for key, value in iterable_dicts.items():
            # parse stats from a set of parameters
            grid_stats = np.array(value[0])  # raw stats
            # last parameter is given separately since it is actively varying
            last_param_vals, last_param_key, parent_params = value[1:]
            if not headers:
                # set up headers for each stat and insert parameter headers
                # at the start
                headers = [e.value for e in GridSearchStats]
                headers[0] = "_".join((headers[0], last_param_key))
                for i, parent in enumerate(parent_params.keys()):
                    headers.insert(
                        i, "_".join((GridSearchStats.PARAM.value, parent)))
                    param_keys.append(parent)
                param_keys.append(last_param_key)
            # false discovery rate, inverse of PPV, since don't have true negs
            fdr = np.subtract(
                1,
                np.divide(grid_stats[:, 1],
                          np.add(grid_stats[:, 1], grid_stats[:, 2])))
            sens = np.divide(grid_stats[:, 1], grid_stats[:, 0])
            for i, n in enumerate(last_param_vals):
                stat_list = []
                for parent_val in parent_params.values():
                    stat_list.append(parent_val)
                stat_list.extend((last_param_vals[i], 1 - fdr[i], sens[i],
                                  *grid_stats[i].astype(int), fdr[i]))
                for header, stat in zip(headers, stat_list):
                    stats_for_df.setdefault(header, []).append(stat)
            group_dict[key] = (fdr, sens, last_param_vals)
        print()
        path_df = "gridsearch_{}.csv".format("_".join(param_keys))
        dfs.append(df_io.dict_to_data_frame(stats_for_df, path_df, show=" "))
    return parsed_stats, dfs

Beispiel #6

0

Datei anzeigen

Datei: verifier.py Projekt: kaparna126/magellanmapper

def verify_rois(rois, blobs, blobs_truth, tol, output_db, exp_id, exp_name,
                channel):
    """Verify blobs in ROIs by comparing detected blobs with truth sets
    of blobs stored in a database.
    
    Save the verifications to a separate database with a name in the same
    format as saved processed files but with "_verified.db" at the end.
    Prints basic statistics on the verification.
    
    Note that blobs are found from ROI parameters rather than loading from 
    database, so blobs recorded within these ROI bounds but from different 
    ROIs will be included in the verification.
    
    Args:
        rois: Rows of ROIs from sqlite database.
        blobs (:obj:`np.ndarray`): The blobs to be checked for accuracy,
            given as 2D array of ``[[z, row, column, radius, ...], ...]``.
        blobs_truth (:obj:`np.ndarray`): The list by which to check for
            accuracy, in the same format as blobs.
        tol: Tolerance as z,y,x of floats specifying padding for the inner
            ROI and used to generate a single tolerance distance within
            which a detected and ground truth blob will be considered
            potential matches.
        output_db: Database in which to save the verification flags, typical
            the database in :attr:``config.verified_db``.
        exp_id: Experiment ID in ``output_db``.
        exp_name (str): Name of experiment to store as the sample name for
            each row in the output data frame.
        channel (List[int]): Filter ``blobs_truth`` by this channel.
    
    Returns:
        tuple[int, int, int], str, :class:`pandas.DataFrame`: Tuple of
        ``pos, true_pos, false_pos`` stats, feedback message, and accuracy
        metrics in a data frame.
    
    """
    blobs_truth = detector.blobs_in_channel(blobs_truth, channel)
    blobs_truth_rois = None
    blobs_rois = None
    rois_falsehood = []
    thresh, scaling, inner_padding, resize, blobs = setup_match_blobs_roi(
        blobs, tol)
    
    # set up metrics dict for accuracy metrics of each ROI
    metrics = {}
    cols = (
        config.AtlasMetrics.SAMPLE,
        config.AtlasMetrics.CHANNEL,
        config.AtlasMetrics.OFFSET,
        config.AtlasMetrics.SIZE,
        mlearn.GridSearchStats.POS,
        mlearn.GridSearchStats.TP,
        mlearn.GridSearchStats.FP,
        mlearn.GridSearchStats.FN,
    )
    
    for roi in rois:
        # get ROI from database for ground truth blobs
        offset = (roi["offset_x"], roi["offset_y"], roi["offset_z"])
        size = (roi["size_x"], roi["size_y"], roi["size_z"])
        series = roi["series"]
        
        # find matches between truth and detected blobs
        blobs_inner_plus, blobs_truth_inner_plus, offset_inner, size_inner, \
            matches = match_blobs_roi(
                blobs, blobs_truth, offset, size, thresh, scaling,
                inner_padding, resize)
        
        # store blobs in separate verified DB
        roi_id, _ = sqlite.insert_roi(output_db.conn, output_db.cur, exp_id,
                                      series, offset_inner, size_inner)
        sqlite.insert_blobs(output_db.conn, output_db.cur, roi_id,
                            blobs_inner_plus)
        sqlite.insert_blobs(output_db.conn, output_db.cur, roi_id,
                            blobs_truth_inner_plus)
        output_db.insert_blob_matches(roi_id, matches)
        
        # compute accuracy metrics for the ROI
        pos = len(blobs_truth_inner_plus)  # condition pos
        true_pos = np.sum(blobs_inner_plus[:, 4] == 1)
        false_pos = np.sum(blobs_inner_plus[:, 4] == 0)
        false_neg = len(blobs_truth_inner_plus) - true_pos
        if false_neg > 0 or false_pos > 0:
            rois_falsehood.append((offset_inner, false_pos, false_neg))
        vals = (exp_name, channel[0] if channel else 0,
                tuple(offset_inner.astype(int)), tuple(size_inner.astype(int)),
                pos, true_pos, false_pos, pos - true_pos)
        for key, val in zip(cols, vals):
            metrics.setdefault(key, []).append(val)
        
        # combine blobs into total lists for stats across ROIs
        if blobs_truth_rois is None:
            blobs_truth_rois = blobs_truth_inner_plus
        else:
            blobs_truth_rois = np.concatenate(
                (blobs_truth_inner_plus, blobs_truth_rois))
        if blobs_rois is None:
            blobs_rois = blobs_inner_plus
        else:
            blobs_rois = np.concatenate((blobs_inner_plus, blobs_rois))
    
    # generate and show data frame of accuracy metrics for each ROI
    df = df_io.dict_to_data_frame(metrics, show=" ")
    
    # show accuracy metrics of blobs combined across ROIs
    true_pos = df[mlearn.GridSearchStats.TP.value].sum()
    false_pos = df[mlearn.GridSearchStats.FP.value].sum()
    pos = df[mlearn.GridSearchStats.POS.value].sum()
    false_neg = pos - true_pos
    print("Automated verification using tol {}:\n".format(tol))
    fdbk = "Accuracy metrics for channel {}:\n{}".format(
        channel, atlas_stats.calc_sens_ppv(
            pos, true_pos, false_pos, false_neg)[2])
    print(fdbk)
    print("ROIs with falsehood:\n{}".format(rois_falsehood))
    return (pos, true_pos, false_pos), fdbk, df

Beispiel #7

0

Datei anzeigen

def export_region_ids(labels_ref_lookup,
                      path,
                      level=None,
                      drawn_labels_only=False):
    """Export region IDs from annotation reference reverse mapped dictionary 
    to CSV and Excel files.

    Use a ``level`` of None to export labels only for the currently loaded
    atlas. The RGB values used for the currently loaded atlas will also be
    shown, with cell colors corresponding to these values in the Excel file.
    
    Args:
        labels_ref_lookup: The labels reference lookup, assumed to be an 
            OrderedDict generated by :func:`ontology.create_reverse_lookup` 
            to look up by ID while preserving key order to ensure that 
            parents of any child will be reached prior to the child.
        path: Path to output CSV file; if does not end with ``.csv``, it will 
            be added.
        level: Level at which to find parent for each label; defaults to None
            to get the immediate parent.
        drawn_labels_only (bool): True to export only the drawn labels for
            atlas labels in the same folder as ``labels_ref_lookup``.
            Defaults to False to use the full set of labels in
            ``labels_ref_lookup``
    
    Returns:
        Pandas data frame of the region IDs and corresponding names.
    """
    def color_cells(s):
        # convert RGB to hex values since Pandas Excel export only supports
        # named colors or hex (as of v0.22)
        css = ["background-color: #{:02x}{:02x}{:02x}".format(*c) for c in s]
        return css

    ext = ".csv"
    path_csv = path if path.endswith(ext) else path + ext

    # find ancestor for each label at the given level
    label_parents = ontology.labels_to_parent(labels_ref_lookup,
                                              level,
                                              allow_parent_same_level=True)

    cols = [
        config.AtlasMetrics.REGION.value,
        config.AtlasMetrics.REGION_ABBR.value,
        config.AtlasMetrics.REGION_NAME.value, config.AtlasMetrics.LEVEL.value,
        config.AtlasMetrics.PARENT.value
    ]
    data = OrderedDict()
    label_ids = sitk_io.find_atlas_labels(config.load_labels,
                                          drawn_labels_only, labels_ref_lookup)
    cm = colormaps.get_labels_discrete_colormap(None, 0, use_orig_labels=True)
    rgbs = cm.cmap_labels
    if rgbs is not None:
        cols.append("RGB")
    for i, key in enumerate(label_ids):
        # get label dict
        label = labels_ref_lookup.get(key)
        if label is None: continue

        # ID of parent at label_parents' level
        parent = label_parents[key]
        vals = [
            key, label[ontology.NODE][config.ABAKeys.ACRONYM.value],
            label[ontology.NODE][config.ABAKeys.NAME.value],
            label[ontology.NODE][config.ABAKeys.LEVEL.value], parent
        ]
        if rgbs is not None:
            vals.append(rgbs[i, :3])
        for col, val in zip(cols, vals):
            data.setdefault(col, []).append(val)
    df = df_io.dict_to_data_frame(data, path_csv)
    if rgbs is not None:
        df = df.style.apply(color_cells, subset="RGB")
    path_xlsx = "{}.xlsx".format(os.path.splitext(path)[0])
    df.to_excel(path_xlsx)
    print("exported regions to styled spreadsheet: \"{}\"".format(path_xlsx))
    return df

Beispiel #8

0

Datei anzeigen

Datei: sqlite.py Projekt: sanderslab/magellanmapper

    def select_blob_matches(
            self, roi_id: int, offset: Optional[Sequence[int]] = None,
            shape: Optional[Sequence[int]] = None) -> "colocalizer.BlobMatch":
        """Select blob matches for the given ROI.
        
        Args:
            roi_id: ROI ID.
            offset: ROI offset in ``z,y,x``; defaults to None.
            shape: ROI shape in ``z,y,x``; defaults to None.

        Returns:
            Blob matches.

        """
        _logger.debug("Selecting blob matches for ROI ID: %s", roi_id)
        start = time()
        
        # set up columns for each table
        cols_matches = _specify_table_cols(
            _COLS_BLOB_MATCHES + ', id', ', ', 'bm')
        cols_blobs = _COLS_BLOBS + ", id"
        cols_blobs1 = _specify_table_cols(cols_blobs, ', ', 'b1')
        cols_blobs2 = _specify_table_cols(cols_blobs, ', ', 'b2')
        
        # set up select statement
        stmnt = (
            f"SELECT {cols_matches}, "
            f"{cols_blobs1}, "
            f"{cols_blobs2} "
            f"FROM blob_matches bm "
            f"INNER JOIN blobs b1 ON bm.blob1 = b1.id "
            f"INNER JOIN blobs b2 ON bm.blob2 = b2.id "
            f"WHERE bm.roi_id = ?")
        args = [roi_id, ]
        
        if offset is not None and shape is not None:
            # add ROI parameters
            bounds = zip(offset, np.add(offset, shape))
            bounds = [str(b) for bound in bounds for b in bound]
            stmnt += (
                " AND b1.z >= ? AND b1.z < ?"
                "AND b1.y >= ? AND b1.y < ? AND b1.x >= ? AND b1.x < ?"
                "AND b2.z >= ? AND b2.z < ?"
                "AND b2.y >= ? AND b2.y < ? AND b2.x >= ? AND b2.x < ?")
            args.extend(bounds)
            args.extend(bounds)
        
        # execute query
        self.cur.execute(stmnt, args)
        rows = self.cur.fetchall()
        
        df_matches = None
        if len(rows) > 0:
            # convert to data frame to access by named columns
            df = df_io.dict_to_data_frame(rows, records_cols=rows[0].keys())
            
            def get_cols(col_full):
                # extract column aliases
                return [c.split(" ")[1] for c in col_full.split(", ")]
            
            # extract columns for blob matches
            df_matches = df[get_cols(cols_matches)]
            df_matches = df_matches.rename(columns={
                "bm_blob1": colocalizer.BlobMatch.Cols.BLOB1_ID.value,
                "bm_blob2": colocalizer.BlobMatch.Cols.BLOB2_ID.value,
                "bm_id": colocalizer.BlobMatch.Cols.MATCH_ID.value,
                "bm_roi_id": colocalizer.BlobMatch.Cols.ROI_ID.value,
                "bm_dist": colocalizer.BlobMatch.Cols.DIST.value,
            })
            
            # merge each set of blob columns into a single column of blob lists
            cols_dict = {
                colocalizer.BlobMatch.Cols.BLOB1.value: cols_blobs1,
                colocalizer.BlobMatch.Cols.BLOB2.value: cols_blobs2,
            }
            for col, cols in cols_dict.items():
                cols = get_cols(cols)[1:]
                df_matches[col] = df[cols].to_numpy().tolist()
            
        blob_matches = colocalizer.BlobMatch(df=df_matches)
        _logger.debug("Finished selecting blob matches in %s s", time() - start)
        return blob_matches

Beispiel #9

0

Datei anzeigen

Datei: export_rois.py Projekt: clifduhn/magellanmapper

def export_rois(db,
                image5d,
                channel,
                path,
                padding=None,
                unit_factor=None,
                truth_mode=None,
                exp_name=None):
    """Export all ROIs from database.
    
    If the current processing profile includes isotropic interpolation, the 
    ROIs will be resized to make isotropic according to this factor.
    
    Args:
        db: Database from which to export.
        image5d: The image with the ROIs.
        channel (List[int]): Channels to export; currently only the first
            channel is used.
        path: Path with filename base from which to save the exported files.
        padding (List[int]): Padding in x,y,z to exclude from the ROI;
            defaults to None.
        unit_factor (float): Linear conversion factor for units (eg 1000.0
            to convert um to mm).
        truth_mode (:obj:`config.TruthDBModes`): Truth mode enum; defaults
            to None.
        exp_name (str): Name of experiment to export; defaults to None to
            export all experiments in ``db``.
    
    Returns:
        :obj:`pd.DataFrame`: ROI metrics in a data frame.
    
    """
    if padding is not None:
        padding = np.array(padding)

    # TODO: consider iterating through all channels
    channel = channel[0] if channel else 0

    # convert volume base on scaling and unit factor
    phys_mult = np.prod(detector.calc_scaling_factor())
    if unit_factor: phys_mult /= unit_factor**3

    metrics_all = {}
    exps = sqlite.select_experiment(db.cur, None)
    for exp in exps:
        if exp_name and exp["name"] != exp_name:
            # DBs may contain many experiments, which may not correspond to
            # image5d, eg verified DBs from many truth sets
            continue
        rois = sqlite.select_rois(db.cur, exp["id"])
        for roi in rois:
            # get ROI as a small image
            size = sqlite.get_roi_size(roi)
            offset = sqlite.get_roi_offset(roi)
            img3d = plot_3d.prepare_roi(image5d, size, offset)

            # get blobs and change confirmation flag to avoid confirmation
            # color in 2D plots
            roi_id = roi["id"]
            blobs = sqlite.select_blobs(db.cur, roi_id)
            blobs_detected = None
            if truth_mode is config.TruthDBModes.VERIFIED:
                # verified DBs use a truth value of -1 to indicate "detected",
                # non-truth blobs, including both correct and incorrect
                # detections, while the rest of blobs are "truth" blobs
                truth_vals = detector.get_blob_truth(blobs)
                blobs_detected = blobs[truth_vals == -1]
                blobs = blobs[truth_vals != -1]
            else:
                # default to include only confirmed blobs; truth sets
                # ironically do not use the truth flag but instead
                # assume all confirmed blobs are "truth"
                blobs = blobs[detector.get_blob_confirmed(blobs) == 1]
            blobs[:, 4] = -1

            # adjust ROI size and offset if border set
            if padding is not None:
                size = np.subtract(img3d.shape[::-1], 2 * padding)
                img3d = plot_3d.prepare_roi(img3d, size, padding)
                blobs[:, 0:3] = np.subtract(blobs[:, 0:3],
                                            np.add(offset, padding)[::-1])
            print("exporting ROI of shape {}".format(img3d.shape))

            isotropic = config.roi_profile["isotropic"]
            blobs_orig = blobs
            if isotropic is not None:
                # interpolation for isotropy if set in first processing profile
                img3d = cv_nd.make_isotropic(img3d, isotropic)
                isotropic_factor = cv_nd.calc_isotropic_factor(isotropic)
                blobs_orig = np.copy(blobs)
                blobs = detector.multiply_blob_rel_coords(
                    blobs, isotropic_factor)

            # export ROI and 2D plots
            path_base, path_dir_nifti, path_img, path_img_nifti, path_blobs, \
                path_img_annot, path_img_annot_nifti = make_roi_paths(
                    path, roi_id, channel, make_dirs=True)
            np.save(path_img, img3d)
            print("saved 3D image to {}".format(path_img))
            # WORKAROUND: for some reason SimpleITK gives a conversion error
            # when converting from uint16 (>u2) Numpy array
            img3d = img3d.astype(np.float64)
            img3d_sitk = sitk.GetImageFromArray(img3d)
            '''
            print(img3d_sitk)
            print("orig img:\n{}".format(img3d[0]))
            img3d_back = sitk.GetArrayFromImage(img3d_sitk)
            print(img3d.shape, img3d.dtype, img3d_back.shape, img3d_back.dtype)
            print("sitk img:\n{}".format(img3d_back[0]))
            '''
            sitk.WriteImage(img3d_sitk, path_img_nifti, False)
            roi_ed = roi_editor.ROIEditor(img3d)
            roi_ed.plot_roi(blobs,
                            channel,
                            show=False,
                            title=os.path.splitext(path_img)[0])
            libmag.show_full_arrays()

            # export image and blobs, stripping blob flags and adjusting
            # user-added segments' radii; use original rather than blobs with
            # any interpolation since the ground truth will itself be
            # interpolated
            blobs = blobs_orig
            blobs = blobs[:, 0:4]
            # prior to v.0.5.0, user-added segments had a radius of 0.0
            blobs[np.isclose(blobs[:, 3], 0), 3] = 5.0
            # as of v.0.5.0, user-added segments have neg radii whose abs
            # value corresponds to the displayed radius
            blobs[:, 3] = np.abs(blobs[:, 3])
            # make more rounded since near-integer values appear to give
            # edges of 5 straight pixels
            # https://github.com/scikit-image/scikit-image/issues/2112
            #blobs[:, 3] += 1E-1
            blobs[:, 3] -= 0.5
            libmag.printv("blobs:\n{}".format(blobs))
            np.save(path_blobs, blobs)

            # convert blobs to ground truth
            img3d_truth = plot_3d.build_ground_truth(
                np.zeros(size[::-1], dtype=np.uint8), blobs)
            if isotropic is not None:
                img3d_truth = cv_nd.make_isotropic(img3d_truth, isotropic)
                # remove fancy blending since truth set must be binary
                img3d_truth[img3d_truth >= 0.5] = 1
                img3d_truth[img3d_truth < 0.5] = 0
            print("exporting truth ROI of shape {}".format(img3d_truth.shape))
            np.save(path_img_annot, img3d_truth)
            #print(img3d_truth)
            sitk.WriteImage(sitk.GetImageFromArray(img3d_truth),
                            path_img_annot_nifti, False)
            # avoid smoothing interpolation, using "nearest" instead
            with plt.style.context(config.rc_params_mpl2_img_interp):
                roi_ed.plot_roi(img3d_truth,
                                None,
                                channel,
                                show=False,
                                title=os.path.splitext(path_img_annot)[0])

            # measure ROI metrics and export to data frame; use AtlasMetrics
            # enum vals since will need LabelMetrics names instead
            metrics = {
                config.AtlasMetrics.SAMPLE.value: exp["name"],
                config.AtlasMetrics.CONDITION.value: "truth",
                config.AtlasMetrics.CHANNEL.value: channel,
                config.AtlasMetrics.OFFSET.value: offset,
                config.AtlasMetrics.SIZE.value: size,
            }
            # get basic counts for ROI and update volume for physical units
            vols.MeasureLabel.set_data(img3d, np.ones_like(img3d,
                                                           dtype=np.int8))
            _, metrics_counts = vols.MeasureLabel.measure_counts(1)
            metrics_counts[vols.LabelMetrics.Volume] *= phys_mult
            for key, val in metrics_counts.items():
                # convert LabelMetrics to their name
                metrics[key.name] = val
            metrics[vols.LabelMetrics.Nuclei.name] = len(blobs)
            metrics_dicts = [metrics]
            if blobs_detected is not None:
                # add another row for detected blobs
                metrics_detected = dict(metrics)
                metrics_detected[
                    config.AtlasMetrics.CONDITION.value] = "detected"
                metrics_detected[vols.LabelMetrics.Nuclei.name] = len(
                    blobs_detected)
                metrics_dicts.append(metrics_detected)
            for m in metrics_dicts:
                for key, val in m.items():
                    metrics_all.setdefault(key, []).append(val)

            print("exported {}".format(path_base))

    #_test_loading_rois(db, channel, path)

    # convert to data frame and compute densities for nuclei and intensity
    df = df_io.dict_to_data_frame(metrics_all)
    vol = df[vols.LabelMetrics.Volume.name]
    df.loc[:, vols.LabelMetrics.DensityIntens.name] = (
        df[vols.LabelMetrics.Intensity.name] / vol)
    df.loc[:, vols.LabelMetrics.Density.name] = (
        df[vols.LabelMetrics.Nuclei.name] / vol)
    df = df_io.data_frames_to_csv(df, "{}_rois.csv".format(path))
    return df

Beispiel #10

0

Datei anzeigen

Datei: aws.py Projekt: sanderslab/magellanmapper

def list_s3_bucket(name, keys=None, prefix=None, suffix=None, versions=False):
    """List all objects or object versions in an AWS S3 bucket.

    Args:
        name (str): Name of bucket.
        keys (List[str]): Sequence of keys within the bucket to include
            sizes of only these files; defaults to None.
        prefix (str): Filter only keys starting with this string; defaults
            to None.
        suffix (str): String to append to output CSV file; defaults to None.
        versions (bool): True to get all object versions, including
            deleted objects; False to get only the current versions; defaults
            to False.

    Returns:
        float, :obj:`pd.DataFrame`, :obj:`pd.DataFrame`: Size of bucket in
        bytes; a dataframe of keys and associated sizes; and a dataframe
        of missing keys from ``keys``, or None if ``keys`` is not given.

    """
    s3 = boto3.resource("s3")
    bucket = s3.Bucket(name)
    tot_size = 0
    obj_sizes = {}
    # get latest version of objects or all object version, filtering
    # for paths starting with prefix if set
    objs = bucket.object_versions if versions else bucket.objects
    objs = objs.filter(Prefix=prefix) if prefix else objs.all()
    for obj in objs:
        if not keys or obj.key in keys:
            # only check keys in list if given
            obj_sizes.setdefault("Bucket", []).append(bucket.name)
            obj_sizes.setdefault("Key", []).append(obj.key)
            size = obj.size
            obj_sizes.setdefault("Size", []).append(size)
            if size:
                # skip delete markers, which have a size of None
                tot_size += obj.size
            if versions:
                # add columns for version info
                obj_sizes.setdefault("Version_id", []).append(obj.version_id)
                obj_sizes.setdefault("Last_modified",
                                     []).append(obj.last_modified)

    out_path = "bucket_{}".format(bucket.name)
    if suffix:
        out_path = libmag.insert_before_ext(out_path, suffix, "_")
    df_missing = None
    if keys:
        # if list of keys given, show all keys that were not found
        keys_missing = []
        obj_keys = obj_sizes.keys()
        for key in keys:
            if key not in obj_keys:
                keys_missing.append(key)
        # print("Missing keys:\n", "\n".join(keys_missing))
        df_missing = df_io.dict_to_data_frame({"Keys_missing": keys_missing},
                                              libmag.insert_before_ext(
                                                  out_path, "_missing"))

    df = df_io.dict_to_data_frame(obj_sizes, out_path)
    print("{} bucket total tot_size (GiB): {}".format(
        bucket.name, libmag.convert_bin_magnitude(tot_size, 3)))
    return tot_size, df, df_missing

Beispiel #11

0

Datei anzeigen

def detect_blobs_blocks(filename_base, image5d, offset, size, channels,
                        verify=False, save_dfs=True, full_roi=False,
                        coloc=False):
    """Detect blobs by block processing of a large image.
    
    All channels are processed in the same blocks.
    
    Args:
        filename_base: Base path to use file output.
        image5d: Large image to process as a Numpy array of t,z,y,x,[c]
        offset: Sub-image offset given as coordinates in z,y,x.
        size: Sub-image shape given in z,y,x.
        channels (Sequence[int]): Sequence of channels, where None detects
            in all channels.
        verify: True to verify detections against truth database; defaults 
            to False.
        save_dfs: True to save data frames to file; defaults to True.
        full_roi (bool): True to treat ``image5d`` as the full ROI; defaults
            to False.
        coloc (bool): True to perform blob co-localizations; defaults to False.
    
    Returns:
        tuple[int, int, int], str, :class:`magmap.cv.detector.Blobs`:
        Accuracy metrics from :class:`magmap.cv.detector.verify_rois`,
        feedback message from this same function, and detected blobs.
    
    """
    time_start = time()
    subimg_path_base = filename_base
    if size is None or offset is None:
        # uses the entire stack if no size or offset specified
        size = image5d.shape[1:4]
        offset = (0, 0, 0)
    else:
        # get base path for sub-image
        subimg_path_base = naming.make_subimage_name(
            filename_base, offset, size)
    filename_blobs = libmag.combine_paths(subimg_path_base, config.SUFFIX_BLOBS)
    
    # get ROI for given region, including all channels
    if full_roi:
        # treat the full image as the ROI
        roi = image5d[0]
    else:
        roi = plot_3d.prepare_subimg(image5d, offset, size)
    num_chls_roi = 1 if len(roi.shape) < 4 else roi.shape[3]
    if num_chls_roi < 2:
        coloc = False
        print("Unable to co-localize as image has only 1 channel")
    
    # prep chunking ROI into sub-ROIs with size based on segment_size, scaling
    # by physical units to make more independent of resolution; use profile
    # from first channel to be processed for block settings
    time_detection_start = time()
    settings = config.get_roi_profile(channels[0])
    print("Profile for block settings:", settings[settings.NAME_KEY])
    sub_roi_slices, sub_rois_offsets, denoise_max_shape, exclude_border, \
        tol, overlap_base, overlap, overlap_padding = setup_blocks(
            settings, roi.shape)
    
    # TODO: option to distribute groups of sub-ROIs to different servers 
    # for blob detection
    seg_rois = StackDetector.detect_blobs_sub_rois(
        roi, sub_roi_slices, sub_rois_offsets, denoise_max_shape,
        exclude_border, coloc, channels)
    detection_time = time() - time_detection_start
    print("blob detection time (s):", detection_time)
    
    # prune blobs in overlapping portions of sub-ROIs
    time_pruning_start = time()
    segments_all, df_pruning = StackPruner.prune_blobs_mp(
        roi, seg_rois, overlap, tol, sub_roi_slices, sub_rois_offsets, channels,
        overlap_padding)
    pruning_time = time() - time_pruning_start
    print("blob pruning time (s):", pruning_time)
    #print("maxes:", np.amax(segments_all, axis=0))
    
    # get weighted mean of ratios
    if df_pruning is not None:
        print("\nBlob pruning ratios:")
        path_pruning = "blob_ratios.csv" if save_dfs else None
        df_pruning_all = df_io.data_frames_to_csv(
            df_pruning, path_pruning, show=" ")
        cols = df_pruning_all.columns.tolist()
        blob_pruning_means = {}
        if "blobs" in cols:
            blobs_unpruned = df_pruning_all["blobs"]
            num_blobs_unpruned = np.sum(blobs_unpruned)
            for col in cols[1:]:
                blob_pruning_means["mean_{}".format(col)] = [
                    np.sum(np.multiply(df_pruning_all[col], blobs_unpruned)) 
                    / num_blobs_unpruned]
            path_pruning_means = "blob_ratios_means.csv" if save_dfs else None
            df_pruning_means = df_io.dict_to_data_frame(
                blob_pruning_means, path_pruning_means, show=" ")
        else:
            print("no blob ratios found")
    
    '''# report any remaining duplicates
    np.set_printoptions(linewidth=500, threshold=10000000)
    print("all blobs (len {}):".format(len(segments_all)))
    sort = np.lexsort(
        (segments_all[:, 2], segments_all[:, 1], segments_all[:, 0]))
    blobs = segments_all[sort]
    print(blobs)
    print("checking for duplicates in all:")
    print(detector.remove_duplicate_blobs(blobs, slice(0, 3)))
    '''
    
    stats_detection = None
    fdbk = None
    colocs = None
    if segments_all is not None:
        # remove the duplicated elements that were used for pruning
        detector.replace_rel_with_abs_blob_coords(segments_all)
        if coloc:
            colocs = segments_all[:, 10:10+num_chls_roi].astype(np.uint8)
        # remove absolute coordinate and any co-localization columns
        segments_all = detector.remove_abs_blob_coords(segments_all)
        
        # compare detected blobs with truth blobs
        # TODO: assumes ground truth is relative to any ROI offset,
        # but should make customizable
        if verify:
            stats_detection, fdbk = verifier.verify_stack(
                filename_base, subimg_path_base, settings, segments_all,
                channels, overlap_base)
    
    if config.save_subimg:
        subimg_base_path = libmag.combine_paths(
            subimg_path_base, config.SUFFIX_SUBIMG)
        if (isinstance(config.image5d, np.memmap) and 
                config.image5d.filename == os.path.abspath(subimg_base_path)):
            # file at sub-image save path may have been opened as a memmap
            # file, in which case saving would fail
            libmag.warn("{} is currently open, cannot save sub-image"
                        .format(subimg_base_path))
        else:
            # write sub-image, which is in ROI (3D) format
            with open(subimg_base_path, "wb") as f:
                np.save(f, roi)

    # store blobs in Blobs instance
    # TODO: consider separating into blobs and blobs metadata archives
    blobs = detector.Blobs(
        segments_all, colocalizations=colocs, path=filename_blobs)
    blobs.resolutions = config.resolutions
    blobs.basename = os.path.basename(config.filename)
    blobs.roi_offset = offset
    blobs.roi_size = size
    
    # whole image benchmarking time
    times = (
        [detection_time], 
        [pruning_time], 
        time() - time_start)
    times_dict = {}
    for key, val in zip(StackTimes, times):
        times_dict[key] = val
    if segments_all is None:
        print("\nNo blobs detected")
    else:
        print("\nTotal blobs found:", len(segments_all))
        detector.show_blobs_per_channel(segments_all)
    print("\nTotal detection processing times (s):")
    path_times = "stack_detection_times.csv" if save_dfs else None
    df_io.dict_to_data_frame(times_dict, path_times, show=" ")
    
    return stats_detection, fdbk, blobs

Beispiel #12

0

Datei anzeigen

Datei: segmenter.py Projekt: sanderslab/magellanmapper

def labels_to_markers_erosion(
        labels_img: np.ndarray,
        filter_size: int = 8,
        target_frac: Optional[float] = None,
        min_filter_size: Optional[int] = None,
        use_min_filter: bool = False,
        skel_eros_filt_size: Optional[int] = None,
        wt_dists: Optional[np.ndarray] = None,
        multiprocess: bool = True) -> Tuple[np.ndarray, pd.DataFrame]:
    """Convert a labels image to markers as eroded labels via multiprocessing.
    
    These markers can be used in segmentation algorithms such as 
    watershed.
    
    Args:
        labels_img: Labels image as an integer Numpy array,
            where each unique int is a separate label.
        filter_size: Size of structing element for erosion, which should
            be > 0; defaults to 8.
        target_frac: Target fraction of original label to erode,
            passed to :func:`LabelToMarkerErosion.erode_label`. Defaults
            to None.
        min_filter_size: Minimum erosion filter size; defaults to None
            to use half of ``filter_size``, rounded down.
        use_min_filter: True to erode even if ``min_filter_size``
            is reached; defaults to False to avoid any erosion if this size
            is reached.
        skel_eros_filt_size: Erosion filter size before skeletonization
            in :func:`LabelToMarkerErosion.erode_labels`. Defaults to None to
            use the minimum filter size, which is half of ``filter_size``.
        wt_dists: Array of distances by which to weight
            the filter size, such as a distance transform to the outer
            perimeter of ``labels_img`` to weight central labels more
            heavily. Defaults to None.
        multiprocess: True to use multiprocessing; defaults to True.
    
    Returns:
        Tuple of an image array of the same shape as ``img`` and the
        same number of labels as in ``labels_img``, with eroded labels, and
        a data frame of erosion metrics.
    
    """
    def handle_eroded_label():
        # mutate markers outside of mp for changes to persist and collect stats
        markers[tuple(slices)][filtered] = stats_eros[0]
        for col, stat in zip(cols, stats_eros):
            sizes_dict.setdefault(col, []).append(stat)

    # set up labels erosion
    start_time = time()
    _logger.info(
        "Eroding labels to markers with filter size %s, min filter size %s, "
        "and target fraction %s", filter_size, min_filter_size, target_frac)
    markers = np.zeros_like(labels_img)
    labels_unique = np.unique(labels_img)
    if min_filter_size is None:
        min_filter_size = filter_size // 2
    if skel_eros_filt_size is None:
        skel_eros_filt_size = filter_size // 2
    sizes_dict = {}
    cols = (config.AtlasMetrics.REGION.value, "SizeOrig", "SizeMarker",
            config.SmoothingMetrics.FILTER_SIZE.value)

    # share large images as class attributes for forked or non-multiprocessing
    LabelToMarkerErosion.set_labels_img(labels_img, wt_dists)

    is_fork = False
    pool_results = None
    pool = None
    if multiprocess:
        # set up multiprocessing
        is_fork = chunking.is_fork()
        initializer = None
        initargs = None
        if not is_fork:
            # set up labels image as a shared array for spawned mode
            initializer, initargs = LabelToMarkerErosion.build_pool_init(
                {config.RegNames.IMG_LABELS: labels_img})

        pool = chunking.get_mp_pool(initializer, initargs)
        pool_results = []

    for label_id in labels_unique:
        if label_id == 0: continue
        # erode labels to generate markers, excluding labels small enough
        # that they would require a filter smaller than half of original size
        args = [
            label_id, filter_size, target_frac, min_filter_size,
            use_min_filter, skel_eros_filt_size
        ]
        if not is_fork:
            # pickle distance weight directly in spawned mode (not necessary
            # for non-multiprocessed but equivalent)
            if wt_dists is not None:
                args.append(
                    LabelToMarkerErosion.meas_wt(labels_img, label_id,
                                                 wt_dists))
        if pool is None:
            # process labels without multiprocessing
            stats_eros, slices, filtered = LabelToMarkerErosion.erode_label(
                *args)
            handle_eroded_label()
        else:
            # process in multiprocessing
            pool_results.append(
                pool.apply_async(LabelToMarkerErosion.erode_label, args=args))

    if multiprocess:
        # handle multiprocessing output
        for result in pool_results:
            stats_eros, slices, filtered = result.get()
            handle_eroded_label()
        pool.close()
        pool.join()

    # show erosion stats
    df = df_io.dict_to_data_frame(sizes_dict, show=True)

    _logger.info("Time elapsed to erode labels into markers: %s",
                 time() - start_time)
    return markers, df

Beispiel #13

0

Datei anzeigen

Datei: export_regions.py Projekt: clifduhn/magellanmapper

def export_region_ids(labels_ref_lookup, path, level):
    """Export region IDs from annotation reference reverse mapped dictionary 
    to CSV and Excel files.

    Use a ``level`` of None to export labels only for the currently loaded
    atlas. The RGB values used for the currently loaded atlas will also be
    shown, with cell colors corresponding to these values in the Excel file.
    
    Args:
        labels_ref_lookup: The labels reference lookup, assumed to be an 
            OrderedDict generated by :func:`ontology.create_reverse_lookup` 
            to look up by ID while preserving key order to ensure that 
            parents of any child will be reached prior to the child.
        path: Path to output CSV file; if does not end with ``.csv``, it will 
            be added.
        level: Level at which to find parent for each label. If None, 
            a parent level of -1 will be used, and label IDs will be 
            taken from the labels image rather than the full set of 
            labels from the ``labels_ref_lookup``.
    
    Returns:
        Pandas data frame of the region IDs and corresponding names.
    """
    def color_cells(s):
        # convert RGB to hex values since Pandas Excel export only supports
        # named colors or hex (as of v0.22)
        css = ["background-color: #{:02x}{:02x}{:02x}".format(*c) for c in s]
        return css

    ext = ".csv"
    path_csv = path if path.endswith(ext) else path + ext

    # find parents for label at the given level
    parent_level = -1 if level is None else level
    label_parents = ontology.labels_to_parent(labels_ref_lookup, parent_level)

    cols = ["Region", "RegionAbbr", "RegionName", "Level", "Parent"]
    data = OrderedDict()
    label_ids = sitk_io.find_atlas_labels(config.load_labels, level,
                                          labels_ref_lookup)
    cm = colormaps.get_labels_discrete_colormap(None, 0, use_orig_labels=True)
    rgbs = cm.cmap_labels
    if rgbs is not None:
        cols.append("RGB")
    for i, key in enumerate(label_ids):
        # does not include laterality distinction, only using original IDs
        if key <= 0: continue
        label = labels_ref_lookup[key]
        # ID of parent at label_parents' level
        parent = label_parents[key]
        vals = [
            key, label[ontology.NODE][config.ABAKeys.ACRONYM.value],
            label[ontology.NODE][config.ABAKeys.NAME.value],
            label[ontology.NODE][config.ABAKeys.LEVEL.value], parent
        ]
        if rgbs is not None:
            vals.append(rgbs[i, :3])
        for col, val in zip(cols, vals):
            data.setdefault(col, []).append(val)
    df = df_io.dict_to_data_frame(data, path_csv)
    if rgbs is not None:
        df = df.style.apply(color_cells, subset="RGB")
    path_xlsx = "{}.xlsx".format(os.path.splitext(path)[0])
    df.to_excel(path_xlsx)
    print("exported regions to styled spreadsheet: \"{}\"".format(path_xlsx))
    return df

Beispiel #14

0

Datei anzeigen

Datei: atlas_stats.py Projekt: sanderslab/magellanmapper

def meas_improvement(path,
                     col_effect,
                     col_p,
                     thresh_impr=0,
                     thresh_p=0.05,
                     col_wt=None,
                     suffix=None,
                     df=None):
    """Measure overall improvement and worsening for a column in a data frame.
    
    Args:
        path (str): Path of file to load into data frame.
        col_effect (str): Name of column with metric to measure.
        col_p (str): Name of column with p-values.
        thresh_impr (float): Threshold of effects below which are considered
            improved.
        thresh_p (float): Threshold of p-values below which are considered
            statistically significant.
        col_wt (str): Name of column for weighting.
        suffix (str): Output path suffix; defaults to None.
        df (:obj:`pd.DataFrame`): Data fram to use instead of loading from
            ``path``; defaults to None.

    Returns:
        :obj:`pd.DataFrame`: Data frame with improvement measurements.
        The data frame will be saved to a filename based on ``path``.

    """
    def add_wt(mask_cond, mask_cond_ss, name):
        # add weighted metrics for the given condition, such as improved
        # vs. worsened
        metrics[col_wt] = [np.sum(df[col_wt])]
        wt_cond = df.loc[mask_cond, col_wt]
        wt_cond_ss = df.loc[mask_cond_ss, col_wt]
        # sum of weighting column fitting the condition (all and statistically
        # significant)
        metrics["{}_{}".format(col_wt, name)] = [np.sum(wt_cond)]
        metrics["{}_{}_ss".format(col_wt, name)] = [np.sum(wt_cond_ss)]
        # sum of filtered effect multiplied by weighting
        metrics["{}_{}_by_{}".format(col_effect, name, col_wt)] = [
            np.sum(wt_cond.multiply(df.loc[mask_cond, col_effect]))
        ]
        metrics["{}_{}_by_{}_ss".format(col_effect, name, col_wt)] = [
            np.sum(wt_cond_ss.multiply(df.loc[mask_cond_ss, col_effect]))
        ]

    if df is None:
        df = pd.read_csv(path)

    # masks of improved and worsened, all and statistically significant
    # for each, where improvement is above the given threshold
    effects = df[col_effect]
    mask_impr = effects > thresh_impr
    mask_ss = df[col_p] < thresh_p
    mask_impr_ss = mask_impr & mask_ss
    mask_wors = effects < thresh_impr
    mask_wors_ss = mask_wors & mask_ss
    metrics = {
        "n": [len(effects)],
        "n_impr": [np.sum(mask_impr)],
        "n_impr_ss": [np.sum(mask_impr_ss)],
        "n_wors": [np.sum(mask_wors)],
        "n_wors_ss": [np.sum(mask_wors_ss)],
        col_effect: [np.sum(effects)],
        "{}_impr".format(col_effect): [np.sum(effects[mask_impr])],
        "{}_impr_ss".format(col_effect): [np.sum(effects[mask_impr_ss])],
        "{}_wors".format(col_effect): [np.sum(effects[mask_wors])],
        "{}_wors_ss".format(col_effect): [np.sum(effects[mask_wors_ss])],
    }
    if col_wt:
        # add columns based on weighting column
        add_wt(mask_impr, mask_impr_ss, "impr")
        add_wt(mask_wors, mask_wors_ss, "wors")

    out_path = libmag.insert_before_ext(path, "_impr")
    if suffix:
        out_path = libmag.insert_before_ext(out_path, suffix)
    df_impr = df_io.dict_to_data_frame(metrics, out_path)
    # display transposed version for more compact view given large number
    # of columns, but save un-transposed to preserve data types
    df_io.print_data_frame(df_impr.T, index=True, header=False)
    return df_impr

Beispiel #15

0

Datei anzeigen

def parse_grid_stats(
    stats: OrderedDict[str, Tuple[Sequence, Sequence, str, OrderedDict]]
) -> Tuple[Dict[str, Tuple[Sequence, Sequence, Sequence]], pd.DataFrame]:
    """Parse stats from a grid search.
    
    Args:
        stats: Dictionary where key is a string with the parameters
            up to the last parameter group, and each value is a tuple of 
            the raw stats as (pos, true_pos, false_pos); the array of
            values for the last parameter; the last parameter key; and an 
            ``OrderedDict`` of the parent parameters and their values for 
            the given set of stats.
    
    Returns:
        Tuple of ``group_stats`` and ``df``:
        - ``group_stats`` is a dictionary of stats, where keys
          correspond go ``stats`` keys, and values are tuples of the
          false discovery rate, sensitivity, and last parameter group value,
          each as sequences
        - ``df`` is a data frame summarizing the stats
    
    """

    # parse a grid search
    stats_for_df = {}
    headers = None
    group_dict = {}
    param_keys = []
    for key, value in stats.items():
        # parse stats from a set of parameters
        grid_stats = np.array(value[0])  # raw stats
        # last parameter is given separately since it is actively varying
        last_param_vals, last_param_key, parent_params = value[1:]
        if not headers:
            # set up headers for each stat and insert parameter headers
            # at the start
            headers = [
                GridSearchStats.PARAM.value,
                GridSearchStats.PPV,
                GridSearchStats.SENS,
                GridSearchStats.POS,
                GridSearchStats.TP,
                GridSearchStats.FP,
                GridSearchStats.FDR,
            ]
            headers[0] = "_".join((headers[0], last_param_key))
            for i, parent in enumerate(parent_params.keys()):
                headers.insert(i, "_".join(
                    (GridSearchStats.PARAM.value, parent)))
                param_keys.append(parent)
            param_keys.append(last_param_key)
        # false discovery rate, inverse of PPV, since don't have true negs
        fdr = np.subtract(
            1,
            np.divide(grid_stats[:, 1],
                      np.add(grid_stats[:, 1], grid_stats[:, 2])))
        sens = np.divide(grid_stats[:, 1], grid_stats[:, 0])
        for i, n in enumerate(last_param_vals):
            stat_list = []
            for parent_val in parent_params.values():
                stat_list.append(parent_val)
            stat_list.extend((last_param_vals[i], 1 - fdr[i], sens[i],
                              *grid_stats[i].astype(int), fdr[i]))
            for header, stat in zip(headers, stat_list):
                stats_for_df.setdefault(header, []).append(stat)
        group_dict[key] = (fdr, sens, last_param_vals)
    print()

    # generate a data frame to summarize stats and save to file
    path_df = libmag.make_out_path("gridsearch_{}.csv".format(
        "_".join(param_keys)))
    df = df_io.dict_to_data_frame(stats_for_df, path_df, show=" ")
    return group_dict, df

Beispiel #16

0

Datei anzeigen

def labels_to_markers_erosion(labels_img,
                              filter_size=8,
                              target_frac=None,
                              min_filter_size=None,
                              use_min_filter=False,
                              skel_eros_filt_size=None,
                              wt_dists=None):
    """Convert a labels image to markers as eroded labels via multiprocessing.
    
    These markers can be used in segmentation algorithms such as 
    watershed.
    
    Args:
        labels_img (:obj:`np.ndarray`): Labels image as an integer Numpy array,
            where each unique int is a separate label.
        filter_size (int): Size of structing element for erosion, which should
            be > 0; defaults to 8.
        target_frac (float): Target fraction of original label to erode,
            passed to :func:`LabelToMarkerErosion.erode_label`. Defaults
            to None.
        min_filter_size (int): Minimum erosion filter size; defaults to None
            to use half of ``filter_size``, rounded down.
        use_min_filter (bool): True to erode even if ``min_filter_size``
            is reached; defaults to False to avoid any erosion if this size
            is reached.
        skel_eros_filt_size (int): Erosion filter size before skeletonization
            in :func:`LabelToMarkerErosion.erode_labels`. Defaults to None to
            use the minimum filter size, which is half of ``filter_size``.
        wt_dists (:obj:`np.ndarray`): Array of distances by which to weight
            the filter size, such as a distance transform to the outer
            perimeter of ``labels_img`` to weight central labels more
            heavily. Defaults to None.
    
    Returns:
        :obj:`np.ndarray`: Image array of the same shape as ``img`` and the
        same number of labels as in ``labels_img``, with eroded labels.
    """
    start_time = time()
    markers = np.zeros_like(labels_img)
    labels_unique = np.unique(labels_img)
    if min_filter_size is None:
        min_filter_size = filter_size // 2
    if skel_eros_filt_size is None:
        skel_eros_filt_size = filter_size // 2
    #labels_unique = np.concatenate((labels_unique[:5], labels_unique[-5:]))
    sizes_dict = {}
    cols = (config.AtlasMetrics.REGION.value, "SizeOrig", "SizeMarker",
            config.SmoothingMetrics.FILTER_SIZE.value)

    # erode labels via multiprocessing
    print("Eroding labels to markers with filter size {}, min filter size {}, "
          "and target fraction {}".format(filter_size, min_filter_size,
                                          target_frac))
    LabelToMarkerErosion.set_labels_img(labels_img, wt_dists)
    pool = chunking.get_mp_pool()
    pool_results = []
    for label_id in labels_unique:
        if label_id == 0: continue
        # erode labels to generate markers, excluding labels small enough
        # that they would require a filter smaller than half of original size
        pool_results.append(
            pool.apply_async(LabelToMarkerErosion.erode_label,
                             args=(label_id, filter_size, target_frac,
                                   min_filter_size, use_min_filter,
                                   skel_eros_filt_size)))
    for result in pool_results:
        stats_eros, slices, filtered = result.get()
        # can only mutate markers outside of mp for changes to persist
        markers[tuple(slices)][filtered] = stats_eros[0]
        for col, stat in zip(cols, stats_eros):
            sizes_dict.setdefault(col, []).append(stat)
    pool.close()
    pool.join()

    # show erosion stats
    df = df_io.dict_to_data_frame(sizes_dict, show=True)

    print("time elapsed to erode labels into markers:", time() - start_time)
    return markers, df

Beispiel #17

0

Datei anzeigen

Datei: stack_detect.py Projekt: clifduhn/magellanmapper

def detect_blobs_large_image(filename_base, image5d, offset, size,
                             verify=False, save_dfs=True, full_roi=False):
    """Detect blobs within a large image through parallel processing of 
    smaller chunks.
    
    Args:
        filename_base: Base path to use file output.
        image5d: Large image to process as a Numpy array of t,z,y,x,[c]
        offset: Sub-image offset given as coordinates in z,y,x.
        size: Sub-image shape given in z,y,x.
        verify: True to verify detections against truth database; defaults 
            to False.
        save_dfs: True to save data frames to file; defaults to True.
        full_roi (bool): True to treat ``image5d`` as the full ROI; defaults
            to False.
    """
    time_start = time()
    if size is None or offset is None:
        # uses the entire stack if no size or offset specified
        size = image5d.shape[1:4]
        offset = (0, 0, 0)
    else:
        # change base filename for ROI-based partial stack
        filename_base = make_subimage_name(filename_base, offset, size)
    filename_subimg = libmag.combine_paths(filename_base, config.SUFFIX_SUBIMG)
    filename_blobs = libmag.combine_paths(filename_base, config.SUFFIX_BLOBS)
    
    # get ROI for given region, including all channels
    if full_roi:
        # treat the full image as the ROI
        roi = image5d[0]
    else:
        roi = plot_3d.prepare_subimg(image5d, size, offset)
    _, channels = plot_3d.setup_channels(roi, config.channel, 3)
    
    # prep chunking ROI into sub-ROIs with size based on segment_size, scaling
    # by physical units to make more independent of resolution
    time_detection_start = time()
    settings = config.roi_profile  # use default settings
    scaling_factor = detector.calc_scaling_factor()
    print("microsope scaling factor based on resolutions: {}"
          .format(scaling_factor))
    denoise_size = config.roi_profile["denoise_size"]
    denoise_max_shape = None
    if denoise_size:
        # further subdivide each sub-ROI for local preprocessing
        denoise_max_shape = np.ceil(
            np.multiply(scaling_factor, denoise_size)).astype(int)

    # overlap sub-ROIs to minimize edge effects
    overlap_base = chunking.calc_overlap()
    tol = np.multiply(overlap_base, settings["prune_tol_factor"]).astype(int)
    overlap_padding = np.copy(tol)
    overlap = np.copy(overlap_base)
    exclude_border = config.roi_profile["exclude_border"]
    if exclude_border is not None:
        # exclude border to avoid blob detector edge effects, where blobs
        # often collect at the faces of the sub-ROI;
        # ensure that overlap is greater than twice the border exclusion per
        # axis so that no plane will be excluded from both overlapping sub-ROIs
        exclude_border_thresh = np.multiply(2, exclude_border)
        overlap_less = np.less(overlap, exclude_border_thresh)
        overlap[overlap_less] = exclude_border_thresh[overlap_less]
        excluded = np.greater(exclude_border, 0)
        overlap[excluded] += 1  # additional padding
        overlap_padding[excluded] = 0  # no need to prune past excluded border
    print("sub-ROI overlap: {}, pruning tolerance: {}, padding beyond "
          "overlap for pruning: {}, exclude borders: {}"
          .format(overlap, tol, overlap_padding, exclude_border))
    max_pixels = np.ceil(np.multiply(
        scaling_factor, 
        config.roi_profile["segment_size"])).astype(int)
    print("preprocessing max shape: {}, detection max pixels: {}"
          .format(denoise_max_shape, max_pixels))
    sub_roi_slices, sub_rois_offsets = chunking.stack_splitter(
        roi.shape, max_pixels, overlap)
    # TODO: option to distribute groups of sub-ROIs to different servers 
    # for blob detection
    seg_rois = detect_blobs_sub_rois(
        roi, sub_roi_slices, sub_rois_offsets, denoise_max_shape, exclude_border)
    detection_time = time() - time_detection_start
    print("blob detection time (s):", detection_time)
    
    # prune blobs in overlapping portions of sub-ROIs
    time_pruning_start = time()
    segments_all, df_pruning = _prune_blobs_mp(
        roi, seg_rois, overlap, tol, sub_roi_slices, sub_rois_offsets, channels,
        overlap_padding)
    pruning_time = time() - time_pruning_start
    print("blob pruning time (s):", pruning_time)
    #print("maxes:", np.amax(segments_all, axis=0))
    
    # get weighted mean of ratios
    if df_pruning is not None:
        print("\nBlob pruning ratios:")
        path_pruning = "blob_ratios.csv" if save_dfs else None
        df_pruning_all = df_io.data_frames_to_csv(
            df_pruning, path_pruning, show=" ")
        cols = df_pruning_all.columns.tolist()
        blob_pruning_means = {}
        if "blobs" in cols:
            blobs_unpruned = df_pruning_all["blobs"]
            num_blobs_unpruned = np.sum(blobs_unpruned)
            for col in cols[1:]:
                blob_pruning_means["mean_{}".format(col)] = [
                    np.sum(np.multiply(df_pruning_all[col], blobs_unpruned)) 
                    / num_blobs_unpruned]
            path_pruning_means = "blob_ratios_means.csv" if save_dfs else None
            df_pruning_means = df_io.dict_to_data_frame(
                blob_pruning_means, path_pruning_means, show=" ")
        else:
            print("no blob ratios found")
    
    '''# report any remaining duplicates
    np.set_printoptions(linewidth=500, threshold=10000000)
    print("all blobs (len {}):".format(len(segments_all)))
    sort = np.lexsort(
        (segments_all[:, 2], segments_all[:, 1], segments_all[:, 0]))
    blobs = segments_all[sort]
    print(blobs)
    print("checking for duplicates in all:")
    print(detector.remove_duplicate_blobs(blobs, slice(0, 3)))
    '''
    
    stats_detection = None
    fdbk = None
    if segments_all is not None:
        # remove the duplicated elements that were used for pruning
        detector.replace_rel_with_abs_blob_coords(segments_all)
        segments_all = detector.remove_abs_blob_coords(segments_all)
        
        # compare detected blobs with truth blobs
        # TODO: assumes ground truth is relative to any ROI offset,
        # but should make customizable
        if verify:
            db_path_base = None
            exp_name = os.path.splitext(os.path.basename(config.filename))[0]
            try:
                if config.truth_db is None:
                    # find and load truth DB based on filename and subimage
                    db_path_base = os.path.basename(filename_base)
                    print("about to verify with truth db from {}"
                          .format(db_path_base))
                    sqlite.load_truth_db(db_path_base)
                if config.truth_db is not None:
                    # truth DB may contain multiple experiments for different
                    # subimages; series not included in exp name since in ROI
                    rois = config.truth_db.get_rois(exp_name)
                    if rois is None:
                        # exp may have been named by ROI
                        print("{} experiment name not found, will try with"
                              "ROI offset/size".format(exp_name))
                        exp_name = make_subimage_name(exp_name, offset, size)
                        rois = config.truth_db.get_rois(exp_name)
                    if rois is None:
                        raise LookupError(
                            "No truth set ROIs found for experiment {}, will "
                            "skip detection verification".format(exp_name))
                    print("load ROIs from exp: {}".format(exp_name))
                    exp_id = sqlite.insert_experiment(
                        config.verified_db.conn, config.verified_db.cur, 
                        exp_name, None)
                    verify_tol = np.multiply(
                        overlap_base, settings["verify_tol_factor"])
                    stats_detection, fdbk = detector.verify_rois(
                        rois, segments_all, config.truth_db.blobs_truth, 
                        verify_tol, config.verified_db, exp_id, config.channel)
            except FileNotFoundError:
                libmag.warn("Could not load truth DB from {}; "
                            "will not verify ROIs".format(db_path_base))
            except LookupError as e:
                libmag.warn(str(e))
    
    file_time_start = time()
    if config.save_subimg:
        if (isinstance(config.image5d, np.memmap) and 
                config.image5d.filename == os.path.abspath(filename_subimg)):
            # file at sub-image save path may have been opened as a memmap
            # file, in which case saving would fail
            libmag.warn("{} is currently open, cannot save sub-image"
                        .format(filename_subimg))
        else:
            # write sub-image, which is in ROI (3D) format
            with open(filename_subimg, "wb") as f:
                np.save(f, roi)

    # save blobs
    # TODO: only segments used; consider removing the rest except ver
    outfile_blobs = open(filename_blobs, "wb")
    np.savez(outfile_blobs, ver=BLOBS_NP_VER, segments=segments_all,
             resolutions=config.resolutions,
             basename=os.path.basename(config.filename),  # only save name
             offset=offset, roi_size=size)  # None unless explicitly set
    outfile_blobs.close()
    file_save_time = time() - file_time_start
    
    # whole image benchmarking time
    times = (
        [detection_time], 
        [pruning_time], 
        time() - time_start)
    times_dict = {}
    for key, val in zip(StackTimes, times):
        times_dict[key] = val
    if segments_all is None:
        print("\nNo blobs detected")
    else:
        print("\nTotal blobs found:", len(segments_all))
        detector.show_blobs_per_channel(segments_all)
    print("file save time:", file_save_time)
    print("\nTotal detection processing times (s):")
    path_times = "stack_detection_times.csv" if save_dfs else None
    df_io.dict_to_data_frame(times_dict, path_times, show=" ")
    
    return stats_detection, fdbk, segments_all