def _test_blob_verification(a, b, tol): # test verifying blobs by checking for closest matches within a tolerance print("test (b):\n{}".format(b)) print("master (a):\n{}".format(a)) #found_truth, detected = _find_closest_blobs(b, a, tol) #dists = np.zeros(len(blobs) detected, found_truth, dists = find_closest_blobs_cdist(b, a, tol) df_io.dict_to_data_frame( {"Testi": detected, "Masteri": found_truth, "Dist": dists}, show=True)
def get_ref_lookup_as_df(self) -> Optional[pd.DataFrame]: """Get the reference lookup dict as a data frame. Returns: :attr:`ref_lookup` converted to a data frame. Returns the object as-is if it is already a data frame. """ if self.ref_lookup is None: # return immediately if no reference dict to convert return None if isinstance(self.ref_lookup, pd.DataFrame): # return existing data frame return self.ref_lookup # convert dict reference to data frame with main columns labels_ref_regions = {} keys_node = ( config.ABAKeys.NAME.value, config.ABAKeys.LEVEL.value, config.ABAKeys.ACRONYM.value, ) for key, val in self.ref_lookup.items(): # extract a subset of entries labels_ref_regions.setdefault(config.ABAKeys.ABA_ID.value, []).append(key) node = val[NODE] for node_k in keys_node: labels_ref_regions.setdefault( node_k, []).append(node.get(node_k) if node else None) labels_ref_regions.setdefault(PARENT_IDS, []).append(val.get(PARENT_IDS)) df_regions = df_io.dict_to_data_frame(labels_ref_regions) return df_regions
def _parse_blob_matches(self, rows): """Parse blob match selection. Args: rows (List[:obj:`sqlite3.Row`]): Sequence of rows. Returns: :class:`magmap.cv.colocalizer.BlobMatch`: Blob match object. Deprecated: 1.6.0 Use :meth:`select_blob_matches` instead. """ # build list of blob matches, which contain matching blobs and their # distances, converting blob IDs to full blobs matches = [] for row in rows: matches.append(( self.select_blob_by_id(row["blob1"])[0], self.select_blob_by_id(row["blob2"])[0], row["dist"])) if len(rows) > 0: # convert to data frame to access by named columns df = df_io.dict_to_data_frame(rows, records_cols=rows[0].keys()) blob_matches = colocalizer.BlobMatch( matches, df["id"], df["roi_id"], df["blob1"], df["blob2"]) else: blob_matches = colocalizer.BlobMatch() return blob_matches
def __init__(self, matches=None, match_id=None, roi_id=None, blob1_id=None, blob2_id=None, df=None): """Initialize blob match object. Args: matches (list[list[ :class:`numpy.ndarray`, :class:`numpy.ndarray`, float]]: List of blob match lists, which each contain, ``blob1, blob2, distance``. Defaults to None, which sets the data frame to None. match_id (Sequence[int]): Sequence of match IDs, which should be of the same length as ``matches``; defaults to None. roi_id (Sequence[int]): Sequence of ROI IDs, which should be of the same length as ``matches``; defaults to None. blob1_id (Sequence[int]): Sequence of blob 1 IDs, which should be of the same length as ``matches``; defaults to None. blob2_id (Sequence[int]): Sequence of blob2 IDs, which should be of the same length as ``matches``; defaults to None. df (:class:`pandas.DataFrame`): Pandas data frame to set in place of any other arguments; defaults to None. """ self.df: Optional[pd.DataFrame] = None self.coords: Optional[np.ndarray] = None self.cmap: Optional[np.ndarray] = None if df is not None: # set data frame directly and ignore any other arguments self.df = df return if matches is None: # return since any other arguments must correspond to matches return matches_dict = {} for i, match in enumerate(matches): # assumes that all first sequences are of the same length vals = { BlobMatch.Cols.BLOB1: match[0], BlobMatch.Cols.BLOB2: match[1], BlobMatch.Cols.DIST: match[2], } if match_id is not None: vals[BlobMatch.Cols.MATCH_ID] = match_id[i] if roi_id is not None: vals[BlobMatch.Cols.ROI_ID] = roi_id[i] if blob1_id is not None: vals[BlobMatch.Cols.BLOB1_ID] = blob1_id[i] if blob2_id is not None: vals[BlobMatch.Cols.BLOB2_ID] = blob2_id[i] for key in BlobMatch.Cols: matches_dict.setdefault( key, []).append(vals[key] if key in vals else None) self.df = df_io.dict_to_data_frame(matches_dict)
def parse_grid_stats(stats_dict): """Parse stats from multiple grid searches. Args: stats_dict: Dictionary where key is a string with the parameters up to the last parameter group, and each value is a tuple of the raw stats as (pos, true_pos, false_pos); the array of values for the last parameter; the last parameter key; and an ``OrderedDict`` of the parent parameters and their values for the given set of stats. """ parsed_stats = {} dfs = [] param_keys = [] for group, iterable_dicts in stats_dict.items(): # parse a grid search stats_for_df = {} headers = None print("{}:".format(group)) group_dict = {} parsed_stats[group] = group_dict for key, value in iterable_dicts.items(): # parse stats from a set of parameters grid_stats = np.array(value[0]) # raw stats # last parameter is given separately since it is actively varying last_param_vals, last_param_key, parent_params = value[1:] if not headers: # set up headers for each stat and insert parameter headers # at the start headers = [e.value for e in GridSearchStats] headers[0] = "_".join((headers[0], last_param_key)) for i, parent in enumerate(parent_params.keys()): headers.insert( i, "_".join((GridSearchStats.PARAM.value, parent))) param_keys.append(parent) param_keys.append(last_param_key) # false discovery rate, inverse of PPV, since don't have true negs fdr = np.subtract( 1, np.divide(grid_stats[:, 1], np.add(grid_stats[:, 1], grid_stats[:, 2]))) sens = np.divide(grid_stats[:, 1], grid_stats[:, 0]) for i, n in enumerate(last_param_vals): stat_list = [] for parent_val in parent_params.values(): stat_list.append(parent_val) stat_list.extend((last_param_vals[i], 1 - fdr[i], sens[i], *grid_stats[i].astype(int), fdr[i])) for header, stat in zip(headers, stat_list): stats_for_df.setdefault(header, []).append(stat) group_dict[key] = (fdr, sens, last_param_vals) print() path_df = "gridsearch_{}.csv".format("_".join(param_keys)) dfs.append(df_io.dict_to_data_frame(stats_for_df, path_df, show=" ")) return parsed_stats, dfs
def verify_rois(rois, blobs, blobs_truth, tol, output_db, exp_id, exp_name, channel): """Verify blobs in ROIs by comparing detected blobs with truth sets of blobs stored in a database. Save the verifications to a separate database with a name in the same format as saved processed files but with "_verified.db" at the end. Prints basic statistics on the verification. Note that blobs are found from ROI parameters rather than loading from database, so blobs recorded within these ROI bounds but from different ROIs will be included in the verification. Args: rois: Rows of ROIs from sqlite database. blobs (:obj:`np.ndarray`): The blobs to be checked for accuracy, given as 2D array of ``[[z, row, column, radius, ...], ...]``. blobs_truth (:obj:`np.ndarray`): The list by which to check for accuracy, in the same format as blobs. tol: Tolerance as z,y,x of floats specifying padding for the inner ROI and used to generate a single tolerance distance within which a detected and ground truth blob will be considered potential matches. output_db: Database in which to save the verification flags, typical the database in :attr:``config.verified_db``. exp_id: Experiment ID in ``output_db``. exp_name (str): Name of experiment to store as the sample name for each row in the output data frame. channel (List[int]): Filter ``blobs_truth`` by this channel. Returns: tuple[int, int, int], str, :class:`pandas.DataFrame`: Tuple of ``pos, true_pos, false_pos`` stats, feedback message, and accuracy metrics in a data frame. """ blobs_truth = detector.blobs_in_channel(blobs_truth, channel) blobs_truth_rois = None blobs_rois = None rois_falsehood = [] thresh, scaling, inner_padding, resize, blobs = setup_match_blobs_roi( blobs, tol) # set up metrics dict for accuracy metrics of each ROI metrics = {} cols = ( config.AtlasMetrics.SAMPLE, config.AtlasMetrics.CHANNEL, config.AtlasMetrics.OFFSET, config.AtlasMetrics.SIZE, mlearn.GridSearchStats.POS, mlearn.GridSearchStats.TP, mlearn.GridSearchStats.FP, mlearn.GridSearchStats.FN, ) for roi in rois: # get ROI from database for ground truth blobs offset = (roi["offset_x"], roi["offset_y"], roi["offset_z"]) size = (roi["size_x"], roi["size_y"], roi["size_z"]) series = roi["series"] # find matches between truth and detected blobs blobs_inner_plus, blobs_truth_inner_plus, offset_inner, size_inner, \ matches = match_blobs_roi( blobs, blobs_truth, offset, size, thresh, scaling, inner_padding, resize) # store blobs in separate verified DB roi_id, _ = sqlite.insert_roi(output_db.conn, output_db.cur, exp_id, series, offset_inner, size_inner) sqlite.insert_blobs(output_db.conn, output_db.cur, roi_id, blobs_inner_plus) sqlite.insert_blobs(output_db.conn, output_db.cur, roi_id, blobs_truth_inner_plus) output_db.insert_blob_matches(roi_id, matches) # compute accuracy metrics for the ROI pos = len(blobs_truth_inner_plus) # condition pos true_pos = np.sum(blobs_inner_plus[:, 4] == 1) false_pos = np.sum(blobs_inner_plus[:, 4] == 0) false_neg = len(blobs_truth_inner_plus) - true_pos if false_neg > 0 or false_pos > 0: rois_falsehood.append((offset_inner, false_pos, false_neg)) vals = (exp_name, channel[0] if channel else 0, tuple(offset_inner.astype(int)), tuple(size_inner.astype(int)), pos, true_pos, false_pos, pos - true_pos) for key, val in zip(cols, vals): metrics.setdefault(key, []).append(val) # combine blobs into total lists for stats across ROIs if blobs_truth_rois is None: blobs_truth_rois = blobs_truth_inner_plus else: blobs_truth_rois = np.concatenate( (blobs_truth_inner_plus, blobs_truth_rois)) if blobs_rois is None: blobs_rois = blobs_inner_plus else: blobs_rois = np.concatenate((blobs_inner_plus, blobs_rois)) # generate and show data frame of accuracy metrics for each ROI df = df_io.dict_to_data_frame(metrics, show=" ") # show accuracy metrics of blobs combined across ROIs true_pos = df[mlearn.GridSearchStats.TP.value].sum() false_pos = df[mlearn.GridSearchStats.FP.value].sum() pos = df[mlearn.GridSearchStats.POS.value].sum() false_neg = pos - true_pos print("Automated verification using tol {}:\n".format(tol)) fdbk = "Accuracy metrics for channel {}:\n{}".format( channel, atlas_stats.calc_sens_ppv( pos, true_pos, false_pos, false_neg)[2]) print(fdbk) print("ROIs with falsehood:\n{}".format(rois_falsehood)) return (pos, true_pos, false_pos), fdbk, df
def export_region_ids(labels_ref_lookup, path, level=None, drawn_labels_only=False): """Export region IDs from annotation reference reverse mapped dictionary to CSV and Excel files. Use a ``level`` of None to export labels only for the currently loaded atlas. The RGB values used for the currently loaded atlas will also be shown, with cell colors corresponding to these values in the Excel file. Args: labels_ref_lookup: The labels reference lookup, assumed to be an OrderedDict generated by :func:`ontology.create_reverse_lookup` to look up by ID while preserving key order to ensure that parents of any child will be reached prior to the child. path: Path to output CSV file; if does not end with ``.csv``, it will be added. level: Level at which to find parent for each label; defaults to None to get the immediate parent. drawn_labels_only (bool): True to export only the drawn labels for atlas labels in the same folder as ``labels_ref_lookup``. Defaults to False to use the full set of labels in ``labels_ref_lookup`` Returns: Pandas data frame of the region IDs and corresponding names. """ def color_cells(s): # convert RGB to hex values since Pandas Excel export only supports # named colors or hex (as of v0.22) css = ["background-color: #{:02x}{:02x}{:02x}".format(*c) for c in s] return css ext = ".csv" path_csv = path if path.endswith(ext) else path + ext # find ancestor for each label at the given level label_parents = ontology.labels_to_parent(labels_ref_lookup, level, allow_parent_same_level=True) cols = [ config.AtlasMetrics.REGION.value, config.AtlasMetrics.REGION_ABBR.value, config.AtlasMetrics.REGION_NAME.value, config.AtlasMetrics.LEVEL.value, config.AtlasMetrics.PARENT.value ] data = OrderedDict() label_ids = sitk_io.find_atlas_labels(config.load_labels, drawn_labels_only, labels_ref_lookup) cm = colormaps.get_labels_discrete_colormap(None, 0, use_orig_labels=True) rgbs = cm.cmap_labels if rgbs is not None: cols.append("RGB") for i, key in enumerate(label_ids): # get label dict label = labels_ref_lookup.get(key) if label is None: continue # ID of parent at label_parents' level parent = label_parents[key] vals = [ key, label[ontology.NODE][config.ABAKeys.ACRONYM.value], label[ontology.NODE][config.ABAKeys.NAME.value], label[ontology.NODE][config.ABAKeys.LEVEL.value], parent ] if rgbs is not None: vals.append(rgbs[i, :3]) for col, val in zip(cols, vals): data.setdefault(col, []).append(val) df = df_io.dict_to_data_frame(data, path_csv) if rgbs is not None: df = df.style.apply(color_cells, subset="RGB") path_xlsx = "{}.xlsx".format(os.path.splitext(path)[0]) df.to_excel(path_xlsx) print("exported regions to styled spreadsheet: \"{}\"".format(path_xlsx)) return df
def select_blob_matches( self, roi_id: int, offset: Optional[Sequence[int]] = None, shape: Optional[Sequence[int]] = None) -> "colocalizer.BlobMatch": """Select blob matches for the given ROI. Args: roi_id: ROI ID. offset: ROI offset in ``z,y,x``; defaults to None. shape: ROI shape in ``z,y,x``; defaults to None. Returns: Blob matches. """ _logger.debug("Selecting blob matches for ROI ID: %s", roi_id) start = time() # set up columns for each table cols_matches = _specify_table_cols( _COLS_BLOB_MATCHES + ', id', ', ', 'bm') cols_blobs = _COLS_BLOBS + ", id" cols_blobs1 = _specify_table_cols(cols_blobs, ', ', 'b1') cols_blobs2 = _specify_table_cols(cols_blobs, ', ', 'b2') # set up select statement stmnt = ( f"SELECT {cols_matches}, " f"{cols_blobs1}, " f"{cols_blobs2} " f"FROM blob_matches bm " f"INNER JOIN blobs b1 ON bm.blob1 = b1.id " f"INNER JOIN blobs b2 ON bm.blob2 = b2.id " f"WHERE bm.roi_id = ?") args = [roi_id, ] if offset is not None and shape is not None: # add ROI parameters bounds = zip(offset, np.add(offset, shape)) bounds = [str(b) for bound in bounds for b in bound] stmnt += ( " AND b1.z >= ? AND b1.z < ?" "AND b1.y >= ? AND b1.y < ? AND b1.x >= ? AND b1.x < ?" "AND b2.z >= ? AND b2.z < ?" "AND b2.y >= ? AND b2.y < ? AND b2.x >= ? AND b2.x < ?") args.extend(bounds) args.extend(bounds) # execute query self.cur.execute(stmnt, args) rows = self.cur.fetchall() df_matches = None if len(rows) > 0: # convert to data frame to access by named columns df = df_io.dict_to_data_frame(rows, records_cols=rows[0].keys()) def get_cols(col_full): # extract column aliases return [c.split(" ")[1] for c in col_full.split(", ")] # extract columns for blob matches df_matches = df[get_cols(cols_matches)] df_matches = df_matches.rename(columns={ "bm_blob1": colocalizer.BlobMatch.Cols.BLOB1_ID.value, "bm_blob2": colocalizer.BlobMatch.Cols.BLOB2_ID.value, "bm_id": colocalizer.BlobMatch.Cols.MATCH_ID.value, "bm_roi_id": colocalizer.BlobMatch.Cols.ROI_ID.value, "bm_dist": colocalizer.BlobMatch.Cols.DIST.value, }) # merge each set of blob columns into a single column of blob lists cols_dict = { colocalizer.BlobMatch.Cols.BLOB1.value: cols_blobs1, colocalizer.BlobMatch.Cols.BLOB2.value: cols_blobs2, } for col, cols in cols_dict.items(): cols = get_cols(cols)[1:] df_matches[col] = df[cols].to_numpy().tolist() blob_matches = colocalizer.BlobMatch(df=df_matches) _logger.debug("Finished selecting blob matches in %s s", time() - start) return blob_matches
def export_rois(db, image5d, channel, path, padding=None, unit_factor=None, truth_mode=None, exp_name=None): """Export all ROIs from database. If the current processing profile includes isotropic interpolation, the ROIs will be resized to make isotropic according to this factor. Args: db: Database from which to export. image5d: The image with the ROIs. channel (List[int]): Channels to export; currently only the first channel is used. path: Path with filename base from which to save the exported files. padding (List[int]): Padding in x,y,z to exclude from the ROI; defaults to None. unit_factor (float): Linear conversion factor for units (eg 1000.0 to convert um to mm). truth_mode (:obj:`config.TruthDBModes`): Truth mode enum; defaults to None. exp_name (str): Name of experiment to export; defaults to None to export all experiments in ``db``. Returns: :obj:`pd.DataFrame`: ROI metrics in a data frame. """ if padding is not None: padding = np.array(padding) # TODO: consider iterating through all channels channel = channel[0] if channel else 0 # convert volume base on scaling and unit factor phys_mult = np.prod(detector.calc_scaling_factor()) if unit_factor: phys_mult /= unit_factor**3 metrics_all = {} exps = sqlite.select_experiment(db.cur, None) for exp in exps: if exp_name and exp["name"] != exp_name: # DBs may contain many experiments, which may not correspond to # image5d, eg verified DBs from many truth sets continue rois = sqlite.select_rois(db.cur, exp["id"]) for roi in rois: # get ROI as a small image size = sqlite.get_roi_size(roi) offset = sqlite.get_roi_offset(roi) img3d = plot_3d.prepare_roi(image5d, size, offset) # get blobs and change confirmation flag to avoid confirmation # color in 2D plots roi_id = roi["id"] blobs = sqlite.select_blobs(db.cur, roi_id) blobs_detected = None if truth_mode is config.TruthDBModes.VERIFIED: # verified DBs use a truth value of -1 to indicate "detected", # non-truth blobs, including both correct and incorrect # detections, while the rest of blobs are "truth" blobs truth_vals = detector.get_blob_truth(blobs) blobs_detected = blobs[truth_vals == -1] blobs = blobs[truth_vals != -1] else: # default to include only confirmed blobs; truth sets # ironically do not use the truth flag but instead # assume all confirmed blobs are "truth" blobs = blobs[detector.get_blob_confirmed(blobs) == 1] blobs[:, 4] = -1 # adjust ROI size and offset if border set if padding is not None: size = np.subtract(img3d.shape[::-1], 2 * padding) img3d = plot_3d.prepare_roi(img3d, size, padding) blobs[:, 0:3] = np.subtract(blobs[:, 0:3], np.add(offset, padding)[::-1]) print("exporting ROI of shape {}".format(img3d.shape)) isotropic = config.roi_profile["isotropic"] blobs_orig = blobs if isotropic is not None: # interpolation for isotropy if set in first processing profile img3d = cv_nd.make_isotropic(img3d, isotropic) isotropic_factor = cv_nd.calc_isotropic_factor(isotropic) blobs_orig = np.copy(blobs) blobs = detector.multiply_blob_rel_coords( blobs, isotropic_factor) # export ROI and 2D plots path_base, path_dir_nifti, path_img, path_img_nifti, path_blobs, \ path_img_annot, path_img_annot_nifti = make_roi_paths( path, roi_id, channel, make_dirs=True) np.save(path_img, img3d) print("saved 3D image to {}".format(path_img)) # WORKAROUND: for some reason SimpleITK gives a conversion error # when converting from uint16 (>u2) Numpy array img3d = img3d.astype(np.float64) img3d_sitk = sitk.GetImageFromArray(img3d) ''' print(img3d_sitk) print("orig img:\n{}".format(img3d[0])) img3d_back = sitk.GetArrayFromImage(img3d_sitk) print(img3d.shape, img3d.dtype, img3d_back.shape, img3d_back.dtype) print("sitk img:\n{}".format(img3d_back[0])) ''' sitk.WriteImage(img3d_sitk, path_img_nifti, False) roi_ed = roi_editor.ROIEditor(img3d) roi_ed.plot_roi(blobs, channel, show=False, title=os.path.splitext(path_img)[0]) libmag.show_full_arrays() # export image and blobs, stripping blob flags and adjusting # user-added segments' radii; use original rather than blobs with # any interpolation since the ground truth will itself be # interpolated blobs = blobs_orig blobs = blobs[:, 0:4] # prior to v.0.5.0, user-added segments had a radius of 0.0 blobs[np.isclose(blobs[:, 3], 0), 3] = 5.0 # as of v.0.5.0, user-added segments have neg radii whose abs # value corresponds to the displayed radius blobs[:, 3] = np.abs(blobs[:, 3]) # make more rounded since near-integer values appear to give # edges of 5 straight pixels # https://github.com/scikit-image/scikit-image/issues/2112 #blobs[:, 3] += 1E-1 blobs[:, 3] -= 0.5 libmag.printv("blobs:\n{}".format(blobs)) np.save(path_blobs, blobs) # convert blobs to ground truth img3d_truth = plot_3d.build_ground_truth( np.zeros(size[::-1], dtype=np.uint8), blobs) if isotropic is not None: img3d_truth = cv_nd.make_isotropic(img3d_truth, isotropic) # remove fancy blending since truth set must be binary img3d_truth[img3d_truth >= 0.5] = 1 img3d_truth[img3d_truth < 0.5] = 0 print("exporting truth ROI of shape {}".format(img3d_truth.shape)) np.save(path_img_annot, img3d_truth) #print(img3d_truth) sitk.WriteImage(sitk.GetImageFromArray(img3d_truth), path_img_annot_nifti, False) # avoid smoothing interpolation, using "nearest" instead with plt.style.context(config.rc_params_mpl2_img_interp): roi_ed.plot_roi(img3d_truth, None, channel, show=False, title=os.path.splitext(path_img_annot)[0]) # measure ROI metrics and export to data frame; use AtlasMetrics # enum vals since will need LabelMetrics names instead metrics = { config.AtlasMetrics.SAMPLE.value: exp["name"], config.AtlasMetrics.CONDITION.value: "truth", config.AtlasMetrics.CHANNEL.value: channel, config.AtlasMetrics.OFFSET.value: offset, config.AtlasMetrics.SIZE.value: size, } # get basic counts for ROI and update volume for physical units vols.MeasureLabel.set_data(img3d, np.ones_like(img3d, dtype=np.int8)) _, metrics_counts = vols.MeasureLabel.measure_counts(1) metrics_counts[vols.LabelMetrics.Volume] *= phys_mult for key, val in metrics_counts.items(): # convert LabelMetrics to their name metrics[key.name] = val metrics[vols.LabelMetrics.Nuclei.name] = len(blobs) metrics_dicts = [metrics] if blobs_detected is not None: # add another row for detected blobs metrics_detected = dict(metrics) metrics_detected[ config.AtlasMetrics.CONDITION.value] = "detected" metrics_detected[vols.LabelMetrics.Nuclei.name] = len( blobs_detected) metrics_dicts.append(metrics_detected) for m in metrics_dicts: for key, val in m.items(): metrics_all.setdefault(key, []).append(val) print("exported {}".format(path_base)) #_test_loading_rois(db, channel, path) # convert to data frame and compute densities for nuclei and intensity df = df_io.dict_to_data_frame(metrics_all) vol = df[vols.LabelMetrics.Volume.name] df.loc[:, vols.LabelMetrics.DensityIntens.name] = ( df[vols.LabelMetrics.Intensity.name] / vol) df.loc[:, vols.LabelMetrics.Density.name] = ( df[vols.LabelMetrics.Nuclei.name] / vol) df = df_io.data_frames_to_csv(df, "{}_rois.csv".format(path)) return df
def list_s3_bucket(name, keys=None, prefix=None, suffix=None, versions=False): """List all objects or object versions in an AWS S3 bucket. Args: name (str): Name of bucket. keys (List[str]): Sequence of keys within the bucket to include sizes of only these files; defaults to None. prefix (str): Filter only keys starting with this string; defaults to None. suffix (str): String to append to output CSV file; defaults to None. versions (bool): True to get all object versions, including deleted objects; False to get only the current versions; defaults to False. Returns: float, :obj:`pd.DataFrame`, :obj:`pd.DataFrame`: Size of bucket in bytes; a dataframe of keys and associated sizes; and a dataframe of missing keys from ``keys``, or None if ``keys`` is not given. """ s3 = boto3.resource("s3") bucket = s3.Bucket(name) tot_size = 0 obj_sizes = {} # get latest version of objects or all object version, filtering # for paths starting with prefix if set objs = bucket.object_versions if versions else bucket.objects objs = objs.filter(Prefix=prefix) if prefix else objs.all() for obj in objs: if not keys or obj.key in keys: # only check keys in list if given obj_sizes.setdefault("Bucket", []).append(bucket.name) obj_sizes.setdefault("Key", []).append(obj.key) size = obj.size obj_sizes.setdefault("Size", []).append(size) if size: # skip delete markers, which have a size of None tot_size += obj.size if versions: # add columns for version info obj_sizes.setdefault("Version_id", []).append(obj.version_id) obj_sizes.setdefault("Last_modified", []).append(obj.last_modified) out_path = "bucket_{}".format(bucket.name) if suffix: out_path = libmag.insert_before_ext(out_path, suffix, "_") df_missing = None if keys: # if list of keys given, show all keys that were not found keys_missing = [] obj_keys = obj_sizes.keys() for key in keys: if key not in obj_keys: keys_missing.append(key) # print("Missing keys:\n", "\n".join(keys_missing)) df_missing = df_io.dict_to_data_frame({"Keys_missing": keys_missing}, libmag.insert_before_ext( out_path, "_missing")) df = df_io.dict_to_data_frame(obj_sizes, out_path) print("{} bucket total tot_size (GiB): {}".format( bucket.name, libmag.convert_bin_magnitude(tot_size, 3))) return tot_size, df, df_missing
def detect_blobs_blocks(filename_base, image5d, offset, size, channels, verify=False, save_dfs=True, full_roi=False, coloc=False): """Detect blobs by block processing of a large image. All channels are processed in the same blocks. Args: filename_base: Base path to use file output. image5d: Large image to process as a Numpy array of t,z,y,x,[c] offset: Sub-image offset given as coordinates in z,y,x. size: Sub-image shape given in z,y,x. channels (Sequence[int]): Sequence of channels, where None detects in all channels. verify: True to verify detections against truth database; defaults to False. save_dfs: True to save data frames to file; defaults to True. full_roi (bool): True to treat ``image5d`` as the full ROI; defaults to False. coloc (bool): True to perform blob co-localizations; defaults to False. Returns: tuple[int, int, int], str, :class:`magmap.cv.detector.Blobs`: Accuracy metrics from :class:`magmap.cv.detector.verify_rois`, feedback message from this same function, and detected blobs. """ time_start = time() subimg_path_base = filename_base if size is None or offset is None: # uses the entire stack if no size or offset specified size = image5d.shape[1:4] offset = (0, 0, 0) else: # get base path for sub-image subimg_path_base = naming.make_subimage_name( filename_base, offset, size) filename_blobs = libmag.combine_paths(subimg_path_base, config.SUFFIX_BLOBS) # get ROI for given region, including all channels if full_roi: # treat the full image as the ROI roi = image5d[0] else: roi = plot_3d.prepare_subimg(image5d, offset, size) num_chls_roi = 1 if len(roi.shape) < 4 else roi.shape[3] if num_chls_roi < 2: coloc = False print("Unable to co-localize as image has only 1 channel") # prep chunking ROI into sub-ROIs with size based on segment_size, scaling # by physical units to make more independent of resolution; use profile # from first channel to be processed for block settings time_detection_start = time() settings = config.get_roi_profile(channels[0]) print("Profile for block settings:", settings[settings.NAME_KEY]) sub_roi_slices, sub_rois_offsets, denoise_max_shape, exclude_border, \ tol, overlap_base, overlap, overlap_padding = setup_blocks( settings, roi.shape) # TODO: option to distribute groups of sub-ROIs to different servers # for blob detection seg_rois = StackDetector.detect_blobs_sub_rois( roi, sub_roi_slices, sub_rois_offsets, denoise_max_shape, exclude_border, coloc, channels) detection_time = time() - time_detection_start print("blob detection time (s):", detection_time) # prune blobs in overlapping portions of sub-ROIs time_pruning_start = time() segments_all, df_pruning = StackPruner.prune_blobs_mp( roi, seg_rois, overlap, tol, sub_roi_slices, sub_rois_offsets, channels, overlap_padding) pruning_time = time() - time_pruning_start print("blob pruning time (s):", pruning_time) #print("maxes:", np.amax(segments_all, axis=0)) # get weighted mean of ratios if df_pruning is not None: print("\nBlob pruning ratios:") path_pruning = "blob_ratios.csv" if save_dfs else None df_pruning_all = df_io.data_frames_to_csv( df_pruning, path_pruning, show=" ") cols = df_pruning_all.columns.tolist() blob_pruning_means = {} if "blobs" in cols: blobs_unpruned = df_pruning_all["blobs"] num_blobs_unpruned = np.sum(blobs_unpruned) for col in cols[1:]: blob_pruning_means["mean_{}".format(col)] = [ np.sum(np.multiply(df_pruning_all[col], blobs_unpruned)) / num_blobs_unpruned] path_pruning_means = "blob_ratios_means.csv" if save_dfs else None df_pruning_means = df_io.dict_to_data_frame( blob_pruning_means, path_pruning_means, show=" ") else: print("no blob ratios found") '''# report any remaining duplicates np.set_printoptions(linewidth=500, threshold=10000000) print("all blobs (len {}):".format(len(segments_all))) sort = np.lexsort( (segments_all[:, 2], segments_all[:, 1], segments_all[:, 0])) blobs = segments_all[sort] print(blobs) print("checking for duplicates in all:") print(detector.remove_duplicate_blobs(blobs, slice(0, 3))) ''' stats_detection = None fdbk = None colocs = None if segments_all is not None: # remove the duplicated elements that were used for pruning detector.replace_rel_with_abs_blob_coords(segments_all) if coloc: colocs = segments_all[:, 10:10+num_chls_roi].astype(np.uint8) # remove absolute coordinate and any co-localization columns segments_all = detector.remove_abs_blob_coords(segments_all) # compare detected blobs with truth blobs # TODO: assumes ground truth is relative to any ROI offset, # but should make customizable if verify: stats_detection, fdbk = verifier.verify_stack( filename_base, subimg_path_base, settings, segments_all, channels, overlap_base) if config.save_subimg: subimg_base_path = libmag.combine_paths( subimg_path_base, config.SUFFIX_SUBIMG) if (isinstance(config.image5d, np.memmap) and config.image5d.filename == os.path.abspath(subimg_base_path)): # file at sub-image save path may have been opened as a memmap # file, in which case saving would fail libmag.warn("{} is currently open, cannot save sub-image" .format(subimg_base_path)) else: # write sub-image, which is in ROI (3D) format with open(subimg_base_path, "wb") as f: np.save(f, roi) # store blobs in Blobs instance # TODO: consider separating into blobs and blobs metadata archives blobs = detector.Blobs( segments_all, colocalizations=colocs, path=filename_blobs) blobs.resolutions = config.resolutions blobs.basename = os.path.basename(config.filename) blobs.roi_offset = offset blobs.roi_size = size # whole image benchmarking time times = ( [detection_time], [pruning_time], time() - time_start) times_dict = {} for key, val in zip(StackTimes, times): times_dict[key] = val if segments_all is None: print("\nNo blobs detected") else: print("\nTotal blobs found:", len(segments_all)) detector.show_blobs_per_channel(segments_all) print("\nTotal detection processing times (s):") path_times = "stack_detection_times.csv" if save_dfs else None df_io.dict_to_data_frame(times_dict, path_times, show=" ") return stats_detection, fdbk, blobs
def labels_to_markers_erosion( labels_img: np.ndarray, filter_size: int = 8, target_frac: Optional[float] = None, min_filter_size: Optional[int] = None, use_min_filter: bool = False, skel_eros_filt_size: Optional[int] = None, wt_dists: Optional[np.ndarray] = None, multiprocess: bool = True) -> Tuple[np.ndarray, pd.DataFrame]: """Convert a labels image to markers as eroded labels via multiprocessing. These markers can be used in segmentation algorithms such as watershed. Args: labels_img: Labels image as an integer Numpy array, where each unique int is a separate label. filter_size: Size of structing element for erosion, which should be > 0; defaults to 8. target_frac: Target fraction of original label to erode, passed to :func:`LabelToMarkerErosion.erode_label`. Defaults to None. min_filter_size: Minimum erosion filter size; defaults to None to use half of ``filter_size``, rounded down. use_min_filter: True to erode even if ``min_filter_size`` is reached; defaults to False to avoid any erosion if this size is reached. skel_eros_filt_size: Erosion filter size before skeletonization in :func:`LabelToMarkerErosion.erode_labels`. Defaults to None to use the minimum filter size, which is half of ``filter_size``. wt_dists: Array of distances by which to weight the filter size, such as a distance transform to the outer perimeter of ``labels_img`` to weight central labels more heavily. Defaults to None. multiprocess: True to use multiprocessing; defaults to True. Returns: Tuple of an image array of the same shape as ``img`` and the same number of labels as in ``labels_img``, with eroded labels, and a data frame of erosion metrics. """ def handle_eroded_label(): # mutate markers outside of mp for changes to persist and collect stats markers[tuple(slices)][filtered] = stats_eros[0] for col, stat in zip(cols, stats_eros): sizes_dict.setdefault(col, []).append(stat) # set up labels erosion start_time = time() _logger.info( "Eroding labels to markers with filter size %s, min filter size %s, " "and target fraction %s", filter_size, min_filter_size, target_frac) markers = np.zeros_like(labels_img) labels_unique = np.unique(labels_img) if min_filter_size is None: min_filter_size = filter_size // 2 if skel_eros_filt_size is None: skel_eros_filt_size = filter_size // 2 sizes_dict = {} cols = (config.AtlasMetrics.REGION.value, "SizeOrig", "SizeMarker", config.SmoothingMetrics.FILTER_SIZE.value) # share large images as class attributes for forked or non-multiprocessing LabelToMarkerErosion.set_labels_img(labels_img, wt_dists) is_fork = False pool_results = None pool = None if multiprocess: # set up multiprocessing is_fork = chunking.is_fork() initializer = None initargs = None if not is_fork: # set up labels image as a shared array for spawned mode initializer, initargs = LabelToMarkerErosion.build_pool_init( {config.RegNames.IMG_LABELS: labels_img}) pool = chunking.get_mp_pool(initializer, initargs) pool_results = [] for label_id in labels_unique: if label_id == 0: continue # erode labels to generate markers, excluding labels small enough # that they would require a filter smaller than half of original size args = [ label_id, filter_size, target_frac, min_filter_size, use_min_filter, skel_eros_filt_size ] if not is_fork: # pickle distance weight directly in spawned mode (not necessary # for non-multiprocessed but equivalent) if wt_dists is not None: args.append( LabelToMarkerErosion.meas_wt(labels_img, label_id, wt_dists)) if pool is None: # process labels without multiprocessing stats_eros, slices, filtered = LabelToMarkerErosion.erode_label( *args) handle_eroded_label() else: # process in multiprocessing pool_results.append( pool.apply_async(LabelToMarkerErosion.erode_label, args=args)) if multiprocess: # handle multiprocessing output for result in pool_results: stats_eros, slices, filtered = result.get() handle_eroded_label() pool.close() pool.join() # show erosion stats df = df_io.dict_to_data_frame(sizes_dict, show=True) _logger.info("Time elapsed to erode labels into markers: %s", time() - start_time) return markers, df
def export_region_ids(labels_ref_lookup, path, level): """Export region IDs from annotation reference reverse mapped dictionary to CSV and Excel files. Use a ``level`` of None to export labels only for the currently loaded atlas. The RGB values used for the currently loaded atlas will also be shown, with cell colors corresponding to these values in the Excel file. Args: labels_ref_lookup: The labels reference lookup, assumed to be an OrderedDict generated by :func:`ontology.create_reverse_lookup` to look up by ID while preserving key order to ensure that parents of any child will be reached prior to the child. path: Path to output CSV file; if does not end with ``.csv``, it will be added. level: Level at which to find parent for each label. If None, a parent level of -1 will be used, and label IDs will be taken from the labels image rather than the full set of labels from the ``labels_ref_lookup``. Returns: Pandas data frame of the region IDs and corresponding names. """ def color_cells(s): # convert RGB to hex values since Pandas Excel export only supports # named colors or hex (as of v0.22) css = ["background-color: #{:02x}{:02x}{:02x}".format(*c) for c in s] return css ext = ".csv" path_csv = path if path.endswith(ext) else path + ext # find parents for label at the given level parent_level = -1 if level is None else level label_parents = ontology.labels_to_parent(labels_ref_lookup, parent_level) cols = ["Region", "RegionAbbr", "RegionName", "Level", "Parent"] data = OrderedDict() label_ids = sitk_io.find_atlas_labels(config.load_labels, level, labels_ref_lookup) cm = colormaps.get_labels_discrete_colormap(None, 0, use_orig_labels=True) rgbs = cm.cmap_labels if rgbs is not None: cols.append("RGB") for i, key in enumerate(label_ids): # does not include laterality distinction, only using original IDs if key <= 0: continue label = labels_ref_lookup[key] # ID of parent at label_parents' level parent = label_parents[key] vals = [ key, label[ontology.NODE][config.ABAKeys.ACRONYM.value], label[ontology.NODE][config.ABAKeys.NAME.value], label[ontology.NODE][config.ABAKeys.LEVEL.value], parent ] if rgbs is not None: vals.append(rgbs[i, :3]) for col, val in zip(cols, vals): data.setdefault(col, []).append(val) df = df_io.dict_to_data_frame(data, path_csv) if rgbs is not None: df = df.style.apply(color_cells, subset="RGB") path_xlsx = "{}.xlsx".format(os.path.splitext(path)[0]) df.to_excel(path_xlsx) print("exported regions to styled spreadsheet: \"{}\"".format(path_xlsx)) return df
def meas_improvement(path, col_effect, col_p, thresh_impr=0, thresh_p=0.05, col_wt=None, suffix=None, df=None): """Measure overall improvement and worsening for a column in a data frame. Args: path (str): Path of file to load into data frame. col_effect (str): Name of column with metric to measure. col_p (str): Name of column with p-values. thresh_impr (float): Threshold of effects below which are considered improved. thresh_p (float): Threshold of p-values below which are considered statistically significant. col_wt (str): Name of column for weighting. suffix (str): Output path suffix; defaults to None. df (:obj:`pd.DataFrame`): Data fram to use instead of loading from ``path``; defaults to None. Returns: :obj:`pd.DataFrame`: Data frame with improvement measurements. The data frame will be saved to a filename based on ``path``. """ def add_wt(mask_cond, mask_cond_ss, name): # add weighted metrics for the given condition, such as improved # vs. worsened metrics[col_wt] = [np.sum(df[col_wt])] wt_cond = df.loc[mask_cond, col_wt] wt_cond_ss = df.loc[mask_cond_ss, col_wt] # sum of weighting column fitting the condition (all and statistically # significant) metrics["{}_{}".format(col_wt, name)] = [np.sum(wt_cond)] metrics["{}_{}_ss".format(col_wt, name)] = [np.sum(wt_cond_ss)] # sum of filtered effect multiplied by weighting metrics["{}_{}_by_{}".format(col_effect, name, col_wt)] = [ np.sum(wt_cond.multiply(df.loc[mask_cond, col_effect])) ] metrics["{}_{}_by_{}_ss".format(col_effect, name, col_wt)] = [ np.sum(wt_cond_ss.multiply(df.loc[mask_cond_ss, col_effect])) ] if df is None: df = pd.read_csv(path) # masks of improved and worsened, all and statistically significant # for each, where improvement is above the given threshold effects = df[col_effect] mask_impr = effects > thresh_impr mask_ss = df[col_p] < thresh_p mask_impr_ss = mask_impr & mask_ss mask_wors = effects < thresh_impr mask_wors_ss = mask_wors & mask_ss metrics = { "n": [len(effects)], "n_impr": [np.sum(mask_impr)], "n_impr_ss": [np.sum(mask_impr_ss)], "n_wors": [np.sum(mask_wors)], "n_wors_ss": [np.sum(mask_wors_ss)], col_effect: [np.sum(effects)], "{}_impr".format(col_effect): [np.sum(effects[mask_impr])], "{}_impr_ss".format(col_effect): [np.sum(effects[mask_impr_ss])], "{}_wors".format(col_effect): [np.sum(effects[mask_wors])], "{}_wors_ss".format(col_effect): [np.sum(effects[mask_wors_ss])], } if col_wt: # add columns based on weighting column add_wt(mask_impr, mask_impr_ss, "impr") add_wt(mask_wors, mask_wors_ss, "wors") out_path = libmag.insert_before_ext(path, "_impr") if suffix: out_path = libmag.insert_before_ext(out_path, suffix) df_impr = df_io.dict_to_data_frame(metrics, out_path) # display transposed version for more compact view given large number # of columns, but save un-transposed to preserve data types df_io.print_data_frame(df_impr.T, index=True, header=False) return df_impr
def parse_grid_stats( stats: OrderedDict[str, Tuple[Sequence, Sequence, str, OrderedDict]] ) -> Tuple[Dict[str, Tuple[Sequence, Sequence, Sequence]], pd.DataFrame]: """Parse stats from a grid search. Args: stats: Dictionary where key is a string with the parameters up to the last parameter group, and each value is a tuple of the raw stats as (pos, true_pos, false_pos); the array of values for the last parameter; the last parameter key; and an ``OrderedDict`` of the parent parameters and their values for the given set of stats. Returns: Tuple of ``group_stats`` and ``df``: - ``group_stats`` is a dictionary of stats, where keys correspond go ``stats`` keys, and values are tuples of the false discovery rate, sensitivity, and last parameter group value, each as sequences - ``df`` is a data frame summarizing the stats """ # parse a grid search stats_for_df = {} headers = None group_dict = {} param_keys = [] for key, value in stats.items(): # parse stats from a set of parameters grid_stats = np.array(value[0]) # raw stats # last parameter is given separately since it is actively varying last_param_vals, last_param_key, parent_params = value[1:] if not headers: # set up headers for each stat and insert parameter headers # at the start headers = [ GridSearchStats.PARAM.value, GridSearchStats.PPV, GridSearchStats.SENS, GridSearchStats.POS, GridSearchStats.TP, GridSearchStats.FP, GridSearchStats.FDR, ] headers[0] = "_".join((headers[0], last_param_key)) for i, parent in enumerate(parent_params.keys()): headers.insert(i, "_".join( (GridSearchStats.PARAM.value, parent))) param_keys.append(parent) param_keys.append(last_param_key) # false discovery rate, inverse of PPV, since don't have true negs fdr = np.subtract( 1, np.divide(grid_stats[:, 1], np.add(grid_stats[:, 1], grid_stats[:, 2]))) sens = np.divide(grid_stats[:, 1], grid_stats[:, 0]) for i, n in enumerate(last_param_vals): stat_list = [] for parent_val in parent_params.values(): stat_list.append(parent_val) stat_list.extend((last_param_vals[i], 1 - fdr[i], sens[i], *grid_stats[i].astype(int), fdr[i])) for header, stat in zip(headers, stat_list): stats_for_df.setdefault(header, []).append(stat) group_dict[key] = (fdr, sens, last_param_vals) print() # generate a data frame to summarize stats and save to file path_df = libmag.make_out_path("gridsearch_{}.csv".format( "_".join(param_keys))) df = df_io.dict_to_data_frame(stats_for_df, path_df, show=" ") return group_dict, df
def labels_to_markers_erosion(labels_img, filter_size=8, target_frac=None, min_filter_size=None, use_min_filter=False, skel_eros_filt_size=None, wt_dists=None): """Convert a labels image to markers as eroded labels via multiprocessing. These markers can be used in segmentation algorithms such as watershed. Args: labels_img (:obj:`np.ndarray`): Labels image as an integer Numpy array, where each unique int is a separate label. filter_size (int): Size of structing element for erosion, which should be > 0; defaults to 8. target_frac (float): Target fraction of original label to erode, passed to :func:`LabelToMarkerErosion.erode_label`. Defaults to None. min_filter_size (int): Minimum erosion filter size; defaults to None to use half of ``filter_size``, rounded down. use_min_filter (bool): True to erode even if ``min_filter_size`` is reached; defaults to False to avoid any erosion if this size is reached. skel_eros_filt_size (int): Erosion filter size before skeletonization in :func:`LabelToMarkerErosion.erode_labels`. Defaults to None to use the minimum filter size, which is half of ``filter_size``. wt_dists (:obj:`np.ndarray`): Array of distances by which to weight the filter size, such as a distance transform to the outer perimeter of ``labels_img`` to weight central labels more heavily. Defaults to None. Returns: :obj:`np.ndarray`: Image array of the same shape as ``img`` and the same number of labels as in ``labels_img``, with eroded labels. """ start_time = time() markers = np.zeros_like(labels_img) labels_unique = np.unique(labels_img) if min_filter_size is None: min_filter_size = filter_size // 2 if skel_eros_filt_size is None: skel_eros_filt_size = filter_size // 2 #labels_unique = np.concatenate((labels_unique[:5], labels_unique[-5:])) sizes_dict = {} cols = (config.AtlasMetrics.REGION.value, "SizeOrig", "SizeMarker", config.SmoothingMetrics.FILTER_SIZE.value) # erode labels via multiprocessing print("Eroding labels to markers with filter size {}, min filter size {}, " "and target fraction {}".format(filter_size, min_filter_size, target_frac)) LabelToMarkerErosion.set_labels_img(labels_img, wt_dists) pool = chunking.get_mp_pool() pool_results = [] for label_id in labels_unique: if label_id == 0: continue # erode labels to generate markers, excluding labels small enough # that they would require a filter smaller than half of original size pool_results.append( pool.apply_async(LabelToMarkerErosion.erode_label, args=(label_id, filter_size, target_frac, min_filter_size, use_min_filter, skel_eros_filt_size))) for result in pool_results: stats_eros, slices, filtered = result.get() # can only mutate markers outside of mp for changes to persist markers[tuple(slices)][filtered] = stats_eros[0] for col, stat in zip(cols, stats_eros): sizes_dict.setdefault(col, []).append(stat) pool.close() pool.join() # show erosion stats df = df_io.dict_to_data_frame(sizes_dict, show=True) print("time elapsed to erode labels into markers:", time() - start_time) return markers, df
def detect_blobs_large_image(filename_base, image5d, offset, size, verify=False, save_dfs=True, full_roi=False): """Detect blobs within a large image through parallel processing of smaller chunks. Args: filename_base: Base path to use file output. image5d: Large image to process as a Numpy array of t,z,y,x,[c] offset: Sub-image offset given as coordinates in z,y,x. size: Sub-image shape given in z,y,x. verify: True to verify detections against truth database; defaults to False. save_dfs: True to save data frames to file; defaults to True. full_roi (bool): True to treat ``image5d`` as the full ROI; defaults to False. """ time_start = time() if size is None or offset is None: # uses the entire stack if no size or offset specified size = image5d.shape[1:4] offset = (0, 0, 0) else: # change base filename for ROI-based partial stack filename_base = make_subimage_name(filename_base, offset, size) filename_subimg = libmag.combine_paths(filename_base, config.SUFFIX_SUBIMG) filename_blobs = libmag.combine_paths(filename_base, config.SUFFIX_BLOBS) # get ROI for given region, including all channels if full_roi: # treat the full image as the ROI roi = image5d[0] else: roi = plot_3d.prepare_subimg(image5d, size, offset) _, channels = plot_3d.setup_channels(roi, config.channel, 3) # prep chunking ROI into sub-ROIs with size based on segment_size, scaling # by physical units to make more independent of resolution time_detection_start = time() settings = config.roi_profile # use default settings scaling_factor = detector.calc_scaling_factor() print("microsope scaling factor based on resolutions: {}" .format(scaling_factor)) denoise_size = config.roi_profile["denoise_size"] denoise_max_shape = None if denoise_size: # further subdivide each sub-ROI for local preprocessing denoise_max_shape = np.ceil( np.multiply(scaling_factor, denoise_size)).astype(int) # overlap sub-ROIs to minimize edge effects overlap_base = chunking.calc_overlap() tol = np.multiply(overlap_base, settings["prune_tol_factor"]).astype(int) overlap_padding = np.copy(tol) overlap = np.copy(overlap_base) exclude_border = config.roi_profile["exclude_border"] if exclude_border is not None: # exclude border to avoid blob detector edge effects, where blobs # often collect at the faces of the sub-ROI; # ensure that overlap is greater than twice the border exclusion per # axis so that no plane will be excluded from both overlapping sub-ROIs exclude_border_thresh = np.multiply(2, exclude_border) overlap_less = np.less(overlap, exclude_border_thresh) overlap[overlap_less] = exclude_border_thresh[overlap_less] excluded = np.greater(exclude_border, 0) overlap[excluded] += 1 # additional padding overlap_padding[excluded] = 0 # no need to prune past excluded border print("sub-ROI overlap: {}, pruning tolerance: {}, padding beyond " "overlap for pruning: {}, exclude borders: {}" .format(overlap, tol, overlap_padding, exclude_border)) max_pixels = np.ceil(np.multiply( scaling_factor, config.roi_profile["segment_size"])).astype(int) print("preprocessing max shape: {}, detection max pixels: {}" .format(denoise_max_shape, max_pixels)) sub_roi_slices, sub_rois_offsets = chunking.stack_splitter( roi.shape, max_pixels, overlap) # TODO: option to distribute groups of sub-ROIs to different servers # for blob detection seg_rois = detect_blobs_sub_rois( roi, sub_roi_slices, sub_rois_offsets, denoise_max_shape, exclude_border) detection_time = time() - time_detection_start print("blob detection time (s):", detection_time) # prune blobs in overlapping portions of sub-ROIs time_pruning_start = time() segments_all, df_pruning = _prune_blobs_mp( roi, seg_rois, overlap, tol, sub_roi_slices, sub_rois_offsets, channels, overlap_padding) pruning_time = time() - time_pruning_start print("blob pruning time (s):", pruning_time) #print("maxes:", np.amax(segments_all, axis=0)) # get weighted mean of ratios if df_pruning is not None: print("\nBlob pruning ratios:") path_pruning = "blob_ratios.csv" if save_dfs else None df_pruning_all = df_io.data_frames_to_csv( df_pruning, path_pruning, show=" ") cols = df_pruning_all.columns.tolist() blob_pruning_means = {} if "blobs" in cols: blobs_unpruned = df_pruning_all["blobs"] num_blobs_unpruned = np.sum(blobs_unpruned) for col in cols[1:]: blob_pruning_means["mean_{}".format(col)] = [ np.sum(np.multiply(df_pruning_all[col], blobs_unpruned)) / num_blobs_unpruned] path_pruning_means = "blob_ratios_means.csv" if save_dfs else None df_pruning_means = df_io.dict_to_data_frame( blob_pruning_means, path_pruning_means, show=" ") else: print("no blob ratios found") '''# report any remaining duplicates np.set_printoptions(linewidth=500, threshold=10000000) print("all blobs (len {}):".format(len(segments_all))) sort = np.lexsort( (segments_all[:, 2], segments_all[:, 1], segments_all[:, 0])) blobs = segments_all[sort] print(blobs) print("checking for duplicates in all:") print(detector.remove_duplicate_blobs(blobs, slice(0, 3))) ''' stats_detection = None fdbk = None if segments_all is not None: # remove the duplicated elements that were used for pruning detector.replace_rel_with_abs_blob_coords(segments_all) segments_all = detector.remove_abs_blob_coords(segments_all) # compare detected blobs with truth blobs # TODO: assumes ground truth is relative to any ROI offset, # but should make customizable if verify: db_path_base = None exp_name = os.path.splitext(os.path.basename(config.filename))[0] try: if config.truth_db is None: # find and load truth DB based on filename and subimage db_path_base = os.path.basename(filename_base) print("about to verify with truth db from {}" .format(db_path_base)) sqlite.load_truth_db(db_path_base) if config.truth_db is not None: # truth DB may contain multiple experiments for different # subimages; series not included in exp name since in ROI rois = config.truth_db.get_rois(exp_name) if rois is None: # exp may have been named by ROI print("{} experiment name not found, will try with" "ROI offset/size".format(exp_name)) exp_name = make_subimage_name(exp_name, offset, size) rois = config.truth_db.get_rois(exp_name) if rois is None: raise LookupError( "No truth set ROIs found for experiment {}, will " "skip detection verification".format(exp_name)) print("load ROIs from exp: {}".format(exp_name)) exp_id = sqlite.insert_experiment( config.verified_db.conn, config.verified_db.cur, exp_name, None) verify_tol = np.multiply( overlap_base, settings["verify_tol_factor"]) stats_detection, fdbk = detector.verify_rois( rois, segments_all, config.truth_db.blobs_truth, verify_tol, config.verified_db, exp_id, config.channel) except FileNotFoundError: libmag.warn("Could not load truth DB from {}; " "will not verify ROIs".format(db_path_base)) except LookupError as e: libmag.warn(str(e)) file_time_start = time() if config.save_subimg: if (isinstance(config.image5d, np.memmap) and config.image5d.filename == os.path.abspath(filename_subimg)): # file at sub-image save path may have been opened as a memmap # file, in which case saving would fail libmag.warn("{} is currently open, cannot save sub-image" .format(filename_subimg)) else: # write sub-image, which is in ROI (3D) format with open(filename_subimg, "wb") as f: np.save(f, roi) # save blobs # TODO: only segments used; consider removing the rest except ver outfile_blobs = open(filename_blobs, "wb") np.savez(outfile_blobs, ver=BLOBS_NP_VER, segments=segments_all, resolutions=config.resolutions, basename=os.path.basename(config.filename), # only save name offset=offset, roi_size=size) # None unless explicitly set outfile_blobs.close() file_save_time = time() - file_time_start # whole image benchmarking time times = ( [detection_time], [pruning_time], time() - time_start) times_dict = {} for key, val in zip(StackTimes, times): times_dict[key] = val if segments_all is None: print("\nNo blobs detected") else: print("\nTotal blobs found:", len(segments_all)) detector.show_blobs_per_channel(segments_all) print("file save time:", file_save_time) print("\nTotal detection processing times (s):") path_times = "stack_detection_times.csv" if save_dfs else None df_io.dict_to_data_frame(times_dict, path_times, show=" ") return stats_detection, fdbk, segments_all