def plot(mod=""): # plot sorted distances as line and return data frame df = pd.DataFrame({ "point": np.arange(len(dist_disp)), "dist": dist_disp }) plot_2d.plot_lines("knn_dist{}".format(mod), "point", ("dist", ), df=df, show=show, title=config.plot_labels[config.PlotLabels.TITLE]) return df
def plot_unlabeled_hemisphere(path, cols, size=None, show=True): """Plot unlabeled hemisphere fractions as bar and line plots. Args: path (str): Path to data frame. cols (List[str]): Sequence of columns to plot. size (List[int]): Sequence of ``width, height`` to size the figure; defaults to None. show (bool): True to display the image; defaults to True. """ # load data frame and convert sample names to ages df = pd.read_csv(path) ages = ontology.rel_to_abs_ages(df["Sample"].unique()) df["Age"] = df["Sample"].map(ages) # generate a separate graph for each metric conds = df["Condition"].unique() for col in cols: title = "{}".format(col).replace("_", " ") y_label = "Fraction of hemisphere unlabeled" # plot as lines df_lines, regions = df_io.pivot_with_conditions( df, ["Age", "Condition"], "Region", col) plot_2d.plot_lines(config.filename, "Age", regions, linestyles=("--", "-"), labels=(y_label, "Post-Conceptional Age"), title=title, size=size, show=show, ignore_invis=True, suffix="_{}".format(col), df=df_lines, groups=conds) # plot as bars, pivoting value into separate columns by condition df_bars = df.pivot(index="Sample", columns="Condition", values=col).reset_index() plot_2d.plot_bars(config.filename, conds, col_groups="Sample", y_label=y_label, title=title, size=None, show=show, df=df_bars, prefix="{}_{}".format( os.path.splitext(config.filename)[0], col))
def plot_knns(img_paths, suffix=None, show=False, names=None): """Plot k-nearest-neighbor distances for multiple sets of blobs, overlaying on a single plot. Args: img_paths (List[str]): Base paths from which registered labels and blobs files will be found and output blobs file save location will be constructed. suffix (str): Suffix for ``path``; defaults to None. show (bool): True to plot the distances; defaults to False. names (List[str]): Sequence of names corresponding to ``img_paths`` for the plot legend. """ cluster_settings = config.atlas_profile[profiles.RegKeys.METRICS_CLUSTER] knn_n = cluster_settings[profiles.RegKeys.KNN_N] if not knn_n: knn_n = cluster_settings[profiles.RegKeys.DBSCAN_MINPTS] - 1 print("Calculating k-nearest-neighbor distances and plotting distances " "for neighbor {}".format(knn_n)) # set up combined data frames for all samples at each zoom level df_keys = ("ov", "zoom") dfs_comb = {key: [] for key in df_keys} names_disp = names if names else [] for i, img_path in enumerate(img_paths): # load blobs associated with image mod_path = img_path if suffix is not None: mod_path = libmag.insert_before_ext(img_path, suffix) labels_img_np = sitk_io.load_registered_img( mod_path, config.RegNames.IMG_LABELS.value) blobs = detector.Blobs().load_blobs(np_io.img_to_blobs_path(img_path)) scaling, res = np_io.find_scaling(img_path, labels_img_np.shape) if blobs is None: libmag.warn("unable to load nuclei coordinates for", img_path) continue # convert to physical units and display k-nearest-neighbors for nuclei blobs_phys = np.multiply(blobs.blobs[:, :3], res) # TESTING: given the same blobs, simply shift #blobs = np.multiply(blobs[i*10000000:, :3], res) _, _, dfs = knn_dist(blobs_phys, knn_n, 2, 1000000, False) if names is None: # default to naming from filename names_disp.append(os.path.basename(mod_path)) for j, df in enumerate(dfs): dfs_comb[df_keys[j]].append(df) for key in dfs_comb: # combine data frames at each zoom level, save, and plot with # different colors for each image df = df_io.join_dfs(dfs_comb[key], "point") dist_cols = [col for col in df.columns if col.startswith("dist")] rename_cols = {col: name for col, name in zip(dist_cols, names_disp)} df = df.rename(rename_cols, axis=1) out_path = "knn_dist_combine_{}".format(key) df_io.data_frames_to_csv(df, out_path) plot_2d.plot_lines(out_path, "point", rename_cols.values(), df=df, show=show, title=config.plot_labels[config.PlotLabels.TITLE])
def plot_region_development(metric, size=None, show=True): """Plot regions across development for the given metric. Args: metric (str): Column name of metric to track. size (List[int]): Sequence of ``width, height`` to size the figure; defaults to None. show (bool): True to display the image; defaults to True. """ # set up access to data frame columns id_cols = ["Age", "Condition"] extra_cols = ["RegionName"] cond_col = "Region" # assume that vol stats file is given first, then region IDs; # merge in region names and levels df_regions = pd.read_csv(config.filenames[1]) df = pd.read_csv(config.filename).merge( df_regions[["Region", "RegionName", "Level"]], on="Region", how="left") # convert sample names to ages ages = ontology.rel_to_abs_ages(df["Sample"].unique()) df["Age"] = df["Sample"].map(ages) # get large super-structures for normalization to brain tissue, where # "non-brain" are spinal cord and ventricles, which are variably labeled df_base = df[df["Region"] == 15564] ids_nonbr_large = (17651, 126651558) dfs_nonbr_large = [df[df["Region"] == n] for n in ids_nonbr_large] # get data frame with region IDs of all non-brain structures removed labels_ref_lookup = ontology.LabelsRef( config.load_labels).load().ref_lookup ids_nonbr = [] for n in ids_nonbr_large: ids_nonbr.extend(ontology.get_children_from_id(labels_ref_lookup, n)) label_id = config.atlas_labels[config.AtlasLabels.ID] if label_id is not None: # show only selected region and its children ids = ontology.get_children_from_id(labels_ref_lookup, label_id) df = df[np.isin(df["Region"], ids)] df_brain = df.loc[~df["Region"].isin(ids_nonbr)] levels = np.sort(df["Level"].unique()) conds = df["Condition"].unique() # get aggregated whole brain tissue for normalization cols_show = (*id_cols, cond_col, *extra_cols, metric) if dfs_nonbr_large: # add all large non-brain structures df_nonbr = dfs_nonbr_large[0] for df_out in dfs_nonbr_large[1:]: df_nonbr = df_io.normalize_df(df_nonbr, id_cols, cond_col, None, [metric], extra_cols, df_out, df_io.df_add) # subtract them from whole organism to get brain tissue alone, # updating given metric in db_base df_base = df_io.normalize_df(df_base, id_cols, cond_col, None, [metric], extra_cols, df_nonbr, df_io.df_subtract) df_base.loc[:, "RegionName"] = "Brain tissue" print("Brain {}:".format(metric)) df_io.print_data_frame(df_base.loc[:, cols_show], "\t") df_base_piv, regions = df_io.pivot_with_conditions(df_base, id_cols, "RegionName", metric) # plot lines with separate styles for each condition and colors for # each region name linestyles = ("--", "-.", ":", "-") num_conds = len(conds) linestyles = linestyles * (num_conds // (len(linestyles) + 1) + 1) if num_conds < len(linestyles): # ensure that 1st and last styles are dashed and solid unless linestyles = (*linestyles[:num_conds - 1], linestyles[-1]) lines_params = { "labels": (metric, "Post-Conceptional Age"), "linestyles": linestyles, "size": size, "show": show, "ignore_invis": True, "groups": conds, "marker": ".", } line_params_norm = lines_params.copy() line_params_norm["labels"] = ("Fraction", "Post-Conceptional Age") plot_2d.plot_lines(config.filename, "Age", regions, title="Whole Brain Development ({})".format(metric), suffix="_dev_{}_brain".format(metric), df=df_base_piv, **lines_params) for level in levels: # plot raw metric at given level df_level = df.loc[df["Level"] == level] print("Raw {}:".format(metric)) df_io.print_data_frame(df_level.loc[:, cols_show], "\t") df_level_piv, regions = df_io.pivot_with_conditions( df_level, id_cols, "RegionName", metric) plot_2d.plot_lines(config.filename, "Age", regions, title="Structure Development ({}, Level {})".format( metric, level), suffix="_dev_{}_level{}".format(metric, level), df=df_level_piv, **lines_params) # plot metric normalized to whole brain tissue; structures # above removed regions will still contain them df_brain_level = df_brain.loc[df_brain["Level"] == level] df_norm = df_io.normalize_df(df_brain_level, id_cols, cond_col, None, [metric], extra_cols, df_base) print("{} normalized to whole brain:".format(metric)) df_io.print_data_frame(df_norm.loc[:, cols_show], "\t") df_norm_piv, regions = df_io.pivot_with_conditions( df_norm, id_cols, "RegionName", metric) plot_2d.plot_lines( config.filename, "Age", regions, units=(None, config.plot_labels[config.PlotLabels.X_UNIT]), title=("Structure Development Normalized to Whole " "Brain ({}, Level {})".format(metric, level)), suffix="_dev_{}_level{}_norm".format(metric, level), df=df_norm_piv, **line_params_norm)