def plot_region_development(metric, size=None, show=True): """Plot regions across development for the given metric. Args: metric (str): Column name of metric to track. size (List[int]): Sequence of ``width, height`` to size the figure; defaults to None. show (bool): True to display the image; defaults to True. """ # set up access to data frame columns id_cols = ["Age", "Condition"] extra_cols = ["RegionName"] cond_col = "Region" # assume that vol stats file is given first, then region IDs; # merge in region names and levels df_regions = pd.read_csv(config.filenames[1]) df = pd.read_csv(config.filename).merge( df_regions[["Region", "RegionName", "Level"]], on="Region", how="left") # convert sample names to ages ages = ontology.rel_to_abs_ages(df["Sample"].unique()) df["Age"] = df["Sample"].map(ages) # get large super-structures for normalization to brain tissue, where # "non-brain" are spinal cord and ventricles, which are variably labeled df_base = df[df["Region"] == 15564] ids_nonbr_large = (17651, 126651558) dfs_nonbr_large = [df[df["Region"] == n] for n in ids_nonbr_large] # get data frame with region IDs of all non-brain structures removed labels_ref_lookup = ontology.LabelsRef( config.load_labels).load().ref_lookup ids_nonbr = [] for n in ids_nonbr_large: ids_nonbr.extend(ontology.get_children_from_id(labels_ref_lookup, n)) label_id = config.atlas_labels[config.AtlasLabels.ID] if label_id is not None: # show only selected region and its children ids = ontology.get_children_from_id(labels_ref_lookup, label_id) df = df[np.isin(df["Region"], ids)] df_brain = df.loc[~df["Region"].isin(ids_nonbr)] levels = np.sort(df["Level"].unique()) conds = df["Condition"].unique() # get aggregated whole brain tissue for normalization cols_show = (*id_cols, cond_col, *extra_cols, metric) if dfs_nonbr_large: # add all large non-brain structures df_nonbr = dfs_nonbr_large[0] for df_out in dfs_nonbr_large[1:]: df_nonbr = df_io.normalize_df(df_nonbr, id_cols, cond_col, None, [metric], extra_cols, df_out, df_io.df_add) # subtract them from whole organism to get brain tissue alone, # updating given metric in db_base df_base = df_io.normalize_df(df_base, id_cols, cond_col, None, [metric], extra_cols, df_nonbr, df_io.df_subtract) df_base.loc[:, "RegionName"] = "Brain tissue" print("Brain {}:".format(metric)) df_io.print_data_frame(df_base.loc[:, cols_show], "\t") df_base_piv, regions = df_io.pivot_with_conditions(df_base, id_cols, "RegionName", metric) # plot lines with separate styles for each condition and colors for # each region name linestyles = ("--", "-.", ":", "-") num_conds = len(conds) linestyles = linestyles * (num_conds // (len(linestyles) + 1) + 1) if num_conds < len(linestyles): # ensure that 1st and last styles are dashed and solid unless linestyles = (*linestyles[:num_conds - 1], linestyles[-1]) lines_params = { "labels": (metric, "Post-Conceptional Age"), "linestyles": linestyles, "size": size, "show": show, "ignore_invis": True, "groups": conds, "marker": ".", } line_params_norm = lines_params.copy() line_params_norm["labels"] = ("Fraction", "Post-Conceptional Age") plot_2d.plot_lines(config.filename, "Age", regions, title="Whole Brain Development ({})".format(metric), suffix="_dev_{}_brain".format(metric), df=df_base_piv, **lines_params) for level in levels: # plot raw metric at given level df_level = df.loc[df["Level"] == level] print("Raw {}:".format(metric)) df_io.print_data_frame(df_level.loc[:, cols_show], "\t") df_level_piv, regions = df_io.pivot_with_conditions( df_level, id_cols, "RegionName", metric) plot_2d.plot_lines(config.filename, "Age", regions, title="Structure Development ({}, Level {})".format( metric, level), suffix="_dev_{}_level{}".format(metric, level), df=df_level_piv, **lines_params) # plot metric normalized to whole brain tissue; structures # above removed regions will still contain them df_brain_level = df_brain.loc[df_brain["Level"] == level] df_norm = df_io.normalize_df(df_brain_level, id_cols, cond_col, None, [metric], extra_cols, df_base) print("{} normalized to whole brain:".format(metric)) df_io.print_data_frame(df_norm.loc[:, cols_show], "\t") df_norm_piv, regions = df_io.pivot_with_conditions( df_norm, id_cols, "RegionName", metric) plot_2d.plot_lines( config.filename, "Age", regions, units=(None, config.plot_labels[config.PlotLabels.X_UNIT]), title=("Structure Development Normalized to Whole " "Brain ({}, Level {})".format(metric, level)), suffix="_dev_{}_level{}_norm".format(metric, level), df=df_norm_piv, **line_params_norm)
def meas_improvement(path, col_effect, col_p, thresh_impr=0, thresh_p=0.05, col_wt=None, suffix=None, df=None): """Measure overall improvement and worsening for a column in a data frame. Args: path (str): Path of file to load into data frame. col_effect (str): Name of column with metric to measure. col_p (str): Name of column with p-values. thresh_impr (float): Threshold of effects below which are considered improved. thresh_p (float): Threshold of p-values below which are considered statistically significant. col_wt (str): Name of column for weighting. suffix (str): Output path suffix; defaults to None. df (:obj:`pd.DataFrame`): Data fram to use instead of loading from ``path``; defaults to None. Returns: :obj:`pd.DataFrame`: Data frame with improvement measurements. The data frame will be saved to a filename based on ``path``. """ def add_wt(mask_cond, mask_cond_ss, name): # add weighted metrics for the given condition, such as improved # vs. worsened metrics[col_wt] = [np.sum(df[col_wt])] wt_cond = df.loc[mask_cond, col_wt] wt_cond_ss = df.loc[mask_cond_ss, col_wt] # sum of weighting column fitting the condition (all and statistically # significant) metrics["{}_{}".format(col_wt, name)] = [np.sum(wt_cond)] metrics["{}_{}_ss".format(col_wt, name)] = [np.sum(wt_cond_ss)] # sum of filtered effect multiplied by weighting metrics["{}_{}_by_{}".format(col_effect, name, col_wt)] = [ np.sum(wt_cond.multiply(df.loc[mask_cond, col_effect])) ] metrics["{}_{}_by_{}_ss".format(col_effect, name, col_wt)] = [ np.sum(wt_cond_ss.multiply(df.loc[mask_cond_ss, col_effect])) ] if df is None: df = pd.read_csv(path) # masks of improved and worsened, all and statistically significant # for each, where improvement is above the given threshold effects = df[col_effect] mask_impr = effects > thresh_impr mask_ss = df[col_p] < thresh_p mask_impr_ss = mask_impr & mask_ss mask_wors = effects < thresh_impr mask_wors_ss = mask_wors & mask_ss metrics = { "n": [len(effects)], "n_impr": [np.sum(mask_impr)], "n_impr_ss": [np.sum(mask_impr_ss)], "n_wors": [np.sum(mask_wors)], "n_wors_ss": [np.sum(mask_wors_ss)], col_effect: [np.sum(effects)], "{}_impr".format(col_effect): [np.sum(effects[mask_impr])], "{}_impr_ss".format(col_effect): [np.sum(effects[mask_impr_ss])], "{}_wors".format(col_effect): [np.sum(effects[mask_wors])], "{}_wors_ss".format(col_effect): [np.sum(effects[mask_wors_ss])], } if col_wt: # add columns based on weighting column add_wt(mask_impr, mask_impr_ss, "impr") add_wt(mask_wors, mask_wors_ss, "wors") out_path = libmag.insert_before_ext(path, "_impr") if suffix: out_path = libmag.insert_before_ext(out_path, suffix) df_impr = df_io.dict_to_data_frame(metrics, out_path) # display transposed version for more compact view given large number # of columns, but save un-transposed to preserve data types df_io.print_data_frame(df_impr.T, index=True, header=False) return df_impr
def __repr__(self): """Format the underlying data frame.""" if self.df is None: return "Empty blob matches" return df_io.print_data_frame(self.df, show=False)