def plot_region_development(metric, size=None, show=True):
    """Plot regions across development for the given metric.
    
    Args:
        metric (str): Column name of metric to track.
        size (List[int]): Sequence of ``width, height`` to size the figure; 
            defaults to None.
        show (bool): True to display the image; defaults to True.

    """
    # set up access to data frame columns
    id_cols = ["Age", "Condition"]
    extra_cols = ["RegionName"]
    cond_col = "Region"

    # assume that vol stats file is given first, then region IDs;
    # merge in region names and levels
    df_regions = pd.read_csv(config.filenames[1])
    df = pd.read_csv(config.filename).merge(
        df_regions[["Region", "RegionName", "Level"]], on="Region", how="left")

    # convert sample names to ages
    ages = ontology.rel_to_abs_ages(df["Sample"].unique())
    df["Age"] = df["Sample"].map(ages)

    # get large super-structures for normalization to brain tissue, where
    # "non-brain" are spinal cord and ventricles, which are variably labeled
    df_base = df[df["Region"] == 15564]
    ids_nonbr_large = (17651, 126651558)
    dfs_nonbr_large = [df[df["Region"] == n] for n in ids_nonbr_large]

    # get data frame with region IDs of all non-brain structures removed
    labels_ref_lookup = ontology.LabelsRef(
        config.load_labels).load().ref_lookup
    ids_nonbr = []
    for n in ids_nonbr_large:
        ids_nonbr.extend(ontology.get_children_from_id(labels_ref_lookup, n))

    label_id = config.atlas_labels[config.AtlasLabels.ID]
    if label_id is not None:
        # show only selected region and its children
        ids = ontology.get_children_from_id(labels_ref_lookup, label_id)
        df = df[np.isin(df["Region"], ids)]
    df_brain = df.loc[~df["Region"].isin(ids_nonbr)]

    levels = np.sort(df["Level"].unique())
    conds = df["Condition"].unique()

    # get aggregated whole brain tissue for normalization
    cols_show = (*id_cols, cond_col, *extra_cols, metric)
    if dfs_nonbr_large:
        # add all large non-brain structures
        df_nonbr = dfs_nonbr_large[0]
        for df_out in dfs_nonbr_large[1:]:
            df_nonbr = df_io.normalize_df(df_nonbr, id_cols, cond_col, None,
                                          [metric], extra_cols, df_out,
                                          df_io.df_add)
        # subtract them from whole organism to get brain tissue alone,
        # updating given metric in db_base
        df_base = df_io.normalize_df(df_base, id_cols, cond_col, None,
                                     [metric], extra_cols, df_nonbr,
                                     df_io.df_subtract)
    df_base.loc[:, "RegionName"] = "Brain tissue"
    print("Brain {}:".format(metric))
    df_io.print_data_frame(df_base.loc[:, cols_show], "\t")
    df_base_piv, regions = df_io.pivot_with_conditions(df_base, id_cols,
                                                       "RegionName", metric)

    # plot lines with separate styles for each condition and colors for
    # each region name
    linestyles = ("--", "-.", ":", "-")
    num_conds = len(conds)
    linestyles = linestyles * (num_conds // (len(linestyles) + 1) + 1)
    if num_conds < len(linestyles):
        # ensure that 1st and last styles are dashed and solid unless
        linestyles = (*linestyles[:num_conds - 1], linestyles[-1])
    lines_params = {
        "labels": (metric, "Post-Conceptional Age"),
        "linestyles": linestyles,
        "size": size,
        "show": show,
        "ignore_invis": True,
        "groups": conds,
        "marker": ".",
    }
    line_params_norm = lines_params.copy()
    line_params_norm["labels"] = ("Fraction", "Post-Conceptional Age")
    plot_2d.plot_lines(config.filename,
                       "Age",
                       regions,
                       title="Whole Brain Development ({})".format(metric),
                       suffix="_dev_{}_brain".format(metric),
                       df=df_base_piv,
                       **lines_params)

    for level in levels:
        # plot raw metric at given level
        df_level = df.loc[df["Level"] == level]
        print("Raw {}:".format(metric))
        df_io.print_data_frame(df_level.loc[:, cols_show], "\t")
        df_level_piv, regions = df_io.pivot_with_conditions(
            df_level, id_cols, "RegionName", metric)
        plot_2d.plot_lines(config.filename,
                           "Age",
                           regions,
                           title="Structure Development ({}, Level {})".format(
                               metric, level),
                           suffix="_dev_{}_level{}".format(metric, level),
                           df=df_level_piv,
                           **lines_params)

        # plot metric normalized to whole brain tissue; structures
        # above removed regions will still contain them
        df_brain_level = df_brain.loc[df_brain["Level"] == level]
        df_norm = df_io.normalize_df(df_brain_level, id_cols, cond_col, None,
                                     [metric], extra_cols, df_base)
        print("{} normalized to whole brain:".format(metric))
        df_io.print_data_frame(df_norm.loc[:, cols_show], "\t")
        df_norm_piv, regions = df_io.pivot_with_conditions(
            df_norm, id_cols, "RegionName", metric)
        plot_2d.plot_lines(
            config.filename,
            "Age",
            regions,
            units=(None, config.plot_labels[config.PlotLabels.X_UNIT]),
            title=("Structure Development Normalized to Whole "
                   "Brain ({}, Level {})".format(metric, level)),
            suffix="_dev_{}_level{}_norm".format(metric, level),
            df=df_norm_piv,
            **line_params_norm)
def meas_improvement(path,
                     col_effect,
                     col_p,
                     thresh_impr=0,
                     thresh_p=0.05,
                     col_wt=None,
                     suffix=None,
                     df=None):
    """Measure overall improvement and worsening for a column in a data frame.
    
    Args:
        path (str): Path of file to load into data frame.
        col_effect (str): Name of column with metric to measure.
        col_p (str): Name of column with p-values.
        thresh_impr (float): Threshold of effects below which are considered
            improved.
        thresh_p (float): Threshold of p-values below which are considered
            statistically significant.
        col_wt (str): Name of column for weighting.
        suffix (str): Output path suffix; defaults to None.
        df (:obj:`pd.DataFrame`): Data fram to use instead of loading from
            ``path``; defaults to None.

    Returns:
        :obj:`pd.DataFrame`: Data frame with improvement measurements.
        The data frame will be saved to a filename based on ``path``.

    """
    def add_wt(mask_cond, mask_cond_ss, name):
        # add weighted metrics for the given condition, such as improved
        # vs. worsened
        metrics[col_wt] = [np.sum(df[col_wt])]
        wt_cond = df.loc[mask_cond, col_wt]
        wt_cond_ss = df.loc[mask_cond_ss, col_wt]
        # sum of weighting column fitting the condition (all and statistically
        # significant)
        metrics["{}_{}".format(col_wt, name)] = [np.sum(wt_cond)]
        metrics["{}_{}_ss".format(col_wt, name)] = [np.sum(wt_cond_ss)]
        # sum of filtered effect multiplied by weighting
        metrics["{}_{}_by_{}".format(col_effect, name, col_wt)] = [
            np.sum(wt_cond.multiply(df.loc[mask_cond, col_effect]))
        ]
        metrics["{}_{}_by_{}_ss".format(col_effect, name, col_wt)] = [
            np.sum(wt_cond_ss.multiply(df.loc[mask_cond_ss, col_effect]))
        ]

    if df is None:
        df = pd.read_csv(path)

    # masks of improved and worsened, all and statistically significant
    # for each, where improvement is above the given threshold
    effects = df[col_effect]
    mask_impr = effects > thresh_impr
    mask_ss = df[col_p] < thresh_p
    mask_impr_ss = mask_impr & mask_ss
    mask_wors = effects < thresh_impr
    mask_wors_ss = mask_wors & mask_ss
    metrics = {
        "n": [len(effects)],
        "n_impr": [np.sum(mask_impr)],
        "n_impr_ss": [np.sum(mask_impr_ss)],
        "n_wors": [np.sum(mask_wors)],
        "n_wors_ss": [np.sum(mask_wors_ss)],
        col_effect: [np.sum(effects)],
        "{}_impr".format(col_effect): [np.sum(effects[mask_impr])],
        "{}_impr_ss".format(col_effect): [np.sum(effects[mask_impr_ss])],
        "{}_wors".format(col_effect): [np.sum(effects[mask_wors])],
        "{}_wors_ss".format(col_effect): [np.sum(effects[mask_wors_ss])],
    }
    if col_wt:
        # add columns based on weighting column
        add_wt(mask_impr, mask_impr_ss, "impr")
        add_wt(mask_wors, mask_wors_ss, "wors")

    out_path = libmag.insert_before_ext(path, "_impr")
    if suffix:
        out_path = libmag.insert_before_ext(out_path, suffix)
    df_impr = df_io.dict_to_data_frame(metrics, out_path)
    # display transposed version for more compact view given large number
    # of columns, but save un-transposed to preserve data types
    df_io.print_data_frame(df_impr.T, index=True, header=False)
    return df_impr
Exemple #3
0
 def __repr__(self):
     """Format the underlying data frame."""
     if self.df is None:
         return "Empty blob matches"
     return df_io.print_data_frame(self.df, show=False)