예제 #1
0
def get_rel_resp_stats_df(sessions,
                          analyspar,
                          stimpar,
                          permpar,
                          rel_sess=1,
                          randst=None,
                          parallel=False):
    """
    get_rel_resp_stats_df(sessions, analyspar, stimpar, permpar)

    Returns relative response stats dataframe for requested sessions.

    Required args:
        - sessions (list): 
            session objects
        - analyspar (AnalysPar): 
            named tuple containing analysis parameters
        - stimpar (StimPar): 
            named tuple containing stimulus parameters
        - permpar (PermPar): 
            named tuple containing permutation parameters

    Optional args:
        - rel_sess (int):
            number of session relative to which data should be scaled, for each 
            mouse
            default: 1
        - randst (int or np.random.RandomState): 
            random state or seed value to use. (-1 treated as None)
            default: None
        - parallel (bool): 
            if True, some of the analysis is run in parallel across CPU cores 
            default: False

    Returns:
            dataframe with one row per session/line/plane, and the following 
            columns, in addition to the basic sess_df columns: 
            - rel_reg or rel_exp (list): data stats for regular data (me, err)
            - rel_unexp (list): data stats for unexpected data (me, err)
            for reg/exp/unexp data types, session comparisons, e.g. 1v2:
            - {data_type}_raw_p_vals_{}v{} (float): uncorrected p-value for 
                data differences between sessions 
            - {data_type}_p_vals_{}v{} (float): p-value for data between 
                sessions, corrected for multiple comparisons and tails
    """

    nanpol = None if analyspar.rem_bad else "omit"

    initial_columns = misc_analys.get_sess_df_columns(sessions[0], analyspar)

    resp_data_df = get_resp_df(sessions,
                               analyspar,
                               stimpar,
                               rel_sess=rel_sess,
                               parallel=parallel)

    # prepare target dataframe
    source_cols = ["rel_exp", "rel_unexp"]
    if stimpar.stimtype == "gabors":
        # regular means only A, B, C are included
        targ_cols = ["rel_reg", "rel_unexp"]
    else:
        targ_cols = ["rel_exp", "rel_unexp"]
    rel_resp_data_df = pd.DataFrame(columns=initial_columns + targ_cols)

    group_columns = ["lines", "planes"]
    aggreg_cols = [
        col for col in initial_columns
        if col not in group_columns + ["sess_ns"]
    ]
    for grp_vals, resp_grp_df in resp_data_df.groupby(group_columns):
        sess_ns = sorted(resp_grp_df["sess_ns"].unique())

        # take stats across frame types
        for e, (data_col, source_col) in enumerate(zip(targ_cols,
                                                       source_cols)):
            sess_data = []
            if e == 0:
                row_indices = []
            for s, sess_n in enumerate(sess_ns):
                sess_grp_df = resp_grp_df.loc[resp_grp_df["sess_ns"] == sess_n]
                sess_grp_df = sess_grp_df.sort_values("mouse_ns")
                if e == 0:
                    row_idx = len(rel_resp_data_df)
                    row_indices.append(row_idx)
                    rel_resp_data_df.loc[row_idx, "sess_ns"] = sess_n
                    for g, group_column in enumerate(group_columns):
                        rel_resp_data_df.loc[row_idx,
                                             group_column] = grp_vals[g]

                    # add aggregated values for initial columns
                    rel_resp_data_df = misc_analys.aggreg_columns(
                        sess_grp_df,
                        rel_resp_data_df,
                        aggreg_cols,
                        row_idx=row_idx,
                        in_place=True)
                else:
                    row_idx = row_indices[s]

                if stimpar.stimtype == "gabors":
                    # average across Gabor frames included in reg or unexp data
                    cols = [f"{source_col}_{fr}" for fr in stimpar.gabfr[e]]
                    data = sess_grp_df[cols].values.tolist()
                    # sess x frs x ROIs -> sess x ROIs
                    data = [
                        math_util.mean_med(sub,
                                           stats=analyspar.stats,
                                           axis=0,
                                           nanpol=nanpol) for sub in data
                    ]
                else:
                    # sess x ROIs
                    data = sess_grp_df[source_col].tolist()

                data = np.concatenate(data, axis=0)

                # take stats across ROIs, grouped
                rel_resp_data_df.at[row_idx, data_col] = \
                    math_util.get_stats(
                        data,
                        stats=analyspar.stats,
                        error=analyspar.error,
                        nanpol=nanpol
                        ).tolist()

                sess_data.append(data)  # for p-value calculation

            # calculate p-values between sessions (0-1, 0-2, 1-2...)
            p_vals = rand_util.comp_vals_acr_groups(sess_data,
                                                    n_perms=permpar.n_perms,
                                                    stats=analyspar.stats,
                                                    paired=analyspar.tracked,
                                                    nanpol=nanpol,
                                                    randst=randst)
            p = 0
            for i, sess_n in enumerate(sess_ns):
                for j, sess_n2 in enumerate(sess_ns[i + 1:]):
                    key = f"{data_col}_p_vals_{int(sess_n)}v{int(sess_n2)}"
                    rel_resp_data_df.loc[row_indices[i], key] = p_vals[p]
                    rel_resp_data_df.loc[row_indices[j + 1], key] = p_vals[p]
                    p += 1

    rel_resp_data_df["sess_ns"] = rel_resp_data_df["sess_ns"].astype(int)

    # corrected p-values
    rel_resp_data_df = misc_analys.add_corr_p_vals(rel_resp_data_df, permpar)

    return rel_resp_data_df
예제 #2
0
def get_sess_grped_diffs_df(sessions,
                            analyspar,
                            stimpar,
                            basepar,
                            permpar,
                            split="by_exp",
                            randst=None,
                            parallel=False):
    """
    get_sess_grped_diffs_df(sessions, analyspar, stimpar, basepar)

    Returns split difference statistics for specific sessions, grouped across 
    mice.

    Required args:
        - sessions (list): 
            session objects
        - analyspar (AnalysPar): 
            named tuple containing analysis parameters
        - stimpar (StimPar): 
            named tuple containing stimulus parameters
        - basepar (BasePar): 
            named tuple containing baseline parameters
        - permpar (PermPar): 
            named tuple containing permutation parameters

    Optional args:
        - split (str): 
            how to split data:
            "by_exp" (all exp, all unexp), 
            "unexp_lock" (unexp, preceeding exp), 
            "exp_lock" (exp, preceeding unexp),
            "stim_onset" (grayscr, stim on), 
            "stim_offset" (stim off, grayscr)
            default: "by_exp"
        - randst (int or np.random.RandomState): 
            random state or seed value to use. (-1 treated as None)
            default: None
        - parallel (bool): 
            if True, some of the analysis is run in parallel across CPU cores 
            default: False

    Returns:
        - diffs_df (pd.DataFrame):
            dataframe with one row per session/line/plane, and the following 
            columns, in addition to the basic sess_df columns: 
            - diff_stats (list): split difference stats (me, err)
            - null_CIs (list): adjusted null CI for split differences 
            - raw_p_vals (float): uncorrected p-value for differences within 
                sessions
            - p_vals (float): p-value for differences within sessions, 
                corrected for multiple comparisons and tails
            for session comparisons, e.g. 1v2:
            - raw_p_vals_{}v{} (float): uncorrected p-value for differences
                between sessions 
            - p_vals_{}v{} (float): p-value for differences between sessions, 
                corrected for multiple comparisons and tails
    """

    nanpol = None if analyspar.rem_bad else "omit"

    if analyspar.tracked:
        misc_analys.check_sessions_complete(sessions, raise_err=True)

    sess_diffs_df = misc_analys.get_check_sess_df(sessions, None, analyspar)
    initial_columns = sess_diffs_df.columns.tolist()

    # retrieve ROI index information
    args_dict = {
        "analyspar": analyspar,
        "stimpar": stimpar,
        "basepar": basepar,
        "split": split,
        "return_data": True,
    }

    # sess x split x ROI
    split_stats, split_data = gen_util.parallel_wrap(get_sess_roi_split_stats,
                                                     sessions,
                                                     args_dict=args_dict,
                                                     parallel=parallel,
                                                     zip_output=True)

    misc_analys.get_check_sess_df(sessions, sess_diffs_df)
    sess_diffs_df["roi_split_stats"] = list(split_stats)
    sess_diffs_df["roi_split_data"] = list(split_data)

    columns = initial_columns + ["diff_stats", "null_CIs"]
    diffs_df = pd.DataFrame(columns=columns)

    group_columns = ["lines", "planes", "sess_ns"]
    aggreg_cols = [col for col in initial_columns if col not in group_columns]
    for lp_grp_vals, lp_grp_df in sess_diffs_df.groupby(["lines", "planes"]):
        lp_grp_df = lp_grp_df.sort_values(["sess_ns", "mouse_ns"])
        line, plane = lp_grp_vals
        lp_name = plot_helper_fcts.get_line_plane_name(line, plane)
        logger.info(f"Running permutation tests for {lp_name} sessions...",
                    extra={"spacing": TAB})

        # obtain ROI random split differences per session
        # done here to avoid OOM errors
        lp_rand_diffs = gen_util.parallel_wrap(
            get_rand_split_data,
            lp_grp_df["roi_split_data"].tolist(),
            args_list=[analyspar, permpar, randst],
            parallel=parallel,
            zip_output=False)

        sess_diffs = []
        row_indices = []
        sess_ns = sorted(lp_grp_df["sess_ns"].unique())
        for sess_n in sess_ns:
            row_idx = len(diffs_df)
            row_indices.append(row_idx)
            sess_grp_df = lp_grp_df.loc[lp_grp_df["sess_ns"] == sess_n]

            grp_vals = list(lp_grp_vals) + [sess_n]
            for g, group_column in enumerate(group_columns):
                diffs_df.loc[row_idx, group_column] = grp_vals[g]

            # add aggregated values for initial columns
            diffs_df = misc_analys.aggreg_columns(sess_grp_df,
                                                  diffs_df,
                                                  aggreg_cols,
                                                  row_idx=row_idx,
                                                  in_place=True)

            # group ROI split stats across mice: split x ROIs
            split_stats = np.concatenate(
                sess_grp_df["roi_split_stats"].to_numpy(), axis=-1)

            # take diff and stats across ROIs
            diffs = split_stats[1] - split_stats[0]
            diff_stats = math_util.get_stats(diffs,
                                             stats=analyspar.stats,
                                             error=analyspar.error,
                                             nanpol=nanpol)
            diffs_df.at[row_idx, "diff_stats"] = diff_stats.tolist()
            sess_diffs.append(diffs)

            # group random ROI split diffs across mice, and take stat
            rand_idxs = [
                lp_grp_df.index.tolist().index(idx)
                for idx in sess_grp_df.index
            ]
            rand_diffs = math_util.mean_med(np.concatenate(
                [lp_rand_diffs[r] for r in rand_idxs], axis=0),
                                            axis=0,
                                            stats=analyspar.stats,
                                            nanpol=nanpol)

            # get CIs and p-values
            p_val, null_CI = rand_util.get_p_val_from_rand(
                diff_stats[0],
                rand_diffs,
                return_CIs=True,
                p_thresh=permpar.p_val,
                tails=permpar.tails,
                multcomp=permpar.multcomp,
                nanpol=nanpol)
            diffs_df.loc[row_idx, "p_vals"] = p_val
            diffs_df.at[row_idx, "null_CIs"] = null_CI

        del lp_rand_diffs  # free up memory

        # calculate p-values between sessions (0-1, 0-2, 1-2...)
        p_vals = rand_util.comp_vals_acr_groups(sess_diffs,
                                                n_perms=permpar.n_perms,
                                                stats=analyspar.stats,
                                                paired=analyspar.tracked,
                                                nanpol=nanpol,
                                                randst=randst)
        p = 0
        for i, sess_n in enumerate(sess_ns):
            for j, sess_n2 in enumerate(sess_ns[i + 1:]):
                key = f"p_vals_{int(sess_n)}v{int(sess_n2)}"
                diffs_df.loc[row_indices[i], key] = p_vals[p]
                diffs_df.loc[row_indices[j + 1], key] = p_vals[p]
                p += 1

    # add corrected p-values
    diffs_df = misc_analys.add_corr_p_vals(diffs_df, permpar)

    diffs_df["sess_ns"] = diffs_df["sess_ns"].astype(int)

    return diffs_df
예제 #3
0
def get_basic_idx_corr_df(lp_idx_df, consec_only=False, null_CI_cols=True):
    """
    get_basic_idx_corr_df(lp_idx_df)

    Returns index correlation dataframe for each line/plane, and optionally 
    columns added for null confidence intervals.

    Required args:
        - lp_idx_df (pd.DataFrame):
            dataframe with one row per line/plane/session, and the following 
            columns, in addition to the basic sess_df columns:
            - roi_idxs (list): index for each ROI

     Optional args:
        - consec_only (bool):
            if True, only consecutive session numbers are correlated
            default: True
        - null_CI_cols (bool):
            if True, null CI columns are included in the dataframe.

    Returns:
        - idx_corr_df (pd.DataFrame):
            dataframe with one row per line/plane, and the following 
            columns, in addition to the basic sess_df columns:
            - roi_idxs (list): index for each ROI

            if null_CI_cols:
            for session comparisons, e.g. 1v2
            - {}v{}_null_CIs (object): empty
    """

    initial_columns = [col for col in lp_idx_df.columns if col != "roi_idxs"]

    # get correlation pairs
    corr_ns = get_corr_pairs(lp_idx_df, consec_only=consec_only)

    # aggregate by line/plane for correlation dataframe
    group_columns = ["lines", "planes"]
    
    all_columns = initial_columns
    if null_CI_cols:
        CI_columns = [
            f"{corr_pair[0]}v{corr_pair[1]}_null_CIs" for corr_pair in corr_ns
            ]
        all_columns = initial_columns + CI_columns
    
    idx_corr_df = pd.DataFrame(columns=all_columns)
    aggreg_cols = [
        col for col in initial_columns if col not in group_columns
        ]

    for grp_vals, grp_df in lp_idx_df.groupby(group_columns):
        grp_df = grp_df.sort_values("sess_ns") # mice already aggregated
        row_idx = len(idx_corr_df)

        for g, group_column in enumerate(group_columns):
            idx_corr_df.loc[row_idx, group_column] = grp_vals[g]

        # add aggregated values for initial columns
        idx_corr_df = misc_analys.aggreg_columns(
            grp_df, idx_corr_df, aggreg_cols, row_idx=row_idx, 
            in_place=True, sort_by="sess_ns"
            )
        
        # amend mouse info
        for col in ["mouse_ns", "mouseids"]:
            vals = [tuple(ns) for ns in idx_corr_df.loc[row_idx, col]]
            if len(list(set(vals))) != 1:
                raise RuntimeError(
                    "Aggregated sessions should share same mouse "
                    "information."
                    )
            idx_corr_df.at[row_idx, col] = list(vals[0])

    return idx_corr_df
예제 #4
0
def get_lp_idx_df(sessions, analyspar, stimpar, basepar, idxpar, permpar=None, 
                  sig_only=False, randst=None, parallel=False):
    """
    get_lp_idx_df(sessions, analyspar, stimpar, basepar, idxpar)

    Returns ROI index dataframe, grouped by line/plane/session.

    Required args:
        - sessions (list): 
            Session objects
        - analyspar (AnalysPar): 
            named tuple containing analysis parameters
        - stimpar (StimPar): 
            named tuple containing stimulus parameters
        - basepar (BasePar): 
            named tuple containing baseline parameters
        - idxpar (IdxPar): 
            named tuple containing index parameters
    
    Optional args:
        - permpar (PermPar): 
            named tuple containing permutation parameters, required if 
            sig_only is True
            default: None
        - sig_only (bool):
            if True, ROIs with significant USIs are included 
            (only possible if analyspar.tracked is True)
            default: False
        - randst (int or np.random.RandomState): 
            random state or seed value to use. (-1 treated as None)
            default: None
        - parallel (bool): 
            if True, some of the analysis is run in parallel across CPU cores 
            default: False

    Returns:
        - lp_idx_df (pd.DataFrame):
            dataframe with one row per line/plane/session, and the following 
            columns, in addition to the basic sess_df columns:
            - roi_idxs (list): index for each ROI 
              (or each ROI that is significant in at least one session, 
              if sig_only)
    """

    if analyspar.tracked:
        misc_analys.check_sessions_complete(sessions, raise_err=True)
    
    if sig_only and permpar is None:
            raise ValueError("If sig_only is True, permpar cannot be None.")

    initial_columns = misc_analys.get_sess_df_columns(sessions[0], analyspar)

    args_dict = {
        "analyspar": analyspar,
        "stimpar"  : stimpar,
        "basepar"  : basepar,
        "idxpar"   : idxpar,
        "parallel" : parallel,
    } 

    if sig_only:
        idx_df = usi_analys.get_idx_sig_df(
            sessions, 
            permpar=permpar,
            randst=randst,
            aggreg_sess=True,
            **args_dict
            )
    else:
        idx_df = usi_analys.get_idx_only_df(sessions, **args_dict)
    
    # aggregate by line/plane/session
    lp_idx_df = pd.DataFrame(columns=initial_columns + ["roi_idxs"])

    # aggregate within line/plane/sessions
    group_columns = ["lines", "planes", "sess_ns"]
    aggreg_cols = [col for col in initial_columns if col not in group_columns]
    for grp_vals, grp_df in idx_df.groupby(group_columns):
        grp_df = grp_df.sort_values("mouse_ns")
        row_idx = len(lp_idx_df)
        for g, group_column in enumerate(group_columns):
            lp_idx_df.loc[row_idx, group_column] = grp_vals[g]

        # add aggregated values for initial columns
        lp_idx_df = misc_analys.aggreg_columns(
            grp_df, lp_idx_df, aggreg_cols, row_idx=row_idx, in_place=True
            )

        roi_idxs = grp_df["roi_idxs"].tolist()
        if sig_only:
            roi_idxs = [
                np.asarray(idx_vals)[np.asarray(sig_ns).astype(int)] 
                for idx_vals, sig_ns in zip(roi_idxs, grp_df["sig_idxs"])
                ]

        lp_idx_df.at[row_idx, "roi_idxs"] = np.concatenate(roi_idxs).tolist()
  
    lp_idx_df["sess_ns"] = lp_idx_df["sess_ns"].astype(int)

    return lp_idx_df
예제 #5
0
def get_roi_tracking_df(sessions,
                        analyspar,
                        reg_only=False,
                        proj=False,
                        crop_info=False,
                        parallel=False):
    """
    get_roi_tracking_df(sessions, analyspar)

    Return ROI tracking information for the requested sessions.

    Required args:
        - sessions (list): 
            Session objects
        - analyspar (AnalysPar): 
            named tuple containing analysis parameters

    Optional args:
        - proj (bool):
            if True, max projections are included in the output dataframe
            default: False
        - reg_only (bool):
            if True, only registered masks, and projections if proj is True, 
            are included in the output dataframe
            default: False
        - crop_info (bool or str):
            if not False, the type of cropping information to include 
            ("small" for the small plots, "large" for the large plots)
            default: False
        - parallel (bool): 
            if True, some of the analysis is run in parallel across CPU cores 
            default: False

    Returns:
        - roi_mask_df (pd.DataFrame in dict format):
            dataframe with a row for each mouse, and the following 
            columns, in addition to the basic sess_df columns: 

            - "registered_roi_mask_idxs" (list): list of mask indices, 
                registered across sessions, for each session 
                (flattened across ROIs) ((sess, hei, wid) x val)
            - "roi_mask_shapes" (list): shape into which ROI mask indices index 
                (sess x hei x wid)

            if not reg_only:
            - "roi_mask_idxs" (list): list of mask indices for each session, 
                and each ROI (sess x ((ROI, hei, wid) x val)) (not registered)

            if proj:
            - "registered_max_projections" (list): pixel intensities of maximum 
                projection for the plane (hei x wid), after registration across 
                sessions

            if proj and not reg_only:
            - "max_projections" (list): pixel intensities of maximum projection 
                for the plane (hei x wid)
                
            if crop_info:
            - "crop_fact" (num): factor by which to crop masks (> 1) 
            - "shift_prop_hei" (float): proportion by which to shift cropped 
                mask center vertically from left edge [0, 1]
            - "shift_prop_wid" (float): proportion by which to shift cropped 
                mask center horizontally from left edge [0, 1]
    """

    if not analyspar.tracked:
        raise ValueError("analyspar.tracked must be True for this analysis.")

    misc_analys.check_sessions_complete(sessions, raise_err=True)

    sess_df = misc_analys.get_check_sess_df(sessions, analyspar=analyspar)

    # if cropping, check right away for dictionary with the preset parameters
    if crop_info:
        if crop_info == "small":
            crop_dict = SMALL_CROP_DICT
        elif crop_info == "large":
            crop_dict = LARGE_CROP_DICT
        else:
            gen_util.accepted_values_error("crop_info", crop_info,
                                           ["small", "large"])
        for mouse_n in sess_df["mouse_ns"].unique():
            if int(mouse_n) not in crop_dict.keys():
                raise NotImplementedError(
                    f"No preset cropping information found for mouse {mouse_n}."
                )

    # collect ROI mask data
    sess_dicts = gen_util.parallel_wrap(get_sess_reg_mask_info,
                                        sessions,
                                        args_list=[analyspar, True, proj],
                                        parallel=parallel)
    all_sessids = [sess.sessid for sess in sessions]

    group_columns = ["planes", "lines", "mouse_ns"]
    initial_columns = sess_df.columns.tolist()
    obj_columns = ["registered_roi_mask_idxs", "roi_mask_shapes"]
    if not reg_only:
        obj_columns.append("roi_mask_idxs")
    if proj:
        obj_columns.append("registered_max_projections")
        if not reg_only:
            obj_columns.append("max_projections")

    roi_mask_df = pd.DataFrame(columns=initial_columns + obj_columns)

    aggreg_cols = [col for col in initial_columns if col not in group_columns]
    for grp_vals, grp_df in sess_df.groupby(group_columns):
        row_idx = len(roi_mask_df)
        for g, group_column in enumerate(group_columns):
            roi_mask_df.loc[row_idx, group_column] = grp_vals[g]

        # add aggregated values for initial columns
        roi_mask_df = misc_analys.aggreg_columns(grp_df,
                                                 roi_mask_df,
                                                 aggreg_cols,
                                                 row_idx=row_idx,
                                                 in_place=True,
                                                 by_mouse=True)

        sessids = sorted(grp_df["sessids"].tolist())
        reg_roi_masks, roi_mask_idxs = [], []
        if proj:
            reg_max_projs, max_projs = [], []

        roi_mask_shape = None
        for sessid in sessids:
            sess_dict = sess_dicts[all_sessids.index(sessid)]
            reg_roi_mask = sess_dict["registered_roi_masks"]
            # flatten masks across ROIs
            reg_roi_masks.append(np.max(reg_roi_mask, axis=0))
            if roi_mask_shape is None:
                roi_mask_shape = reg_roi_mask.shape
            elif roi_mask_shape != reg_roi_mask.shape:
                raise RuntimeError(
                    "ROI mask shapes across sessions should match, for the "
                    "same mouse.")
            if not reg_only:
                roi_mask_idxs.append([
                    idxs.tolist() for idxs in np.where(sess_dict["roi_masks"])
                ])
            if proj:
                reg_max_projs.append(
                    sess_dict["registered_max_projection"].tolist())
                if not reg_only:
                    max_projs.append(sess_dict["max_projection"].tolist())

        # add to the dataframe
        roi_mask_df.at[row_idx, "registered_roi_mask_idxs"] = \
            [idxs.tolist() for idxs in np.where(reg_roi_masks)]
        roi_mask_df.at[row_idx, "roi_mask_shapes"] = roi_mask_shape

        if not reg_only:
            roi_mask_df.at[row_idx, "roi_mask_idxs"] = roi_mask_idxs
        if proj:
            roi_mask_df.at[row_idx, "registered_max_projections"] = \
                reg_max_projs
            if not reg_only:
                roi_mask_df.at[row_idx, "max_projections"] = max_projs

        # add cropping info
        if crop_info:
            mouse_n = grp_vals[group_columns.index("mouse_ns")]
            crop_fact, shift_prop_hei, shift_prop_wid = crop_dict[mouse_n]
            roi_mask_df.at[row_idx, "crop_fact"] = crop_fact
            roi_mask_df.at[row_idx, "shift_prop_hei"] = shift_prop_hei
            roi_mask_df.at[row_idx, "shift_prop_wid"] = shift_prop_wid

    roi_mask_df["mouse_ns"] = roi_mask_df["mouse_ns"].astype(int)

    return roi_mask_df
예제 #6
0
def check_init_stim_data_df(data_df,
                            sessions,
                            stimpar,
                            comp_sess=[1, 3],
                            stim_data_df=None,
                            analyspar=None):
    """
    check_init_stim_data_df(data_df, stimpar)

    Checks existing stimulus dataframe or creates one for each line/plane.

    Required args:
        - data_df (pd.DataFrame):
            dataframe with one row per session, and the basic sess_df columns
        - sessions (list): 
            session objects
        - stimpar (StimPar): 
            named tuple containing stimulus parameters

    Optional args:
        - comp_sess (int):
            sessions for which to obtain absolute fractional change 
            [x, y] => |(y - x) / x|
            default: [1, 3]
        - stim_data_df (pd.DataFrame):
            dataframe with one row per line/plane, and the basic sess_df 
            columns
            default: None
        - analyspar (AnalysPar): 
            named tuple containing analysis parameters
            default: None

    Returns:
        - stim_data_df (pd.DataFrame):
            dataframe with one row per line/plane, and the basic sess_df 
            columns, as well as stimulus columns for each comp_sess:
            - {stimpar.stimtype}_s{comp_sess[0]}: for first comp_sess data
            - {stimpar.stimtype}_s{comp_sess[1]}: for second comp_sess data
    """

    initial_columns = misc_analys.get_sess_df_columns(sessions[0], analyspar)

    stimtype_cols = [f"{stimpar.stimtype}_s{i}" for i in comp_sess]
    if stim_data_df is None:
        new_df = True
        if analyspar is None:
            raise ValueError(
                "If stim_data_df is None, analyspar must be provided.")
        columns = initial_columns + stimtype_cols
        stim_data_df = pd.DataFrame(columns=columns)
    else:
        new_df = False
        if stimpar.stimtype in stim_data_df:
            raise KeyError(
                f"{stimpar.stimtype} should not already be in stim_data_df.")
        stim_data_df = gen_util.set_object_columns(stim_data_df,
                                                   stimtype_cols,
                                                   in_place=True)

    group_columns = ["lines", "planes"]
    aggreg_cols = [
        col for col in initial_columns
        if col not in group_columns + ["sess_ns"]
    ]

    # populate dataframe
    for grp_vals, grp_df in data_df.groupby(group_columns):
        grp_df = grp_df.sort_values(["sess_ns", "mouse_ns"])
        line, plane = grp_vals
        if new_df:
            row_idx = len(stim_data_df)
            for g, group_column in enumerate(group_columns):
                stim_data_df.loc[row_idx, group_column] = grp_vals[g]
        else:
            row_idxs = stim_data_df.loc[(stim_data_df["lines"] == line) & (
                stim_data_df["planes"] == plane)].index
            if len(row_idxs) != 1:
                raise ValueError(
                    "Expected exactly one row to match line/plane.")
            row_idx = row_idxs[0]

        # add aggregated values for initial columns
        ext_stim_data_df = misc_analys.aggreg_columns(grp_df,
                                                      stim_data_df,
                                                      aggreg_cols,
                                                      row_idx=row_idx,
                                                      in_place=new_df)

        # check data was added correctly
        if not new_df:
            for col in aggreg_cols:
                if (ext_stim_data_df.loc[row_idx, col] !=
                        stim_data_df.loc[row_idx, col]):
                    raise RuntimeError(
                        "If stim_data_df is not None, it must contain columns "
                        "generated from data_df. This does not appear to be "
                        f"the case, as the values in {col} do not match the "
                        "values that would be added if stim_data_df was None.")

    if new_df:
        stim_data_df = ext_stim_data_df

    return stim_data_df