def get_rel_resp_stats_df(sessions, analyspar, stimpar, permpar, rel_sess=1, randst=None, parallel=False): """ get_rel_resp_stats_df(sessions, analyspar, stimpar, permpar) Returns relative response stats dataframe for requested sessions. Required args: - sessions (list): session objects - analyspar (AnalysPar): named tuple containing analysis parameters - stimpar (StimPar): named tuple containing stimulus parameters - permpar (PermPar): named tuple containing permutation parameters Optional args: - rel_sess (int): number of session relative to which data should be scaled, for each mouse default: 1 - randst (int or np.random.RandomState): random state or seed value to use. (-1 treated as None) default: None - parallel (bool): if True, some of the analysis is run in parallel across CPU cores default: False Returns: dataframe with one row per session/line/plane, and the following columns, in addition to the basic sess_df columns: - rel_reg or rel_exp (list): data stats for regular data (me, err) - rel_unexp (list): data stats for unexpected data (me, err) for reg/exp/unexp data types, session comparisons, e.g. 1v2: - {data_type}_raw_p_vals_{}v{} (float): uncorrected p-value for data differences between sessions - {data_type}_p_vals_{}v{} (float): p-value for data between sessions, corrected for multiple comparisons and tails """ nanpol = None if analyspar.rem_bad else "omit" initial_columns = misc_analys.get_sess_df_columns(sessions[0], analyspar) resp_data_df = get_resp_df(sessions, analyspar, stimpar, rel_sess=rel_sess, parallel=parallel) # prepare target dataframe source_cols = ["rel_exp", "rel_unexp"] if stimpar.stimtype == "gabors": # regular means only A, B, C are included targ_cols = ["rel_reg", "rel_unexp"] else: targ_cols = ["rel_exp", "rel_unexp"] rel_resp_data_df = pd.DataFrame(columns=initial_columns + targ_cols) group_columns = ["lines", "planes"] aggreg_cols = [ col for col in initial_columns if col not in group_columns + ["sess_ns"] ] for grp_vals, resp_grp_df in resp_data_df.groupby(group_columns): sess_ns = sorted(resp_grp_df["sess_ns"].unique()) # take stats across frame types for e, (data_col, source_col) in enumerate(zip(targ_cols, source_cols)): sess_data = [] if e == 0: row_indices = [] for s, sess_n in enumerate(sess_ns): sess_grp_df = resp_grp_df.loc[resp_grp_df["sess_ns"] == sess_n] sess_grp_df = sess_grp_df.sort_values("mouse_ns") if e == 0: row_idx = len(rel_resp_data_df) row_indices.append(row_idx) rel_resp_data_df.loc[row_idx, "sess_ns"] = sess_n for g, group_column in enumerate(group_columns): rel_resp_data_df.loc[row_idx, group_column] = grp_vals[g] # add aggregated values for initial columns rel_resp_data_df = misc_analys.aggreg_columns( sess_grp_df, rel_resp_data_df, aggreg_cols, row_idx=row_idx, in_place=True) else: row_idx = row_indices[s] if stimpar.stimtype == "gabors": # average across Gabor frames included in reg or unexp data cols = [f"{source_col}_{fr}" for fr in stimpar.gabfr[e]] data = sess_grp_df[cols].values.tolist() # sess x frs x ROIs -> sess x ROIs data = [ math_util.mean_med(sub, stats=analyspar.stats, axis=0, nanpol=nanpol) for sub in data ] else: # sess x ROIs data = sess_grp_df[source_col].tolist() data = np.concatenate(data, axis=0) # take stats across ROIs, grouped rel_resp_data_df.at[row_idx, data_col] = \ math_util.get_stats( data, stats=analyspar.stats, error=analyspar.error, nanpol=nanpol ).tolist() sess_data.append(data) # for p-value calculation # calculate p-values between sessions (0-1, 0-2, 1-2...) p_vals = rand_util.comp_vals_acr_groups(sess_data, n_perms=permpar.n_perms, stats=analyspar.stats, paired=analyspar.tracked, nanpol=nanpol, randst=randst) p = 0 for i, sess_n in enumerate(sess_ns): for j, sess_n2 in enumerate(sess_ns[i + 1:]): key = f"{data_col}_p_vals_{int(sess_n)}v{int(sess_n2)}" rel_resp_data_df.loc[row_indices[i], key] = p_vals[p] rel_resp_data_df.loc[row_indices[j + 1], key] = p_vals[p] p += 1 rel_resp_data_df["sess_ns"] = rel_resp_data_df["sess_ns"].astype(int) # corrected p-values rel_resp_data_df = misc_analys.add_corr_p_vals(rel_resp_data_df, permpar) return rel_resp_data_df
def get_sess_grped_diffs_df(sessions, analyspar, stimpar, basepar, permpar, split="by_exp", randst=None, parallel=False): """ get_sess_grped_diffs_df(sessions, analyspar, stimpar, basepar) Returns split difference statistics for specific sessions, grouped across mice. Required args: - sessions (list): session objects - analyspar (AnalysPar): named tuple containing analysis parameters - stimpar (StimPar): named tuple containing stimulus parameters - basepar (BasePar): named tuple containing baseline parameters - permpar (PermPar): named tuple containing permutation parameters Optional args: - split (str): how to split data: "by_exp" (all exp, all unexp), "unexp_lock" (unexp, preceeding exp), "exp_lock" (exp, preceeding unexp), "stim_onset" (grayscr, stim on), "stim_offset" (stim off, grayscr) default: "by_exp" - randst (int or np.random.RandomState): random state or seed value to use. (-1 treated as None) default: None - parallel (bool): if True, some of the analysis is run in parallel across CPU cores default: False Returns: - diffs_df (pd.DataFrame): dataframe with one row per session/line/plane, and the following columns, in addition to the basic sess_df columns: - diff_stats (list): split difference stats (me, err) - null_CIs (list): adjusted null CI for split differences - raw_p_vals (float): uncorrected p-value for differences within sessions - p_vals (float): p-value for differences within sessions, corrected for multiple comparisons and tails for session comparisons, e.g. 1v2: - raw_p_vals_{}v{} (float): uncorrected p-value for differences between sessions - p_vals_{}v{} (float): p-value for differences between sessions, corrected for multiple comparisons and tails """ nanpol = None if analyspar.rem_bad else "omit" if analyspar.tracked: misc_analys.check_sessions_complete(sessions, raise_err=True) sess_diffs_df = misc_analys.get_check_sess_df(sessions, None, analyspar) initial_columns = sess_diffs_df.columns.tolist() # retrieve ROI index information args_dict = { "analyspar": analyspar, "stimpar": stimpar, "basepar": basepar, "split": split, "return_data": True, } # sess x split x ROI split_stats, split_data = gen_util.parallel_wrap(get_sess_roi_split_stats, sessions, args_dict=args_dict, parallel=parallel, zip_output=True) misc_analys.get_check_sess_df(sessions, sess_diffs_df) sess_diffs_df["roi_split_stats"] = list(split_stats) sess_diffs_df["roi_split_data"] = list(split_data) columns = initial_columns + ["diff_stats", "null_CIs"] diffs_df = pd.DataFrame(columns=columns) group_columns = ["lines", "planes", "sess_ns"] aggreg_cols = [col for col in initial_columns if col not in group_columns] for lp_grp_vals, lp_grp_df in sess_diffs_df.groupby(["lines", "planes"]): lp_grp_df = lp_grp_df.sort_values(["sess_ns", "mouse_ns"]) line, plane = lp_grp_vals lp_name = plot_helper_fcts.get_line_plane_name(line, plane) logger.info(f"Running permutation tests for {lp_name} sessions...", extra={"spacing": TAB}) # obtain ROI random split differences per session # done here to avoid OOM errors lp_rand_diffs = gen_util.parallel_wrap( get_rand_split_data, lp_grp_df["roi_split_data"].tolist(), args_list=[analyspar, permpar, randst], parallel=parallel, zip_output=False) sess_diffs = [] row_indices = [] sess_ns = sorted(lp_grp_df["sess_ns"].unique()) for sess_n in sess_ns: row_idx = len(diffs_df) row_indices.append(row_idx) sess_grp_df = lp_grp_df.loc[lp_grp_df["sess_ns"] == sess_n] grp_vals = list(lp_grp_vals) + [sess_n] for g, group_column in enumerate(group_columns): diffs_df.loc[row_idx, group_column] = grp_vals[g] # add aggregated values for initial columns diffs_df = misc_analys.aggreg_columns(sess_grp_df, diffs_df, aggreg_cols, row_idx=row_idx, in_place=True) # group ROI split stats across mice: split x ROIs split_stats = np.concatenate( sess_grp_df["roi_split_stats"].to_numpy(), axis=-1) # take diff and stats across ROIs diffs = split_stats[1] - split_stats[0] diff_stats = math_util.get_stats(diffs, stats=analyspar.stats, error=analyspar.error, nanpol=nanpol) diffs_df.at[row_idx, "diff_stats"] = diff_stats.tolist() sess_diffs.append(diffs) # group random ROI split diffs across mice, and take stat rand_idxs = [ lp_grp_df.index.tolist().index(idx) for idx in sess_grp_df.index ] rand_diffs = math_util.mean_med(np.concatenate( [lp_rand_diffs[r] for r in rand_idxs], axis=0), axis=0, stats=analyspar.stats, nanpol=nanpol) # get CIs and p-values p_val, null_CI = rand_util.get_p_val_from_rand( diff_stats[0], rand_diffs, return_CIs=True, p_thresh=permpar.p_val, tails=permpar.tails, multcomp=permpar.multcomp, nanpol=nanpol) diffs_df.loc[row_idx, "p_vals"] = p_val diffs_df.at[row_idx, "null_CIs"] = null_CI del lp_rand_diffs # free up memory # calculate p-values between sessions (0-1, 0-2, 1-2...) p_vals = rand_util.comp_vals_acr_groups(sess_diffs, n_perms=permpar.n_perms, stats=analyspar.stats, paired=analyspar.tracked, nanpol=nanpol, randst=randst) p = 0 for i, sess_n in enumerate(sess_ns): for j, sess_n2 in enumerate(sess_ns[i + 1:]): key = f"p_vals_{int(sess_n)}v{int(sess_n2)}" diffs_df.loc[row_indices[i], key] = p_vals[p] diffs_df.loc[row_indices[j + 1], key] = p_vals[p] p += 1 # add corrected p-values diffs_df = misc_analys.add_corr_p_vals(diffs_df, permpar) diffs_df["sess_ns"] = diffs_df["sess_ns"].astype(int) return diffs_df
def get_basic_idx_corr_df(lp_idx_df, consec_only=False, null_CI_cols=True): """ get_basic_idx_corr_df(lp_idx_df) Returns index correlation dataframe for each line/plane, and optionally columns added for null confidence intervals. Required args: - lp_idx_df (pd.DataFrame): dataframe with one row per line/plane/session, and the following columns, in addition to the basic sess_df columns: - roi_idxs (list): index for each ROI Optional args: - consec_only (bool): if True, only consecutive session numbers are correlated default: True - null_CI_cols (bool): if True, null CI columns are included in the dataframe. Returns: - idx_corr_df (pd.DataFrame): dataframe with one row per line/plane, and the following columns, in addition to the basic sess_df columns: - roi_idxs (list): index for each ROI if null_CI_cols: for session comparisons, e.g. 1v2 - {}v{}_null_CIs (object): empty """ initial_columns = [col for col in lp_idx_df.columns if col != "roi_idxs"] # get correlation pairs corr_ns = get_corr_pairs(lp_idx_df, consec_only=consec_only) # aggregate by line/plane for correlation dataframe group_columns = ["lines", "planes"] all_columns = initial_columns if null_CI_cols: CI_columns = [ f"{corr_pair[0]}v{corr_pair[1]}_null_CIs" for corr_pair in corr_ns ] all_columns = initial_columns + CI_columns idx_corr_df = pd.DataFrame(columns=all_columns) aggreg_cols = [ col for col in initial_columns if col not in group_columns ] for grp_vals, grp_df in lp_idx_df.groupby(group_columns): grp_df = grp_df.sort_values("sess_ns") # mice already aggregated row_idx = len(idx_corr_df) for g, group_column in enumerate(group_columns): idx_corr_df.loc[row_idx, group_column] = grp_vals[g] # add aggregated values for initial columns idx_corr_df = misc_analys.aggreg_columns( grp_df, idx_corr_df, aggreg_cols, row_idx=row_idx, in_place=True, sort_by="sess_ns" ) # amend mouse info for col in ["mouse_ns", "mouseids"]: vals = [tuple(ns) for ns in idx_corr_df.loc[row_idx, col]] if len(list(set(vals))) != 1: raise RuntimeError( "Aggregated sessions should share same mouse " "information." ) idx_corr_df.at[row_idx, col] = list(vals[0]) return idx_corr_df
def get_lp_idx_df(sessions, analyspar, stimpar, basepar, idxpar, permpar=None, sig_only=False, randst=None, parallel=False): """ get_lp_idx_df(sessions, analyspar, stimpar, basepar, idxpar) Returns ROI index dataframe, grouped by line/plane/session. Required args: - sessions (list): Session objects - analyspar (AnalysPar): named tuple containing analysis parameters - stimpar (StimPar): named tuple containing stimulus parameters - basepar (BasePar): named tuple containing baseline parameters - idxpar (IdxPar): named tuple containing index parameters Optional args: - permpar (PermPar): named tuple containing permutation parameters, required if sig_only is True default: None - sig_only (bool): if True, ROIs with significant USIs are included (only possible if analyspar.tracked is True) default: False - randst (int or np.random.RandomState): random state or seed value to use. (-1 treated as None) default: None - parallel (bool): if True, some of the analysis is run in parallel across CPU cores default: False Returns: - lp_idx_df (pd.DataFrame): dataframe with one row per line/plane/session, and the following columns, in addition to the basic sess_df columns: - roi_idxs (list): index for each ROI (or each ROI that is significant in at least one session, if sig_only) """ if analyspar.tracked: misc_analys.check_sessions_complete(sessions, raise_err=True) if sig_only and permpar is None: raise ValueError("If sig_only is True, permpar cannot be None.") initial_columns = misc_analys.get_sess_df_columns(sessions[0], analyspar) args_dict = { "analyspar": analyspar, "stimpar" : stimpar, "basepar" : basepar, "idxpar" : idxpar, "parallel" : parallel, } if sig_only: idx_df = usi_analys.get_idx_sig_df( sessions, permpar=permpar, randst=randst, aggreg_sess=True, **args_dict ) else: idx_df = usi_analys.get_idx_only_df(sessions, **args_dict) # aggregate by line/plane/session lp_idx_df = pd.DataFrame(columns=initial_columns + ["roi_idxs"]) # aggregate within line/plane/sessions group_columns = ["lines", "planes", "sess_ns"] aggreg_cols = [col for col in initial_columns if col not in group_columns] for grp_vals, grp_df in idx_df.groupby(group_columns): grp_df = grp_df.sort_values("mouse_ns") row_idx = len(lp_idx_df) for g, group_column in enumerate(group_columns): lp_idx_df.loc[row_idx, group_column] = grp_vals[g] # add aggregated values for initial columns lp_idx_df = misc_analys.aggreg_columns( grp_df, lp_idx_df, aggreg_cols, row_idx=row_idx, in_place=True ) roi_idxs = grp_df["roi_idxs"].tolist() if sig_only: roi_idxs = [ np.asarray(idx_vals)[np.asarray(sig_ns).astype(int)] for idx_vals, sig_ns in zip(roi_idxs, grp_df["sig_idxs"]) ] lp_idx_df.at[row_idx, "roi_idxs"] = np.concatenate(roi_idxs).tolist() lp_idx_df["sess_ns"] = lp_idx_df["sess_ns"].astype(int) return lp_idx_df
def get_roi_tracking_df(sessions, analyspar, reg_only=False, proj=False, crop_info=False, parallel=False): """ get_roi_tracking_df(sessions, analyspar) Return ROI tracking information for the requested sessions. Required args: - sessions (list): Session objects - analyspar (AnalysPar): named tuple containing analysis parameters Optional args: - proj (bool): if True, max projections are included in the output dataframe default: False - reg_only (bool): if True, only registered masks, and projections if proj is True, are included in the output dataframe default: False - crop_info (bool or str): if not False, the type of cropping information to include ("small" for the small plots, "large" for the large plots) default: False - parallel (bool): if True, some of the analysis is run in parallel across CPU cores default: False Returns: - roi_mask_df (pd.DataFrame in dict format): dataframe with a row for each mouse, and the following columns, in addition to the basic sess_df columns: - "registered_roi_mask_idxs" (list): list of mask indices, registered across sessions, for each session (flattened across ROIs) ((sess, hei, wid) x val) - "roi_mask_shapes" (list): shape into which ROI mask indices index (sess x hei x wid) if not reg_only: - "roi_mask_idxs" (list): list of mask indices for each session, and each ROI (sess x ((ROI, hei, wid) x val)) (not registered) if proj: - "registered_max_projections" (list): pixel intensities of maximum projection for the plane (hei x wid), after registration across sessions if proj and not reg_only: - "max_projections" (list): pixel intensities of maximum projection for the plane (hei x wid) if crop_info: - "crop_fact" (num): factor by which to crop masks (> 1) - "shift_prop_hei" (float): proportion by which to shift cropped mask center vertically from left edge [0, 1] - "shift_prop_wid" (float): proportion by which to shift cropped mask center horizontally from left edge [0, 1] """ if not analyspar.tracked: raise ValueError("analyspar.tracked must be True for this analysis.") misc_analys.check_sessions_complete(sessions, raise_err=True) sess_df = misc_analys.get_check_sess_df(sessions, analyspar=analyspar) # if cropping, check right away for dictionary with the preset parameters if crop_info: if crop_info == "small": crop_dict = SMALL_CROP_DICT elif crop_info == "large": crop_dict = LARGE_CROP_DICT else: gen_util.accepted_values_error("crop_info", crop_info, ["small", "large"]) for mouse_n in sess_df["mouse_ns"].unique(): if int(mouse_n) not in crop_dict.keys(): raise NotImplementedError( f"No preset cropping information found for mouse {mouse_n}." ) # collect ROI mask data sess_dicts = gen_util.parallel_wrap(get_sess_reg_mask_info, sessions, args_list=[analyspar, True, proj], parallel=parallel) all_sessids = [sess.sessid for sess in sessions] group_columns = ["planes", "lines", "mouse_ns"] initial_columns = sess_df.columns.tolist() obj_columns = ["registered_roi_mask_idxs", "roi_mask_shapes"] if not reg_only: obj_columns.append("roi_mask_idxs") if proj: obj_columns.append("registered_max_projections") if not reg_only: obj_columns.append("max_projections") roi_mask_df = pd.DataFrame(columns=initial_columns + obj_columns) aggreg_cols = [col for col in initial_columns if col not in group_columns] for grp_vals, grp_df in sess_df.groupby(group_columns): row_idx = len(roi_mask_df) for g, group_column in enumerate(group_columns): roi_mask_df.loc[row_idx, group_column] = grp_vals[g] # add aggregated values for initial columns roi_mask_df = misc_analys.aggreg_columns(grp_df, roi_mask_df, aggreg_cols, row_idx=row_idx, in_place=True, by_mouse=True) sessids = sorted(grp_df["sessids"].tolist()) reg_roi_masks, roi_mask_idxs = [], [] if proj: reg_max_projs, max_projs = [], [] roi_mask_shape = None for sessid in sessids: sess_dict = sess_dicts[all_sessids.index(sessid)] reg_roi_mask = sess_dict["registered_roi_masks"] # flatten masks across ROIs reg_roi_masks.append(np.max(reg_roi_mask, axis=0)) if roi_mask_shape is None: roi_mask_shape = reg_roi_mask.shape elif roi_mask_shape != reg_roi_mask.shape: raise RuntimeError( "ROI mask shapes across sessions should match, for the " "same mouse.") if not reg_only: roi_mask_idxs.append([ idxs.tolist() for idxs in np.where(sess_dict["roi_masks"]) ]) if proj: reg_max_projs.append( sess_dict["registered_max_projection"].tolist()) if not reg_only: max_projs.append(sess_dict["max_projection"].tolist()) # add to the dataframe roi_mask_df.at[row_idx, "registered_roi_mask_idxs"] = \ [idxs.tolist() for idxs in np.where(reg_roi_masks)] roi_mask_df.at[row_idx, "roi_mask_shapes"] = roi_mask_shape if not reg_only: roi_mask_df.at[row_idx, "roi_mask_idxs"] = roi_mask_idxs if proj: roi_mask_df.at[row_idx, "registered_max_projections"] = \ reg_max_projs if not reg_only: roi_mask_df.at[row_idx, "max_projections"] = max_projs # add cropping info if crop_info: mouse_n = grp_vals[group_columns.index("mouse_ns")] crop_fact, shift_prop_hei, shift_prop_wid = crop_dict[mouse_n] roi_mask_df.at[row_idx, "crop_fact"] = crop_fact roi_mask_df.at[row_idx, "shift_prop_hei"] = shift_prop_hei roi_mask_df.at[row_idx, "shift_prop_wid"] = shift_prop_wid roi_mask_df["mouse_ns"] = roi_mask_df["mouse_ns"].astype(int) return roi_mask_df
def check_init_stim_data_df(data_df, sessions, stimpar, comp_sess=[1, 3], stim_data_df=None, analyspar=None): """ check_init_stim_data_df(data_df, stimpar) Checks existing stimulus dataframe or creates one for each line/plane. Required args: - data_df (pd.DataFrame): dataframe with one row per session, and the basic sess_df columns - sessions (list): session objects - stimpar (StimPar): named tuple containing stimulus parameters Optional args: - comp_sess (int): sessions for which to obtain absolute fractional change [x, y] => |(y - x) / x| default: [1, 3] - stim_data_df (pd.DataFrame): dataframe with one row per line/plane, and the basic sess_df columns default: None - analyspar (AnalysPar): named tuple containing analysis parameters default: None Returns: - stim_data_df (pd.DataFrame): dataframe with one row per line/plane, and the basic sess_df columns, as well as stimulus columns for each comp_sess: - {stimpar.stimtype}_s{comp_sess[0]}: for first comp_sess data - {stimpar.stimtype}_s{comp_sess[1]}: for second comp_sess data """ initial_columns = misc_analys.get_sess_df_columns(sessions[0], analyspar) stimtype_cols = [f"{stimpar.stimtype}_s{i}" for i in comp_sess] if stim_data_df is None: new_df = True if analyspar is None: raise ValueError( "If stim_data_df is None, analyspar must be provided.") columns = initial_columns + stimtype_cols stim_data_df = pd.DataFrame(columns=columns) else: new_df = False if stimpar.stimtype in stim_data_df: raise KeyError( f"{stimpar.stimtype} should not already be in stim_data_df.") stim_data_df = gen_util.set_object_columns(stim_data_df, stimtype_cols, in_place=True) group_columns = ["lines", "planes"] aggreg_cols = [ col for col in initial_columns if col not in group_columns + ["sess_ns"] ] # populate dataframe for grp_vals, grp_df in data_df.groupby(group_columns): grp_df = grp_df.sort_values(["sess_ns", "mouse_ns"]) line, plane = grp_vals if new_df: row_idx = len(stim_data_df) for g, group_column in enumerate(group_columns): stim_data_df.loc[row_idx, group_column] = grp_vals[g] else: row_idxs = stim_data_df.loc[(stim_data_df["lines"] == line) & ( stim_data_df["planes"] == plane)].index if len(row_idxs) != 1: raise ValueError( "Expected exactly one row to match line/plane.") row_idx = row_idxs[0] # add aggregated values for initial columns ext_stim_data_df = misc_analys.aggreg_columns(grp_df, stim_data_df, aggreg_cols, row_idx=row_idx, in_place=new_df) # check data was added correctly if not new_df: for col in aggreg_cols: if (ext_stim_data_df.loc[row_idx, col] != stim_data_df.loc[row_idx, col]): raise RuntimeError( "If stim_data_df is not None, it must contain columns " "generated from data_df. This does not appear to be " f"the case, as the values in {col} do not match the " "values that would be added if stim_data_df was None.") if new_df: stim_data_df = ext_stim_data_df return stim_data_df