def roi_overlays_sess123_enlarged(sessions, analyspar, sesspar, figpar, parallel=False): """ roi_overlays_sess123_enlarged(sessions, analyspar, sesspar, figpar) Retrieves enlarged ROI mask overlay examples for sessions 1 to 3. Saves results and parameters relevant to analysis in a dictionary. Required args: - session (Session): Session object - analyspar (AnalysPar): named tuple containing analysis parameters - sesspar (SessPar): named tuple containing session parameters - figpar (dict): dictionary containing figure parameters Optional args: - parallel (bool): if True, some of the analysis is run in parallel across CPU cores default: False """ logger.info("Compiling enlarged ROI mask overlay examples.", extra={"spacing": "\n"}) if not analyspar.tracked: raise ValueError("analyspar.tracked should be set to True.") # remove incomplete session series and warn sessions = misc_analys.check_sessions_complete(sessions) roi_mask_df = roi_analys.get_roi_tracking_df( sessions, analyspar=analyspar, reg_only=True, proj=False, crop_info="large", parallel=parallel, ) extrapar = dict() info = { "analyspar": analyspar._asdict(), "sesspar": sesspar._asdict(), "extrapar": extrapar, "roi_mask_df": roi_mask_df.to_dict() } helper_fcts.plot_save_all(info, figpar)
def get_resp_df(sessions, analyspar, stimpar, rel_sess=1, parallel=False): """ get_resp_df(sessions, analyspar, stimpar) Returns relative response dataframe for requested sessions. Required args: - sessions (list): session objects - analyspar (AnalysPar): named tuple containing analysis parameters - stimpar (StimPar): named tuple containing stimulus parameters Optional args: - rel_sess (int): number of session relative to which data should be scaled, for each mouse. If None, relative data is not added. default: 1 - parallel (bool): if True, some of the analysis is run in parallel across CPU cores default: False Returns: - resp_data_df (pd.DataFrame): data dictionary with response stats (2D array, ROI x stats) under keys for expected ("exp") and unexpected ("unexp") data, separated by Gabor frame (e.g., "exp_3", "unexp_G") if stimpar.stimtype == "gabors", and with "rel_{}" columns added for each input column with "exp" in its name if rel_sess is not None. """ if analyspar.tracked: misc_analys.check_sessions_complete(sessions, raise_err=True) sessids = [sess.sessid for sess in sessions] resp_data_df = misc_analys.get_check_sess_df(sessions, analyspar=analyspar) # double check that sessions are in correct order if resp_data_df["sessids"].tolist() != sessids: raise NotImplementedError( "Implementation error. Sessions must appear in correct order in " "resp_data_df.") logger.info(f"Loading data for each session...", extra={"spacing": TAB}) data_dicts = gen_util.parallel_wrap(get_sess_integ_resp_dict, sessions, args_list=[analyspar, stimpar], parallel=parallel) # add data to df misc_analys.get_check_sess_df(sessions, resp_data_df) for i, idx in enumerate(resp_data_df.index): for key, value in data_dicts[i].items(): if i == 0: resp_data_df = gen_util.set_object_columns(resp_data_df, [key], in_place=True) resp_data_df.at[idx, key] = value[:, 0] # retain stat only, not error # add relative data if rel_sess is not None: resp_data_df = add_relative_resp_data(resp_data_df, analyspar, rel_sess=rel_sess, in_place=True) return resp_data_df
def get_sess_grped_diffs_df(sessions, analyspar, stimpar, basepar, permpar, split="by_exp", randst=None, parallel=False): """ get_sess_grped_diffs_df(sessions, analyspar, stimpar, basepar) Returns split difference statistics for specific sessions, grouped across mice. Required args: - sessions (list): session objects - analyspar (AnalysPar): named tuple containing analysis parameters - stimpar (StimPar): named tuple containing stimulus parameters - basepar (BasePar): named tuple containing baseline parameters - permpar (PermPar): named tuple containing permutation parameters Optional args: - split (str): how to split data: "by_exp" (all exp, all unexp), "unexp_lock" (unexp, preceeding exp), "exp_lock" (exp, preceeding unexp), "stim_onset" (grayscr, stim on), "stim_offset" (stim off, grayscr) default: "by_exp" - randst (int or np.random.RandomState): random state or seed value to use. (-1 treated as None) default: None - parallel (bool): if True, some of the analysis is run in parallel across CPU cores default: False Returns: - diffs_df (pd.DataFrame): dataframe with one row per session/line/plane, and the following columns, in addition to the basic sess_df columns: - diff_stats (list): split difference stats (me, err) - null_CIs (list): adjusted null CI for split differences - raw_p_vals (float): uncorrected p-value for differences within sessions - p_vals (float): p-value for differences within sessions, corrected for multiple comparisons and tails for session comparisons, e.g. 1v2: - raw_p_vals_{}v{} (float): uncorrected p-value for differences between sessions - p_vals_{}v{} (float): p-value for differences between sessions, corrected for multiple comparisons and tails """ nanpol = None if analyspar.rem_bad else "omit" if analyspar.tracked: misc_analys.check_sessions_complete(sessions, raise_err=True) sess_diffs_df = misc_analys.get_check_sess_df(sessions, None, analyspar) initial_columns = sess_diffs_df.columns.tolist() # retrieve ROI index information args_dict = { "analyspar": analyspar, "stimpar": stimpar, "basepar": basepar, "split": split, "return_data": True, } # sess x split x ROI split_stats, split_data = gen_util.parallel_wrap(get_sess_roi_split_stats, sessions, args_dict=args_dict, parallel=parallel, zip_output=True) misc_analys.get_check_sess_df(sessions, sess_diffs_df) sess_diffs_df["roi_split_stats"] = list(split_stats) sess_diffs_df["roi_split_data"] = list(split_data) columns = initial_columns + ["diff_stats", "null_CIs"] diffs_df = pd.DataFrame(columns=columns) group_columns = ["lines", "planes", "sess_ns"] aggreg_cols = [col for col in initial_columns if col not in group_columns] for lp_grp_vals, lp_grp_df in sess_diffs_df.groupby(["lines", "planes"]): lp_grp_df = lp_grp_df.sort_values(["sess_ns", "mouse_ns"]) line, plane = lp_grp_vals lp_name = plot_helper_fcts.get_line_plane_name(line, plane) logger.info(f"Running permutation tests for {lp_name} sessions...", extra={"spacing": TAB}) # obtain ROI random split differences per session # done here to avoid OOM errors lp_rand_diffs = gen_util.parallel_wrap( get_rand_split_data, lp_grp_df["roi_split_data"].tolist(), args_list=[analyspar, permpar, randst], parallel=parallel, zip_output=False) sess_diffs = [] row_indices = [] sess_ns = sorted(lp_grp_df["sess_ns"].unique()) for sess_n in sess_ns: row_idx = len(diffs_df) row_indices.append(row_idx) sess_grp_df = lp_grp_df.loc[lp_grp_df["sess_ns"] == sess_n] grp_vals = list(lp_grp_vals) + [sess_n] for g, group_column in enumerate(group_columns): diffs_df.loc[row_idx, group_column] = grp_vals[g] # add aggregated values for initial columns diffs_df = misc_analys.aggreg_columns(sess_grp_df, diffs_df, aggreg_cols, row_idx=row_idx, in_place=True) # group ROI split stats across mice: split x ROIs split_stats = np.concatenate( sess_grp_df["roi_split_stats"].to_numpy(), axis=-1) # take diff and stats across ROIs diffs = split_stats[1] - split_stats[0] diff_stats = math_util.get_stats(diffs, stats=analyspar.stats, error=analyspar.error, nanpol=nanpol) diffs_df.at[row_idx, "diff_stats"] = diff_stats.tolist() sess_diffs.append(diffs) # group random ROI split diffs across mice, and take stat rand_idxs = [ lp_grp_df.index.tolist().index(idx) for idx in sess_grp_df.index ] rand_diffs = math_util.mean_med(np.concatenate( [lp_rand_diffs[r] for r in rand_idxs], axis=0), axis=0, stats=analyspar.stats, nanpol=nanpol) # get CIs and p-values p_val, null_CI = rand_util.get_p_val_from_rand( diff_stats[0], rand_diffs, return_CIs=True, p_thresh=permpar.p_val, tails=permpar.tails, multcomp=permpar.multcomp, nanpol=nanpol) diffs_df.loc[row_idx, "p_vals"] = p_val diffs_df.at[row_idx, "null_CIs"] = null_CI del lp_rand_diffs # free up memory # calculate p-values between sessions (0-1, 0-2, 1-2...) p_vals = rand_util.comp_vals_acr_groups(sess_diffs, n_perms=permpar.n_perms, stats=analyspar.stats, paired=analyspar.tracked, nanpol=nanpol, randst=randst) p = 0 for i, sess_n in enumerate(sess_ns): for j, sess_n2 in enumerate(sess_ns[i + 1:]): key = f"p_vals_{int(sess_n)}v{int(sess_n2)}" diffs_df.loc[row_indices[i], key] = p_vals[p] diffs_df.loc[row_indices[j + 1], key] = p_vals[p] p += 1 # add corrected p-values diffs_df = misc_analys.add_corr_p_vals(diffs_df, permpar) diffs_df["sess_ns"] = diffs_df["sess_ns"].astype(int) return diffs_df
def tracked_roi_usis_stimulus_comp_sess1v3(sessions, analyspar, sesspar, stimpar, basepar, idxpar, permpar, figpar, seed=None, parallel=False): """ tracked_roi_usis_stimulus_comp_sess1v3(sessions, analyspar, sesspar, stimpar, basepar, idxpar, permpar, figpar) Retrieves changes in tracked ROI USIs for Gabors vs visual flow stimuli. Saves results and parameters relevant to analysis in a dictionary. Required args: - sessions (list): Session objects - analyspar (AnalysPar): named tuple containing analysis parameters - sesspar (SessPar): named tuple containing session parameters - stimpar (StimPar): named tuple containing stimulus parameters - basepar (BasePar): named tuple containing baseline parameters - idxpar (IdxPar): named tuple containing index parameters - permpar (PermPar): named tuple containing permutation parameters - figpar (dict): dictionary containing figure parameters Optional args: - seed (int): seed value to use. (-1 treated as None) default: None - parallel (bool): if True, some of the analysis is run in parallel across CPU cores default: False """ logger.info( ("Compiling changes in ROI USIs for Gabors vs visual flow stimuli."), extra={"spacing": "\n"}) if not analyspar.tracked: raise ValueError("analyspar.tracked should be set to True.") # remove incomplete session series and warn sessions = misc_analys.check_sessions_complete(sessions) # calculate multiple comparisons dummy_df = misc_analys.get_check_sess_df( sessions, None, analyspar).drop_duplicates(subset=["lines", "planes"]) multcomp = len(dummy_df) + 1 permpar = sess_ntuple_util.get_modif_ntuple(permpar, "multcomp", multcomp) comp_sess = [1, 3] datatype = "usis" pop_stats = True usi_comp_df = stim_analys.get_stim_stats_df( sessions, analyspar=analyspar, stimpar=stimpar, basepar=basepar, idxpar=idxpar, permpar=permpar, comp_sess=comp_sess, datatype=datatype, pop_stats=pop_stats, randst=seed, parallel=parallel, ) extrapar = { "comp_sess": comp_sess, "datatype": datatype, "pop_stats": pop_stats, "seed": seed, } info = { "analyspar": analyspar._asdict(), "sesspar": sesspar._asdict(), "stimpar": stimpar._asdict(), "basepar": basepar._asdict(), "idxpar": idxpar._asdict(), "permpar": permpar._asdict(), "extrapar": extrapar, "usi_comp_df": usi_comp_df.to_dict(), } helper_fcts.plot_save_all(info, figpar)
def gabor_corrs_sess123_comps(sessions, analyspar, sesspar, stimpar, basepar, idxpar, permpar, figpar, seed=None, parallel=False): """ gabor_corrs_sess123_comps(sessions, analyspar, sesspar, stimpar, basepar, idxpar, permpar, figpar) Retrieves tracked ROI Gabor USI correlations for session 1 to 3. Saves results and parameters relevant to analysis in a dictionary. Required args: - sessions (list): Session objects - analyspar (AnalysPar): named tuple containing analysis parameters - sesspar (SessPar): named tuple containing session parameters - stimpar (StimPar): named tuple containing stimulus parameters - basepar (BasePar): named tuple containing baseline parameters - idxpar (IdxPar): named tuple containing index parameters - permpar (PermPar): named tuple containing permutation parameters - figpar (dict): dictionary containing figure parameters Optional args: - seed (int): seed value to use. (-1 treated as None) default: None - parallel (bool): if True, some of the analysis is run in parallel across CPU cores default: False """ logger.info( "Compiling tracked ROI Gabor USI correlations for sessions 1 to 3.", extra={"spacing": "\n"}) if not analyspar.tracked: raise ValueError("analyspar.tracked should be set to True.") # remove incomplete session series and warn sessions = misc_analys.check_sessions_complete(sessions) consec_only = True permpar = corr_analys.set_multcomp(permpar, sessions, analyspar, consec_only=consec_only) permute = PERMUTE corr_type = CORR_TYPE sig_only = SIG_ONLY if "R_sqr" in corr_type: permpar = sess_ntuple_util.get_modif_ntuple(permpar, "tails", "hi") idx_corr_df = corr_analys.get_idx_corrs_df(sessions, analyspar=analyspar, stimpar=stimpar, basepar=basepar, idxpar=idxpar, permpar=permpar, consec_only=consec_only, permute=permute, corr_type=corr_type, sig_only=sig_only, randst=seed, parallel=parallel) extrapar = { "consec_only": consec_only, "corr_type": corr_type, "permute": permute, "seed": seed, "sig_only": sig_only, } info = { "analyspar": analyspar._asdict(), "sesspar": sesspar._asdict(), "stimpar": stimpar._asdict(), "basepar": basepar._asdict(), "idxpar": idxpar._asdict(), "permpar": permpar._asdict(), "extrapar": extrapar, "idx_corr_df": idx_corr_df.to_dict() } helper_fcts.plot_save_all(info, figpar)
def corr_scatterplots(sessions, analyspar, sesspar, stimpar, basepar, idxpar, permpar, figpar, seed=None, parallel=False): """ corr_scatterplots(sessions, analyspar, sesspar, stimpar, basepar, idxpar, permpar, figpar) Retrieves tracked ROI USI correlation scatterplot data for a session pair. Saves results and parameters relevant to analysis in a dictionary. Required args: - sessions (list): Session objects - analyspar (AnalysPar): named tuple containing analysis parameters - sesspar (SessPar): named tuple containing session parameters - stimpar (StimPar): named tuple containing stimulus parameters - basepar (BasePar): named tuple containing baseline parameters - idxpar (IdxPar): named tuple containing index parameters - permpar (PermPar): named tuple containing permutation parameters - figpar (dict): dictionary containing figure parameters Optional args: - seed (int): seed value to use. (-1 treated as None) default: None - parallel (bool): if True, some of the analysis is run in parallel across CPU cores default: False """ if not analyspar.tracked: raise ValueError("analyspar.tracked should be set to True.") # remove incomplete session series and warn sessions = misc_analys.check_sessions_complete(sessions) permpar = corr_analys.set_multcomp(permpar, sessions, analyspar, factor=2) permute = PERMUTE sig_only = SIG_ONLY if not analyspar.tracked: raise ValueError("analyspar.tracked should be set to True.") idx_corr_df = corr_analys.corr_scatterplots(sessions, analyspar=analyspar, stimpar=stimpar, basepar=basepar, idxpar=idxpar, permpar=permpar, permute=permute, sig_only=sig_only, randst=seed, parallel=parallel) extrapar = { "permute": permute, "seed": seed, "sig_only": sig_only, } info = { "analyspar": analyspar._asdict(), "sesspar": sesspar._asdict(), "stimpar": stimpar._asdict(), "basepar": basepar._asdict(), "idxpar": idxpar._asdict(), "permpar": permpar._asdict(), "extrapar": extrapar, "idx_corr_df": idx_corr_df.to_dict() } helper_fcts.plot_save_all(info, figpar)
def get_lp_idx_df(sessions, analyspar, stimpar, basepar, idxpar, permpar=None, sig_only=False, randst=None, parallel=False): """ get_lp_idx_df(sessions, analyspar, stimpar, basepar, idxpar) Returns ROI index dataframe, grouped by line/plane/session. Required args: - sessions (list): Session objects - analyspar (AnalysPar): named tuple containing analysis parameters - stimpar (StimPar): named tuple containing stimulus parameters - basepar (BasePar): named tuple containing baseline parameters - idxpar (IdxPar): named tuple containing index parameters Optional args: - permpar (PermPar): named tuple containing permutation parameters, required if sig_only is True default: None - sig_only (bool): if True, ROIs with significant USIs are included (only possible if analyspar.tracked is True) default: False - randst (int or np.random.RandomState): random state or seed value to use. (-1 treated as None) default: None - parallel (bool): if True, some of the analysis is run in parallel across CPU cores default: False Returns: - lp_idx_df (pd.DataFrame): dataframe with one row per line/plane/session, and the following columns, in addition to the basic sess_df columns: - roi_idxs (list): index for each ROI (or each ROI that is significant in at least one session, if sig_only) """ if analyspar.tracked: misc_analys.check_sessions_complete(sessions, raise_err=True) if sig_only and permpar is None: raise ValueError("If sig_only is True, permpar cannot be None.") initial_columns = misc_analys.get_sess_df_columns(sessions[0], analyspar) args_dict = { "analyspar": analyspar, "stimpar" : stimpar, "basepar" : basepar, "idxpar" : idxpar, "parallel" : parallel, } if sig_only: idx_df = usi_analys.get_idx_sig_df( sessions, permpar=permpar, randst=randst, aggreg_sess=True, **args_dict ) else: idx_df = usi_analys.get_idx_only_df(sessions, **args_dict) # aggregate by line/plane/session lp_idx_df = pd.DataFrame(columns=initial_columns + ["roi_idxs"]) # aggregate within line/plane/sessions group_columns = ["lines", "planes", "sess_ns"] aggreg_cols = [col for col in initial_columns if col not in group_columns] for grp_vals, grp_df in idx_df.groupby(group_columns): grp_df = grp_df.sort_values("mouse_ns") row_idx = len(lp_idx_df) for g, group_column in enumerate(group_columns): lp_idx_df.loc[row_idx, group_column] = grp_vals[g] # add aggregated values for initial columns lp_idx_df = misc_analys.aggreg_columns( grp_df, lp_idx_df, aggreg_cols, row_idx=row_idx, in_place=True ) roi_idxs = grp_df["roi_idxs"].tolist() if sig_only: roi_idxs = [ np.asarray(idx_vals)[np.asarray(sig_ns).astype(int)] for idx_vals, sig_ns in zip(roi_idxs, grp_df["sig_idxs"]) ] lp_idx_df.at[row_idx, "roi_idxs"] = np.concatenate(roi_idxs).tolist() lp_idx_df["sess_ns"] = lp_idx_df["sess_ns"].astype(int) return lp_idx_df
def gabor_rel_resp_tracked_rois_sess123(sessions, analyspar, sesspar, stimpar, permpar, figpar, seed=None, parallel=False): """ gabor_rel_resp_tracked_rois_sess123(sessions, analyspar, sesspar, stimpar, permpar, figpar) Retrieves ROI responses to regular and unexpected Gabor frames, relative to session 1, for tracked ROIs. Saves results and parameters relevant to analysis in a dictionary. Required args: - sessions (list): Session objects - analyspar (AnalysPar): named tuple containing analysis parameters - sesspar (SessPar): named tuple containing session parameters - stimpar (StimPar): named tuple containing stimulus parameters - permpar (PermPar): named tuple containing permutation parameters - figpar (dict): dictionary containing figure parameters Optional args: - seed (int): seed value to use. (-1 treated as None) default: None - parallel (bool): if True, some of the analysis is run in parallel across CPU cores default: False """ logger.info("Compiling tracked ROI Gabor responses relative to session 1.", extra={"spacing": "\n"}) if not analyspar.tracked: raise ValueError("analyspar.tracked should be set to True.") if analyspar.scale: raise ValueError("analyspar.scale should be set to False.") # remove incomplete session series and warn sessions = misc_analys.check_sessions_complete(sessions) # calculate multiple comparisons dummy_df = misc_analys.get_check_sess_df( sessions, None, analyspar).drop_duplicates(subset=["lines", "planes", "sess_ns"]) permpar = misc_analys.set_multcomp(permpar, sess_df=dummy_df, CIs=False, factor=2) rel_sess = 1 rel_resp_df = seq_analys.get_rel_resp_stats_df( sessions, analyspar=analyspar, stimpar=stimpar, permpar=permpar, rel_sess=rel_sess, randst=seed, parallel=parallel, ) extrapar = { "rel_sess": rel_sess, "seed": seed, } info = { "analyspar": analyspar._asdict(), "sesspar": sesspar._asdict(), "stimpar": stimpar._asdict(), "permpar": permpar._asdict(), "extrapar": extrapar, "rel_resp_df": rel_resp_df.to_dict(), } helper_fcts.plot_save_all(info, figpar)
def visual_flow_tracked_roi_usis_sess123(sessions, analyspar, sesspar, stimpar, basepar, idxpar, figpar, parallel=False): """ visual_flow_tracked_roi_usis_sess123(sessions, analyspar, sesspar, stimpar, basepar, idxpar, figpar) Retrieves tracked ROI visual flow USIs for session 1 to 3. Saves results and parameters relevant to analysis in a dictionary. Required args: - sessions (list): Session objects - analyspar (AnalysPar): named tuple containing analysis parameters - sesspar (SessPar): named tuple containing session parameters - stimpar (StimPar): named tuple containing stimulus parameters - basepar (BasePar): named tuple containing baseline parameters - idxpar (IdxPar): named tuple containing index parameters - figpar (dict): dictionary containing figure parameters Optional args: - parallel (bool): if True, some of the analysis is run in parallel across CPU cores default: False """ logger.info("Compiling tracked ROI visual flow USIs for sessions 1 to 3.", extra={"spacing": "\n"}) if not analyspar.tracked: raise ValueError("analyspar.tracked should be set to True.") # remove incomplete session series and warn sessions = misc_analys.check_sessions_complete(sessions) idx_only_df = usi_analys.get_idx_only_df( sessions, analyspar=analyspar, stimpar=stimpar, basepar=basepar, idxpar=idxpar, parallel=parallel ) extrapar = dict() info = {"analyspar" : analyspar._asdict(), "sesspar" : sesspar._asdict(), "stimpar" : stimpar._asdict(), "basepar" : basepar._asdict(), "idxpar" : idxpar._asdict(), "extrapar" : extrapar, "idx_only_df": idx_only_df.to_dict() } helper_fcts.plot_save_all(info, figpar)
def visual_flow_tracked_roi_abs_usi_means_sess123( sessions, analyspar, sesspar, stimpar, basepar, idxpar, permpar, figpar, seed=None, parallel=False): """ visual_flow_tracked_roi_abs_usi_means_sess123( sessions, analyspar, sesspar, stimpar, basepar, idxpar, permpar, figpar) Retrieves mean absolute for tracked ROI visual flow USIs for session 1 to 3. Saves results and parameters relevant to analysis in a dictionary. Required args: - sessions (list): Session objects - analyspar (AnalysPar): named tuple containing analysis parameters - sesspar (SessPar): named tuple containing session parameters - stimpar (StimPar): named tuple containing stimulus parameters - basepar (BasePar): named tuple containing baseline parameters - idxpar (IdxPar): named tuple containing index parameters - permpar (PermPar): named tuple containing permutation parameters - figpar (dict): dictionary containing figure parameters Optional args: - seed (int): seed value to use. (-1 treated as None) default: None - parallel (bool): if True, some of the analysis is run in parallel across CPU cores default: False """ logger.info( ("Compiling absolute means of tracked ROI Gabor USIs for " "sessions 1 to 3."), extra={"spacing": "\n"}) if not analyspar.tracked: raise ValueError("analyspar.tracked should be set to True.") # remove incomplete session series and warn sessions = misc_analys.check_sessions_complete(sessions) # calculate multiple comparisons dummy_df = misc_analys.get_check_sess_df( sessions, None, analyspar).drop_duplicates( subset=["lines", "planes", "sess_ns"]) permpar = misc_analys.set_multcomp(permpar, sess_df=dummy_df, CIs=False) absolute = True by_mouse = False idx_stats_df = usi_analys.get_idx_stats_df( sessions, analyspar=analyspar, stimpar=stimpar, basepar=basepar, idxpar=idxpar, permpar=permpar, absolute=absolute, by_mouse=by_mouse, randst=seed, parallel=parallel, ) extrapar = { "absolute": absolute, "by_mouse": by_mouse, "seed": seed, } info = {"analyspar" : analyspar._asdict(), "sesspar" : sesspar._asdict(), "stimpar" : stimpar._asdict(), "basepar" : basepar._asdict(), "idxpar" : idxpar._asdict(), "permpar" : permpar._asdict(), "extrapar" : extrapar, "idx_stats_df": idx_stats_df.to_dict() } helper_fcts.plot_save_all(info, figpar)
def gabor_tracked_roi_abs_usi_means_sess123_by_mouse( sessions, analyspar, sesspar, stimpar, basepar, idxpar, figpar, parallel=False): """ gabor_tracked_roi_abs_usi_means_sess123_by_mouse( sessions, analyspar, sesspar, stimpar, basepar, idxpar, figpar) Retrieves mean absolute for tracked ROI Gabor USIs, for each mouse, for session 1 to 3. Saves results and parameters relevant to analysis in a dictionary. Required args: - sessions (list): Session objects - analyspar (AnalysPar): named tuple containing analysis parameters - sesspar (SessPar): named tuple containing session parameters - stimpar (StimPar): named tuple containing stimulus parameters - basepar (BasePar): named tuple containing baseline parameters - idxpar (IdxPar): named tuple containing index parameters - figpar (dict): dictionary containing figure parameters Optional args: - parallel (bool): if True, some of the analysis is run in parallel across CPU cores default: False """ logger.info( ("Compiling absolute means per mouse of tracked ROI Gabor USIs for " "sessions 1 to 3."), extra={"spacing": "\n"}) if not analyspar.tracked: raise ValueError("analyspar.tracked should be set to True.") # remove incomplete session series and warn sessions = misc_analys.check_sessions_complete(sessions) absolute = True by_mouse = True idx_stats_df = usi_analys.get_idx_stats_df( sessions, analyspar=analyspar, stimpar=stimpar, basepar=basepar, idxpar=idxpar, absolute=absolute, by_mouse=by_mouse, parallel=parallel, ) extrapar = { "absolute": absolute, "by_mouse": by_mouse, } info = {"analyspar" : analyspar._asdict(), "sesspar" : sesspar._asdict(), "stimpar" : stimpar._asdict(), "basepar" : basepar._asdict(), "idxpar" : idxpar._asdict(), "extrapar" : extrapar, "idx_stats_df": idx_stats_df.to_dict() } helper_fcts.plot_save_all(info, figpar)
def get_roi_tracking_df(sessions, analyspar, reg_only=False, proj=False, crop_info=False, parallel=False): """ get_roi_tracking_df(sessions, analyspar) Return ROI tracking information for the requested sessions. Required args: - sessions (list): Session objects - analyspar (AnalysPar): named tuple containing analysis parameters Optional args: - proj (bool): if True, max projections are included in the output dataframe default: False - reg_only (bool): if True, only registered masks, and projections if proj is True, are included in the output dataframe default: False - crop_info (bool or str): if not False, the type of cropping information to include ("small" for the small plots, "large" for the large plots) default: False - parallel (bool): if True, some of the analysis is run in parallel across CPU cores default: False Returns: - roi_mask_df (pd.DataFrame in dict format): dataframe with a row for each mouse, and the following columns, in addition to the basic sess_df columns: - "registered_roi_mask_idxs" (list): list of mask indices, registered across sessions, for each session (flattened across ROIs) ((sess, hei, wid) x val) - "roi_mask_shapes" (list): shape into which ROI mask indices index (sess x hei x wid) if not reg_only: - "roi_mask_idxs" (list): list of mask indices for each session, and each ROI (sess x ((ROI, hei, wid) x val)) (not registered) if proj: - "registered_max_projections" (list): pixel intensities of maximum projection for the plane (hei x wid), after registration across sessions if proj and not reg_only: - "max_projections" (list): pixel intensities of maximum projection for the plane (hei x wid) if crop_info: - "crop_fact" (num): factor by which to crop masks (> 1) - "shift_prop_hei" (float): proportion by which to shift cropped mask center vertically from left edge [0, 1] - "shift_prop_wid" (float): proportion by which to shift cropped mask center horizontally from left edge [0, 1] """ if not analyspar.tracked: raise ValueError("analyspar.tracked must be True for this analysis.") misc_analys.check_sessions_complete(sessions, raise_err=True) sess_df = misc_analys.get_check_sess_df(sessions, analyspar=analyspar) # if cropping, check right away for dictionary with the preset parameters if crop_info: if crop_info == "small": crop_dict = SMALL_CROP_DICT elif crop_info == "large": crop_dict = LARGE_CROP_DICT else: gen_util.accepted_values_error("crop_info", crop_info, ["small", "large"]) for mouse_n in sess_df["mouse_ns"].unique(): if int(mouse_n) not in crop_dict.keys(): raise NotImplementedError( f"No preset cropping information found for mouse {mouse_n}." ) # collect ROI mask data sess_dicts = gen_util.parallel_wrap(get_sess_reg_mask_info, sessions, args_list=[analyspar, True, proj], parallel=parallel) all_sessids = [sess.sessid for sess in sessions] group_columns = ["planes", "lines", "mouse_ns"] initial_columns = sess_df.columns.tolist() obj_columns = ["registered_roi_mask_idxs", "roi_mask_shapes"] if not reg_only: obj_columns.append("roi_mask_idxs") if proj: obj_columns.append("registered_max_projections") if not reg_only: obj_columns.append("max_projections") roi_mask_df = pd.DataFrame(columns=initial_columns + obj_columns) aggreg_cols = [col for col in initial_columns if col not in group_columns] for grp_vals, grp_df in sess_df.groupby(group_columns): row_idx = len(roi_mask_df) for g, group_column in enumerate(group_columns): roi_mask_df.loc[row_idx, group_column] = grp_vals[g] # add aggregated values for initial columns roi_mask_df = misc_analys.aggreg_columns(grp_df, roi_mask_df, aggreg_cols, row_idx=row_idx, in_place=True, by_mouse=True) sessids = sorted(grp_df["sessids"].tolist()) reg_roi_masks, roi_mask_idxs = [], [] if proj: reg_max_projs, max_projs = [], [] roi_mask_shape = None for sessid in sessids: sess_dict = sess_dicts[all_sessids.index(sessid)] reg_roi_mask = sess_dict["registered_roi_masks"] # flatten masks across ROIs reg_roi_masks.append(np.max(reg_roi_mask, axis=0)) if roi_mask_shape is None: roi_mask_shape = reg_roi_mask.shape elif roi_mask_shape != reg_roi_mask.shape: raise RuntimeError( "ROI mask shapes across sessions should match, for the " "same mouse.") if not reg_only: roi_mask_idxs.append([ idxs.tolist() for idxs in np.where(sess_dict["roi_masks"]) ]) if proj: reg_max_projs.append( sess_dict["registered_max_projection"].tolist()) if not reg_only: max_projs.append(sess_dict["max_projection"].tolist()) # add to the dataframe roi_mask_df.at[row_idx, "registered_roi_mask_idxs"] = \ [idxs.tolist() for idxs in np.where(reg_roi_masks)] roi_mask_df.at[row_idx, "roi_mask_shapes"] = roi_mask_shape if not reg_only: roi_mask_df.at[row_idx, "roi_mask_idxs"] = roi_mask_idxs if proj: roi_mask_df.at[row_idx, "registered_max_projections"] = \ reg_max_projs if not reg_only: roi_mask_df.at[row_idx, "max_projections"] = max_projs # add cropping info if crop_info: mouse_n = grp_vals[group_columns.index("mouse_ns")] crop_fact, shift_prop_hei, shift_prop_wid = crop_dict[mouse_n] roi_mask_df.at[row_idx, "crop_fact"] = crop_fact roi_mask_df.at[row_idx, "shift_prop_hei"] = shift_prop_hei roi_mask_df.at[row_idx, "shift_prop_wid"] = shift_prop_wid roi_mask_df["mouse_ns"] = roi_mask_df["mouse_ns"].astype(int) return roi_mask_df
def get_stim_stats_df(sessions, analyspar, stimpar, permpar, comp_sess=[1, 3], datatype="rel_unexp_resp", rel_sess=1, basepar=None, idxpar=None, pop_stats=True, randst=None, parallel=False): """ get_stim_stats_df(sessions, analyspar, stimpar, permpar) Returns dataframe with comparison of absolute fractional data changes between sessions for different stimuli. Required args: - sessions (list): session objects - analyspar (AnalysPar): named tuple containing analysis parameters - stimpar (StimPar): named tuple containing stimulus parameters - permpar (PermPar): named tuple containing permutation parameters Optional args: - comp_sess (int): sessions for which to obtain absolute fractional change [x, y] => |(y - x) / x| default: [1, 3] - datatype (str): type of data to retrieve default: "rel_unexp_resp" - rel_sess (int): number of session relative to which data should be scaled, for each mouse default: 1 - basepar (BasePar): named tuple containing baseline parameters (needed if datatype is "usis") default: None - idxpar (IdxPar): named tuple containing index parameters (needed if datatype is "usis") default: None - pop_stats (bool): if True, analyses are run on population statistics, and not individual tracked ROIs default: True - randst (int or np.random.RandomState): random state or seed value to use. (-1 treated as None) default: None - parallel (bool): if True, some of the analysis is run in parallel across CPU cores default: False Returns: - stim_stats_df (pd.DataFrame): dataframe with one row per line/plane and one for all line/planes together, and the basic sess_df columns, in addition to, for each stimtype: - {stimtype} (list): absolute fractional change statistics (me, err) - raw_p_vals (float): uncorrected p-value for data differences between stimulus types - p_vals (float): p-value for data differences between stimulus types, corrected for multiple comparisons and tails """ if not pop_stats: if analyspar.tracked: misc_analys.check_sessions_complete(sessions, raise_err=True) else: raise ValueError( "If analysis is run for individual ROIs and not population " "statistics, analyspar.tracked must be set to True.") if set(stimpar.stimtype) != set(["gabors", "visflow"]): raise ValueError( "Expected stimpar.stimtype to list 'gabors' and 'visflow'.") if (not (isinstance(stimpar.pre, list) and isinstance(stimpar.post, list)) or not (len(stimpar.pre) == 2 and len(stimpar.post) == 2)): raise ValueError( "stimpar.pre and stimpar.post must be provided as lists of " "length 2 (one value per stimpar.stimtype, in order).") if datatype == "usis": if (not isinstance(idxpar.feature, list) or not len(idxpar.feature) == 2): raise ValueError( "idxpar.feature must be provided as a list of length 2 " "(one value per stimpar.stimtype, in order).") stim_stats_df = None for s, stimtype in enumerate(stimpar.stimtype): stim_stimpar = sess_ntuple_util.get_modif_ntuple( stimpar, ["stimtype", "pre", "post"], [stimtype, stimpar.pre[s], stimpar.post[s]]) stim_idxpar = idxpar if datatype == "usis": stim_idxpar = sess_ntuple_util.get_modif_ntuple( idxpar, "feature", idxpar.feature[s]) stim_stats_df = get_stim_data_df(sessions, analyspar, stim_stimpar, stim_data_df=stim_stats_df, comp_sess=comp_sess, datatype=datatype, rel_sess=rel_sess, basepar=basepar, idxpar=stim_idxpar, abs_usi=pop_stats, parallel=parallel) # add statistics and p-values add_stim_stats = add_stim_pop_stats if pop_stats else add_stim_roi_stats stim_stats_df = add_stim_stats(stim_stats_df, sessions, analyspar, stimpar, permpar, comp_sess=comp_sess, in_place=True, randst=randst) # adjust data columns data_cols = [] for s, stimtype in enumerate(stimpar.stimtype): for n in comp_sess: data_cols.append(f"{stimtype}_s{n}") stim_stats_df = stim_stats_df.drop(data_cols, axis=1) stim_stats_df["sess_ns"] = f"comp{comp_sess[0]}v{comp_sess[1]}" # corrected p-values stim_stats_df = misc_analys.add_corr_p_vals(stim_stats_df, permpar) return stim_stats_df
def add_stim_roi_stats(stim_stats_df, sessions, analyspar, stimpar, permpar, comp_sess=[1, 3], in_place=False, randst=None): """ add_stim_roi_stats(stim_stats_df, sessions, analyspar, stimpar, permpar) Adds to dataframe comparison of absolute fractional data changes between sessions for different stimuli, calculated for individual ROIs. Required args: - stim_stats_df (pd.DataFrame): dataframe with one row per line/plane, and the basic sess_df columns, as well as stimulus columns for each comp_sess: - {stimpar.stimtype}_s{comp_sess[0]}: first comp_sess data for each ROI - {stimpar.stimtype}_s{comp_sess[1]}: second comp_sess data for each ROI - sessions (list): session objects - analyspar (AnalysPar): named tuple containing analysis parameters - stimpar (StimPar): named tuple containing stimulus parameters - permpar (PermPar): named tuple containing permutation parameters Optional args: - comp_sess (int): sessions for which to obtain absolute fractional change [x, y] => |(y - x) / x| default: [1, 3] - in_place (bool): if True, targ_df is modified in place. Otherwise, a deep copy is modified. targ_df is returned in either case. default: False - randst (int or np.random.RandomState): random state or seed value to use. (-1 treated as None) default: None Returns: - stim_stats_df (pd.DataFrame): dataframe with one row per line/plane and one for all line/planes together, and the basic sess_df columns, in addition to the input columns, and for each stimtype: - {stimtype} (list): absolute fractional change statistics (me, err) - p_vals (float): p-value for data differences between stimulus types, corrected for multiple comparisons and tails """ nanpol = None if analyspar.rem_bad else "omit" if analyspar.tracked: misc_analys.check_sessions_complete(sessions, raise_err=True) else: raise ValueError( "If analysis is run for individual ROIs and not population " "statistics, analyspar.tracked must be set to True.") if not in_place: stim_stats_df = stim_stats_df.copy(deep=True) stimtypes = gen_util.list_if_not(stimpar.stimtype) stim_stats_df = gen_util.set_object_columns(stim_stats_df, stimtypes, in_place=True) # compile all data full_data = dict() for stimtype in stimpar.stimtype: for n in comp_sess: stim_col = f"{stimtype}_s{n}" full_data[stim_col] = np.concatenate(stim_stats_df[stim_col]) row_idx = len(stim_stats_df) for col in stim_stats_df.columns: stim_stats_df.loc[row_idx, col] = "all" if col in full_data.keys(): stim_stats_df.loc[row_idx, col] = full_data[col] # take statistics for row_idx in stim_stats_df.index: comp_data = [None, None] for s, stimtype in enumerate(stimpar.stimtype): stim_data = [] for n in comp_sess: data_col = f"{stimtype}_s{n}" stim_data.append(stim_stats_df.loc[row_idx, data_col]) abs_fractional_diff(stim_data) # get stats and add to dataframe stim_stats_df.at[row_idx, stimtype] = \ math_util.get_stats( comp_data[s], analyspar.stats, analyspar.error, nanpol=nanpol ).tolist() # obtain p-values stim_stats_df.loc[row_idx, "p_vals"] = rand_util.get_op_p_val( comp_data, permpar.n_perms, stats=analyspar.stats, paired=True, nanpol=nanpol, randst=randst) # remove full data columns data_cols = [] for s, stimtype in enumerate(stimpar.stimtype): for n in comp_sess: data_cols.append(f"{stimtype}_s{n}") stim_stats_df = stim_stats_df.drop(data_cols, axis=1) return stim_stats_df
def add_stim_pop_stats(stim_stats_df, sessions, analyspar, stimpar, permpar, comp_sess=[1, 3], in_place=False, randst=None): """ add_stim_pop_stats(stim_stats_df, sessions, analyspar, stimpar, permpar) Adds to dataframe comparison of absolute fractional data changes between sessions for different stimuli, calculated for population statistics. Required args: - stim_stats_df (pd.DataFrame): dataframe with one row per line/plane, and the basic sess_df columns, as well as stimulus columns for each comp_sess: - {stimpar.stimtype}_s{comp_sess[0]}: first comp_sess data for each ROI - {stimpar.stimtype}_s{comp_sess[1]}: second comp_sess data for each ROI - sessions (list): session objects - analyspar (AnalysPar): named tuple containing analysis parameters - stimpar (StimPar): named tuple containing stimulus parameters - permpar (PermPar): named tuple containing permutation parameters Optional args: - comp_sess (int): sessions for which to obtain absolute fractional change [x, y] => |(y - x) / x| default: [1, 3] - in_place (bool): if True, targ_df is modified in place. Otherwise, a deep copy is modified. targ_df is returned in either case. default: False - randst (int or np.random.RandomState): random state or seed value to use. (-1 treated as None) default: None Returns: - stim_stats_df (pd.DataFrame): dataframe with one row per line/plane and one for all line/planes together, and the basic sess_df columns, in addition to the input columns, and for each stimtype: - {stimtype} (list): absolute fractional change statistics (me, err) - p_vals (float): p-value for data differences between stimulus types, corrected for multiple comparisons and tails """ nanpol = None if analyspar.rem_bad else "omit" if analyspar.tracked: misc_analys.check_sessions_complete(sessions, raise_err=False) if not in_place: stim_stats_df = stim_stats_df.copy(deep=True) stimtypes = gen_util.list_if_not(stimpar.stimtype) stim_stats_df = gen_util.set_object_columns(stim_stats_df, stimtypes, in_place=True) if analyspar.stats != "mean" or analyspar.error != "std": raise NotImplementedError("For population statistics analysis, " "analyspar.stats must be set to 'mean', and " "analyspar.error must be set to 'std'.") # initialize arrays for all data n_linpla = len(stim_stats_df) n_stims = len(stimpar.stimtype) n_bootstrp = misc_analys.N_BOOTSTRP all_stats = np.full((n_linpla, n_stims), np.nan) all_btstrap_stats = np.full((n_linpla, n_stims, n_bootstrp), np.nan) all_rand_stat_diffs = np.full((n_linpla, permpar.n_perms), np.nan) for i, row_idx in enumerate(stim_stats_df.index): full_comp_data = [[], []] for s, stimtype in enumerate(stimpar.stimtype): comp_data, btstrap_comp_data = [], [] choices = None for n in comp_sess: data_col = f"{stimtype}_s{n}" # get data data = stim_stats_df.loc[row_idx, data_col] # get session stats comp_data.append( math_util.mean_med(data, analyspar.stats, nanpol=nanpol)) # get bootstrapped data returns = rand_util.bootstrapped_std( data, randst=randst, n_samples=n_bootstrp, return_rand=True, return_choices=analyspar.tracked, choices=choices, nanpol=nanpol) btstrap_data = returns[1] if analyspar.tracked: choices = returns[-1] # use same choices across sessions btstrap_comp_data.append(btstrap_data) full_comp_data[s].append(data) # retain full data # compute absolute fractional change stats (bootstrapped std) all_stats[i, s] = abs_fractional_diff(comp_data) all_btstrap_stats[i, s] = abs_fractional_diff(btstrap_comp_data) error = np.std(all_btstrap_stats[i, s]) # add to dataframe stim_stats_df.at[row_idx, stimtype] = [all_stats[i, s], error] # obtain p-values for real data wrt random data stim_stat_diff = all_stats[i, 1] - all_stats[i, 0] # permute data for each session across stimtypes sess_rand_stats = [] # sess x stim for j in range(len(comp_sess)): rand_concat = [stim_data[j] for stim_data in full_comp_data] rand_concat = np.stack(rand_concat).T rand_stats = rand_util.permute_diff_ratio( rand_concat, div=None, n_perms=permpar.n_perms, stats=analyspar.stats, op="none", paired=True, # pair stimuli nanpol=nanpol, randst=randst) sess_rand_stats.append(rand_stats) # obtain stats per stimtypes, then differences between stimtypes stim_rand_stats = list(zip(*sess_rand_stats)) # stim x sess all_rand_stats = [] for rand_stats in stim_rand_stats: all_rand_stats.append(abs_fractional_diff(rand_stats)) all_rand_stat_diffs[i] = all_rand_stats[1] - all_rand_stats[0] # calculate p-value p_val = rand_util.get_p_val_from_rand(stim_stat_diff, all_rand_stat_diffs[i], tails=permpar.tails, nanpol=nanpol) stim_stats_df.loc[row_idx, "p_vals"] = p_val # collect stats for all line/planes row_idx = len(stim_stats_df) for col in stim_stats_df.columns: stim_stats_df.loc[row_idx, col] = "all" # average across line/planes all_data = [] for data in [all_stats, all_btstrap_stats, all_rand_stat_diffs]: all_data.append( math_util.mean_med(data, analyspar.stats, nanpol=nanpol, axis=0)) stat, btstrap_stats, rand_stat_diffs = all_data for s, stimtype in enumerate(stimpar.stimtype): error = np.std(btstrap_stats[s]) stim_stats_df.at[row_idx, stimtype] = [stat[s], error] p_val = rand_util.get_p_val_from_rand(stat[1] - stat[0], rand_stat_diffs, tails=permpar.tails, nanpol=nanpol) stim_stats_df.loc[row_idx, "p_vals"] = p_val return stim_stats_df