def get_pupil_run_block_diffs_df(sessions, analyspar, stimpar, parallel=False): """ get_pupil_run_block_diffs_df(sessions, analyspar, stimpar) Returns pupil and running statistic differences (unexp - exp) by block. Required args: - sessions (list): session objects - analyspar (AnalysPar): named tuple containing analysis parameters - stimpar (StimPar): named tuple containing stimulus parameters Returns: - block_df (pd.DataFrame): dataframe with a row for each session, and the following columns, in addition to the basic sess_df columns: - run_block_diffs (1D array): split differences per block - run_block_stats (3D array): block statistics (split x block x stats (me, err)) - pupil_block_diffs (1D array): split differences per block - pupil_block_stats (3D array): block statistics (split x block x stats (me, err)) """ block_df = misc_analys.get_check_sess_df(sessions, None, analyspar, roi=False) # retrieve ROI index information args_dict = { "analyspar": analyspar, "stimpar": stimpar, } misc_analys.get_check_sess_df(sessions, block_df) for datatype in ["pupil", "run"]: args_dict["datatype"] = datatype # sess x split x block x stats block_stats = gen_util.parallel_wrap(basic_analys.get_block_data, sessions, args_dict=args_dict, parallel=parallel) block_diffs = [] for sess_block_data in block_stats: # take difference (unexp - exp statistic) for each block stat_diffs = sess_block_data[1, ..., 0] - sess_block_data[0, ..., 0] block_diffs.append(stat_diffs) block_df[f"{datatype}_block_stats"] = block_stats block_df[f"{datatype}_block_diffs"] = block_diffs return block_df
def nrois_sess123(sessions, analyspar, sesspar, figpar): """ nrois_sess123(sessions, analyspar, sesspar, figpar) Retrieves number of ROIs for sessions 1 to 3. Saves results and parameters relevant to analysis in a dictionary. Required args: - session (Session): Session object - analyspar (AnalysPar): named tuple containing analysis parameters - sesspar (SessPar): named tuple containing session parameters - figpar (dict): dictionary containing figure parameters """ logger.info("Compiling ROI numbers from session 1 to 3.", extra={"spacing": "\n"}) nrois_df = misc_analys.get_check_sess_df(sessions, analyspar=analyspar) extrapar = dict() info = { "analyspar": analyspar._asdict(), "sesspar": sesspar._asdict(), "extrapar": extrapar, "nrois_df": nrois_df.to_dict() } helper_fcts.plot_save_all(info, figpar)
def mean_signal_sess123(sessions, analyspar, sesspar, figpar, parallel=False): """ mean_signal_sess123(sessions, analyspar, sesspar, figpar) Retrieves ROI mean signal values for sessions 1 to 3. Saves results and parameters relevant to analysis in a dictionary. Required args: - session (Session): Session object - analyspar (AnalysPar): named tuple containing analysis parameters - sesspar (SessPar): named tuple containing session parameters - figpar (dict): dictionary containing figure parameters Optional args: - parallel (bool): if True, some of the analysis is run in parallel across CPU cores default: False """ logger.info("Compiling ROI signal means from session 1 to 3.", extra={"spacing": "\n"}) logger.info("Calculating ROI signal means for each session...", extra={"spacing": TAB}) all_signal_means = gen_util.parallel_wrap(misc_analys.get_snr, sessions, [analyspar, "signal_means"], parallel=parallel) sig_mean_df = misc_analys.get_check_sess_df(sessions, analyspar=analyspar) sig_mean_df["signal_means"] = [ sig_mean.tolist() for sig_mean in all_signal_means ] extrapar = dict() info = { "analyspar": analyspar._asdict(), "sesspar": sesspar._asdict(), "extrapar": extrapar, "sig_mean_df": sig_mean_df.to_dict() } helper_fcts.plot_save_all(info, figpar)
def imaging_planes(sessions, sesspar, figpar, parallel=False): """ imaging_planes(sessions, sesspar, figpar) Retrieves imaging plane image examples. Saves results and parameters relevant to analysis in a dictionary. Required args: - session (Session): Session object - sesspar (SessPar): named tuple containing session parameters - figpar (dict): dictionary containing figure parameters Optional args: - parallel (bool): if True, some of the analysis is run in parallel across CPU cores default: False """ logger.info("Compiling imaging plane projection examples.", extra={"spacing": "\n"}) imaging_plane_df = misc_analys.get_check_sess_df(sessions, roi=False) imaging_plane_df["max_projections"] = [ sess.max_proj.tolist() for sess in sessions ] extrapar = dict() info = { "sesspar": sesspar._asdict(), "extrapar": extrapar, "imaging_plane_df": imaging_plane_df.to_dict() } helper_fcts.plot_save_all(info, figpar)
def set_multcomp(permpar, sessions, analyspar, consec_only=True, factor=1): """ set_multcomp(permpar, sessions, analyspar) Returns permpar updated with the number of comparisons computed from the sessions. Required args: - permpar (PermPar or dict): named tuple containing permutation parameters - sessions (list): Session objects - analyspar (AnalysPar): named tuple containing analysis parameters Optional args: - consec_only (bool): if True, only consecutive session numbers are correlated default: True - factor (int): multiplicative factor default: 1 Returns: - permpar (PermPar): updated permutation parameter named tuple """ sess_df = misc_analys.get_check_sess_df(sessions, analyspar=analyspar) n_comps = 0 for _, lp_df in sess_df.groupby(["lines", "planes"]): corr_ns = get_corr_pairs(lp_df, consec_only=consec_only) n_comps += len(corr_ns) n_comps = n_comps * factor permpar = sess_ntuple_util.get_modif_ntuple(permpar, "multcomp", n_comps) return permpar
def get_sess_grped_diffs_df(sessions, analyspar, stimpar, basepar, permpar, split="by_exp", randst=None, parallel=False): """ get_sess_grped_diffs_df(sessions, analyspar, stimpar, basepar) Returns split difference statistics for specific sessions, grouped across mice. Required args: - sessions (list): session objects - analyspar (AnalysPar): named tuple containing analysis parameters - stimpar (StimPar): named tuple containing stimulus parameters - basepar (BasePar): named tuple containing baseline parameters - permpar (PermPar): named tuple containing permutation parameters Optional args: - split (str): how to split data: "by_exp" (all exp, all unexp), "unexp_lock" (unexp, preceeding exp), "exp_lock" (exp, preceeding unexp), "stim_onset" (grayscr, stim on), "stim_offset" (stim off, grayscr) default: "by_exp" - randst (int or np.random.RandomState): random state or seed value to use. (-1 treated as None) default: None - parallel (bool): if True, some of the analysis is run in parallel across CPU cores default: False Returns: - diffs_df (pd.DataFrame): dataframe with one row per session/line/plane, and the following columns, in addition to the basic sess_df columns: - diff_stats (list): split difference stats (me, err) - null_CIs (list): adjusted null CI for split differences - raw_p_vals (float): uncorrected p-value for differences within sessions - p_vals (float): p-value for differences within sessions, corrected for multiple comparisons and tails for session comparisons, e.g. 1v2: - raw_p_vals_{}v{} (float): uncorrected p-value for differences between sessions - p_vals_{}v{} (float): p-value for differences between sessions, corrected for multiple comparisons and tails """ nanpol = None if analyspar.rem_bad else "omit" if analyspar.tracked: misc_analys.check_sessions_complete(sessions, raise_err=True) sess_diffs_df = misc_analys.get_check_sess_df(sessions, None, analyspar) initial_columns = sess_diffs_df.columns.tolist() # retrieve ROI index information args_dict = { "analyspar": analyspar, "stimpar": stimpar, "basepar": basepar, "split": split, "return_data": True, } # sess x split x ROI split_stats, split_data = gen_util.parallel_wrap(get_sess_roi_split_stats, sessions, args_dict=args_dict, parallel=parallel, zip_output=True) misc_analys.get_check_sess_df(sessions, sess_diffs_df) sess_diffs_df["roi_split_stats"] = list(split_stats) sess_diffs_df["roi_split_data"] = list(split_data) columns = initial_columns + ["diff_stats", "null_CIs"] diffs_df = pd.DataFrame(columns=columns) group_columns = ["lines", "planes", "sess_ns"] aggreg_cols = [col for col in initial_columns if col not in group_columns] for lp_grp_vals, lp_grp_df in sess_diffs_df.groupby(["lines", "planes"]): lp_grp_df = lp_grp_df.sort_values(["sess_ns", "mouse_ns"]) line, plane = lp_grp_vals lp_name = plot_helper_fcts.get_line_plane_name(line, plane) logger.info(f"Running permutation tests for {lp_name} sessions...", extra={"spacing": TAB}) # obtain ROI random split differences per session # done here to avoid OOM errors lp_rand_diffs = gen_util.parallel_wrap( get_rand_split_data, lp_grp_df["roi_split_data"].tolist(), args_list=[analyspar, permpar, randst], parallel=parallel, zip_output=False) sess_diffs = [] row_indices = [] sess_ns = sorted(lp_grp_df["sess_ns"].unique()) for sess_n in sess_ns: row_idx = len(diffs_df) row_indices.append(row_idx) sess_grp_df = lp_grp_df.loc[lp_grp_df["sess_ns"] == sess_n] grp_vals = list(lp_grp_vals) + [sess_n] for g, group_column in enumerate(group_columns): diffs_df.loc[row_idx, group_column] = grp_vals[g] # add aggregated values for initial columns diffs_df = misc_analys.aggreg_columns(sess_grp_df, diffs_df, aggreg_cols, row_idx=row_idx, in_place=True) # group ROI split stats across mice: split x ROIs split_stats = np.concatenate( sess_grp_df["roi_split_stats"].to_numpy(), axis=-1) # take diff and stats across ROIs diffs = split_stats[1] - split_stats[0] diff_stats = math_util.get_stats(diffs, stats=analyspar.stats, error=analyspar.error, nanpol=nanpol) diffs_df.at[row_idx, "diff_stats"] = diff_stats.tolist() sess_diffs.append(diffs) # group random ROI split diffs across mice, and take stat rand_idxs = [ lp_grp_df.index.tolist().index(idx) for idx in sess_grp_df.index ] rand_diffs = math_util.mean_med(np.concatenate( [lp_rand_diffs[r] for r in rand_idxs], axis=0), axis=0, stats=analyspar.stats, nanpol=nanpol) # get CIs and p-values p_val, null_CI = rand_util.get_p_val_from_rand( diff_stats[0], rand_diffs, return_CIs=True, p_thresh=permpar.p_val, tails=permpar.tails, multcomp=permpar.multcomp, nanpol=nanpol) diffs_df.loc[row_idx, "p_vals"] = p_val diffs_df.at[row_idx, "null_CIs"] = null_CI del lp_rand_diffs # free up memory # calculate p-values between sessions (0-1, 0-2, 1-2...) p_vals = rand_util.comp_vals_acr_groups(sess_diffs, n_perms=permpar.n_perms, stats=analyspar.stats, paired=analyspar.tracked, nanpol=nanpol, randst=randst) p = 0 for i, sess_n in enumerate(sess_ns): for j, sess_n2 in enumerate(sess_ns[i + 1:]): key = f"p_vals_{int(sess_n)}v{int(sess_n2)}" diffs_df.loc[row_indices[i], key] = p_vals[p] diffs_df.loc[row_indices[j + 1], key] = p_vals[p] p += 1 # add corrected p-values diffs_df = misc_analys.add_corr_p_vals(diffs_df, permpar) diffs_df["sess_ns"] = diffs_df["sess_ns"].astype(int) return diffs_df
def get_sess_roi_trace_df(sessions, analyspar, stimpar, basepar, split="by_exp", parallel=False): """ get_sess_roi_trace_df(sess, analyspar, stimpar, basepar) Returns ROI trace statistics for specific sessions, split as requested. Required args: - sessions (list): session objects - analyspar (AnalysPar): named tuple containing analysis parameters - stimpar (StimPar): named tuple containing stimulus parameters - basepar (BasePar): named tuple containing baseline parameters Optional args: - split (str): how to split data: "by_exp" (all exp, all unexp), "unexp_lock" (unexp, preceeding exp), "exp_lock" (exp, preceeding unexp), "stim_onset" (grayscr, stim on), "stim_offset" (stim off, grayscr) default: "by_exp" - parallel (bool): if True, some of the analysis is run in parallel across CPU cores default: False Returns: - trace_df (pd.DataFrame): dataframe with a row for each session, and the following columns, in addition to the basic sess_df columns: - roi_trace_stats (list): ROI trace stats (split x ROIs x frames x stat (me, err)) - time_values (list): values for each frame, in seconds (only 0 to stimpar.post, unless split is "by_exp") """ trace_df = misc_analys.get_check_sess_df(sessions, None, analyspar) # retrieve ROI index information args_dict = { "analyspar": analyspar, "stimpar": stimpar, "basepar": basepar, "split": split, } # sess x split x ROIs x frames roi_trace_stats, all_time_values = gen_util.parallel_wrap( basic_analys.get_sess_roi_trace_stats, sessions, args_dict=args_dict, parallel=parallel, zip_output=True) misc_analys.get_check_sess_df(sessions, trace_df) trace_df["roi_trace_stats"] = [stats.tolist() for stats in roi_trace_stats] trace_df["time_values"] = [ time_values.tolist() for time_values in all_time_values ] return trace_df
def unexp_resp_stimulus_comp_sess1v3(sessions, analyspar, sesspar, stimpar, permpar, figpar, seed=None, parallel=False): """ unexp_resp_stimulus_comp_sess1v3(sessions, analyspar, sesspar, stimpar, permpar, figpar) Retrieves changes in tracked ROI responses to unexpected sequences for Gabors vs visual flow stimuli. Saves results and parameters relevant to analysis in a dictionary. Required args: - sessions (list): Session objects - analyspar (AnalysPar): named tuple containing analysis parameters - sesspar (SessPar): named tuple containing session parameters - stimpar (StimPar): named tuple containing stimulus parameters - permpar (PermPar): named tuple containing permutation parameters - figpar (dict): dictionary containing figure parameters Optional args: - seed (int): seed value to use. (-1 treated as None) default: None - parallel (bool): if True, some of the analysis is run in parallel across CPU cores default: False """ logger.info( ("Compiling changes in unexpected responses to Gabor vs visual " "flow stimuli."), extra={"spacing": "\n"}) if analyspar.scale: raise ValueError("analyspar.scale should be set to False.") # calculate multiple comparisons dummy_df = misc_analys.get_check_sess_df( sessions, None, analyspar).drop_duplicates(subset=["lines", "planes"]) multcomp = len(dummy_df) + 1 permpar = sess_ntuple_util.get_modif_ntuple(permpar, "multcomp", multcomp) comp_sess = [1, 3] datatype = "rel_unexp_resp" rel_sess = 1 pop_stats = True unexp_comp_df = stim_analys.get_stim_stats_df( sessions, analyspar=analyspar, stimpar=stimpar, permpar=permpar, comp_sess=comp_sess, datatype=datatype, rel_sess=rel_sess, pop_stats=pop_stats, randst=seed, parallel=parallel, ) extrapar = { "comp_sess": comp_sess, "datatype": datatype, "rel_sess": rel_sess, "pop_stats": pop_stats, "seed": seed, } info = { "analyspar": analyspar._asdict(), "sesspar": sesspar._asdict(), "stimpar": stimpar._asdict(), "permpar": permpar._asdict(), "extrapar": extrapar, "unexp_comp_df": unexp_comp_df.to_dict(), } helper_fcts.plot_save_all(info, figpar)
def visual_flow_tracked_roi_abs_usi_means_sess123( sessions, analyspar, sesspar, stimpar, basepar, idxpar, permpar, figpar, seed=None, parallel=False): """ visual_flow_tracked_roi_abs_usi_means_sess123( sessions, analyspar, sesspar, stimpar, basepar, idxpar, permpar, figpar) Retrieves mean absolute for tracked ROI visual flow USIs for session 1 to 3. Saves results and parameters relevant to analysis in a dictionary. Required args: - sessions (list): Session objects - analyspar (AnalysPar): named tuple containing analysis parameters - sesspar (SessPar): named tuple containing session parameters - stimpar (StimPar): named tuple containing stimulus parameters - basepar (BasePar): named tuple containing baseline parameters - idxpar (IdxPar): named tuple containing index parameters - permpar (PermPar): named tuple containing permutation parameters - figpar (dict): dictionary containing figure parameters Optional args: - seed (int): seed value to use. (-1 treated as None) default: None - parallel (bool): if True, some of the analysis is run in parallel across CPU cores default: False """ logger.info( ("Compiling absolute means of tracked ROI Gabor USIs for " "sessions 1 to 3."), extra={"spacing": "\n"}) if not analyspar.tracked: raise ValueError("analyspar.tracked should be set to True.") # remove incomplete session series and warn sessions = misc_analys.check_sessions_complete(sessions) # calculate multiple comparisons dummy_df = misc_analys.get_check_sess_df( sessions, None, analyspar).drop_duplicates( subset=["lines", "planes", "sess_ns"]) permpar = misc_analys.set_multcomp(permpar, sess_df=dummy_df, CIs=False) absolute = True by_mouse = False idx_stats_df = usi_analys.get_idx_stats_df( sessions, analyspar=analyspar, stimpar=stimpar, basepar=basepar, idxpar=idxpar, permpar=permpar, absolute=absolute, by_mouse=by_mouse, randst=seed, parallel=parallel, ) extrapar = { "absolute": absolute, "by_mouse": by_mouse, "seed": seed, } info = {"analyspar" : analyspar._asdict(), "sesspar" : sesspar._asdict(), "stimpar" : stimpar._asdict(), "basepar" : basepar._asdict(), "idxpar" : idxpar._asdict(), "permpar" : permpar._asdict(), "extrapar" : extrapar, "idx_stats_df": idx_stats_df.to_dict() } helper_fcts.plot_save_all(info, figpar)
def run_sess_logreg(sess, analyspar, stimpar, logregpar, n_splits=100, n_shuff_splits=300, seed=None, parallel=False): """ run_sess_logreg(sess, analyspar, stimpar, logregpar) Runs logistic regressions on a session (real data and shuffled), and returns statistics dataframes. Required args: - sess (Session): Session object - analyspar (AnalysPar): named tuple containing analysis parameters - stimpar (StimPar): named tuple containing stimulus parameters - logregpar (LogRegPar): named tuple containing logistic regression parameters Optional args: - n_splits (int): number of data splits to run logistic regressions on default: 100 - n_shuff_splits (int): number of shuffled data splits to run logistic regressions on default: 300 - seed (int): seed value to use. (-1 treated as None) default: None - parallel (bool): if True, some of the analysis is run in parallel across CPU cores default: False Returns: - data_stats_df (pd.DataFrame): dataframe with only one data row containing data stats for each score and data subset. - shuffle_df (pd.DataFrame): dataframe where each row contains data for different data shuffles, and each column contains data for each score and data subset. """ seed = rand_util.seed_all(seed, log_seed=False, seed_now=False) # retrieve data input_data, target_data, ctrl_ns = get_decoding_data(sess, analyspar, stimpar, comp=logregpar.comp, ctrl=logregpar.ctrl) scores_df = misc_analys.get_check_sess_df([sess], None, analyspar) common_columns = scores_df.columns.tolist() logreg_columns = ["comp", "ctrl", "bal", "shuffle"] # do checks if logregpar.q1v4 or logregpar.exp_v_unexp: raise NotImplementedError("q1v4 and exp_v_unexp are not implemented.") if n_splits <= 0 or n_shuff_splits <= 0: raise ValueError("n_splits and n_shuff_splits must be greater than 0.") set_types = ["train", "test"] score_types = ["neg_log_loss", "accuracy", "balanced_accuracy"] set_score_types = list(itertools.product(set_types, score_types)) extrapar = dict() for shuffle in [False, True]: n_runs = n_shuff_splits if shuffle else n_splits extrapar["shuffle"] = shuffle temp_dfs = [] for b, n in enumerate(range(0, n_runs, MAX_SIMULT_RUNS)): extrapar["n_runs"] = int(np.min([MAX_SIMULT_RUNS, n_runs - n])) with logger_util.TempChangeLogLevel(level="warning"): mod_cvs, _, _ = logreg_util.run_logreg_cv_sk( input_data, target_data, logregpar._asdict(), extrapar, analyspar.scale, ctrl_ns, randst=seed + b, parallel=parallel, save_models=False, catch_set_prob=False) temp_df = pd.DataFrame() for set_type, score_type in set_score_types: key = f"{set_type}_{score_type}" temp_df[key] = mod_cvs[key] temp_dfs.append(temp_df) # compile batch scores, and get session stats for non shuffled data temp_df = pd.concat(temp_dfs, ignore_index=True) if not shuffle: temp_df = get_df_stats(temp_df, analyspar) # add columns to df score_columns = temp_df.columns.tolist() for col in common_columns: temp_df[col] = scores_df.loc[0, col] for col in logreg_columns: if col != "shuffle": temp_df[col] = logregpar._asdict()[col] else: temp_df[col] = shuffle # re-sort columns temp_df = temp_df.reindex(common_columns + logreg_columns + score_columns, axis=1) if shuffle: shuffle_df = temp_df else: data_stats_df = temp_df return data_stats_df, shuffle_df
def get_pupil_run_trace_df(sessions, analyspar, stimpar, basepar, split="by_exp", parallel=False): """ get_pupil_run_trace_df(sessions, analyspar, stimpar, basepar) Returns pupil and running traces for specific sessions, split as requested. Required args: - sessions (list): session objects - analyspar (AnalysPar): named tuple containing analysis parameters - stimpar (StimPar): named tuple containing stimulus parameters - basepar (BasePar): named tuple containing baseline parameters Optional args: - split (str): how to split data: "by_exp" (all exp, all unexp), "unexp_lock" (unexp, preceeding exp), "exp_lock" (exp, preceeding unexp), "stim_onset" (grayscr, stim on), "stim_offset" (stim off, grayscr) default: "by_exp" - parallel (bool): if True, some of the analysis is run in parallel across CPU cores default: False Returns: - trace_df (pd.DataFrame): dataframe with a row for each session, and the following columns, in addition to the basic sess_df columns: - run_traces (list): running velocity traces (split x seqs x frames) - run_time_values (list): values for each frame, in seconds (only 0 to stimpar.post, unless split is "by_exp") - pupil_traces (list): pupil diameter traces (split x seqs x frames) - pupil_time_values (list): values for each frame, in seconds (only 0 to stimpar.post, unless split is "by_exp") """ trace_df = misc_analys.get_check_sess_df(sessions, None, analyspar, roi=False) # retrieve ROI index information args_dict = { "analyspar": analyspar, "stimpar": stimpar, "baseline": basepar.baseline, "split": split, } misc_analys.get_check_sess_df(sessions, trace_df) for datatype in ["pupil", "run"]: args_dict["datatype"] = datatype # sess x split x seq x frames split_traces, all_time_values = gen_util.parallel_wrap( basic_analys.get_split_data_by_sess, sessions, args_dict=args_dict, parallel=parallel, zip_output=True) # add columns to dataframe trace_df[f"{datatype}_traces"] = list(split_traces) trace_df[f"{datatype}_time_values"] = list(all_time_values) return trace_df
def gabor_sequence_diffs_sess123(sessions, analyspar, sesspar, stimpar, basepar, permpar, figpar, seed=None, parallel=False): """ gabor_sequence_diffs_sess123(sessions, analyspar, sesspar, stimpar, basepar, permpar, figpar) Retrieves differences in ROI responses to Gabor sequences from sessions 1 to 3. Saves results and parameters relevant to analysis in a dictionary. Required args: - sessions (list): Session objects - analyspar (AnalysPar): named tuple containing analysis parameters - sesspar (SessPar): named tuple containing session parameters - stimpar (StimPar): named tuple containing stimulus parameters - basepar (BasePar): named tuple containing baseline parameters - permpar (PermPar): named tuple containing permutation parameters - figpar (dict): dictionary containing figure parameters Optional args: - seed (int): seed value to use. (-1 treated as None) default: None - parallel (bool): if True, some of the analysis is run in parallel across CPU cores default: False """ logger.info("Compiling Gabor sequence differences from session 1 to 3.", extra={"spacing": "\n"}) # calculate multiple comparisons dummy_df = misc_analys.get_check_sess_df( sessions, None, analyspar).drop_duplicates(subset=["lines", "planes", "sess_ns"]) permpar = misc_analys.set_multcomp(permpar, sess_df=dummy_df) split = "by_exp" diffs_df = seq_analys.get_sess_grped_diffs_df( sessions, analyspar=analyspar, stimpar=stimpar, basepar=basepar, permpar=permpar, split=split, randst=seed, parallel=parallel, ) extrapar = { "split": split, "seed": seed, } info = { "analyspar": analyspar._asdict(), "sesspar": sesspar._asdict(), "stimpar": stimpar._asdict(), "basepar": basepar._asdict(), "permpar": permpar._asdict(), "extrapar": extrapar, "diffs_df": diffs_df.to_dict() } helper_fcts.plot_save_all(info, figpar)
def visual_flow_rel_resp_sess123(sessions, analyspar, sesspar, stimpar, permpar, figpar, seed=None, parallel=False): """ visual_flow_rel_resp_sess123(sessions, analyspar, sesspar, stimpar, permpar, figpar) Retrieves ROI responses to expected and unexpected visual flow, relative to session 1. Saves results and parameters relevant to analysis in a dictionary. Required args: - sessions (list): Session objects - analyspar (AnalysPar): named tuple containing analysis parameters - sesspar (SessPar): named tuple containing session parameters - stimpar (StimPar): named tuple containing stimulus parameters - permpar (PermPar): named tuple containing permutation parameters - figpar (dict): dictionary containing figure parameters Optional args: - seed (int): seed value to use. (-1 treated as None) default: None - parallel (bool): if True, some of the analysis is run in parallel across CPU cores default: False """ logger.info("Compiling ROI visual flow responses relative to session 1.", extra={"spacing": "\n"}) if analyspar.scale: raise ValueError("analyspar.scale should be set to False.") # calculate multiple comparisons dummy_df = misc_analys.get_check_sess_df( sessions, None, analyspar).drop_duplicates(subset=["lines", "planes", "sess_ns"]) permpar = misc_analys.set_multcomp(permpar, sess_df=dummy_df, CIs=False, factor=2) rel_sess = 1 rel_resp_df = seq_analys.get_rel_resp_stats_df( sessions, analyspar=analyspar, stimpar=stimpar, permpar=permpar, rel_sess=rel_sess, randst=seed, parallel=parallel, ) extrapar = { "rel_sess": rel_sess, "seed": seed, } info = { "analyspar": analyspar._asdict(), "sesspar": sesspar._asdict(), "stimpar": stimpar._asdict(), "permpar": permpar._asdict(), "extrapar": extrapar, "rel_resp_df": rel_resp_df.to_dict(), } helper_fcts.plot_save_all(info, figpar)
def gabor_decoding_sess123(sessions, analyspar, sesspar, stimpar, logregpar, permpar, figpar, seed=None, parallel=False): """ gabor_decoding_sess123(sessions, analyspar, sesspar, stimpar, logregpar, permpar, figpar) Runs decoding analyses (D and U orientations). Saves results and parameters relevant to analysis in a dictionary. Required args: - sessions (list): Session objects - analyspar (AnalysPar): named tuple containing analysis parameters - sesspar (SessPar): named tuple containing session parameters - stimpar (StimPar): named tuple containing stimulus parameters - logregpar (LogRegPar): named tuple containing logistic regression parameters - permpar (PermPar): named tuple containing permutation parameters - figpar (dict): dictionary containing figure parameters Optional args: - seed (int): seed value to use. (-1 treated as None) default: None - parallel (bool): if True, some of the analysis is run in parallel across CPU cores default: False """ comp_str = logregpar.comp.replace("ori", " orientation") logger.info( f"Compiling Gabor {comp_str} decoder performances for sessions 1 to 3.", extra={"spacing": "\n"}) if not analyspar.scale: raise ValueError("analyspar.scale should be True.") # calculate multiple comparisons dummy_df = misc_analys.get_check_sess_df( sessions, None, analyspar).drop_duplicates(subset=["lines", "planes", "sess_ns"]) permpar = misc_analys.set_multcomp(permpar, sess_df=dummy_df, pairs=False, factor=2) n_splits = 100 score_df = decoding_analys.run_sess_logregs( sessions, analyspar=analyspar, stimpar=stimpar, logregpar=logregpar, permpar=permpar, n_splits=n_splits, seed=seed, parallel=parallel, ) extrapar = { "n_splits": n_splits, "seed": seed, } info = { "analyspar": analyspar._asdict(), "sesspar": sesspar._asdict(), "stimpar": stimpar._asdict(), "logregpar": logregpar._asdict(), "permpar": permpar._asdict(), "extrapar": extrapar, "scores_df": score_df.to_dict(), } helper_fcts.plot_save_all(info, figpar)
def get_ex_traces_df(sessions, analyspar, stimpar, basepar, n_ex=6, rolling_win=4, randst=None, parallel=False): """ get_ex_traces_df(sessions, analyspar, stimpar, basepar) Returns example ROI traces dataframe. Required args: - sessions (list): Session objects - analyspar (AnalysPar): named tuple containing analysis parameters - stimpar (StimPar): named tuple containing stimulus parameters - basepar (BasePar): named tuple containing baseline parameters Optional args: - n_ex (int): number of example traces to retain default: 6 - rolling_win (int): window to use in rolling mean over individual trial traces default: 4 - randst (int or np.random.RandomState): random state or seed value to use. (-1 treated as None) default: None - parallel (bool): if True, some of the analysis is run in parallel across CPU cores default: False Returns: - selected_roi_data (pd.DataFrame): dataframe with a row for each ROI, and the following columns, in addition to the basic sess_df columns: - time_values (list): values for each frame, in seconds (only 0 to stimpar.post, unless split is "by_exp") - roi_ns (list): selected ROI number - traces_sm (list): selected ROI sequence traces, smoothed, with dims: seq x frames - trace_stats (list): selected ROI trace mean or median """ retained_traces_df = misc_analys.get_check_sess_df(sessions, None, analyspar) initial_columns = retained_traces_df.columns logger.info(f"Identifying example ROIs for each session...", extra={"spacing": TAB}) retained_roi_data = gen_util.parallel_wrap( get_sess_ex_traces, sessions, [analyspar, stimpar, basepar, rolling_win], parallel=parallel) randst = rand_util.get_np_rand_state(randst, set_none=True) # add data to dataframe new_columns = list(retained_roi_data[0]) retained_traces_df = gen_util.set_object_columns(retained_traces_df, new_columns, in_place=True) for i, sess in enumerate(sessions): row_idx = retained_traces_df.loc[retained_traces_df["sessids"] == sess.sessid].index if len(row_idx) != 1: raise RuntimeError( "Expected exactly one dataframe row to match session ID.") row_idx = row_idx[0] for column, value in retained_roi_data[i].items(): retained_traces_df.at[row_idx, column] = value # select a few ROIs per line/plane/session columns = retained_traces_df.columns.tolist() columns = [column.replace("roi_trace", "trace") for column in columns] selected_traces_df = pd.DataFrame(columns=columns) group_columns = ["lines", "planes", "sess_ns"] for _, trace_grp_df in retained_traces_df.groupby(group_columns): trace_grp_df = trace_grp_df.sort_values("mouse_ns") grp_indices = trace_grp_df.index n_per = np.asarray([len(roi_ns) for roi_ns in trace_grp_df["roi_ns"]]) roi_ns = np.concatenate(trace_grp_df["roi_ns"].tolist()) concat_idxs = np.sort(randst.choice(len(roi_ns), n_ex, replace=False)) for concat_idx in concat_idxs: row_idx = len(selected_traces_df) sess_idx = np.where(concat_idx < np.cumsum(n_per))[0][0] source_row = trace_grp_df.loc[grp_indices[sess_idx]] for column in initial_columns: selected_traces_df.at[row_idx, column] = source_row[column] selected_traces_df.at[row_idx, "time_values"] = \ source_row["time_values"].tolist() roi_idx = concat_idx - n_per[:sess_idx].sum() for col in ["roi_ns", "traces_sm", "trace_stats"]: source_col = col.replace("trace", "roi_trace") selected_traces_df.at[row_idx, col] = \ source_row[source_col][roi_idx].tolist() for column in [ "mouse_ns", "mouseids", "sess_ns", "sessids", "nrois", "roi_ns" ]: selected_traces_df[column] = selected_traces_df[column].astype(int) return selected_traces_df
def get_resp_df(sessions, analyspar, stimpar, rel_sess=1, parallel=False): """ get_resp_df(sessions, analyspar, stimpar) Returns relative response dataframe for requested sessions. Required args: - sessions (list): session objects - analyspar (AnalysPar): named tuple containing analysis parameters - stimpar (StimPar): named tuple containing stimulus parameters Optional args: - rel_sess (int): number of session relative to which data should be scaled, for each mouse. If None, relative data is not added. default: 1 - parallel (bool): if True, some of the analysis is run in parallel across CPU cores default: False Returns: - resp_data_df (pd.DataFrame): data dictionary with response stats (2D array, ROI x stats) under keys for expected ("exp") and unexpected ("unexp") data, separated by Gabor frame (e.g., "exp_3", "unexp_G") if stimpar.stimtype == "gabors", and with "rel_{}" columns added for each input column with "exp" in its name if rel_sess is not None. """ if analyspar.tracked: misc_analys.check_sessions_complete(sessions, raise_err=True) sessids = [sess.sessid for sess in sessions] resp_data_df = misc_analys.get_check_sess_df(sessions, analyspar=analyspar) # double check that sessions are in correct order if resp_data_df["sessids"].tolist() != sessids: raise NotImplementedError( "Implementation error. Sessions must appear in correct order in " "resp_data_df.") logger.info(f"Loading data for each session...", extra={"spacing": TAB}) data_dicts = gen_util.parallel_wrap(get_sess_integ_resp_dict, sessions, args_list=[analyspar, stimpar], parallel=parallel) # add data to df misc_analys.get_check_sess_df(sessions, resp_data_df) for i, idx in enumerate(resp_data_df.index): for key, value in data_dicts[i].items(): if i == 0: resp_data_df = gen_util.set_object_columns(resp_data_df, [key], in_place=True) resp_data_df.at[idx, key] = value[:, 0] # retain stat only, not error # add relative data if rel_sess is not None: resp_data_df = add_relative_resp_data(resp_data_df, analyspar, rel_sess=rel_sess, in_place=True) return resp_data_df
def tracked_roi_usis_stimulus_comp_sess1v3(sessions, analyspar, sesspar, stimpar, basepar, idxpar, permpar, figpar, seed=None, parallel=False): """ tracked_roi_usis_stimulus_comp_sess1v3(sessions, analyspar, sesspar, stimpar, basepar, idxpar, permpar, figpar) Retrieves changes in tracked ROI USIs for Gabors vs visual flow stimuli. Saves results and parameters relevant to analysis in a dictionary. Required args: - sessions (list): Session objects - analyspar (AnalysPar): named tuple containing analysis parameters - sesspar (SessPar): named tuple containing session parameters - stimpar (StimPar): named tuple containing stimulus parameters - basepar (BasePar): named tuple containing baseline parameters - idxpar (IdxPar): named tuple containing index parameters - permpar (PermPar): named tuple containing permutation parameters - figpar (dict): dictionary containing figure parameters Optional args: - seed (int): seed value to use. (-1 treated as None) default: None - parallel (bool): if True, some of the analysis is run in parallel across CPU cores default: False """ logger.info( ("Compiling changes in ROI USIs for Gabors vs visual flow stimuli."), extra={"spacing": "\n"}) if not analyspar.tracked: raise ValueError("analyspar.tracked should be set to True.") # remove incomplete session series and warn sessions = misc_analys.check_sessions_complete(sessions) # calculate multiple comparisons dummy_df = misc_analys.get_check_sess_df( sessions, None, analyspar).drop_duplicates(subset=["lines", "planes"]) multcomp = len(dummy_df) + 1 permpar = sess_ntuple_util.get_modif_ntuple(permpar, "multcomp", multcomp) comp_sess = [1, 3] datatype = "usis" pop_stats = True usi_comp_df = stim_analys.get_stim_stats_df( sessions, analyspar=analyspar, stimpar=stimpar, basepar=basepar, idxpar=idxpar, permpar=permpar, comp_sess=comp_sess, datatype=datatype, pop_stats=pop_stats, randst=seed, parallel=parallel, ) extrapar = { "comp_sess": comp_sess, "datatype": datatype, "pop_stats": pop_stats, "seed": seed, } info = { "analyspar": analyspar._asdict(), "sesspar": sesspar._asdict(), "stimpar": stimpar._asdict(), "basepar": basepar._asdict(), "idxpar": idxpar._asdict(), "permpar": permpar._asdict(), "extrapar": extrapar, "usi_comp_df": usi_comp_df.to_dict(), } helper_fcts.plot_save_all(info, figpar)
def run_sess_logregs(sessions, analyspar, stimpar, logregpar, permpar, n_splits=100, seed=None, parallel=False): """ run_sess_logregs(sessions, analyspar, stimpar, logregpar, permpar) Runs logistic regressions on sessions (real data and shuffled), and returns statistics dataframe. Number of shuffles is determined by permpar.n_perms. Required args: - sessions (list): Session objects - analyspar (AnalysPar): named tuple containing analysis parameters - stimpar (StimPar): named tuple containing stimulus parameters - logregpar (LogRegPar): named tuple containing logistic regression parameters - permpar (PermPar): named tuple containing permutation parameters Optional args: - n_splits (int): number of data splits to run logistic regressions on default: 100 - seed (int): seed value to use. (-1 treated as None) default: None - parallel (bool): if True, some of the analysis is run in parallel across CPU cores default: False Returns: - score_stats_df (pd.DataFrame): dataframe with logistic regression score statistics, shuffled score confidence intervals, and test set p-values for each line/plane/session. """ sessids = [sess.sessid for sess in sessions] sess_df = misc_analys.get_check_sess_df(sessions, None, analyspar) score_stats_dfs = [] group_columns = ["lines", "planes", "sess_ns"] s = 0 for _, lp_grp_df in sess_df.groupby(group_columns): lp_grp_df = lp_grp_df.sort_values("mouse_ns") lp_sessions = [ sessions[sessids.index(sessid)] for sessid in lp_grp_df["sessids"] ] sess_data_stats_dfs, shuffle_dfs = [], [] for sess in lp_sessions: logger.info( f"Running decoders for session {s + 1}/{len(sess_df)}...", extra={"spacing": f"\n{TAB}"}) sess_data_stats_df, shuffle_df = run_sess_logreg( sess, analyspar=analyspar, stimpar=stimpar, logregpar=logregpar, n_splits=n_splits, n_shuff_splits=permpar.n_perms, seed=seed, parallel=parallel) sess_data_stats_dfs.append(sess_data_stats_df) shuffle_dfs.append(shuffle_df) s += 1 sess_data_stats_df = pd.concat(sess_data_stats_dfs, ignore_index=True) # collect data lp_df = collate_results(sess_data_stats_df, shuffle_dfs, analyspar, permpar) score_stats_dfs.append(lp_df) score_stats_df = pd.concat(score_stats_dfs, ignore_index=True) score_stats_df = misc_analys.add_corr_p_vals(score_stats_df, permpar) # add splits information score_stats_df["n_splits_per"] = n_splits score_stats_df["n_shuffled_splits_per"] = permpar.n_perms # get unique (first) values for group_columns for col in group_columns + list(logregpar._asdict().keys()): if col not in score_stats_df.columns: continue score_stats_df[col] = score_stats_df[col].apply(lambda x: x[0]) score_stats_df["sess_ns"] = score_stats_df["sess_ns"].astype(int) return score_stats_df
def get_roi_tracking_df(sessions, analyspar, reg_only=False, proj=False, crop_info=False, parallel=False): """ get_roi_tracking_df(sessions, analyspar) Return ROI tracking information for the requested sessions. Required args: - sessions (list): Session objects - analyspar (AnalysPar): named tuple containing analysis parameters Optional args: - proj (bool): if True, max projections are included in the output dataframe default: False - reg_only (bool): if True, only registered masks, and projections if proj is True, are included in the output dataframe default: False - crop_info (bool or str): if not False, the type of cropping information to include ("small" for the small plots, "large" for the large plots) default: False - parallel (bool): if True, some of the analysis is run in parallel across CPU cores default: False Returns: - roi_mask_df (pd.DataFrame in dict format): dataframe with a row for each mouse, and the following columns, in addition to the basic sess_df columns: - "registered_roi_mask_idxs" (list): list of mask indices, registered across sessions, for each session (flattened across ROIs) ((sess, hei, wid) x val) - "roi_mask_shapes" (list): shape into which ROI mask indices index (sess x hei x wid) if not reg_only: - "roi_mask_idxs" (list): list of mask indices for each session, and each ROI (sess x ((ROI, hei, wid) x val)) (not registered) if proj: - "registered_max_projections" (list): pixel intensities of maximum projection for the plane (hei x wid), after registration across sessions if proj and not reg_only: - "max_projections" (list): pixel intensities of maximum projection for the plane (hei x wid) if crop_info: - "crop_fact" (num): factor by which to crop masks (> 1) - "shift_prop_hei" (float): proportion by which to shift cropped mask center vertically from left edge [0, 1] - "shift_prop_wid" (float): proportion by which to shift cropped mask center horizontally from left edge [0, 1] """ if not analyspar.tracked: raise ValueError("analyspar.tracked must be True for this analysis.") misc_analys.check_sessions_complete(sessions, raise_err=True) sess_df = misc_analys.get_check_sess_df(sessions, analyspar=analyspar) # if cropping, check right away for dictionary with the preset parameters if crop_info: if crop_info == "small": crop_dict = SMALL_CROP_DICT elif crop_info == "large": crop_dict = LARGE_CROP_DICT else: gen_util.accepted_values_error("crop_info", crop_info, ["small", "large"]) for mouse_n in sess_df["mouse_ns"].unique(): if int(mouse_n) not in crop_dict.keys(): raise NotImplementedError( f"No preset cropping information found for mouse {mouse_n}." ) # collect ROI mask data sess_dicts = gen_util.parallel_wrap(get_sess_reg_mask_info, sessions, args_list=[analyspar, True, proj], parallel=parallel) all_sessids = [sess.sessid for sess in sessions] group_columns = ["planes", "lines", "mouse_ns"] initial_columns = sess_df.columns.tolist() obj_columns = ["registered_roi_mask_idxs", "roi_mask_shapes"] if not reg_only: obj_columns.append("roi_mask_idxs") if proj: obj_columns.append("registered_max_projections") if not reg_only: obj_columns.append("max_projections") roi_mask_df = pd.DataFrame(columns=initial_columns + obj_columns) aggreg_cols = [col for col in initial_columns if col not in group_columns] for grp_vals, grp_df in sess_df.groupby(group_columns): row_idx = len(roi_mask_df) for g, group_column in enumerate(group_columns): roi_mask_df.loc[row_idx, group_column] = grp_vals[g] # add aggregated values for initial columns roi_mask_df = misc_analys.aggreg_columns(grp_df, roi_mask_df, aggreg_cols, row_idx=row_idx, in_place=True, by_mouse=True) sessids = sorted(grp_df["sessids"].tolist()) reg_roi_masks, roi_mask_idxs = [], [] if proj: reg_max_projs, max_projs = [], [] roi_mask_shape = None for sessid in sessids: sess_dict = sess_dicts[all_sessids.index(sessid)] reg_roi_mask = sess_dict["registered_roi_masks"] # flatten masks across ROIs reg_roi_masks.append(np.max(reg_roi_mask, axis=0)) if roi_mask_shape is None: roi_mask_shape = reg_roi_mask.shape elif roi_mask_shape != reg_roi_mask.shape: raise RuntimeError( "ROI mask shapes across sessions should match, for the " "same mouse.") if not reg_only: roi_mask_idxs.append([ idxs.tolist() for idxs in np.where(sess_dict["roi_masks"]) ]) if proj: reg_max_projs.append( sess_dict["registered_max_projection"].tolist()) if not reg_only: max_projs.append(sess_dict["max_projection"].tolist()) # add to the dataframe roi_mask_df.at[row_idx, "registered_roi_mask_idxs"] = \ [idxs.tolist() for idxs in np.where(reg_roi_masks)] roi_mask_df.at[row_idx, "roi_mask_shapes"] = roi_mask_shape if not reg_only: roi_mask_df.at[row_idx, "roi_mask_idxs"] = roi_mask_idxs if proj: roi_mask_df.at[row_idx, "registered_max_projections"] = \ reg_max_projs if not reg_only: roi_mask_df.at[row_idx, "max_projections"] = max_projs # add cropping info if crop_info: mouse_n = grp_vals[group_columns.index("mouse_ns")] crop_fact, shift_prop_hei, shift_prop_wid = crop_dict[mouse_n] roi_mask_df.at[row_idx, "crop_fact"] = crop_fact roi_mask_df.at[row_idx, "shift_prop_hei"] = shift_prop_hei roi_mask_df.at[row_idx, "shift_prop_wid"] = shift_prop_wid roi_mask_df["mouse_ns"] = roi_mask_df["mouse_ns"].astype(int) return roi_mask_df