def mean_contrast_variables(batch, modelname): df1 = fitted_params_per_batch(batch, modelname, mod_key='fn') amplitude_mods = df1[df1.index.str.contains('amplitude_mod')] base_mods = df1[df1.index.str.contains('base_mod')] kappa_mods = df1[df1.index.str.contains('kappa_mod')] shift_mods = df1[df1.index.str.contains('shift_mod')] avg_amp = amplitude_mods['mean'][0] avg_base = base_mods['mean'][0] avg_kappa = kappa_mods['mean'][0] avg_shift = shift_mods['mean'][0] max_amp = amplitude_mods['max'][0] max_base = base_mods['max'][0] max_kappa = kappa_mods['max'][0] max_shift = shift_mods['max'][0] # raw_amp = amplitude_mods.values[0][5:] # raw_base = base_mods.values[0][5:] # raw_kappa = kappa_mods.values[0][5:] # raw_shift = shift_mods.values[0][5:] print("Mean amplitude_mod: %.06f\n" "Mean base_mod: %.06f\n" "Mean kappa_mod: %.06f\n" "Mean shift_mod: %.06f\n" % (avg_amp, avg_base, avg_kappa, avg_shift)) # Better way to tell which ones are being modulated? # Can't really tell just from the average. print("ratio of max: %.06f, %.06f, %.06f, %.06f" % (avg_amp / max_amp, avg_base / max_base, avg_kappa / max_kappa, avg_shift / max_shift))
def get_valid_improvements(batch, model1, model2, threshold=2.5): # TODO: threshold 2.5 works for removing outliers in correlation scatter # and maximizes r, but need an unbiased way to pick this number. # Otherwise basically just cherrypicked the cutoff to make # correlation better. # NOTE: Also helps to do this for both gc and stp, then # list(set(gc_cells) & set(stp_cells)) to get the intersection. df1 = fitted_params_per_batch(batch, model1, stats_keys=[]) df2 = fitted_params_per_batch(batch, model2, stats_keys=[]) # fill in missing cellids w/ nan celldata = nd.get_batch_cells(batch=batch) cellids = celldata['cellid'].tolist() nrows = len(df1.index.values.tolist()) df1_cells = df1.loc['meta--r_test'].index.values.tolist()[5:] df2_cells = df2.loc['meta--r_test'].index.values.tolist()[5:] nan_series = pd.Series(np.full((nrows), np.nan)) df1_nans = 0 df2_nans = 0 for c in cellids: if c not in df1_cells: df1[c] = nan_series df1_nans += 1 if c not in df2_cells: df2[c] = nan_series df2_nans += 1 print("# missing cells: %d, %d" % (df1_nans, df2_nans)) # Force same cellid order now that cols are filled in df1 = df1[cellids] df2 = df2[cellids] ratio = df1.loc['meta--r_test'] / df2.loc['meta--r_test'] valid_improvements = ratio.loc[ratio < threshold].loc[ratio > 1 / threshold] return valid_improvements.index.values.tolist()
param_scatter_batch batch = 308 limit = None modelname1 = 'ozgf100ch18_dlog_wcg18x2_fir2x15_lvl1_dexp1_basic' modelname2 = 'ozgf100ch18_dlog_wcg18x2_fir2x15_lvl1_dexp1_iter01-T3-T4-T5-T6-T7-ti100-fi15' #batch = 303 #limit = None #modelname = 'nostim20pupbeh_stategain3_basic-nf' # Can use mod_key='fn', mod_key='id', etc to display more info in index. # Formatted as: '<mspec_index--mod_key--parameter_name>' # So mod_key='id' gives something like: '0--wc15x1--coefficients'. df = fitted_params_per_batch(batch, modelname1, mod_key='', #stats_keys=[], limit=limit, multi='mean') print(df) param_scatter_batch(batch, modelname1, modelname2, param='shift', multi='mean', limit=limit, mod_key='') # Not handling arrays yet, just scalar params plot_all_params(df, only_scalars=True) # example output (truncated) """ mean \ 0--mean [0.6428007712025126, 1.0079999163612767] 0--sd [0.4607000818990278, 0.41440913122937123] 1--coefficients [[0.22104853498428548, 0.3174402233420055, 0.0...
def gd_scatter(batch, model1, model2, se_filter=True, gd_threshold=0, param='kappa', log_gd=False): df_r = nd.batch_comp(batch, [model1, model2], stat='r_ceiling') df_e = nd.batch_comp(batch, [model1, model2], stat='se_test') # Remove any cellids that have NaN for 1 or more models df_r.dropna(axis=0, how='any', inplace=True) df_e.dropna(axis=0, how='any', inplace=True) cellids = df_r.index.values.tolist() gc_test = df_r[model1] gc_se = df_e[model1] ln_test = df_r[model2] ln_se = df_e[model2] if se_filter: # Remove if performance not significant at all good_cells = ((gc_test > gc_se * 2) & (ln_test > ln_se * 2)) else: # Set to series w/ all True, so none are skipped good_cells = (gc_test != np.nan) df1 = fitted_params_per_batch(batch, model1, stats_keys=[]) df2 = fitted_params_per_batch(batch, model2, stats_keys=[]) # fill in missing cellids w/ nan celldata = nd.get_batch_cells(batch=batch) cellids = celldata['cellid'].tolist() cellids = [c for c in cellids if c in good_cells] nrows = len(df1.index.values.tolist()) df1_cells = df1.loc['meta--r_test'].index.values.tolist()[5:] df2_cells = df2.loc['meta--r_test'].index.values.tolist()[5:] nan_series = pd.Series(np.full((nrows), np.nan)) df1_nans = 0 df2_nans = 0 for c in cellids: if c not in df1_cells: df1[c] = nan_series df1_nans += 1 if c not in df2_cells: df2[c] = nan_series df2_nans += 1 print("# missing cells: %d, %d" % (df1_nans, df2_nans)) # Force same cellid order now that missing cols are filled in df1 = df1[cellids] df2 = df2[cellids] gc_vs_ln = df1.loc['meta--r_test'].values / df2.loc['meta--r_test'].values gc_vs_ln = gc_vs_ln.astype('float32') kappa_mod = df1[df1.index.str.contains('%s_mod' % param)] kappa = df1[df1.index.str.contains('%s$' % param)] gd_ratio = (np.abs(kappa_mod.values / kappa.values)).astype('float32').flatten() ff = np.isfinite(gc_vs_ln) & np.isfinite(gd_ratio) gc_vs_ln = gc_vs_ln[ff] gd_ratio = gd_ratio[ff] if log_gd: gd_ratio = np.log(gd_ratio) # drop cells with excessively large/small gd_ratio or gc_vs_ln gcd_big = gd_ratio > 10 gc_vs_ln_big = gc_vs_ln > 10 gc_vs_ln_small = gc_vs_ln < 0.1 keep = ~gcd_big & ~gc_vs_ln_big & ~gc_vs_ln_small gd_ratio = gd_ratio[keep] gc_vs_ln = gc_vs_ln[keep] r = np.corrcoef(gc_vs_ln, gd_ratio)[0, 1] n = gc_vs_ln.size # Separately do the same comparison but only with cells that had a # Gd ratio at least a little greater than 1 (i.e. had *some* GC effect) gd_ratio2 = copy.deepcopy(gd_ratio) gc_vs_ln2 = copy.deepcopy(gc_vs_ln) if log_gd: gd_threshold = np.log(gd_threshold) thresholded = (gd_ratio2 > gd_threshold) gd_ratio2 = gd_ratio2[thresholded] gc_vs_ln2 = gc_vs_ln2[thresholded] r2 = np.corrcoef(gc_vs_ln2, gd_ratio2)[0, 1] n2 = gc_vs_ln2.size fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2, 2, figsize=(8, 9)) ax1.scatter(gd_ratio, gc_vs_ln, c='black', s=1) ax1.set_ylabel("GC/LN R") ax1.set_xlabel("Gd ratio") ax1.set_title("Performance Improvement vs Gd ratio\nr: %.02f, n: %d" % (r, n)) ax2.hist(gd_ratio, bins=30, histtype='bar', color=['gray']) ax2.set_title('Gd ratio distribution') ax2.set_xlabel('Gd ratio') ax2.set_ylabel('Count') ax3.scatter(gd_ratio2, gc_vs_ln2, c='black', s=1) ax3.set_ylabel("GC/LN R") ax3.set_xlabel("Gd ratio") ax3.set_title("Same, only cells w/ Gd > %.02f\nr: %.02f, n: %d" % (gd_threshold, r2, n2)) ax4.hist(gd_ratio2, bins=30, histtype='bar', color=['gray']) ax4.set_title('Gd ratio distribution, only Gd > %.02f' % gd_threshold) ax4.set_xlabel('Gd ratio') ax4.set_ylabel('Count') fig.suptitle('param: %s' % param) fig.tight_layout()
def gain_by_contrast_slopes(batch, gc, stp, LN, combined, se_filter=True, good_LN=0, bins=30, use_exp=True): df_r, df_c, df_e = get_dataframes(batch, gc, stp, LN, combined) #cellids = df_r[LN] > good_LN cellids = df_r[LN] > df_e[LN] * 2 gc_LN_SE = (df_e[gc] + df_e[LN]) # stp_LN_SE = (df_e[stp] + df_e[LN]) gc_cells = (cellids) & ((df_r[gc] - df_r[LN]) > gc_LN_SE) # stp_cells = (df_r[LN] > good_LN) & ((df_r[stp] - df_r[LN]) > stp_LN_SE) # both_cells = gc_cells & stp_cells # gc_cells = gc_cells & np.logical_not(both_cells) # stp_cells = stp_cells & np.logical_not(both_cells) LN_cells = cellids & np.logical_not(gc_cells) # | stp_cells | both_cells) meta = ['r_test', 'ctmax_val', 'ctmax_est', 'ctmin_val', 'ctmin_est'] gc_params = fitted_params_per_batch(289, gc, stats_keys=[], meta=meta) # drop cellids that haven't been fit for all models gc_params_cells = gc_params.transpose().index.values.tolist() for c in gc_params_cells: if c not in LN_cells: LN_cells[c] = False if c not in gc_cells: gc_cells[c] = False # if c not in stp_cells: # stp_cells[c] = False # if c not in both_cells: # both_cells[c] = False # index keys are formatted like "4--dsig.d--kappa" mod_keys = gc.split('_')[1] for i, k in enumerate(mod_keys.split('-')): if 'dsig' in k: break k_key = f'{i}--{k}--kappa' ka_key = k_key + '_mod' meta_keys = ['meta--' + k for k in meta] all_keys = [k_key, ka_key] + meta_keys phi_dfs = [ gc_params[gc_params.index == k].transpose()[LN_cells].transpose() for k in all_keys ] sep_dfs = [df.values.flatten().astype(np.float64) for df in phi_dfs] gc_dfs = [ gc_params[gc_params.index == k].transpose()[gc_cells].transpose() for k in all_keys ] gc_sep_dfs = [df.values.flatten().astype(np.float64) for df in gc_dfs] # stp_dfs = [gc_params[gc_params.index==k].transpose()[stp_cells].transpose() # for k in all_keys] # stp_sep_dfs = [df.values.flatten().astype(np.float64) for df in stp_dfs] # both_dfs = [gc_params[gc_params.index==k].transpose()[both_cells].transpose() # for k in all_keys] # both_sep_dfs = [df.values.flatten().astype(np.float64) for df in both_dfs] low, high, r_test, ctmax_val, ctmax_est, ctmin_val, ctmin_est = sep_dfs gc_low, gc_high, gc_r, gc_ctmax_val, \ gc_ctmax_est, gc_ctmin_val, gc_ctmin_est = gc_sep_dfs # stp_low, stp_high, stp_r, stp_ctmax_val, \ # stp_ctmax_est, stp_ctmin_val, stp_ctmin_est = stp_sep_dfs # both_low, both_high, both_r, both_ctmax_val, \ # both_ctmax_est, both_ctmin_val, both_ctmin_est = both_sep_dfs ctmax = np.maximum(ctmax_val, ctmax_est) gc_ctmax = np.maximum(gc_ctmax_val, gc_ctmax_est) ctmin = np.minimum(ctmin_val, ctmin_est) gc_ctmin = np.minimum(gc_ctmin_val, gc_ctmin_est) # stp_ctmax = np.maximum(stp_ctmax_val, stp_ctmax_est) # stp_ctmin = np.minimum(stp_ctmin_val, stp_ctmin_est) # both_ctmax = np.maximum(both_ctmax_val, both_ctmax_est) # both_ctmin = np.minimum(both_ctmin_val, both_ctmin_est) ct_range = ctmax - ctmin gc_ct_range = gc_ctmax - gc_ctmin # stp_ct_range = stp_ctmax - stp_ctmin # both_ct_range = both_ctmax - both_ctmin gain = (high - low) * ct_range gc_gain = (gc_high - gc_low) * gc_ct_range # test hyp. that gc_gains are more negative than LN gc_LN_p = st.mannwhitneyu(gc_gain, gain, alternative='two-sided')[1] med_gain = np.median(gain) gc_med_gain = np.median(gc_gain) # stp_gain = (stp_high - stp_low)*stp_ct_range # both_gain = (both_high - both_low)*both_ct_range k_low = low + (high - low) * ctmin k_high = low + (high - low) * ctmax gc_k_low = gc_low + (gc_high - gc_low) * gc_ctmin gc_k_high = gc_low + (gc_high - gc_low) * gc_ctmax # stp_k_low = stp_low + (stp_high - stp_low)*stp_ctmin # stp_k_high = stp_low + (stp_high - stp_low)*stp_ctmax # both_k_low = both_low + (both_high - both_low)*both_ctmin # both_k_high = both_low + (both_high - both_low)*both_ctmax if use_exp: k_low = np.exp(k_low) k_high = np.exp(k_high) gc_k_low = np.exp(gc_k_low) gc_k_high = np.exp(gc_k_high) # stp_k_low = np.exp(stp_k_low) # stp_k_high = np.exp(stp_k_high) # both_k_low = np.exp(both_k_low) # both_k_high = np.exp(both_k_high) # fig = plt.figure()#, axes = plt.subplots(1, 2, ) # #axes[0].plot([ctmin, ctmax], [k_low, k_high], color='black', alpha=0.5) # plt.hist(high-low, bins=bins, color='black', alpha=0.5) # # #axes[0].plot([gc_ctmin, gc_ctmax], [gc_k_low, gc_k_high], color='red', # # alpha=0.3) # plt.hist(gc_high-gc_low, bins=bins, color='red', alpha=0.3) # # #axes[0].plot([stp_ctmin, stp_ctmax], [stp_k_low, stp_k_high], color='blue', # # alpha=0.3) # plt.hist(stp_high-stp_low, bins=bins, color='blue', alpha=0.3) # plt.xlabel('gain slope') # plt.ylabel('count') # plt.title(f'raw counts, LN > {good_LN}') # plt.legend([f'LN, {len(low)}', f'gc, {len(gc_low)}', f'stp, {len(stp_low)}', # f'Both, {len(both_low)}']) smallest_slope = min(np.min(gain), np.min(gc_gain)) #, np.min(stp_gain), #np.min(both_gain)) largest_slope = max(np.max(gain), np.max(gc_gain)) #, np.max(stp_gain), #np.max(both_gain)) slope_range = (smallest_slope, largest_slope) bins = np.linspace(smallest_slope, largest_slope, bins) bar_width = bins[1] - bins[0] axis_locs = bins[:-1] hist = np.histogram(gain, bins=bins, range=slope_range) gc_hist = np.histogram(gc_gain, bins=bins, range=slope_range) # stp_hist = np.histogram(stp_gain, bins=bins, range=slope_range) # both_hist = np.histogram(both_gain, bins=bins, range=slope_range) raw = hist[0] gc_raw = gc_hist[0] # stp_raw = stp_hist[0] # both_raw = both_hist[0] #prop_hist = hist[0] / np.sum(hist[0]) #prop_gc_hist = gc_hist[0] / np.sum(gc_hist[0]) # prop_stp_hist = stp_hist[0] / np.sum(stp_hist[0]) # prop_both_hist = both_hist[0] / np.sum(both_hist[0]) fig1 = plt.figure() plt.bar(axis_locs, raw, width=bar_width, color='gray', alpha=0.8) plt.bar(axis_locs, gc_raw, width=bar_width, color='maroon', alpha=0.8, bottom=raw) # plt.bar(axis_locs, stp_raw, width=bar_width, color='teal', alpha=0.8, # bottom=raw+gc_raw) # plt.bar(axis_locs, both_raw, width=bar_width, color='goldenrod', alpha=0.8, # bottom=raw+gc_raw+stp_raw) plt.xlabel('gain slope') plt.ylabel('count') plt.title(f'raw counts, LN > {good_LN}') plt.legend([ f'LN, {len(low)}, md={med_gain:.4f}', f'gc, {len(gc_low)}, md={gc_med_gain:.4f}, p={gc_LN_p:.4f}' ])
def gd_ratio(batch, gc, stp, LN, combined, se_filter=True, good_LN=0, bins=30, use_exp=True): df_r, df_c, df_e = get_dataframes(batch, gc, stp, LN, combined) #cellids = df_r[LN] > good_LN cellids = df_r[LN] > df_e[LN] * 2 gc_LN_SE = (df_e[gc] + df_e[LN]) #stp_LN_SE = (df_e[stp] + df_e[LN]) gc_cells = cellids & ((df_r[gc] - df_r[LN]) > gc_LN_SE) #stp_cells = (df_r[LN] > good_LN) & ((df_r[stp] - df_r[LN]) > stp_LN_SE) #both_cells = gc_cells & stp_cells LN_cells = cellids & np.logical_not(gc_cells) #stp_cells = stp_cells & np.logical_not(both_cells) meta = ['r_test', 'ctmax_val', 'ctmax_est', 'ctmin_val', 'ctmin_est'] gc_params = fitted_params_per_batch(289, gc, stats_keys=[], meta=meta) # drop cellids that haven't been fit for all models gc_params_cells = gc_params.transpose().index.values.tolist() for c in gc_params_cells: if c not in LN_cells: LN_cells[c] = False if c not in gc_cells: gc_cells[c] = False # if c not in stp_cells: # stp_cells[c] = False # if c not in both_cells: # both_cells[c] = False # index keys are formatted like "4--dsig.d--kappa" mod_keys = gc.split('_')[1] for i, k in enumerate(mod_keys.split('-')): if 'dsig' in k: break k_key = f'{i}--{k}--kappa' ka_key = k_key + '_mod' meta_keys = ['meta--' + k for k in meta] all_keys = [k_key, ka_key] + meta_keys phi_dfs = [ gc_params[gc_params.index == k].transpose()[LN_cells].transpose() for k in all_keys ] sep_dfs = [df.values.flatten().astype(np.float64) for df in phi_dfs] gc_dfs = [ gc_params[gc_params.index == k].transpose()[gc_cells].transpose() for k in all_keys ] gc_sep_dfs = [df.values.flatten().astype(np.float64) for df in gc_dfs] # stp_dfs = [gc_params[gc_params.index==k].transpose()[stp_cells].transpose() # for k in all_keys] # stp_sep_dfs = [df.values.flatten().astype(np.float64) for df in stp_dfs] # both_dfs = [gc_params[gc_params.index==k].transpose()[both_cells].transpose() # for k in all_keys] # both_sep_dfs = [df.values.flatten().astype(np.float64) for df in both_dfs] low, high, r_test, ctmax_val, ctmax_est, ctmin_val, ctmin_est = sep_dfs gc_low, gc_high, gc_r, gc_ctmax_val, \ gc_ctmax_est, gc_ctmin_val, gc_ctmin_est = gc_sep_dfs # stp_low, stp_high, stp_r, stp_ctmax_val, \ # stp_ctmax_est, stp_ctmin_val, stp_ctmin_est = stp_sep_dfs # both_low, both_high, both_r, both_ctmax_val, \ # both_ctmax_est, both_ctmin_val, both_ctmin_est = both_sep_dfs ctmax = np.maximum(ctmax_val, ctmax_est) gc_ctmax = np.maximum(gc_ctmax_val, gc_ctmax_est) ctmin = np.minimum(ctmin_val, ctmin_est) gc_ctmin = np.minimum(gc_ctmin_val, gc_ctmin_est) # stp_ctmax = np.maximum(stp_ctmax_val, stp_ctmax_est) # stp_ctmin = np.minimum(stp_ctmin_val, stp_ctmin_est) # both_ctmax = np.maximum(both_ctmax_val, both_ctmax_est) # both_ctmin = np.minimum(both_ctmin_val, both_ctmin_est) k_low = low + (high - low) * ctmin k_high = low + (high - low) * ctmax gc_k_low = gc_low + (gc_high - gc_low) * gc_ctmin gc_k_high = gc_low + (gc_high - gc_low) * gc_ctmax # stp_k_low = stp_low + (stp_high - stp_low)*stp_ctmin # stp_k_high = stp_low + (stp_high - stp_low)*stp_ctmax # both_k_low = both_low + (both_high - both_low)*both_ctmin # both_k_high = both_low + (both_high - both_low)*both_ctmax if use_exp: k_low = np.exp(k_low) k_high = np.exp(k_high) gc_k_low = np.exp(gc_k_low) gc_k_high = np.exp(gc_k_high) # stp_k_low = np.exp(stp_k_low) # stp_k_high = np.exp(stp_k_high) # both_k_low = np.exp(both_k_low) # both_k_high = np.exp(both_k_high) ratio = k_low / k_high gc_ratio = gc_k_low / gc_k_high # stp_ratio = stp_k_low / stp_k_high # both_ratio = both_k_low / both_k_high fig1, ((ax1), (ax2)) = plt.subplots( 1, 2, ) ax1.hist(ratio, bins=bins) ax1.set_title('all cells') ax2.hist(gc_ratio, bins=bins) ax2.set_title('gc') # ax3.hist(stp_ratio, bins=bins) # ax3.set_title('stp') if not use_exp: title = 'k_low / k_high' else: title = 'e^(k_low - k_high)' fig1.suptitle(title) fig3 = plt.figure() plt.scatter(ratio, r_test) plt.title('low/high vs r_test') fig4 = plt.figure() plt.scatter(gc_ratio, gc_r) plt.title('low/high vs r_test, gc improvements only')
def stp_distributions(batch, gc, stp, LN, combined, se_filter=True, good_ln=0, log_scale=False, legend=False, use_combined=False): df_r, df_c, df_e = get_dataframes(batch, gc, stp, LN, combined) cellids, under_chance, less_LN = get_filtered_cellids(batch, gc, stp, LN, combined, as_lists=False) _, _, _, _, c = improved_cells_to_list(batch, gc, stp, LN, combined, good_ln=good_ln) if use_combined: params_model = combined else: params_model = stp stp_params = fitted_params_per_batch(batch, params_model, stats_keys=[], meta=[]) stp_params_cells = stp_params.transpose().index.values.tolist() for cell in stp_params_cells: if cell not in cellids: cellids[cell] = False not_c = list(set(stp_params.transpose()[cellids].index.values) - set(c)) # index keys are formatted like "2--stp.2--tau" mod_keys = stp.split('_')[1] for i, k in enumerate(mod_keys.split('-')): if 'stp' in k: break tau_key = '%d--%s--tau' % (i, k) u_key = '%d--%s--u' % (i, k) all_taus = stp_params[stp_params.index == tau_key].transpose()[cellids].transpose() all_us = stp_params[stp_params.index == u_key].transpose()[cellids].transpose() dims = all_taus.values.flatten()[0].shape[0] # convert to dims x cells array instead of cells, array w/ multidim values #sep_taus = _df_to_array(all_taus, dims).mean(axis=0) #sep_us = _df_to_array(all_us, dims).mean(axis=0) #med_tau = np.median(sep_taus) #med_u = np.median(sep_u) sep_taus = _df_to_array(all_taus[not_c], dims).mean(axis=0) sep_us = _df_to_array(all_us[not_c], dims).mean(axis=0) med_tau = np.median(sep_taus) med_u = np.median(sep_us) stp_taus = all_taus[c] stp_us = all_us[c] stp_sep_taus = _df_to_array(stp_taus, dims).mean(axis=0) stp_sep_us = _df_to_array(stp_us, dims).mean(axis=0) stp_med_tau = np.median(stp_sep_taus) stp_med_u = np.median(stp_sep_us) #tau_t, tau_p = st.ttest_ind(sep_taus, stp_sep_taus) #u_t, u_p = st.ttest_ind(sep_us, stp_sep_us) # NOTE: not actually a t statistic now, it's mann-whitney U statistic, # just didn't want to change all of the var names incase i revert tau_t, tau_p = st.mannwhitneyu(sep_taus, stp_sep_taus, alternative='two-sided') u_t, u_p = st.mannwhitneyu(sep_us, stp_sep_us, alternative='two-sided') sep_taus, sep_us = drop_common_outliers(sep_taus, sep_us) stp_sep_taus, stp_sep_us = drop_common_outliers(stp_sep_taus, stp_sep_us) not_imp_outliers = len(sep_taus) imp_outliers = len(stp_sep_taus) fig1, (a1, a2) = plt.subplots(2, 1, sharex=True, sharey=True) color = model_colors['LN'] imp_color = model_colors['max'] stp_label = 'STP ++ (%d)' % len(c) total_cells = len(c) + len(not_c) bin_count = 30 hist_kwargs = {'linewidth': 1, 'label': ['not imp', 'stp imp']} plt.sca(a1) weights1 = [np.ones(len(sep_taus)) / len(sep_taus)] weights2 = [np.ones(len(stp_sep_taus)) / len(stp_sep_taus)] upper = max(sep_taus.max(), stp_sep_taus.max()) lower = min(sep_taus.min(), stp_sep_taus.min()) bins = np.linspace(lower, upper, bin_count + 1) # if log_scale: # lower_bound = min(sep_taus.min(), stp_sep_taus.min()) # upper_bound = max(sep_taus.max(), stp_sep_taus.max()) # bins = np.logspace(lower_bound, upper_bound, bin_count+1) # hist_kwargs['bins'] = bins # plt.hist([sep_taus, stp_sep_taus], weights=weights, **hist_kwargs) a1.hist(sep_taus, weights=weights1, fc=faded_LN, edgecolor=dark_LN, bins=bins, **hist_kwargs) a2.hist(stp_sep_taus, weights=weights2, fc=faded_max, edgecolor=dark_max, bins=bins, **hist_kwargs) a1.axes.axvline(med_tau, color=dark_LN, linewidth=2, linestyle='dashed', dashes=dash_spacing) a1.axes.axvline(stp_med_tau, color=dark_max, linewidth=2, linestyle='dashed', dashes=dash_spacing) a2.axes.axvline(med_tau, color=dark_LN, linewidth=2, linestyle='dashed', dashes=dash_spacing) a2.axes.axvline(stp_med_tau, color=dark_max, linewidth=2, linestyle='dashed', dashes=dash_spacing) ax_remove_box(a1) ax_remove_box(a2) #plt.title('tau, sig diff?: p=%.4E' % tau_p) #plt.xlabel('tau (ms)') fig2 = plt.figure(figsize=text_fig) text = ("tau distributions, n: %d\n" "n stp imp (bot): %d, med: %.4f\n" "n not imp (top): %d, med: %.4f\n" "yaxes: fraction of cells\n" "xaxis: tau(ms)\n" "st.mannwhitneyu u: %.4E,\np: %.4E\n" "not imp after outliers: %d\n" "imp after outliers: %d\n" % (total_cells, len(c), stp_med_tau, len(not_c), med_tau, tau_t, tau_p, not_imp_outliers, imp_outliers)) plt.text(0.1, 0.5, text) fig3, (a3, a4) = plt.subplots(2, 1, sharex=True, sharey=True) weights3 = [np.ones(len(sep_us)) / len(sep_us)] weights4 = [np.ones(len(stp_sep_us)) / len(stp_sep_us)] upper = max(sep_us.max(), stp_sep_us.max()) lower = min(sep_us.min(), stp_sep_us.min()) bins = np.linspace(lower, upper, bin_count + 1) # if log_scale: # lower_bound = min(sep_us.min(), stp_sep_us.min()) # upper_bound = max(sep_us.max(), stp_sep_us.max()) # bins = np.logspace(lower_bound, upper_bound, bin_count+1) # hist_kwargs['bins'] = bins # plt.hist([sep_us, stp_sep_us], weights=weights, **hist_kwargs) a3.hist(sep_us, weights=weights3, fc=faded_LN, edgecolor=dark_LN, bins=bins, **hist_kwargs) a4.hist(stp_sep_us, weights=weights4, fc=faded_max, edgecolor=dark_max, bins=bins, **hist_kwargs) a3.axes.axvline(med_u, color=dark_LN, linewidth=2, linestyle='dashed', dashes=dash_spacing) a3.axes.axvline(stp_med_u, color=dark_max, linewidth=2, linestyle='dashed', dashes=dash_spacing) a4.axes.axvline(med_u, color=dark_LN, linewidth=2, linestyle='dashed', dashes=dash_spacing) a4.axes.axvline(stp_med_u, color=dark_max, linewidth=2, linestyle='dashed', dashes=dash_spacing) ax_remove_box(a3) ax_remove_box(a4) #plt.title('u, sig diff?: p=%.4E' % u_p) #plt.xlabel('u (fractional change in gain \nper unit of stimulus amplitude)') #plt.ylabel('proportion within group') fig4 = plt.figure(figsize=text_fig) text = ("u distributions, n: %d\n" "n stp imp (bot): %d, med: %.4f\n" "n not imp (top): %d, med: %.4f\n" "yaxes: fraction of cells\n" "xaxis: u(fractional change in gain per unit stimulus amplitude)\n" "st.mannwhitneyu u: %.4E,\np: %.4E" % (total_cells, len(c), stp_med_u, len(not_c), med_u, u_t, u_p)) plt.text(0.1, 0.5, text) stp_mag, stp_yin, stp_out = stp_magnitude(np.array([[stp_med_tau]]), np.array([[stp_med_u]])) mag, yin, out = stp_magnitude(np.array([[med_tau]]), np.array([[med_u]])) fig5 = plt.figure(figsize=short_fig) plt.plot(stp_out.as_continuous().flatten(), color=imp_color, label='STP ++') plt.plot(out.as_continuous().flatten(), color=color) if legend: plt.legend() ax_remove_box() return fig1, fig2, fig3, fig4, fig5
def gc_distributions(batch, gc, stp, LN, combined, se_filter=True, good_ln=0, use_combined=False): df_r, df_c, df_e = get_dataframes(batch, gc, stp, LN, combined) cellids, under_chance, less_LN = get_filtered_cellids(batch, gc, stp, LN, combined, as_lists=False) _, _, _, _, c = improved_cells_to_list(batch, gc, stp, LN, combined, good_ln=good_ln) if use_combined: params_model = combined else: params_model = gc gc_params = fitted_params_per_batch(batch, params_model, stats_keys=[], meta=[]) gc_params_cells = gc_params.transpose().index.values.tolist() for cell in gc_params_cells: if cell not in cellids: cellids[cell] = False not_c = list(set(gc_params.transpose()[cellids].index.values) - set(c)) # index keys are formatted like "4--dsig.d--kappa" mod_keys = params_model.split('_')[1] for i, k in enumerate(mod_keys.split('-')): if 'dsig' in k: break b_key = f'{i}--{k}--base' a_key = f'{i}--{k}--amplitude' s_key = f'{i}--{k}--shift' k_key = f'{i}--{k}--kappa' ka_key = k_key + '_mod' ba_key = b_key + '_mod' aa_key = a_key + '_mod' sa_key = s_key + '_mod' all_keys = [b_key, a_key, s_key, k_key, ba_key, aa_key, sa_key, ka_key] phi_dfs = [ gc_params[gc_params.index == k].transpose()[cellids].transpose() for k in all_keys ] sep_dfs = [df[not_c].values.flatten().astype(np.float64) for df in phi_dfs] gc_sep_dfs = [df[c].values.flatten().astype(np.float64) for df in phi_dfs] # removing extreme outliers b/c kept getting one or two cells with # values that were multiple orders of magnitude different than all others # diffs = [sep_dfs[i+1] - sep_dfs[i] # for i, _ in enumerate(sep_dfs[:-1]) # if i % 2 == 0] #diffs = sep_dfs[1::2] - sep_dfs[::2] # gc_diffs = [gc_sep_dfs[i+1] - gc_sep_dfs[i] # for i, _ in enumerate(gc_sep_dfs[:-1]) # if i % 2 == 0] #gc_diffs = gc_sep_dfs[1::2] - gc_sep_dfs[::2] raw_low, raw_high = sep_dfs[:4], sep_dfs[4:] diffs = [high - low for low, high in zip(raw_low, raw_high)] medians = [np.median(d) for d in diffs] medians_low = [np.median(d) for d in raw_low] medians_high = [np.median(d) for d in raw_high] gc_raw_low, gc_raw_high = gc_sep_dfs[:4], gc_sep_dfs[4:] gc_diffs = [high - low for low, high in zip(gc_raw_low, gc_raw_high)] gc_medians = [np.median(d) for d in gc_diffs] gc_medians_low = [np.median(d) for d in gc_raw_low] gc_medians_high = [np.median(d) for d in gc_raw_high] ts, ps = zip(*[ st.mannwhitneyu(diff, gc_diff, alternative='two-sided') for diff, gc_diff in zip(diffs, gc_diffs) ]) diffs = drop_common_outliers(*diffs) gc_diffs = drop_common_outliers(*gc_diffs) not_imp_outliers = len(diffs[0]) imp_outliers = len(gc_diffs[0]) color = model_colors['LN'] c_color = model_colors['max'] gc_label = 'GC ++ (%d)' % len(c) total_cells = len(c) + len(not_c) hist_kwargs = {'label': ['no imp', 'sig imp'], 'linewidth': 1} figs = [] for i, name in zip([0, 1, 2, 3], ['base', 'amplitude', 'shift', 'kappa']): f1 = _stacked_hists(diffs[i], gc_diffs[i], medians[i], gc_medians[i], color, c_color, hist_kwargs=hist_kwargs) f2 = plt.figure(figsize=text_fig) text = ("%s distributions, n: %d\n" "n gc imp (bot): %d, med: %.4f\n" "n not imp (top): %d, med: %.4f\n" "yaxes: fraction of cells\n" "xaxis: 'fractional change in parameter per unit contrast'\n" "st.mannwhitneyu u: %.4E,\np: %.4E\n" "not imp w/o outliers: %d\n" "imp w/o outliers: %d" % (name, total_cells, len(c), gc_medians[i], len(not_c), medians[i], ts[i], ps[i], not_imp_outliers, imp_outliers)) plt.text(0.1, 0.5, text) figs.append(f1) figs.append(f2) f3 = plt.figure(figsize=small_fig) # median gc effect plots yin1, out1 = gc_dummy_sigmoid(*medians_low, low=0.0, high=0.3) yin2, out2 = gc_dummy_sigmoid(*medians_high, low=0.0, high=0.3) plt.scatter(yin1, out1, color=color, s=big_scatter, alpha=0.3) plt.scatter(yin2, out2, color=color, s=big_scatter * 2) figs.append(f3) plt.tight_layout() ax_remove_box() f3a = plt.figure(figsize=text_fig) text = ("non improved cells\n" "median low contrast:\n" "base: %.4f, amplitude: %.4f\n" "shift: %.4f, kappa: %.4f\n" "median high contrast:\n" "base: %.4f, amplitude: %.4f\n" "shift: %.4f, kappa: %.4f\n" % (*medians_low, *medians_high)) plt.text(0.1, 0.5, text) figs.append(f3a) f4 = plt.figure(figsize=small_fig) gc_yin1, gc_out1 = gc_dummy_sigmoid(*gc_medians_low, low=0.0, high=0.3) gc_yin2, gc_out2 = gc_dummy_sigmoid(*gc_medians_high, low=0.0, high=0.3) plt.scatter(gc_yin1, gc_out1, color=c_color, s=big_scatter, alpha=0.3) plt.scatter(gc_yin2, gc_out2, color=c_color, s=big_scatter * 2) figs.append(f4) plt.tight_layout() ax_remove_box() f4a = plt.figure(figsize=text_fig) text = ("improved cells\n" "median low contrast:\n" "base: %.4f, amplitude: %.4f\n" "shift: %.4f, kappa: %.4f\n" "median high contrast:\n" "base: %.4f, amplitude: %.4f\n" "shift: %.4f, kappa: %.4f\n" % (*gc_medians_low, *gc_medians_high)) plt.text(0.1, 0.5, text) figs.append(f4a) return figs
def kitchen_sink(batch, gc, stp, LN, combined, equivalence_path, drop_outliers=True, cell_mask=None, mask_name=''): # 0. Get auditory-responsive cells _, a, _, _, _ = improved_cells_to_list(batch, gc, stp, LN, combined, as_lists=False) a_list = a[a == True].index.values.tolist() # 1. load batch parameters (shouldn't need to load models) stp_params = fitted_params_per_batch(289, stp, stats_keys=[], meta=['r_test'], manual_cellids=a_list) gc_params = fitted_params_per_batch(289, gc, stats_keys=[], meta=['r_test'], manual_cellids=a_list) LN_params = fitted_params_per_batch(289, LN, stats_keys=[], meta=['r_test'], manual_cellids=a_list) df = pd.read_pickle(equivalence_path) equivalence = df.sort_index()['equivalence'].values # for c in gc_params_cells: # if c not in LN_cells: # LN_cells[c] = False # assemble each attribute as a vector # index keys are formatted like "2--stp.2--tau" mod_keys = stp.split('_')[1] for i, k in enumerate(mod_keys.split('-')): if 'stp' in k: break tau_key = '%d--%s--tau' % (i, k) u_key = '%d--%s--u' % (i, k) mod_keys = gc.split('_')[1] for i, k in enumerate(mod_keys.split('-')): if 'dsig' in k: break b_key = f'{i}--{k}--base' a_key = f'{i}--{k}--amplitude' s_key = f'{i}--{k}--shift' k_key = f'{i}--{k}--kappa' ba_key = b_key + '_mod' aa_key = a_key + '_mod' sa_key = s_key + '_mod' ka_key = k_key + '_mod' stp_keys = [tau_key, u_key] gc_keys = [b_key, a_key, s_key, k_key, ba_key, aa_key, sa_key, ka_key] stp_dfs = [ stp_params[stp_params.index == k].transpose().sort_index()[a] for k in stp_keys ] gc_dfs = [gc_params[gc_params.index==k].transpose().sort_index()[a]\ .astype(np.float64).values.flatten() for k in gc_keys] r_dfs = [ df[df.index == 'meta--r_test'].transpose().sort_index()[a] for df in [gc_params, stp_params, LN_params] ] diffs = [ gc_dfs[i + 1] - gc_dfs[i] for i, _ in enumerate(gc_dfs[:-1]) if i % 2 == 0 ] for i, k in enumerate(gc_keys): if '_mod' in k: gc_keys[i] = k[:-3] + 'diff' #gc_dfs = gc_dfs[:4] + diffs gc_dfs = diffs gc_keys = gc_keys[4:] dims = 3 gc_vs_LN = (r_dfs[0] - r_dfs[2]).values.astype(np.float64).flatten() stp_vs_LN = (r_dfs[1] - r_dfs[2]).values.astype(np.float64).flatten() to_corr = [gc_vs_LN, stp_vs_LN, equivalence] to_corr.extend([df for df in gc_dfs]) to_corr.extend([_df_to_array(df, dims).mean(axis=0) for df in stp_dfs]) replace = [] if cell_mask is not None: for v in to_corr: replace.append(v[cell_mask]) to_corr = replace replace = [] if drop_outliers: # drop any cells that are an outlier for at least one of the variables out = np.zeros_like(to_corr[0], dtype='bool') for v in to_corr: out = out | is_outlier(v) for v in to_corr: replace.append(v[~out]) to_corr = replace n_cells = len(to_corr[0]) matrix = np.vstack(to_corr) labels = ['gc_vs_LN', 'stp_vs_LN', 'equivalence'] for k in gc_keys + stp_keys: labels.append(k.split('-')[-1]) corr = np.corrcoef(matrix) fig1, ax = plt.subplots() plt.imshow(corr) plt.colorbar() ax.set_xticks(np.arange(len(labels))) plt.setp(ax.get_xticklabels(), rotation=45, ha='right', rotation_mode='anchor') ax.set_yticks(np.arange(len(labels))) ax.set_xticklabels(labels) ax.set_yticklabels(labels) fig1.suptitle("Correlations, mask:%s\n" "n: %d\n" "outliers dropped?: %s" % (mask_name, n_cells, drop_outliers)) for i in range(len(corr)): for j in range(len(corr)): v = str('%.3f' % corr[i, j]) ax.text(j, i, v, ha='center', va='center', color='w') ps = np.empty_like(corr) p_correction = ps.shape[0] # do a bonferroni correction since it's easy for i in range(len(ps)): for j in range(len(ps)): r, p = st.pearsonr(matrix[i], matrix[j]) ps[i][j] = p * p_correction fig2, ax = plt.subplots() plt.imshow(ps) plt.colorbar() ax.set_xticks(np.arange(len(labels))) plt.setp(ax.get_xticklabels(), rotation=45, ha='right', rotation_mode='anchor') ax.set_yticks(np.arange(len(labels))) ax.set_xticklabels(labels) ax.set_yticklabels(labels) for i in range(len(corr)): for j in range(len(corr)): v = str('%.1E' % ps[i, j]) ax.text(j, i, v, size=12, ha='center', va='center', color='w') fig2.suptitle("P-values * %d (bonferroni correction)\n" "mask:%s\n" "n: %d\n" "outliers dropped?: %s" % (p_correction, mask_name, n_cells, drop_outliers)) return fig1, fig2