def OLD_regress(density): dcols = sorted([c for c in density.columns if isinstance(c,float)]) uchoice = density.choice.values uvals = density[dcols].values isnan = np.isnan(uchoice) if np.sum(isnan)>0: print('Excluding {:0.0f}% nans.'.format(np.mean(isnan)*100)) uchoice = uchoice[~isnan] uvals = uvals[~isnan] try: reg = Logit(uchoice,uvals).fit(disp=False) reg_params = reg.params reg_err = np.abs(reg.conf_int(alpha=0.05).T - reg.params) except (np.linalg.LinAlgError, sm.tools.sm_exceptions.PerfectSeparationError): reg_params = np.nan * np.zeros(uvals.shape[1]) reg_err = np.nan * np.zeros([2, len(reg_params)]) res = pd.DataFrame(index=dcols) res.loc[:,'weight'] = reg_params if not np.any(np.isnan(reg_err)): assert np.allclose(reg_err[0],reg_err[1]) # symmetrical errorbars res.loc[:,'yerr'] = reg_err[0] # half of confidence interval else: res.loc[:,'yerr'] = np.nan return res
def regress(density, nbins=None, only_dur=None, error='95ci', fit_intercept=False, r_and_l=False): if only_dur is not None: density = density[density.dur==only_dur] else: only_dur = density.dur.values[0] assert np.all(density.dur.values == density.dur.values[0]), 'Cannot regress on different trial lengths at once.' dcols_L = np.array(sorted([c for c in density.columns if c.startswith('L_')])) dcols_R = np.array(sorted([c for c in density.columns if c.startswith('R_')])) dcols_L_num = np.array([float(i[2:]) for i in dcols_L]) dcols_R_num = np.array([float(i[2:]) for i in dcols_L]) # restrict to bins that exist for duration of interest is_indur_L = dcols_L_num <= only_dur is_indur_R = dcols_R_num <= only_dur dcols_L = dcols_L[is_indur_L] dcols_R = dcols_R[is_indur_R] dcols_L_num = dcols_L_num[is_indur_L] dcols_R_num = dcols_R_num[is_indur_R] assert np.all(dcols_L_num == dcols_R_num) dcols_num = dcols_L_num uchoice = density.choice.values udens_L = density[dcols_L].values udens_R = density[dcols_R].values udens = udens_R-udens_L if nbins is not None: udens = downsample_bins(udens, nbins) udens_L = downsample_bins(udens_L, nbins) udens_R = downsample_bins(udens_R, nbins) dcols_num = downsample_bins(dcols_num, nbins, method=np.mean) isnan = np.isnan(uchoice) if np.sum(isnan)>0: print('Excluding {:0.0f}% nans.'.format(np.mean(isnan)*100)) uchoice = uchoice[~isnan] udens = udens[~isnan] udens_L = udens_L[~isnan] udens_R = udens_R[~isnan] if r_and_l: # use r and l as separate regressors udens = np.concatenate([udens_L, udens_R], axis=1) if fit_intercept: udens = sm.add_constant(udens) try: reg = Logit(uchoice, udens, missing='drop').fit(disp=False) reg_params = reg.params if error == '95ci': reg_err = np.abs(reg.conf_int(alpha=0.05).T - reg.params) # 95% CI elif error == '99ci': reg_err = np.abs(reg.conf_int(alpha=0.01).T - reg.params) # 99% CI elif error == 'se': reg_err = reg.bse # standard error elif error == 'bootstrap': boots = [] for i in range(1000): samp = np.random.choice(np.arange(len(udens)), replace=True, size=len(udens)) udb = udens[samp] uch = uchoice[samp] boots.append(Logit(uch, udb, missing='drop').fit(disp=False).params) reg_err = np.std(boots, axis=0) #/ np.sqrt(len(boots)) except (np.linalg.LinAlgError, sm.tools.sm_exceptions.PerfectSeparationError): reg_params = np.nan * np.zeros(udens.shape[1]) reg_err = np.nan * np.zeros([2, len(reg_params)]) index = dcols_num if r_and_l: index = np.tile(index, 2) if fit_intercept: index = np.append(-1, index) res = pd.DataFrame(index=index) res.loc[:,'weight'] = reg_params if not np.any(np.isnan(reg_err)): if reg_err.ndim == 2: assert np.allclose(reg_err[0],reg_err[1]) # symmetrical errorbars res.loc[:,'yerr'] = reg_err[0] # half of confidence interval elif reg_err.ndim == 1: res.loc[:,'yerr'] = reg_err else: res.loc[:,'yerr'] = np.nan if r_and_l: resl = res.iloc[:len(res)//2] resl.columns = [c+'_L' for c in resl.columns] resr = res.iloc[len(res)//2:] resr.columns = [c+'_R' for c in resr.columns] res = resl.join(resr) return res