예제 #1
0
def OLD_regress(density):

    dcols = sorted([c for c in density.columns if isinstance(c,float)])

    uchoice = density.choice.values
    uvals = density[dcols].values

    isnan = np.isnan(uchoice)
    if np.sum(isnan)>0:
        print('Excluding {:0.0f}% nans.'.format(np.mean(isnan)*100))
    uchoice = uchoice[~isnan]
    uvals = uvals[~isnan]

    try:
        reg = Logit(uchoice,uvals).fit(disp=False)
        reg_params = reg.params
        reg_err = np.abs(reg.conf_int(alpha=0.05).T - reg.params)
    except (np.linalg.LinAlgError, sm.tools.sm_exceptions.PerfectSeparationError):
        reg_params = np.nan * np.zeros(uvals.shape[1])
        reg_err = np.nan * np.zeros([2, len(reg_params)])
        

    res = pd.DataFrame(index=dcols)
    res.loc[:,'weight'] = reg_params
    if not np.any(np.isnan(reg_err)):
        assert np.allclose(reg_err[0],reg_err[1]) # symmetrical errorbars
        res.loc[:,'yerr'] = reg_err[0] # half of confidence interval
    else:
        res.loc[:,'yerr'] = np.nan

    return res
예제 #2
0
def regress(density, nbins=None, only_dur=None, error='95ci', fit_intercept=False, r_and_l=False):

    if only_dur is not None:
        density = density[density.dur==only_dur]
    else:
        only_dur = density.dur.values[0]

    assert np.all(density.dur.values == density.dur.values[0]), 'Cannot regress on different trial lengths at once.'

    dcols_L = np.array(sorted([c for c in density.columns if c.startswith('L_')]))
    dcols_R = np.array(sorted([c for c in density.columns if c.startswith('R_')]))
    dcols_L_num = np.array([float(i[2:]) for i in dcols_L])
    dcols_R_num = np.array([float(i[2:]) for i in dcols_L])
    
    # restrict to bins that exist for duration of interest
    is_indur_L = dcols_L_num <= only_dur
    is_indur_R = dcols_R_num <= only_dur
    dcols_L = dcols_L[is_indur_L]
    dcols_R = dcols_R[is_indur_R]
    dcols_L_num = dcols_L_num[is_indur_L]
    dcols_R_num = dcols_R_num[is_indur_R]

    assert np.all(dcols_L_num == dcols_R_num)
    dcols_num = dcols_L_num

    uchoice = density.choice.values
    udens_L = density[dcols_L].values
    udens_R = density[dcols_R].values
    udens = udens_R-udens_L

    if nbins is not None:
        udens = downsample_bins(udens, nbins)
        udens_L = downsample_bins(udens_L, nbins)
        udens_R = downsample_bins(udens_R, nbins)
        dcols_num = downsample_bins(dcols_num, nbins, method=np.mean)

    isnan = np.isnan(uchoice)
    if np.sum(isnan)>0:
        print('Excluding {:0.0f}% nans.'.format(np.mean(isnan)*100))
    uchoice = uchoice[~isnan]
    udens = udens[~isnan]
    udens_L = udens_L[~isnan]
    udens_R = udens_R[~isnan]

    if r_and_l: # use r and l as separate regressors
        udens = np.concatenate([udens_L, udens_R], axis=1)
    
    if fit_intercept:
        udens = sm.add_constant(udens)

    try:
        reg = Logit(uchoice, udens, missing='drop').fit(disp=False)
        reg_params = reg.params
        if error == '95ci':
            reg_err = np.abs(reg.conf_int(alpha=0.05).T - reg.params) # 95% CI
        elif error == '99ci':
            reg_err = np.abs(reg.conf_int(alpha=0.01).T - reg.params) # 99% CI
        elif error == 'se':
            reg_err = reg.bse # standard error
        elif error == 'bootstrap':
            boots = []
            for i in range(1000):
                samp = np.random.choice(np.arange(len(udens)), replace=True, size=len(udens))
                udb = udens[samp]
                uch = uchoice[samp]
                boots.append(Logit(uch, udb, missing='drop').fit(disp=False).params)
            reg_err = np.std(boots, axis=0) #/ np.sqrt(len(boots))
    except (np.linalg.LinAlgError, sm.tools.sm_exceptions.PerfectSeparationError):
        reg_params = np.nan * np.zeros(udens.shape[1])
        reg_err = np.nan * np.zeros([2, len(reg_params)])
    
    index = dcols_num
    if r_and_l:
        index = np.tile(index, 2)
    if fit_intercept:
        index = np.append(-1, index)

    res = pd.DataFrame(index=index)
    res.loc[:,'weight'] = reg_params

    if not np.any(np.isnan(reg_err)):
        if reg_err.ndim == 2:
            assert np.allclose(reg_err[0],reg_err[1]) # symmetrical errorbars
            res.loc[:,'yerr'] = reg_err[0] # half of confidence interval
        elif reg_err.ndim == 1:
            res.loc[:,'yerr'] = reg_err
    else:
        res.loc[:,'yerr'] = np.nan
    
    if r_and_l:
        resl = res.iloc[:len(res)//2]
        resl.columns = [c+'_L' for c in resl.columns]
        resr = res.iloc[len(res)//2:]
        resr.columns = [c+'_R' for c in resr.columns]
        res = resl.join(resr)

    return res