Ejemplo n.º 1
0
def findErrorBarsBindingCurve(subSeries):
    """ Return bootstrapped confidence intervals on columns of an input data matrix.
    
    Assuming rows represent replicate measurments, i.e. clusters. """
    eminus=[]
    eplus = [] 
    for i in subSeries:
        vec = subSeries.loc[:, i].dropna()
        success = True
        if len(vec) > 1:
            try:
                bounds = bootstrap.ci(vec, np.median, n_samples=1000)
            except IndexError:
                success = False
        else:
            success = False
            
        if success:
            eminus.append(vec.median() - bounds[0])
            eplus.append(bounds[1] - vec.median())
        else:
            eminus.append(np.nan)
            eplus.append(np.nan)
    eminus = pd.Series(eminus, index=subSeries.columns)
    eplus = pd.Series(eplus, index=subSeries.columns)

    return eminus, eplus
def returnFractionGroupedBy(mat, param_in, param_out):
    """ Given data matrix, return mean of param_out binned by param_in. """
    grouped = mat.groupby(param_in)[param_out]
    y = grouped.mean()
    x = y.index.tolist()
    with warnings.catch_warnings():
        warnings.simplefilter("ignore")
        yerr = np.array([np.abs(bootstrap.ci(group, method='pi', n_samples=100) - y.loc[name])
                         for name, group in grouped]).transpose()
    return x, y, yerr   
 def findAlpha(self, n):
     x = self.x
     y = self.y
     index = (x>0)&(y>0)
     #vec = np.exp(np.log(y/x)/n).loc[index]
     vec = np.power(y/x, 1/float(n)).loc[index]
     alpha = vec.median()
     lb, ub = bootstrap.ci(vec, statfunction=np.median, n_samples=1000)
     xlim = [0.9 ,1.1]
     bins = np.arange(0.898, 1.1, 0.001)
     plt.figure(figsize=(4,3));
     sns.distplot(vec, bins=bins, hist_kws={'histtype':'stepfilled'}, kde_kws={'clip':xlim});
     plt.axvline(alpha, color='k', linewidth=0.5);
     plt.axvline(lb, color='k', linestyle=':', linewidth=0.5);
     plt.axvline(ub, color='k', linestyle=':', linewidth=0.5);
     plt.xlabel('photobleach fraction per image');
     plt.ylabel('probability');
     plt.xlim(xlim)
     fix_axes(plt.gca());
     plt.tight_layout()
     return alpha, lb, ub
Ejemplo n.º 4
0
                   'substrate']].dropna()
    merge.rename(columns={'substrate_x': 'substrate'}, inplace=True)
    del df1, df2, df3
    grouped = merge.groupby('substrate')
    sand = grouped.get_group('sand')
    gravel = grouped.get_group('gravel')
    boulders = grouped.get_group('boulders')
    del merge

    print 'Calculating calibrations metrics for lsq classifications...'
    calib_df = pd.DataFrame(columns=['ent', 'h**o', 'var'],
                            index=['sand', 'gravel', 'boulders'])

    calib_df.loc['sand'] = pd.Series({
        'h**o':
        1 - np.average(boot.ci(sand['homo_median'], np.median, alpha=0.05)),
        'ent':
        np.average(boot.ci(sand['entropy_median'], np.median, alpha=0.05)),
        'var':
        np.average(boot.ci(sand['var_median'], np.median, alpha=0.05))
    })
    calib_df.loc['gravel'] = pd.Series({
        'h**o':
        1 - np.average(boot.ci(gravel['homo_median'], np.median, alpha=0.05)),
        'ent':
        np.average(boot.ci(gravel['entropy_median'], np.median, alpha=0.05)),
        'var':
        np.average(boot.ci(gravel['var_median'], np.median, alpha=0.05))
    })
    calib_df.loc['boulders'] = pd.Series({
        'h**o':
Ejemplo n.º 5
0
    'std': 'var_std'
},
           inplace=True)

merge = df1.merge(df2, left_index=True, right_index=True, how='left')
merge = merge.merge(df3, left_index=True, right_index=True, how='left')
merge = merge[['homo_median', 'entropy_median', 'var_median',
               'substrate']].dropna()
merge.rename(columns={'substrate_x': 'substrate'}, inplace=True)
del df1, df2, df3
grouped = merge.groupby('substrate')
sand = grouped.get_group('sand')
gravel = grouped.get_group('gravel')
boulders = grouped.get_group('boulders')

boot.ci(sand['homo_median'], np.median, alpha=0.05)
boot.ci(sand['entropy_median'], np.median, alpha=0.05)
boot.ci(sand['var_median'], np.median, alpha=0.05)

boot.ci(gravel['homo_median'], np.median, alpha=0.05)
boot.ci(gravel['entropy_median'], np.median, alpha=0.05)
boot.ci(gravel['var_median'], np.median, alpha=0.05)

boot.ci(boulders['homo_median'], np.median, alpha=0.05)
boot.ci(boulders['entropy_median'], np.median, alpha=0.05)
boot.ci(boulders['var_median'], np.median, alpha=0.05)

homo_df = pd.DataFrame(index=['Sand', 'Gravel', 'Boulders'],
                       columns=['lbound', 'ubound'])
homo_df.loc['Sand'] = pd.Series({
    'lbound':
Ejemplo n.º 6
0
def get_final_ci(series):
    mean = series.mean()
    std = series.std(ddof=1)
    low_ci, high_ci = bootstrap.ci(series, n_samples=1000)
    return mean, std, low_ci, high_ci