コード例 #1
0
ファイル: plotter.py プロジェクト: imclab/climate
def _nice_intervals(data, nlevs):
    '''
    Purpose::
        Calculates nice intervals between each color level for colorbars
        and contour plots. The target minimum and maximum color levels are
        calculated by taking the minimum and maximum of the distribution
        after cutting off the tails to remove outliers.

    Input::
        data - an array of data to be plotted
        nlevs - an int giving the target number of intervals

    Output::
        clevs - A list of floats for the resultant colorbar levels
    '''
    # Find the min and max levels by cutting off the tails of the distribution
    # This mitigates the influence of outliers
    data = data.ravel()
    mnlvl = mstats.scoreatpercentile(data, 5)
    mxlvl = mstats.scoreatpercentile(data, 95)
    locator = mpl.ticker.MaxNLocator(nlevs)
    clevs = locator.tick_values(mnlvl, mxlvl)

    # Make sure the bounds of clevs are reasonable since sometimes
    # MaxNLocator gives values outside the domain of the input data
    clevs = clevs[(clevs >= mnlvl) & (clevs <= mxlvl)]
    return clevs
コード例 #2
0
ファイル: sample_props.py プロジェクト: ajmejia/notebooks
def binner(x, y, w_sta, nbins, rang = None, ebar = False, per = None) :
	from numpy import array, digitize, lexsort, linspace
	from numpy.ma import average, median

	ind    = lexsort((y, x))
	xs, ys = x[ind], y[ind]

	if rang is None : mn, mx = min(xs), max(xs)
	else            : mn, mx = rang
	
	bins  = linspace(mn, mx, nbins + 1)
	x_cen = (bins[: - 1] + bins[1:])*0.5
	bins  = linspace(mn, mx, nbins)
	ibins = digitize(xs, bins)

	if w_sta   == "median" : y_sta = array([median(ys[ibins == i]) for i in range(1, bins.size + 1)])
	elif w_sta == "mean"   : y_sta = array([average(ys[ibins == i]) for i in range(1, bins.size + 1)])
	elif w_sta == "mode"   : y_sta = array([mode(ys[ibins == i])[0] for i in range(1, bins.size + 1)])

	if ebar   == False                : return x_cen, y_sta
	elif ebar == True and per == None :
		myer = abs(array([scoreatpercentile(ys[ibins == i], 15.8) for i in range(1, bins.size + 1)]) - y_sta)
		pyer = abs(array([scoreatpercentile(ys[ibins == i], 84.0) for i in range(1, bins.size + 1)]) - y_sta)
		yer  = array([myer, pyer])
		return x_cen, y_sta, yer

	elif ebar == True and per != None :
		myer = abs(array([scoreatpercentile(ys[ibins == i], per[0]) for i in range(1, bins.size + 1)]) - y_sta)
		pyer = abs(array([scoreatpercentile(ys[ibins == i], per[1]) for i in range(1, bins.size + 1)]) - y_sta)
		yer = array([myer, pyer])
		return x_cen, y_sta, yer
コード例 #3
0
ファイル: plotter.py プロジェクト: imclab/climate
def _nice_intervals(data, nlevs):
    '''
    Purpose::
        Calculates nice intervals between each color level for colorbars
        and contour plots. The target minimum and maximum color levels are
        calculated by taking the minimum and maximum of the distribution
        after cutting off the tails to remove outliers.

    Input::
        data - an array of data to be plotted
        nlevs - an int giving the target number of intervals

    Output::
        clevs - A list of floats for the resultant colorbar levels
    '''
    # Find the min and max levels by cutting off the tails of the distribution
    # This mitigates the influence of outliers
    data = data.ravel()
    mnlvl = mstats.scoreatpercentile(data, 5)
    mxlvl = mstats.scoreatpercentile(data, 95)
    locator = mpl.ticker.MaxNLocator(nlevs)
    clevs = locator.tick_values(mnlvl, mxlvl)

    # Make sure the bounds of clevs are reasonable since sometimes
    # MaxNLocator gives values outside the domain of the input data
    clevs = clevs[(clevs >= mnlvl) & (clevs <= mxlvl)]
    return clevs
コード例 #4
0
def nbins(sample, range_ = None) :
  IQR = lambda x    : st.scoreatpercentile(x, 75.0) - st.scoreatpercentile(x, 25.0)
  if range_ is None : mn, mx = sample.min(), sample.max()
  else              : mn, mx = range_

  mask    = (sample >= mn) & (sample <= mx)
  binsize = (2 * IQR(sample[mask]) / mask.sum() ** (1. / 3))

  return (mx - mn) / binsize, mn, mx, binsize
コード例 #5
0
def nbins(sample, range_=None):
    IQR = lambda x: st.scoreatpercentile(x, 75.0) - st.scoreatpercentile(
        x, 25.0)
    if range_ is None: mn, mx = sample.min(), sample.max()
    else: mn, mx = range_

    mask = (sample >= mn) & (sample <= mx)
    binsize = (2 * IQR(sample[mask]) / mask.sum()**(1. / 3))

    return (mx - mn) / binsize, mn, mx, binsize
コード例 #6
0
ファイル: sample_props.py プロジェクト: ajmejia/notebooks
def binner(x, y, w_sta, nbins, rang=None, ebar=False, per=None):
    from numpy import array, digitize, lexsort, linspace
    from numpy.ma import average, median

    ind = lexsort((y, x))
    xs, ys = x[ind], y[ind]

    if rang is None: mn, mx = min(xs), max(xs)
    else: mn, mx = rang

    bins = linspace(mn, mx, nbins + 1)
    x_cen = (bins[:-1] + bins[1:]) * 0.5
    bins = linspace(mn, mx, nbins)
    ibins = digitize(xs, bins)

    if w_sta == "median":
        y_sta = array(
            [median(ys[ibins == i]) for i in range(1, bins.size + 1)])
    elif w_sta == "mean":
        y_sta = array(
            [average(ys[ibins == i]) for i in range(1, bins.size + 1)])
    elif w_sta == "mode":
        y_sta = array(
            [mode(ys[ibins == i])[0] for i in range(1, bins.size + 1)])

    if ebar == False: return x_cen, y_sta
    elif ebar == True and per == None:
        myer = abs(
            array([
                scoreatpercentile(ys[ibins == i], 15.8)
                for i in range(1, bins.size + 1)
            ]) - y_sta)
        pyer = abs(
            array([
                scoreatpercentile(ys[ibins == i], 84.0)
                for i in range(1, bins.size + 1)
            ]) - y_sta)
        yer = array([myer, pyer])
        return x_cen, y_sta, yer

    elif ebar == True and per != None:
        myer = abs(
            array([
                scoreatpercentile(ys[ibins == i], per[0])
                for i in range(1, bins.size + 1)
            ]) - y_sta)
        pyer = abs(
            array([
                scoreatpercentile(ys[ibins == i], per[1])
                for i in range(1, bins.size + 1)
            ]) - y_sta)
        yer = array([myer, pyer])
        return x_cen, y_sta, yer
コード例 #7
0
ファイル: clean_data.py プロジェクト: calanoue/GFIN_Data_Work
    def clean_outliers(self):
        """
        Function to remove outliers.

        Parameters
        ----------
        self.outlier_perc : integer
            Percentile value for mstats.scoreatpercentile function. Mask all values greater than this value.
        """
        # Outliers using percentiles - num_rows * [min, max]
        outlier_all = ma.array([[mstats.scoreatpercentile(self.xs[i, :], 100 - self.outlier_perc),
               mstats.scoreatpercentile(self.xs[i, :], self.outlier_perc)] for i in xrange(self.rows_N)])
        self.xs = ma.array([ma.hstack((ma.masked_outside(self.xs[i, :-self.keep_n_values], outlier_all[i, 0],
            outlier_all[i, 1]), self.xs[i, -self.keep_n_values:])) for i in xrange(self.rows_N)])
コード例 #8
0
ファイル: test_mstats_basic.py プロジェクト: andycasey/scipy
 def test_2D(self):
     x = ma.array([[1, 1, 1],
                   [1, 1, 1],
                   [4, 4, 3],
                   [1, 1, 1],
                   [1, 1, 1]])
     assert_equal(mstats.scoreatpercentile(x,50), [1,1,1])
コード例 #9
0
ファイル: test_mstats_basic.py プロジェクト: alouisos/scipy
 def test_2D(self):
     x = ma.array([[1, 1, 1],
                   [1, 1, 1],
                   [4, 4, 3],
                   [1, 1, 1],
                   [1, 1, 1]])
     assert_equal(mstats.scoreatpercentile(x,50), [1,1,1])
コード例 #10
0
def _nice_intervals(data, nlevs):
    '''
    Purpose::
        Calculates nice intervals between each color level for colorbars
        and contour plots. The target minimum and maximum color levels are
        calculated by taking the minimum and maximum of the distribution
        after cutting off the tails to remove outliers.

    Input::
        data - an array of data to be plotted
        nlevs - an int giving the target number of intervals

    Output::
        clevs - A list of floats for the resultant colorbar levels
    '''
    # Find the min and max levels by cutting off the tails of the distribution
    # This mitigates the influence of outliers
    data = data.ravel()
    mn = mstats.scoreatpercentile(data, 5)
    mx = mstats.scoreatpercentile(data, 95)
    # if min less than 0 and or max more than 0 put 0 in center of color bar
    if mn < 0 and mx > 0:
        level = max(abs(mn), abs(mx))
        mnlvl = -1 * level
        mxlvl = level
    # if min is larger than 0 then have color bar between min and max
    else:
        mnlvl = mn
        mxlvl = mx

    # hack to make generated intervals from mpl the same for all versions
    autolimit_mode = mpl.rcParams.get('axes.autolimit_mode')
    if autolimit_mode:
        mpl.rc('axes', autolimit_mode='round_numbers')

    locator = mpl.ticker.MaxNLocator(nlevs)
    clevs = locator.tick_values(mnlvl, mxlvl)
    if autolimit_mode:
        mpl.rc('axes', autolimit_mode=autolimit_mode)

    # Make sure the bounds of clevs are reasonable since sometimes
    # MaxNLocator gives values outside the domain of the input data
    clevs = clevs[(clevs >= mnlvl) & (clevs <= mxlvl)]
    return clevs
コード例 #11
0
def _nice_intervals(data, nlevs):
    '''
    Purpose::
        Calculates nice intervals between each color level for colorbars
        and contour plots. The target minimum and maximum color levels are
        calculated by taking the minimum and maximum of the distribution
        after cutting off the tails to remove outliers.

    Input::
        data - an array of data to be plotted
        nlevs - an int giving the target number of intervals

    Output::
        clevs - A list of floats for the resultant colorbar levels
    '''
    # Find the min and max levels by cutting off the tails of the distribution
    # This mitigates the influence of outliers
    data = data.ravel()
    mn = mstats.scoreatpercentile(data, 5)
    mx = mstats.scoreatpercentile(data, 95)
    # if min less than 0 and or max more than 0 put 0 in center of color bar
    if mn < 0 and mx > 0:
        level = max(abs(mn), abs(mx))
        mnlvl = -1 * level
        mxlvl = level
    # if min is larger than 0 then have color bar between min and max
    else:
        mnlvl = mn
        mxlvl = mx

    # hack to make generated intervals from mpl the same for all versions
    autolimit_mode = mpl.rcParams.get('axes.autolimit_mode')
    if autolimit_mode:
        mpl.rc('axes', autolimit_mode='round_numbers')

    locator = mpl.ticker.MaxNLocator(nlevs)
    clevs = locator.tick_values(mnlvl, mxlvl)
    if autolimit_mode:
        mpl.rc('axes', autolimit_mode=autolimit_mode)

    # Make sure the bounds of clevs are reasonable since sometimes
    # MaxNLocator gives values outside the domain of the input data
    clevs = clevs[(clevs >= mnlvl) & (clevs <= mxlvl)]
    return clevs
コード例 #12
0
ファイル: test_mstats_basic.py プロジェクト: alouisos/scipy
 def test_percentile(self):
     x = np.arange(8) * 0.5
     assert_equal(mstats.scoreatpercentile(x, 0), 0.)
     assert_equal(mstats.scoreatpercentile(x, 100), 3.5)
     assert_equal(mstats.scoreatpercentile(x, 50), 1.75)
コード例 #13
0
#solution attempt 2 - heatmap
plt.clf()
gridsize = 20
plt.hexbin(xlist, ylist, gridsize=gridsize, cmap=cm.jet, bins=None)
cb = plt.colorbar()
cb.set_label('frequency')

xlabel('percentile by variation in first five plays')
ylabel('percentile by average in second five plays')
print "r = %.3f, p = %.5f" % pearsonr(xlist, ylist)

savefig('explore_exploit_scatterheatmap.png',
        dpi=300,
        facecolor='w',
        edgecolor='w',
        orientation='portrait',
        papertype=None,
        format=None,
        transparent=False,
        bbox_inches='tight',
        pad_inches=0.1)

#now do CI for r value

bootrec = pickle.load(open('save_a5_boot_bootrec.p', 'rb'))
bootrec = bootrec[0]
ci_upper = ssm.scoreatpercentile(bootrec, 97.5)
ci_lower = ssm.scoreatpercentile(bootrec, 02.5)
ci_mean = np.mean(bootrec)
print "Bootstrapped confidence intervals were Upper = %0.3f, Lower = %0.3f" % (
    ci_upper, ci_lower)
コード例 #14
0
ファイル: ps_fig3obs.py プロジェクト: ErwinHaasnoot/axongame
pickle.dump(a, open('save_a4_1_a.p', 'wb'))

# --------------------------------------------
#calc dict of maximum score for each player(=each key)
maxscore={}
    
for key in big:
    maxscore[key]= max([big[key][attempt][0] for attempt in big[key]])

# sort maximum scores, smallest to biggest
ranked_maxscore=sorted(maxscore[key] for key in maxscore)
        
#calc percentile ranking for each player (=each key)
prcentiles=[]
for p in range(100):
    prcentiles.append(ssm.scoreatpercentile(ranked_maxscore,p))


decile={}
    
for key in big:
    for i in prcentiles:
        if maxscore[key]>i:
            decile[key]=prcentiles.index(float(i))
    
#------------------------------------------------
# now calculate some index of spread
# - the simplest one is range

timespread={}
コード例 #15
0
ファイル: sup_ee_boot.py プロジェクト: leungwk/axongame
        for attempt in second_plays:
            second.append(sample_wr(bootdata[attempt], 1))
        av2[key] = sp.mean(second)
        var2[key] = sp.var(second)

    #make list of summary stats
    x = []
    y = []
    for key in big:
        x.append(var1[key])
        y.append(av2[key])

    #find percentile values
    prcentiles_x = []
    for p in range(100):
        prcentiles_x.append(ssm.scoreatpercentile(x, p))

    prcentiles_y = []
    for p in range(100):
        prcentiles_y.append(ssm.scoreatpercentile(y, p))

    #make dict of prcentile values for each statistic for each player
    prcentile_xindex = {
        key: bisect.bisect(prcentiles_x, var1[key])
        for key in big
    }
    prcentile_yindex = {
        key: bisect.bisect(prcentiles_y, av2[key])
        for key in big
    }
コード例 #16
0
ファイル: make_fig6.py プロジェクト: goryszewskig/axongame
        
#solution attempt 2 - heatmap
plt.clf()
gridsize=20
plt.hexbin(xlist, ylist,gridsize=gridsize, cmap=cm.jet, bins=None)
cb = plt.colorbar()
cb.set_label('frequency')

xlabel('percentile by variation in first five plays')
ylabel('percentile by average in second five plays')
print "r = %.3f, p = %.5f" % pearsonr(xlist,ylist)

savefig('Figure6.png', dpi=300, facecolor='w', edgecolor='w',
        orientation='portrait', papertype=None, format=None,
        transparent=False, bbox_inches='tight', pad_inches=0.1) 

generatepaperfigs=0
if generatepaperfigs:
    savefig('../cogsci13/figures/a5_e-e_heatscatter.png', dpi=300, facecolor='w', edgecolor='w',
        orientation='portrait', papertype=None, format=None,
        transparent=False, bbox_inches='tight', pad_inches=0.1) 
        
#now do CI for r value

bootrec=pickle.load(open('save_a5_boot_bootrec.p', 'rb'))
bootrec=bootrec[0]
ci_upper=ssm.scoreatpercentile(bootrec,97.5)
ci_lower=ssm.scoreatpercentile(bootrec,02.5)
ci_mean=np.mean(bootrec)
print "Bootstrapped confidence intervals were Upper = %0.3f, Lower = %0.3f" % (ci_upper,ci_lower)
コード例 #17
0
ファイル: test_mstats_basic.py プロジェクト: andycasey/scipy
 def test_percentile(self):
     x = np.arange(8) * 0.5
     assert_equal(mstats.scoreatpercentile(x, 0), 0.)
     assert_equal(mstats.scoreatpercentile(x, 100), 3.5)
     assert_equal(mstats.scoreatpercentile(x, 50), 1.75)
コード例 #18
0
ファイル: funcs.py プロジェクト: ErwinHaasnoot/axongame
def drawGraphs(outFolder, bootName,  windowSizes1, windowSizes2, zBottom = -1, zTop = 1):
    import matplotlib
    matplotlib.use('PDF')
    from matplotlib import pyplot as plt
    from mpl_toolkits.mplot3d import Axes3D
    print 'Drawing bootstrap graphs for: {}'.format(bootName)
    bootrec = pickle.load(open('{}/{}/bootrec.p'.format(outFolder,bootName),'rb'))
    plt.close('all')
    Z_obs = np.zeros((len(windowSizes1),len(windowSizes2)))
    Z_lower = np.zeros((len(windowSizes1),len(windowSizes2)))
    Z_boot = np.zeros((len(windowSizes1),len(windowSizes2)))
    Z_upper = np.zeros((len(windowSizes1),len(windowSizes2)))
    Z_std = np.zeros((len(windowSizes1),len(windowSizes2)))
    
    currentFolder = outFolder + '/' + bootName
    
    for i1 in xrange(len(windowSizes1)):
        for i2 in xrange(len(windowSizes2)):
            groupn_i = windowSizes1[i1]       
            groupn_j = windowSizes2[i2]
            curbootrec=bootrec[0,i1,i2]
            print
            print "Analyzing %i - %i" % (groupn_i,groupn_j)
            xlist= pickle.load(open(currentFolder + '/save_a5_xlist' + str(groupn_i) + "," + str(groupn_j) +'.p', 'rb'))
            ylist= pickle.load(open(currentFolder + '/save_a5_ylist' + str(groupn_i) + "," + str(groupn_j) +'.p', 'rb'))
            a,b = pearsonr(xlist,ylist)
                    
            #now do CI for r value
            ci_upper=ssm.scoreatpercentile(curbootrec,97.5)
            ci_lower=ssm.scoreatpercentile(curbootrec,02.5)
            ci_mean=np.mean(curbootrec)
            ci_std=np.var(curbootrec)
            print scipy.stats.norm(ci_mean,ci_std).cdf(abs(a))
            print "r = %.3f, p = %.5f, %s of confidence interval" % (a,b, 'outside' if a > ci_upper or a < ci_lower else 'inside')
            print "Bootstrapped confidence intervals were Upper = %0.3f, Lower = %0.3f" % (ci_upper,ci_lower)
            
            Z_obs[i1][i2] = a 
            Z_upper[i1][i2] = ci_upper         
            Z_lower[i1][i2] = ci_lower    
            Z_boot[i1][i2] = ci_mean
            Z_std[i1][i2] = ci_std
            
            
    X = [[k for j in windowSizes2] for k in windowSizes1]  
    Y = [[j for j in windowSizes2] for k in windowSizes1]
    #One-sided Z value to p value
    Z_p = [[st.norm.sf((Z_obs[i][j] - Z_boot[i][j])/Z_std[i][j]) for j in range(len(windowSizes1))] for i in range(len(windowSizes1))]
    
    fig1 = plt.figure()
    
    fontsize = 16
    ax = fig1.add_subplot(111, projection='3d')
    ax.plot_surface(X, Y, Z_obs, rstride=1, cstride=1)
    fig1.suptitle('Observed Correlations', fontsize=20)
    ax.set_xlabel('Size window 1', fontsize = fontsize)
    ax.set_ylabel('Size window 2', fontsize = fontsize)
    ax.set_zlabel('r', fontsize = fontsize)
    ax.set_zlim(bottom = zBottom, top = zTop)
    
    plt.savefig('{}/figures/{}_corObs.pdf'.format(outFolder,bootName), bbox_inches='tight')
        
    fig2 = plt.figure()
    
    ax = fig2.add_subplot(111, projection='3d')
    ax.plot_surface(X, Y, Z_boot, rstride=1, cstride=1)
    fig2.suptitle('Bootstrap Average Correlations', fontsize=20)
    ax.set_xlabel('Size window 1', fontsize = fontsize)
    ax.set_ylabel('Size window 2', fontsize = fontsize)
    ax.set_zlabel('r', fontsize = fontsize)
    ax.set_zlim(bottom = zBottom, top = zTop)
    
    plt.savefig('{}/figures/{}_corBoot.pdf'.format(outFolder,bootName), bbox_inches='tight')
コード例 #19
0
res = [rt.err(table[:, i], table[:, i + 1]) if i == 0 else rt.err(table[:, i], table[:, i + 1], False) for i in xrange(0, 10, 2)]
lab = ["mass residuals", "mass weighted age residuals", "flux weighted age residuals", "metallicity residuals", "dust extinction residuals"]

for j in xrange(len(res)) :
  fig, axs = plt.subplots(5, 13, sharex = True, sharey = True, figsize = (20, 15))

  plt.xlim(-1.5, +1.5)
  plt.ylim(0, 40)

  axs = np.ravel(axs)

  for i in xrange(65) :
    data   = res[j][i * 100:(i + 1) * 100]
    median = np.median(data)
    p16    = st.scoreatpercentile(data, 16.0)
    p84    = st.scoreatpercentile(data, 84.0)

    counts, bins, patches = axs[i].hist(data, 30, histtype = "step", hatch = "///", lw = 1, color = "#1A1A1A", range = (-1.5, +1.5))

    axs[i].axvline(median, ls = "--", lw = 1.5, color = "#000080")
    axs[i].axvline(p16, ls = "-.", lw = 1.5, color = "#000080")
    axs[i].axvline(p84, ls = "-.", lw = 1.5, color = "#000080")

  axs[-1].set_xticks([-1., 0, +1.])
  axs[52].set_yticks(list(axs[i].get_yticks()[1:-1]))
  axs[58].set_xlabel(lab[j], fontsize = 16)
  axs[26].set_ylabel("counts", fontsize = 16)

  plt.tight_layout()
  plt.subplots_adjust(wspace = 0.01, hspace = 0.01, bottom = 0.06)
コード例 #20
0

# --------------------------------------------
#calc dict of maximum score for each player(=each key)
maxscore={}
    
for key in big:
    maxscore[key]= max([big[key][attempt][0] for attempt in big[key]])

# sort maximum scores, smallest to biggest
ranked_maxscore=sorted(maxscore[key] for key in maxscore)

#calc percentile ranking for each player (=each key)
prcentiles=[]
for p in range(100):
    prcentiles.append(ssm.scoreatpercentile(ranked_maxscore,p))


#decile={}
#    
#for key in big:
#    for i in prcentiles:
#        if maxscore[key]>i:
#            decile[key]=prcentiles.index(float(i))

#so now we know how good each player is

#now let's calc variance

av1={}
var1={}
コード例 #21
0
ファイル: sup_ee_observed.py プロジェクト: leungwk/axongame
big = {k: data[k] for k in data if len(data[k]) > 9}  #pythonic

# --------------------------------------------
#calc dict of maximum score for each player(=each key)
maxscore = {}

for key in big:
    maxscore[key] = max([big[key][attempt][0] for attempt in big[key]])

# sort maximum scores, smallest to biggest
ranked_maxscore = sorted(maxscore[key] for key in maxscore)

#calc percentile ranking for each player (=each key)
prcentiles = []
for p in range(100):
    prcentiles.append(ssm.scoreatpercentile(ranked_maxscore, p))

#decile={}
#
#for key in big:
#    for i in prcentiles:
#        if maxscore[key]>i:
#            decile[key]=prcentiles.index(float(i))

#so now we know how good each player is

#now let's calc variance

av1 = {}
var1 = {}
av2 = {}
コード例 #22
0
ファイル: make_fig4.py プロジェクト: goryszewskig/axongame
execfile("fig4_boot.py") #this can take a long time (e.g. 24 hours) if you use many (e.g. 2000) resamples

#load 
#observed data
plot_timespread = pickle.load(open('save_plot_timespread.p', 'rb'))
#bootstrap data
bootdata = pickle.load(open('save_a4_2boot_bootdata.p','rb'))

#find CIs, using ssm

ci_upper=np.zeros( (1,100))
ci_lower=np.zeros( (1,100))
m_boot=np.zeros( (1,100))
 
for i in range(100):
    ci_upper[0,i]=ssm.scoreatpercentile(bootdata[i,:],97.5)
    ci_lower[0,i]=ssm.scoreatpercentile(bootdata[i,:],02.5)
    m_boot[0,i]=np.mean(bootdata[i,:])

print "PLOTTING"

# plot -------------------------------------------
# thank you tomas http://www.staff.ncl.ac.uk/tom.holderness/software/pythonlinearfit
plt.clf()
    
# plot sample data
plot(plot_timespread,'ro',label='Sample observations')
 
# plot line of best fit
plot(m_boot[(0,)],'b-',label='bootstrap_mean')
コード例 #23
0
ファイル: ps_fig3boot.py プロジェクト: leungwk/axongame
bootdata = np.zeros((100, boot_n))

print "Starting bootstrap calculations"
for n in range(boot_n):

    print "iteration " + str(n) + " of " + str(boot_n)
    #find maxscores, when actual scores are a sample [attempts] long of a
    #maxscore_boot={key: max(random.sample(a,len(big[key]))) for key in big}
    maxscore_boot = {key: max(sample_wr(a, len(big[key]))) for key in big}

    # sort maximum scores, smallest to biggest, put in list
    ranked_maxscore_boot = sorted(maxscore_boot[key] for key in maxscore_boot)

    #calculate percentiles on these bootstrapped maximum scores
    prcentiles_boot = [
        ssm.scoreatpercentile(ranked_maxscore_boot, p) for p in range(100)
    ]

    #assign prcentile to key in decile_boot
    decile_boot = {
        key: bisect.bisect(prcentiles_boot, maxscore_boot[key])
        for key in big
    }

    #now calculate timespread to score percentile, using these
    #bootstrapped maximum scores
    spreads_b = np.zeros((100, 1))  #holding var for the time
    counts_b = np.zeros((100, 1))  #holding var for the number of players' data

    #sort timespread into holding variables according to decile value
    for key in decile_boot:
コード例 #24
0
ファイル: ps_fig3.py プロジェクト: leungwk/axongame
    #load
    #observed data
    plot_timespread = pickle.load(open('save_plot_timespread.p', 'rb'))
    #bootstrap data
    bootdata = pickle.load(open('save_a4_2boot_bootdata.p', 'rb'))

print "finding CIs"

#find CIs, using ssm

ci_upper = np.zeros((1, 100))
ci_lower = np.zeros((1, 100))
m_boot = np.zeros((1, 100))

for i in range(100):
    ci_upper[0, i] = ssm.scoreatpercentile(bootdata[i, :], 97.5)
    ci_lower[0, i] = ssm.scoreatpercentile(bootdata[i, :], 02.5)
    m_boot[0, i] = np.mean(bootdata[i, :])

print "running t-test"

#make the same shape
expt = a = np.reshape(m_boot[(0, )],
                      100)  #expected values (from bootstrap, ie under H0)
obsv = a = np.reshape(plot_timespread, 100)  #observed values

#recode so that a positive difference supports the theory (ie that spacing helps)
#for the bottom 50% this means their observed is lower than expected
#for the top 50% this means their obseved is higher than expected
diffs = np.concatenate([expt[0:50] - obsv[0:50], obsv[50:100] - expt[50:100]])
コード例 #25
0
    "flux weighted age residuals", "metallicity residuals",
    "dust extinction residuals"
]

for j in xrange(len(res)):
    fig, axs = plt.subplots(5, 13, sharex=True, sharey=True, figsize=(20, 15))

    plt.xlim(-1.5, +1.5)
    plt.ylim(0, 40)

    axs = np.ravel(axs)

    for i in xrange(65):
        data = res[j][i * 100:(i + 1) * 100]
        median = np.median(data)
        p16 = st.scoreatpercentile(data, 16.0)
        p84 = st.scoreatpercentile(data, 84.0)

        counts, bins, patches = axs[i].hist(data,
                                            30,
                                            histtype="step",
                                            hatch="///",
                                            lw=1,
                                            color="#1A1A1A",
                                            range=(-1.5, +1.5))

        axs[i].axvline(median, ls="--", lw=1.5, color="#000080")
        axs[i].axvline(p16, ls="-.", lw=1.5, color="#000080")
        axs[i].axvline(p84, ls="-.", lw=1.5, color="#000080")

    axs[-1].set_xticks([-1., 0, +1.])
コード例 #26
0
boot_n = 2000  # define how many resamples the bootstrap uses
bootdata = np.zeros((100, boot_n))

print "Starting bootstrap calculations"
for n in range(boot_n):

    print "iteration " + str(n) + " of " + str(boot_n)
    # find maxscores, when actual scores are a sample [attempts] long of a
    # maxscore_boot={key: max(random.sample(a,len(big[key]))) for key in big}
    maxscore_boot = {key: max(sample_wr(a, len(big[key]))) for key in big}

    # sort maximum scores, smallest to biggest, put in list
    ranked_maxscore_boot = sorted(maxscore_boot[key] for key in maxscore_boot)

    # calculate percentiles on these bootstrapped maximum scores
    prcentiles_boot = [ssm.scoreatpercentile(ranked_maxscore_boot, p) for p in range(100)]

    # assign prcentile to key in decile_boot
    decile_boot = {key: bisect.bisect(prcentiles_boot, maxscore_boot[key]) for key in big}

    # now calculate timespread to score percentile, using these
    # bootstrapped maximum scores
    spreads_b = np.zeros((100, 1))  # holding var for the time
    counts_b = np.zeros((100, 1))  # holding var for the number of players' data

    # sort timespread into holding variables according to decile value
    for key in decile_boot:
        spreads_b[decile_boot[key] - 1] += timespread[key]
        counts_b[decile_boot[key] - 1] += 1

    t = spreads_b / counts_b  # find average
コード例 #27
0
ファイル: sup_ee_boot.py プロジェクト: ErwinHaasnoot/axongame
            second.append(sample_wr(bootdata[attempt],1))      
        av2[key]=sp.mean(second) 
        var2[key]=sp.var(second)
    
    
    #make list of summary stats
    x=[]
    y=[]
    for key in big:
        x.append(var1[key])
        y.append(av2[key])
    
    #find percentile values
    prcentiles_x=[]
    for p in range(100):
        prcentiles_x.append(ssm.scoreatpercentile(x,p))
    
    prcentiles_y=[]
    for p in range(100):
        prcentiles_y.append(ssm.scoreatpercentile(y,p))
    
    
    #make dict of prcentile values for each statistic for each player
    prcentile_xindex={key: bisect.bisect(prcentiles_x,var1[key]) for key in big}
    prcentile_yindex={key: bisect.bisect(prcentiles_y,av2[key]) for key in big}
           
#    #plot subset       
#    i=1001
#    for key in big:
#        i+=1
#        plot(prcentile_xindex[key],prcentile_yindex[key],'b.')