Python scoreatpercentileの例、scipy.stats.mstats.scoreatpercentile Pythonの例

コード例 #1

0

ファイルを表示

ファイル: plotter.py プロジェクト: imclab/climate

def _nice_intervals(data, nlevs):
    '''
    Purpose::
        Calculates nice intervals between each color level for colorbars
        and contour plots. The target minimum and maximum color levels are
        calculated by taking the minimum and maximum of the distribution
        after cutting off the tails to remove outliers.

    Input::
        data - an array of data to be plotted
        nlevs - an int giving the target number of intervals

    Output::
        clevs - A list of floats for the resultant colorbar levels
    '''
    # Find the min and max levels by cutting off the tails of the distribution
    # This mitigates the influence of outliers
    data = data.ravel()
    mnlvl = mstats.scoreatpercentile(data, 5)
    mxlvl = mstats.scoreatpercentile(data, 95)
    locator = mpl.ticker.MaxNLocator(nlevs)
    clevs = locator.tick_values(mnlvl, mxlvl)

    # Make sure the bounds of clevs are reasonable since sometimes
    # MaxNLocator gives values outside the domain of the input data
    clevs = clevs[(clevs >= mnlvl) & (clevs <= mxlvl)]
    return clevs

コード例 #2

0

ファイルを表示

ファイル: sample_props.py プロジェクト: ajmejia/notebooks

def binner(x, y, w_sta, nbins, rang = None, ebar = False, per = None) :
	from numpy import array, digitize, lexsort, linspace
	from numpy.ma import average, median

	ind    = lexsort((y, x))
	xs, ys = x[ind], y[ind]

	if rang is None : mn, mx = min(xs), max(xs)
	else            : mn, mx = rang
	
	bins  = linspace(mn, mx, nbins + 1)
	x_cen = (bins[: - 1] + bins[1:])*0.5
	bins  = linspace(mn, mx, nbins)
	ibins = digitize(xs, bins)

	if w_sta   == "median" : y_sta = array([median(ys[ibins == i]) for i in range(1, bins.size + 1)])
	elif w_sta == "mean"   : y_sta = array([average(ys[ibins == i]) for i in range(1, bins.size + 1)])
	elif w_sta == "mode"   : y_sta = array([mode(ys[ibins == i])[0] for i in range(1, bins.size + 1)])

	if ebar   == False                : return x_cen, y_sta
	elif ebar == True and per == None :
		myer = abs(array([scoreatpercentile(ys[ibins == i], 15.8) for i in range(1, bins.size + 1)]) - y_sta)
		pyer = abs(array([scoreatpercentile(ys[ibins == i], 84.0) for i in range(1, bins.size + 1)]) - y_sta)
		yer  = array([myer, pyer])
		return x_cen, y_sta, yer

	elif ebar == True and per != None :
		myer = abs(array([scoreatpercentile(ys[ibins == i], per[0]) for i in range(1, bins.size + 1)]) - y_sta)
		pyer = abs(array([scoreatpercentile(ys[ibins == i], per[1]) for i in range(1, bins.size + 1)]) - y_sta)
		yer = array([myer, pyer])
		return x_cen, y_sta, yer

コード例 #3

0

ファイルを表示

ファイル: plotter.py プロジェクト: imclab/climate

def _nice_intervals(data, nlevs):
    '''
    Purpose::
        Calculates nice intervals between each color level for colorbars
        and contour plots. The target minimum and maximum color levels are
        calculated by taking the minimum and maximum of the distribution
        after cutting off the tails to remove outliers.

    Input::
        data - an array of data to be plotted
        nlevs - an int giving the target number of intervals

    Output::
        clevs - A list of floats for the resultant colorbar levels
    '''
    # Find the min and max levels by cutting off the tails of the distribution
    # This mitigates the influence of outliers
    data = data.ravel()
    mnlvl = mstats.scoreatpercentile(data, 5)
    mxlvl = mstats.scoreatpercentile(data, 95)
    locator = mpl.ticker.MaxNLocator(nlevs)
    clevs = locator.tick_values(mnlvl, mxlvl)

    # Make sure the bounds of clevs are reasonable since sometimes
    # MaxNLocator gives values outside the domain of the input data
    clevs = clevs[(clevs >= mnlvl) & (clevs <= mxlvl)]
    return clevs

コード例 #4

0

ファイルを表示

ファイル: workingset_selection.py プロジェクト: ajmejia/notebooks

def nbins(sample, range_ = None) :
  IQR = lambda x    : st.scoreatpercentile(x, 75.0) - st.scoreatpercentile(x, 25.0)
  if range_ is None : mn, mx = sample.min(), sample.max()
  else              : mn, mx = range_

  mask    = (sample >= mn) & (sample <= mx)
  binsize = (2 * IQR(sample[mask]) / mask.sum() ** (1. / 3))

  return (mx - mn) / binsize, mn, mx, binsize

コード例 #5

0

ファイルを表示

def nbins(sample, range_=None):
    IQR = lambda x: st.scoreatpercentile(x, 75.0) - st.scoreatpercentile(
        x, 25.0)
    if range_ is None: mn, mx = sample.min(), sample.max()
    else: mn, mx = range_

    mask = (sample >= mn) & (sample <= mx)
    binsize = (2 * IQR(sample[mask]) / mask.sum()**(1. / 3))

    return (mx - mn) / binsize, mn, mx, binsize

コード例 #6

0

ファイルを表示

ファイル: sample_props.py プロジェクト: ajmejia/notebooks

def binner(x, y, w_sta, nbins, rang=None, ebar=False, per=None):
    from numpy import array, digitize, lexsort, linspace
    from numpy.ma import average, median

    ind = lexsort((y, x))
    xs, ys = x[ind], y[ind]

    if rang is None: mn, mx = min(xs), max(xs)
    else: mn, mx = rang

    bins = linspace(mn, mx, nbins + 1)
    x_cen = (bins[:-1] + bins[1:]) * 0.5
    bins = linspace(mn, mx, nbins)
    ibins = digitize(xs, bins)

    if w_sta == "median":
        y_sta = array(
            [median(ys[ibins == i]) for i in range(1, bins.size + 1)])
    elif w_sta == "mean":
        y_sta = array(
            [average(ys[ibins == i]) for i in range(1, bins.size + 1)])
    elif w_sta == "mode":
        y_sta = array(
            [mode(ys[ibins == i])[0] for i in range(1, bins.size + 1)])

    if ebar == False: return x_cen, y_sta
    elif ebar == True and per == None:
        myer = abs(
            array([
                scoreatpercentile(ys[ibins == i], 15.8)
                for i in range(1, bins.size + 1)
            ]) - y_sta)
        pyer = abs(
            array([
                scoreatpercentile(ys[ibins == i], 84.0)
                for i in range(1, bins.size + 1)
            ]) - y_sta)
        yer = array([myer, pyer])
        return x_cen, y_sta, yer

    elif ebar == True and per != None:
        myer = abs(
            array([
                scoreatpercentile(ys[ibins == i], per[0])
                for i in range(1, bins.size + 1)
            ]) - y_sta)
        pyer = abs(
            array([
                scoreatpercentile(ys[ibins == i], per[1])
                for i in range(1, bins.size + 1)
            ]) - y_sta)
        yer = array([myer, pyer])
        return x_cen, y_sta, yer

コード例 #7

0

ファイルを表示

ファイル: clean_data.py プロジェクト: calanoue/GFIN_Data_Work

    def clean_outliers(self):
        """
        Function to remove outliers.

        Parameters
        ----------
        self.outlier_perc : integer
            Percentile value for mstats.scoreatpercentile function. Mask all values greater than this value.
        """
        # Outliers using percentiles - num_rows * [min, max]
        outlier_all = ma.array([[mstats.scoreatpercentile(self.xs[i, :], 100 - self.outlier_perc),
               mstats.scoreatpercentile(self.xs[i, :], self.outlier_perc)] for i in xrange(self.rows_N)])
        self.xs = ma.array([ma.hstack((ma.masked_outside(self.xs[i, :-self.keep_n_values], outlier_all[i, 0],
            outlier_all[i, 1]), self.xs[i, -self.keep_n_values:])) for i in xrange(self.rows_N)])

コード例 #8

0

ファイルを表示

ファイル: test_mstats_basic.py プロジェクト: andycasey/scipy

 def test_2D(self):
     x = ma.array([[1, 1, 1],
                   [1, 1, 1],
                   [4, 4, 3],
                   [1, 1, 1],
                   [1, 1, 1]])
     assert_equal(mstats.scoreatpercentile(x,50), [1,1,1])

コード例 #9

0

ファイルを表示

ファイル: test_mstats_basic.py プロジェクト: alouisos/scipy

 def test_2D(self):
     x = ma.array([[1, 1, 1],
                   [1, 1, 1],
                   [4, 4, 3],
                   [1, 1, 1],
                   [1, 1, 1]])
     assert_equal(mstats.scoreatpercentile(x,50), [1,1,1])

コード例 #10

0

ファイルを表示

def _nice_intervals(data, nlevs):
    '''
    Purpose::
        Calculates nice intervals between each color level for colorbars
        and contour plots. The target minimum and maximum color levels are
        calculated by taking the minimum and maximum of the distribution
        after cutting off the tails to remove outliers.

    Input::
        data - an array of data to be plotted
        nlevs - an int giving the target number of intervals

    Output::
        clevs - A list of floats for the resultant colorbar levels
    '''
    # Find the min and max levels by cutting off the tails of the distribution
    # This mitigates the influence of outliers
    data = data.ravel()
    mn = mstats.scoreatpercentile(data, 5)
    mx = mstats.scoreatpercentile(data, 95)
    # if min less than 0 and or max more than 0 put 0 in center of color bar
    if mn < 0 and mx > 0:
        level = max(abs(mn), abs(mx))
        mnlvl = -1 * level
        mxlvl = level
    # if min is larger than 0 then have color bar between min and max
    else:
        mnlvl = mn
        mxlvl = mx

    # hack to make generated intervals from mpl the same for all versions
    autolimit_mode = mpl.rcParams.get('axes.autolimit_mode')
    if autolimit_mode:
        mpl.rc('axes', autolimit_mode='round_numbers')

    locator = mpl.ticker.MaxNLocator(nlevs)
    clevs = locator.tick_values(mnlvl, mxlvl)
    if autolimit_mode:
        mpl.rc('axes', autolimit_mode=autolimit_mode)

    # Make sure the bounds of clevs are reasonable since sometimes
    # MaxNLocator gives values outside the domain of the input data
    clevs = clevs[(clevs >= mnlvl) & (clevs <= mxlvl)]
    return clevs

コード例 #11

0

ファイルを表示

ファイル: plotter.py プロジェクト: MichaelArthurAnderson/climate

def _nice_intervals(data, nlevs):
    '''
    Purpose::
        Calculates nice intervals between each color level for colorbars
        and contour plots. The target minimum and maximum color levels are
        calculated by taking the minimum and maximum of the distribution
        after cutting off the tails to remove outliers.

    Input::
        data - an array of data to be plotted
        nlevs - an int giving the target number of intervals

    Output::
        clevs - A list of floats for the resultant colorbar levels
    '''
    # Find the min and max levels by cutting off the tails of the distribution
    # This mitigates the influence of outliers
    data = data.ravel()
    mn = mstats.scoreatpercentile(data, 5)
    mx = mstats.scoreatpercentile(data, 95)
    # if min less than 0 and or max more than 0 put 0 in center of color bar
    if mn < 0 and mx > 0:
        level = max(abs(mn), abs(mx))
        mnlvl = -1 * level
        mxlvl = level
    # if min is larger than 0 then have color bar between min and max
    else:
        mnlvl = mn
        mxlvl = mx

    # hack to make generated intervals from mpl the same for all versions
    autolimit_mode = mpl.rcParams.get('axes.autolimit_mode')
    if autolimit_mode:
        mpl.rc('axes', autolimit_mode='round_numbers')

    locator = mpl.ticker.MaxNLocator(nlevs)
    clevs = locator.tick_values(mnlvl, mxlvl)
    if autolimit_mode:
        mpl.rc('axes', autolimit_mode=autolimit_mode)

    # Make sure the bounds of clevs are reasonable since sometimes
    # MaxNLocator gives values outside the domain of the input data
    clevs = clevs[(clevs >= mnlvl) & (clevs <= mxlvl)]
    return clevs

コード例 #12

0

ファイルを表示

ファイル: test_mstats_basic.py プロジェクト: alouisos/scipy

 def test_percentile(self):
     x = np.arange(8) * 0.5
     assert_equal(mstats.scoreatpercentile(x, 0), 0.)
     assert_equal(mstats.scoreatpercentile(x, 100), 3.5)
     assert_equal(mstats.scoreatpercentile(x, 50), 1.75)

コード例 #13

0

ファイルを表示

ファイル: sup_make_eeheatmap.py プロジェクト: leungwk/axongame

#solution attempt 2 - heatmap
plt.clf()
gridsize = 20
plt.hexbin(xlist, ylist, gridsize=gridsize, cmap=cm.jet, bins=None)
cb = plt.colorbar()
cb.set_label('frequency')

xlabel('percentile by variation in first five plays')
ylabel('percentile by average in second five plays')
print "r = %.3f, p = %.5f" % pearsonr(xlist, ylist)

savefig('explore_exploit_scatterheatmap.png',
        dpi=300,
        facecolor='w',
        edgecolor='w',
        orientation='portrait',
        papertype=None,
        format=None,
        transparent=False,
        bbox_inches='tight',
        pad_inches=0.1)

#now do CI for r value

bootrec = pickle.load(open('save_a5_boot_bootrec.p', 'rb'))
bootrec = bootrec[0]
ci_upper = ssm.scoreatpercentile(bootrec, 97.5)
ci_lower = ssm.scoreatpercentile(bootrec, 02.5)
ci_mean = np.mean(bootrec)
print "Bootstrapped confidence intervals were Upper = %0.3f, Lower = %0.3f" % (
    ci_upper, ci_lower)

コード例 #14

0

ファイルを表示

ファイル: ps_fig3obs.py プロジェクト: ErwinHaasnoot/axongame

pickle.dump(a, open('save_a4_1_a.p', 'wb'))

# --------------------------------------------
#calc dict of maximum score for each player(=each key)
maxscore={}
    
for key in big:
    maxscore[key]= max([big[key][attempt][0] for attempt in big[key]])

# sort maximum scores, smallest to biggest
ranked_maxscore=sorted(maxscore[key] for key in maxscore)
        
#calc percentile ranking for each player (=each key)
prcentiles=[]
for p in range(100):
    prcentiles.append(ssm.scoreatpercentile(ranked_maxscore,p))


decile={}
    
for key in big:
    for i in prcentiles:
        if maxscore[key]>i:
            decile[key]=prcentiles.index(float(i))
    
#------------------------------------------------
# now calculate some index of spread
# - the simplest one is range

timespread={}

コード例 #15

0

ファイルを表示

ファイル: sup_ee_boot.py プロジェクト: leungwk/axongame

        for attempt in second_plays:
            second.append(sample_wr(bootdata[attempt], 1))
        av2[key] = sp.mean(second)
        var2[key] = sp.var(second)

    #make list of summary stats
    x = []
    y = []
    for key in big:
        x.append(var1[key])
        y.append(av2[key])

    #find percentile values
    prcentiles_x = []
    for p in range(100):
        prcentiles_x.append(ssm.scoreatpercentile(x, p))

    prcentiles_y = []
    for p in range(100):
        prcentiles_y.append(ssm.scoreatpercentile(y, p))

    #make dict of prcentile values for each statistic for each player
    prcentile_xindex = {
        key: bisect.bisect(prcentiles_x, var1[key])
        for key in big
    }
    prcentile_yindex = {
        key: bisect.bisect(prcentiles_y, av2[key])
        for key in big
    }

コード例 #16

0

ファイルを表示

ファイル: make_fig6.py プロジェクト: goryszewskig/axongame

        
#solution attempt 2 - heatmap
plt.clf()
gridsize=20
plt.hexbin(xlist, ylist,gridsize=gridsize, cmap=cm.jet, bins=None)
cb = plt.colorbar()
cb.set_label('frequency')

xlabel('percentile by variation in first five plays')
ylabel('percentile by average in second five plays')
print "r = %.3f, p = %.5f" % pearsonr(xlist,ylist)

savefig('Figure6.png', dpi=300, facecolor='w', edgecolor='w',
        orientation='portrait', papertype=None, format=None,
        transparent=False, bbox_inches='tight', pad_inches=0.1) 

generatepaperfigs=0
if generatepaperfigs:
    savefig('../cogsci13/figures/a5_e-e_heatscatter.png', dpi=300, facecolor='w', edgecolor='w',
        orientation='portrait', papertype=None, format=None,
        transparent=False, bbox_inches='tight', pad_inches=0.1) 
        
#now do CI for r value

bootrec=pickle.load(open('save_a5_boot_bootrec.p', 'rb'))
bootrec=bootrec[0]
ci_upper=ssm.scoreatpercentile(bootrec,97.5)
ci_lower=ssm.scoreatpercentile(bootrec,02.5)
ci_mean=np.mean(bootrec)
print "Bootstrapped confidence intervals were Upper = %0.3f, Lower = %0.3f" % (ci_upper,ci_lower)

コード例 #17

0

ファイルを表示

ファイル: test_mstats_basic.py プロジェクト: andycasey/scipy

 def test_percentile(self):
     x = np.arange(8) * 0.5
     assert_equal(mstats.scoreatpercentile(x, 0), 0.)
     assert_equal(mstats.scoreatpercentile(x, 100), 3.5)
     assert_equal(mstats.scoreatpercentile(x, 50), 1.75)

コード例 #18

0

ファイルを表示

ファイル: funcs.py プロジェクト: ErwinHaasnoot/axongame

def drawGraphs(outFolder, bootName,  windowSizes1, windowSizes2, zBottom = -1, zTop = 1):
    import matplotlib
    matplotlib.use('PDF')
    from matplotlib import pyplot as plt
    from mpl_toolkits.mplot3d import Axes3D
    print 'Drawing bootstrap graphs for: {}'.format(bootName)
    bootrec = pickle.load(open('{}/{}/bootrec.p'.format(outFolder,bootName),'rb'))
    plt.close('all')
    Z_obs = np.zeros((len(windowSizes1),len(windowSizes2)))
    Z_lower = np.zeros((len(windowSizes1),len(windowSizes2)))
    Z_boot = np.zeros((len(windowSizes1),len(windowSizes2)))
    Z_upper = np.zeros((len(windowSizes1),len(windowSizes2)))
    Z_std = np.zeros((len(windowSizes1),len(windowSizes2)))
    
    currentFolder = outFolder + '/' + bootName
    
    for i1 in xrange(len(windowSizes1)):
        for i2 in xrange(len(windowSizes2)):
            groupn_i = windowSizes1[i1]       
            groupn_j = windowSizes2[i2]
            curbootrec=bootrec[0,i1,i2]
            print
            print "Analyzing %i - %i" % (groupn_i,groupn_j)
            xlist= pickle.load(open(currentFolder + '/save_a5_xlist' + str(groupn_i) + "," + str(groupn_j) +'.p', 'rb'))
            ylist= pickle.load(open(currentFolder + '/save_a5_ylist' + str(groupn_i) + "," + str(groupn_j) +'.p', 'rb'))
            a,b = pearsonr(xlist,ylist)
                    
            #now do CI for r value
            ci_upper=ssm.scoreatpercentile(curbootrec,97.5)
            ci_lower=ssm.scoreatpercentile(curbootrec,02.5)
            ci_mean=np.mean(curbootrec)
            ci_std=np.var(curbootrec)
            print scipy.stats.norm(ci_mean,ci_std).cdf(abs(a))
            print "r = %.3f, p = %.5f, %s of confidence interval" % (a,b, 'outside' if a > ci_upper or a < ci_lower else 'inside')
            print "Bootstrapped confidence intervals were Upper = %0.3f, Lower = %0.3f" % (ci_upper,ci_lower)
            
            Z_obs[i1][i2] = a 
            Z_upper[i1][i2] = ci_upper         
            Z_lower[i1][i2] = ci_lower    
            Z_boot[i1][i2] = ci_mean
            Z_std[i1][i2] = ci_std
            
            
    X = [[k for j in windowSizes2] for k in windowSizes1]  
    Y = [[j for j in windowSizes2] for k in windowSizes1]
    #One-sided Z value to p value
    Z_p = [[st.norm.sf((Z_obs[i][j] - Z_boot[i][j])/Z_std[i][j]) for j in range(len(windowSizes1))] for i in range(len(windowSizes1))]
    
    fig1 = plt.figure()
    
    fontsize = 16
    ax = fig1.add_subplot(111, projection='3d')
    ax.plot_surface(X, Y, Z_obs, rstride=1, cstride=1)
    fig1.suptitle('Observed Correlations', fontsize=20)
    ax.set_xlabel('Size window 1', fontsize = fontsize)
    ax.set_ylabel('Size window 2', fontsize = fontsize)
    ax.set_zlabel('r', fontsize = fontsize)
    ax.set_zlim(bottom = zBottom, top = zTop)
    
    plt.savefig('{}/figures/{}_corObs.pdf'.format(outFolder,bootName), bbox_inches='tight')
        
    fig2 = plt.figure()
    
    ax = fig2.add_subplot(111, projection='3d')
    ax.plot_surface(X, Y, Z_boot, rstride=1, cstride=1)
    fig2.suptitle('Bootstrap Average Correlations', fontsize=20)
    ax.set_xlabel('Size window 1', fontsize = fontsize)
    ax.set_ylabel('Size window 2', fontsize = fontsize)
    ax.set_zlabel('r', fontsize = fontsize)
    ax.set_zlim(bottom = zBottom, top = zTop)
    
    plt.savefig('{}/figures/{}_corBoot.pdf'.format(outFolder,bootName), bbox_inches='tight')

コード例 #19

0

ファイルを表示

ファイル: residual_hists_grid.py プロジェクト: ajmejia/notebooks

res = [rt.err(table[:, i], table[:, i + 1]) if i == 0 else rt.err(table[:, i], table[:, i + 1], False) for i in xrange(0, 10, 2)]
lab = ["mass residuals", "mass weighted age residuals", "flux weighted age residuals", "metallicity residuals", "dust extinction residuals"]

for j in xrange(len(res)) :
  fig, axs = plt.subplots(5, 13, sharex = True, sharey = True, figsize = (20, 15))

  plt.xlim(-1.5, +1.5)
  plt.ylim(0, 40)

  axs = np.ravel(axs)

  for i in xrange(65) :
    data   = res[j][i * 100:(i + 1) * 100]
    median = np.median(data)
    p16    = st.scoreatpercentile(data, 16.0)
    p84    = st.scoreatpercentile(data, 84.0)

    counts, bins, patches = axs[i].hist(data, 30, histtype = "step", hatch = "///", lw = 1, color = "#1A1A1A", range = (-1.5, +1.5))

    axs[i].axvline(median, ls = "--", lw = 1.5, color = "#000080")
    axs[i].axvline(p16, ls = "-.", lw = 1.5, color = "#000080")
    axs[i].axvline(p84, ls = "-.", lw = 1.5, color = "#000080")

  axs[-1].set_xticks([-1., 0, +1.])
  axs[52].set_yticks(list(axs[i].get_yticks()[1:-1]))
  axs[58].set_xlabel(lab[j], fontsize = 16)
  axs[26].set_ylabel("counts", fontsize = 16)

  plt.tight_layout()
  plt.subplots_adjust(wspace = 0.01, hspace = 0.01, bottom = 0.06)

コード例 #20

0

ファイルを表示

ファイル: sup_ee_observed.py プロジェクト: ErwinHaasnoot/axongame


# --------------------------------------------
#calc dict of maximum score for each player(=each key)
maxscore={}
    
for key in big:
    maxscore[key]= max([big[key][attempt][0] for attempt in big[key]])

# sort maximum scores, smallest to biggest
ranked_maxscore=sorted(maxscore[key] for key in maxscore)

#calc percentile ranking for each player (=each key)
prcentiles=[]
for p in range(100):
    prcentiles.append(ssm.scoreatpercentile(ranked_maxscore,p))


#decile={}
#    
#for key in big:
#    for i in prcentiles:
#        if maxscore[key]>i:
#            decile[key]=prcentiles.index(float(i))

#so now we know how good each player is

#now let's calc variance

av1={}
var1={}

コード例 #21

0

ファイルを表示

ファイル: sup_ee_observed.py プロジェクト: leungwk/axongame

big = {k: data[k] for k in data if len(data[k]) > 9}  #pythonic

# --------------------------------------------
#calc dict of maximum score for each player(=each key)
maxscore = {}

for key in big:
    maxscore[key] = max([big[key][attempt][0] for attempt in big[key]])

# sort maximum scores, smallest to biggest
ranked_maxscore = sorted(maxscore[key] for key in maxscore)

#calc percentile ranking for each player (=each key)
prcentiles = []
for p in range(100):
    prcentiles.append(ssm.scoreatpercentile(ranked_maxscore, p))

#decile={}
#
#for key in big:
#    for i in prcentiles:
#        if maxscore[key]>i:
#            decile[key]=prcentiles.index(float(i))

#so now we know how good each player is

#now let's calc variance

av1 = {}
var1 = {}
av2 = {}

コード例 #22

0

ファイルを表示

ファイル: make_fig4.py プロジェクト: goryszewskig/axongame

execfile("fig4_boot.py") #this can take a long time (e.g. 24 hours) if you use many (e.g. 2000) resamples

#load 
#observed data
plot_timespread = pickle.load(open('save_plot_timespread.p', 'rb'))
#bootstrap data
bootdata = pickle.load(open('save_a4_2boot_bootdata.p','rb'))

#find CIs, using ssm

ci_upper=np.zeros( (1,100))
ci_lower=np.zeros( (1,100))
m_boot=np.zeros( (1,100))
 
for i in range(100):
    ci_upper[0,i]=ssm.scoreatpercentile(bootdata[i,:],97.5)
    ci_lower[0,i]=ssm.scoreatpercentile(bootdata[i,:],02.5)
    m_boot[0,i]=np.mean(bootdata[i,:])

print "PLOTTING"

# plot -------------------------------------------
# thank you tomas http://www.staff.ncl.ac.uk/tom.holderness/software/pythonlinearfit
plt.clf()
    
# plot sample data
plot(plot_timespread,'ro',label='Sample observations')
 
# plot line of best fit
plot(m_boot[(0,)],'b-',label='bootstrap_mean')

コード例 #23

0

ファイルを表示

ファイル: ps_fig3boot.py プロジェクト: leungwk/axongame

bootdata = np.zeros((100, boot_n))

print "Starting bootstrap calculations"
for n in range(boot_n):

    print "iteration " + str(n) + " of " + str(boot_n)
    #find maxscores, when actual scores are a sample [attempts] long of a
    #maxscore_boot={key: max(random.sample(a,len(big[key]))) for key in big}
    maxscore_boot = {key: max(sample_wr(a, len(big[key]))) for key in big}

    # sort maximum scores, smallest to biggest, put in list
    ranked_maxscore_boot = sorted(maxscore_boot[key] for key in maxscore_boot)

    #calculate percentiles on these bootstrapped maximum scores
    prcentiles_boot = [
        ssm.scoreatpercentile(ranked_maxscore_boot, p) for p in range(100)
    ]

    #assign prcentile to key in decile_boot
    decile_boot = {
        key: bisect.bisect(prcentiles_boot, maxscore_boot[key])
        for key in big
    }

    #now calculate timespread to score percentile, using these
    #bootstrapped maximum scores
    spreads_b = np.zeros((100, 1))  #holding var for the time
    counts_b = np.zeros((100, 1))  #holding var for the number of players' data

    #sort timespread into holding variables according to decile value
    for key in decile_boot:

コード例 #24

0

ファイルを表示

ファイル: ps_fig3.py プロジェクト: leungwk/axongame

    #load
    #observed data
    plot_timespread = pickle.load(open('save_plot_timespread.p', 'rb'))
    #bootstrap data
    bootdata = pickle.load(open('save_a4_2boot_bootdata.p', 'rb'))

print "finding CIs"

#find CIs, using ssm

ci_upper = np.zeros((1, 100))
ci_lower = np.zeros((1, 100))
m_boot = np.zeros((1, 100))

for i in range(100):
    ci_upper[0, i] = ssm.scoreatpercentile(bootdata[i, :], 97.5)
    ci_lower[0, i] = ssm.scoreatpercentile(bootdata[i, :], 02.5)
    m_boot[0, i] = np.mean(bootdata[i, :])

print "running t-test"

#make the same shape
expt = a = np.reshape(m_boot[(0, )],
                      100)  #expected values (from bootstrap, ie under H0)
obsv = a = np.reshape(plot_timespread, 100)  #observed values

#recode so that a positive difference supports the theory (ie that spacing helps)
#for the bottom 50% this means their observed is lower than expected
#for the top 50% this means their obseved is higher than expected
diffs = np.concatenate([expt[0:50] - obsv[0:50], obsv[50:100] - expt[50:100]])

コード例 #25

0

ファイルを表示

    "flux weighted age residuals", "metallicity residuals",
    "dust extinction residuals"
]

for j in xrange(len(res)):
    fig, axs = plt.subplots(5, 13, sharex=True, sharey=True, figsize=(20, 15))

    plt.xlim(-1.5, +1.5)
    plt.ylim(0, 40)

    axs = np.ravel(axs)

    for i in xrange(65):
        data = res[j][i * 100:(i + 1) * 100]
        median = np.median(data)
        p16 = st.scoreatpercentile(data, 16.0)
        p84 = st.scoreatpercentile(data, 84.0)

        counts, bins, patches = axs[i].hist(data,
                                            30,
                                            histtype="step",
                                            hatch="///",
                                            lw=1,
                                            color="#1A1A1A",
                                            range=(-1.5, +1.5))

        axs[i].axvline(median, ls="--", lw=1.5, color="#000080")
        axs[i].axvline(p16, ls="-.", lw=1.5, color="#000080")
        axs[i].axvline(p84, ls="-.", lw=1.5, color="#000080")

    axs[-1].set_xticks([-1., 0, +1.])

コード例 #26

0

ファイルを表示

ファイル: ps_fig3boot.py プロジェクト: digital-carver/axongame

boot_n = 2000  # define how many resamples the bootstrap uses
bootdata = np.zeros((100, boot_n))

print "Starting bootstrap calculations"
for n in range(boot_n):

    print "iteration " + str(n) + " of " + str(boot_n)
    # find maxscores, when actual scores are a sample [attempts] long of a
    # maxscore_boot={key: max(random.sample(a,len(big[key]))) for key in big}
    maxscore_boot = {key: max(sample_wr(a, len(big[key]))) for key in big}

    # sort maximum scores, smallest to biggest, put in list
    ranked_maxscore_boot = sorted(maxscore_boot[key] for key in maxscore_boot)

    # calculate percentiles on these bootstrapped maximum scores
    prcentiles_boot = [ssm.scoreatpercentile(ranked_maxscore_boot, p) for p in range(100)]

    # assign prcentile to key in decile_boot
    decile_boot = {key: bisect.bisect(prcentiles_boot, maxscore_boot[key]) for key in big}

    # now calculate timespread to score percentile, using these
    # bootstrapped maximum scores
    spreads_b = np.zeros((100, 1))  # holding var for the time
    counts_b = np.zeros((100, 1))  # holding var for the number of players' data

    # sort timespread into holding variables according to decile value
    for key in decile_boot:
        spreads_b[decile_boot[key] - 1] += timespread[key]
        counts_b[decile_boot[key] - 1] += 1

    t = spreads_b / counts_b  # find average

コード例 #27

0

ファイルを表示

ファイル: sup_ee_boot.py プロジェクト: ErwinHaasnoot/axongame

            second.append(sample_wr(bootdata[attempt],1))      
        av2[key]=sp.mean(second) 
        var2[key]=sp.var(second)
    
    
    #make list of summary stats
    x=[]
    y=[]
    for key in big:
        x.append(var1[key])
        y.append(av2[key])
    
    #find percentile values
    prcentiles_x=[]
    for p in range(100):
        prcentiles_x.append(ssm.scoreatpercentile(x,p))
    
    prcentiles_y=[]
    for p in range(100):
        prcentiles_y.append(ssm.scoreatpercentile(y,p))
    
    
    #make dict of prcentile values for each statistic for each player
    prcentile_xindex={key: bisect.bisect(prcentiles_x,var1[key]) for key in big}
    prcentile_yindex={key: bisect.bisect(prcentiles_y,av2[key]) for key in big}
           
#    #plot subset       
#    i=1001
#    for key in big:
#        i+=1
#        plot(prcentile_xindex[key],prcentile_yindex[key],'b.')