Exemple #1
0
def test_bootstrap_pass_indices():
    np.random.seed(0)
    x = np.random.normal(0, 1, 100)

    dist1 = bootstrap(x, 100, np.mean, kwargs=dict(axis=1), random_state=0)
    dist2 = bootstrap(x, 100, lambda i: np.mean(x[i], axis=1), pass_indices=True, random_state=0)

    assert_allclose(dist1, dist2)
Exemple #2
0
def test_bootstrap_multiple():
    np.random.seed(0)
    x = np.random.normal(0, 1, 100)

    dist_mean = bootstrap(x, 100, np.mean, kwargs=dict(axis=1), random_state=0)
    dist_std = bootstrap(x, 100, np.std, kwargs=dict(axis=1), random_state=0)
    res = bootstrap(x, 100, mean_sigma, kwargs=dict(axis=1), random_state=0)

    assert_allclose(res[0], dist_mean)
    assert_allclose(res[1], dist_std)
Exemple #3
0
def test_bootstrap_multiple():
    np.random.seed(0)
    x = np.random.normal(0, 1, 100)

    dist_mean = bootstrap(x, 100, np.mean, kwargs=dict(axis=1), random_state=0)
    dist_std = bootstrap(x, 100, np.std, kwargs=dict(axis=1), random_state=0)
    res = bootstrap(x, 100, mean_sigma, kwargs=dict(axis=1), random_state=0)

    assert_allclose(res[0], dist_mean)
    assert_allclose(res[1], dist_std)
Exemple #4
0
def test_bootstrap_pass_indices():
    np.random.seed(0)
    x = np.random.normal(0, 1, 100)

    dist1 = bootstrap(x, 100, np.mean,
                      kwargs=dict(axis=1), random_state=0)
    dist2 = bootstrap(x, 100, lambda i: np.mean(x[i], axis=1),
                      pass_indices=True, random_state=0)

    assert_allclose(dist1, dist2)
def get_line(x,y,bins=12,ranges=None,use_bootstrap=False,percentiles=(16,84)):

    def percentile_function(x):
        lower = np.percentile(x,percentiles[0])
        median = np.median(x)
        upper = np.percentile(x,percentiles[1])
        return lower, median, upper
    
    if ranges == None:
        ranges = (np.min(x),np.max(x),np.min(y),np.max(y))
        
    if isinstance(bins, int) == True:
        bins = np.linspace(ranges[0],ranges[1],bins+1)
    xbin_centres = [bins[i]+(bins[i+1]-bins[i])/2 for i in range(len(bins)-1)]
    x_bins, _, bin_assignment = binned_statistic(x,x,bins=bins,statistic='median')
    
    x_medians = np.array([])
    x_uppers = np.array([])
    x_lowers = np.array([])
    y_medians = np.array([])
    y_uppers = np.array([])
    y_lowers = np.array([])
    for b in np.unique(bin_assignment):
        in_bin = bin_assignment == b
        x_bin = x[in_bin]
        y_bin = y[in_bin]
        
        if use_bootstrap == True:
            x_lower, x_median, x_upper = bootstrap(x_bin,10,percentile_function,random_state=0)
            y_lower, y_median, y_upper = bootstrap(y_bin,10,percentile_function,random_state=0)
        else:
            x_lower, x_median, x_upper = percentile_function(x_bin)
            y_lower, y_median, y_upper = percentile_function(y_bin)
        
        x_lowers = np.append(x_lowers,x_lower)
        x_medians = np.append(x_medians,x_median)
        x_uppers = np.append(x_uppers,x_upper)
        y_lowers = np.append(y_lowers,y_lower)
        y_medians = np.append(y_medians,y_median)
        y_uppers = np.append(y_uppers,y_upper)
            
    stats_table = Table()
    stats_table['x'] = x_medians
    stats_table['x_upper'] = x_uppers
    stats_table['x_lower'] = x_lowers
    stats_table['y'] = y_medians
    stats_table['y_upper'] = y_uppers
    stats_table['y_lower'] = y_lowers
    stats_table['x_centres'] = xbin_centres
    
    return stats_table, bins
Exemple #6
0
def test_bootstrap_covar():
    np.random.seed(0)
    mean = [0.,0.]
    covar = [[10.,3.],[3.,20.]]
    x = np.random.multivariate_normal(mean, covar, 1000)

    dist_cov = bootstrap(x, 10000, np.cov, kwargs=dict(rowvar=0), random_state=0)
    assert_allclose(covar[0][0], dist_cov[0][0], atol=2.*0.4)
Exemple #7
0
def test_bootstrap_results():
    np.random.seed(0)
    x = np.random.normal(0, 1, 100)
    distribution = bootstrap(x, 100, np.mean, kwargs=dict(axis=1), random_state=0)

    mu, sigma = mean_sigma(distribution)

    assert_allclose([mu, sigma], [0.08139846, 0.10465327])
Exemple #8
0
def test_bootstrap_results():
    np.random.seed(0)
    x = np.random.normal(0, 1, 100)
    distribution = bootstrap(x, 100, np.mean, kwargs=dict(axis=1),
                             random_state=0)

    mu, sigma = mean_sigma(distribution)

    assert_allclose([mu, sigma], [0.08139846, 0.10465327])
Exemple #9
0
def test_bootstrap_covar():
    np.random.seed(0)
    mean = [0., 0.]
    covar = [[10., 3.], [3., 20.]]
    x = np.random.multivariate_normal(mean, covar, 1000)

    dist_cov = bootstrap(x,
                         10000,
                         np.cov,
                         kwargs=dict(rowvar=0),
                         random_state=0)
    assert_allclose(covar[0][0], dist_cov[0][0], atol=2. * 0.4)
Exemple #10
0
    # 	plt.figure()
    # 	plt.plot(temp_mag_out['MAG'],temp_mag_out['MAG_AUTO']-temp_mag_out['MAG'],',')

    temp_diffs = sigma_clip(temp_mag_out['MAG_AUTO'] - temp_mag_out['MAG'],
                            sigma=2.3,
                            iters=None)
    mask = (ma.getmaskarray(temp_diffs) == False)
    # 	temp_mag_out = temp_mag_out[mask]
    temp_diffs = temp_diffs[mask]

    # 	plt.plot(temp_mag_out['MAG'],temp_diffs,'r,')

    print len(temp_diffs)
    if len(temp_diffs) > 1:
        temp_boots = bootstrap(temp_diffs, 10000, np.std)
    else:
        temp_boots = 0.0
    print >> error_fileout, "%.2f	%.2f	%.2e" % (mag_chk, mag_chk + dmag,
                                                temp_boots)
    boots_out.append(temp_boots)
    # 	temp_x = mag_chk+dmag/2.
    # 	plt.plot([mag_chk,mag_chk+dmag],[quad_fit(mag_chk),quad_fit(mag_chk+dmag)],'g',lw=2)
    # 	plt.plot([temp_x,temp_x],[quad_fit(temp_x)-3.*boots,quad_fit(temp_x)+3.*boots],'k',lw=2)
    # 	plt.plot([temp_x,temp_x],[quad_fit(temp_x)-boots,quad_fit(temp_x)+boots],'g',lw=2)
    # 	print 3.*boots
    #
    # 	plt.show()
    # 	exit()

    mag_outs.append(mag_chk + dmag / 2.)
Exemple #11
0
Nbootstrap = 2000

truemean = 0.
truesigma = 1.

nruns = 1000
sigstddev = np.zeros(nruns)
sigboot = np.zeros(nruns)
sigsmboot = np.zeros(nruns)

for irun in xrange(nruns):
    np.random.seed()
    data = stats.norm(truemean, truesigma).rvs(Ndata)
    sigstddev[irun] = np.std(data, ddof=1)
    sigboot[irun] = np.median(
        mlre.bootstrap(data, Nbootstrap, np.std, kwargs=dict(axis=1, ddof=1)))
    sigsmboot[irun] = np.median(
        smoothedbootstrap(data,
                          Nbootstrap,
                          np.std,
                          kwargs=dict(axis=1, ddof=1)))
    #    code here in loop looks at all bootstrap results for a single run (single data set)
    if irun == 17:  # choose 17 as an example run for no particular reason
        fig0, ax0 = plt.subplots(figsize=(5, 3.75))
        ax0.hist(smoothedbootstrap(data,
                                   Nbootstrap,
                                   np.std,
                                   kwargs=dict(axis=1, ddof=1)),
                 bins=50,
                 normed=True,
                 histtype='step',
Exemple #12
0
def get_line(x,
             y,
             bins=12,
             ranges=None,
             use_bootstrap=False,
             percentiles=(16, 84)):
    def percentile_function(x):
        lower = np.percentile(x, percentiles[0])
        median = np.median(x)
        upper = np.percentile(x, percentiles[1])
        return lower, median, upper

    if ranges == None:
        ranges = (np.min(x), np.max(x), np.min(y), np.max(y))

    if isinstance(bins, int) == True:
        bins = np.linspace(ranges[0], ranges[1], bins + 1)
    xbin_centres = [
        bins[i] + (bins[i + 1] - bins[i]) / 2 for i in range(len(bins) - 1)
    ]
    x_bins, _, bin_assignment = binned_statistic(x,
                                                 x,
                                                 bins=bins,
                                                 statistic='median')

    x_medians = np.array([])
    x_uppers = np.array([])
    x_lowers = np.array([])
    y_medians = np.array([])
    y_uppers = np.array([])
    y_lowers = np.array([])
    for b in np.unique(bin_assignment):
        in_bin = bin_assignment == b
        x_bin = x[in_bin]
        y_bin = y[in_bin]

        if use_bootstrap == True:
            x_lower, x_median, x_upper = bootstrap(x_bin,
                                                   10,
                                                   percentile_function,
                                                   random_state=0)
            y_lower, y_median, y_upper = bootstrap(y_bin,
                                                   10,
                                                   percentile_function,
                                                   random_state=0)
        else:
            x_lower, x_median, x_upper = percentile_function(x_bin)
            y_lower, y_median, y_upper = percentile_function(y_bin)

        x_lowers = np.append(x_lowers, x_lower)
        x_medians = np.append(x_medians, x_median)
        x_uppers = np.append(x_uppers, x_upper)
        y_lowers = np.append(y_lowers, y_lower)
        y_medians = np.append(y_medians, y_median)
        y_uppers = np.append(y_uppers, y_upper)

    stats_table = Table()
    stats_table['x'] = x_medians
    stats_table['x_upper'] = x_uppers
    stats_table['x_lower'] = x_lowers
    stats_table['y'] = y_medians
    stats_table['y_upper'] = y_uppers
    stats_table['y_lower'] = y_lowers
    stats_table['x_centres'] = xbin_centres

    return stats_table, bins
Exemple #13
0
from astroML.resample import bootstrap


def fit_samples(sample):
    # sample is an array of size [n_bootstraps, n_samples]
    # compute the maximum likelihood for each bootstrap.
    return np.array([
        optimize.fmin(neg_log_likelihood,
                      theta_guess,
                      args=(F, np.sqrt(F)),
                      disp=0) for F in sample
    ])


samples = bootstrap(F, 1000, fit_samples)  # 1000 bootstrap resamplings

# Now in a similar manner to what we did above for the MCMC Bayesian posterior, we'll compute the sample mean and standard deviation to determine the errors on the parameters.

# In[11]:

mu_samp = samples[:, 0]
sig_samp = abs(samples[:, 1])

print " mu    = {0:.0f} +/- {1:.0f}".format(mu_samp.mean(), mu_samp.std())
print " sigma = {0:.0f} +/- {1:.0f}".format(sig_samp.mean(), sig_samp.std())

# I should note that there is a **huge** literature on the details of bootstrap resampling, and there are definitely some subtleties of the approach that I am glossing over here. One obvious piece is that there is potential for errors to be correlated or non-Gaussian, neither of which is reflected by simply finding the mean and standard deviation of each model parameter. Nevertheless, I trust that this gives the basic idea of the frequentist approach to this problem.

#### Varying Photon Counts: The Bayesian Approach
# you can set usetex to False.
if "setup_text_plots" not in globals():
    from astroML.plotting import setup_text_plots
setup_text_plots(fontsize=8, usetex=True)

m = 1000  # number of points
n = 10000  # number of bootstraps

#------------------------------------------------------------
# sample values from a normal distribution
np.random.seed(123)
data = norm(0, 1).rvs(m)

#------------------------------------------------------------
# Compute bootstrap resamplings of data
mu1_bootstrap = bootstrap(data, n, np.std, kwargs=dict(axis=1, ddof=1))
mu2_bootstrap = bootstrap(data, n, sigmaG, kwargs=dict(axis=1))

#------------------------------------------------------------
# Compute the theoretical expectations for the two distributions
x = np.linspace(0.8, 1.2, 1000)

sigma1 = 1. / np.sqrt(2 * (m - 1))
pdf1 = norm(1, sigma1).pdf(x)

sigma2 = 1.06 / np.sqrt(m)
pdf2 = norm(1, sigma2).pdf(x)

#------------------------------------------------------------
# Plot the results
fig, ax = plt.subplots(figsize=(5, 3.75))
Exemple #15
0
likelihood = np.exp(cauchy_logL(xi, gamma[:, np.newaxis], mu))

pmu = likelihood.sum(0)
pmu /= pmu.sum() * dmu

pgamma = likelihood.sum(1)
pgamma /= pgamma.sum() * dgamma

#------------------------------------------------------------
# bootstrap estimate
mu_bins = np.linspace(-3, 3, 21)
gamma_bins = np.linspace(0, 5, 17)

mu_bootstrap, gamma_bootstrap = bootstrap(xi,
                                          20000,
                                          estimate_mu_gamma,
                                          kwargs=dict(axis=1),
                                          random_state=0)

#------------------------------------------------------------
# Plot results
fig = plt.figure(figsize=(5, 5))
fig.subplots_adjust(wspace=0.35, right=0.95, hspace=0.2, top=0.95)

# first axes: mu posterior
ax1 = fig.add_subplot(221)
ax1.plot(mu, pmu, '-k')
ax1.hist(mu_bootstrap,
         mu_bins,
         normed=True,
         histtype='step',
Exemple #16
0
dmu = mu[1] - mu[0]

likelihood = np.exp(cauchy_logL(xi, gamma[:, np.newaxis], mu))

pmu = likelihood.sum(0)
pmu /= pmu.sum() * dmu

pgamma = likelihood.sum(1)
pgamma /= pgamma.sum() * dgamma

#------------------------------------------------------------
# bootstrap estimate
mu_bins = np.linspace(-3, 3, 21)
gamma_bins = np.linspace(0, 5, 17)

mu_bootstrap, gamma_bootstrap = bootstrap(xi, 20000, estimate_mu_gamma,
                                          kwargs=dict(axis=1), random_state=0)

#------------------------------------------------------------
# Plot results
fig = plt.figure(figsize=(8, 8))
fig.subplots_adjust(wspace=0.35, right=0.95,
                    hspace=0.2, top=0.95)

# first axes: mu posterior
ax1 = fig.add_subplot(221)
ax1.plot(mu, pmu, '-k')
ax1.hist(mu_bootstrap, mu_bins, normed=True,
         histtype='step', color='b', linestyle='dashed')
ax1.set_xlabel(r'$\mu$')
ax1.set_ylabel(r'$p(\mu|x,I)$')
Exemple #17
0
# [2]: http://en.wikipedia.org/wiki/Jackknife_(statistics)
# [3]: http://en.wikipedia.org/wiki/Bootstrapping_(statistics)
# [4]: http://astroML.org

# In[10]:

from astroML.resample import bootstrap

def fit_samples(sample):
    # sample is an array of size [n_bootstraps, n_samples]
    # compute the maximum likelihood for each bootstrap.
    return np.array([optimize.fmin(neg_log_likelihood, theta_guess,
                                   args=(F, np.sqrt(F)), disp=0)
                     for F in sample])

samples = bootstrap(F, 1000, fit_samples)  # 1000 bootstrap resamplings


# Now in a similar manner to what we did above for the MCMC Bayesian posterior, we'll compute the sample mean and standard deviation to determine the errors on the parameters.

# In[11]:

mu_samp = samples[:, 0]
sig_samp = abs(samples[:, 1])

print " mu    = {0:.0f} +/- {1:.0f}".format(mu_samp.mean(), mu_samp.std())
print " sigma = {0:.0f} +/- {1:.0f}".format(sig_samp.mean(), sig_samp.std())


# I should note that there is a **huge** literature on the details of bootstrap resampling, and there are definitely some subtleties of the approach that I am glossing over here. One obvious piece is that there is potential for errors to be correlated or non-Gaussian, neither of which is reflected by simply finding the mean and standard deviation of each model parameter. Nevertheless, I trust that this gives the basic idea of the frequentist approach to this problem.
Exemple #18
0
from astroML.resample import bootstrap
import numpy as np
from scipy.stats import *
import matplotlib.pyplot as plt
plt.ion()
N=1000

samples = np.random.normal(0,1,N)
median_BS = bootstrap(samples,10000,np.median,kwargs=dict(axis=1))
standard_dev=np.std(median_BS)
std=np.sqrt(np.pi/(2*N))
print('deviation of bootstrap sammples',standard_dev)

x = np.linspace(-0.5,0.5,1000)
mu = np.mean(median_BS)
sigma = np.std(median_BS)
pdf = norm(0, std).pdf(x)#theorically expected gaussian 
pdf1 = norm(mu, sigma).pdf(x) #samples based gaussian 
plt.hist(median_BS,bins=30,normed=True,label='Histogram') #histogram of Bootstrap samples
plt.plot(x,pdf,'-k',label='sample calculated plot')	
plt.plot(x,pdf1,'-r',label='Theoritical Plot')
plt.legend()
Exemple #19
0
	mask = np.logical_and(mock_out_data['MAG'] >= mag_chk, mock_out_data['MAG'] < mag_chk+dmag)
	temp_mag_out = mock_out_data[mask]

# 	plt.figure()
# 	plt.plot(temp_mag_out['MAG'],temp_mag_out['MAG_AUTO']-temp_mag_out['MAG'],',')

 	temp_diffs = sigma_clip(temp_mag_out['MAG_AUTO']-temp_mag_out['MAG'],sigma=2.3,iters=None)
	mask = (ma.getmaskarray(temp_diffs) == False)	
# 	temp_mag_out = temp_mag_out[mask]
	temp_diffs = temp_diffs[mask]

# 	plt.plot(temp_mag_out['MAG'],temp_diffs,'r,')

	print len(temp_diffs)
	if len(temp_diffs) > 1:
		temp_boots = bootstrap(temp_diffs,10000,np.std)
	else:
		temp_boots = 0.0
	print >> error_fileout, "%.2f	%.2f	%.2e" %	(mag_chk, mag_chk+dmag, temp_boots)
	boots_out.append(temp_boots)
# 	temp_x = mag_chk+dmag/2.
# 	plt.plot([mag_chk,mag_chk+dmag],[quad_fit(mag_chk),quad_fit(mag_chk+dmag)],'g',lw=2)
# 	plt.plot([temp_x,temp_x],[quad_fit(temp_x)-3.*boots,quad_fit(temp_x)+3.*boots],'k',lw=2)
# 	plt.plot([temp_x,temp_x],[quad_fit(temp_x)-boots,quad_fit(temp_x)+boots],'g',lw=2)
# 	print 3.*boots
# 
# 	plt.show()
# 	exit()

	mag_outs.append(mag_chk+dmag/2.)
	comps.append(float(len(temp_mag_out['MAG'])/float(len(temp_mag_in))))
Exemple #20
0
Ndata=5
Nbootstrap=1000

truemean = 0.
truesigma = 1.

nruns = 10000
sigstddev = np.zeros(nruns)
sigboot = np.zeros(nruns)
sigsmboot = np.zeros(nruns)

for irun in xrange(nruns):
    np.random.seed()
    data = stats.norm(truemean, truesigma).rvs(Ndata)
    sigstddev[irun] = np.std(data,ddof=1)
    sigboot[irun] = np.median(mlre.bootstrap(data, Nbootstrap,  np.std, kwargs=dict(axis=1, ddof=1)))
    sigsmboot[irun] = np.median(smoothedbootstrap(data, Nbootstrap,  np.std, kwargs=dict(axis=1, ddof=1)))
# code below looks at all bootstrap results for a single data set
#    fig0, ax0 = plt.subplots(figsize=(5, 3.75))
#    ax0.hist(smoothedbootstrap(data, Nbootstrap,  np.std, kwargs=dict(axis=1, ddof=1)), bins=50, normed=True, histtype='step',
#        color='green', lw=2)
#    ax0.hist(mlre.bootstrap(data, Nbootstrap,  np.std, kwargs=dict(axis=1, ddof=1)), bins=50, normed=True, histtype='step',
#       color='red', lw=2)

# code below looks at individual median bootstrap results for all nruns data sets
fig, ax = plt.subplots(figsize=(5, 3.75))
ax.hist(sigstddev, bins=50, normed=True, histtype='step',
        color='blue', ls='dashed', label=r'$\sigma\ {\rm (stddev)}$')
ax.hist(sigboot, bins=50, normed=True, histtype='step',
        color='red', label=r'$\sigma\ {\rm (bootstrap)}$')
ax.hist(sigsmboot, bins=50, normed=True, histtype='step',
Exemple #21
0
from astroML.resample import bootstrap
from bootstrap_comp import smoothedbootstrap
from bootstrap_comp import smoothedbootstrap2

#==============================================================================
#Unsmoothed Section
plt.close('all')
sigma = 1
numdat = 5
mean = 0
rndsamp = npr.normal(mean, sigma, numdat)
dev = np.std(rndsamp)
print('Direct Method:')
print(dev)

boot1 = bootstrap(rndsamp, 5, np.std, kwargs=dict(axis=1, ddof=1))
bootsort = sorted(boot1)
bootmed = np.median(bootsort)
print('Bootstrap Method:')
print(bootmed)

boot2 = smoothedbootstrap(rndsamp,
                          numdat,
                          0,
                          np.std,
                          kwargs=dict(axis=1, ddof=1))
bootsort2 = sorted(boot2)
bootmed2 = np.median(bootsort2)
print('Smoothed Bootstrap Method:')
print(bootmed2)
Ndata=5
Nbootstrap=2000

truemean = 0.
truesigma = 1.

nruns = 1000
sigstddev = np.zeros(nruns)
sigboot = np.zeros(nruns)
sigsmboot = np.zeros(nruns)

for irun in xrange(nruns):
    np.random.seed()
    data = stats.norm(truemean, truesigma).rvs(Ndata)
    sigstddev[irun] = np.std(data,ddof=1)
    sigboot[irun] = np.median(mlre.bootstrap(data, Nbootstrap,  np.std, kwargs=dict(axis=1, ddof=1)))
    sigsmboot[irun] = np.median(smoothedbootstrap(data, Nbootstrap,  np.std, kwargs=dict(axis=1, ddof=1)))
#    code here in loop looks at all bootstrap results for a single run (single data set)
    if irun == 17: # choose 17 as an example run for no particular reason
        fig0, ax0 = plt.subplots(figsize=(5, 3.75))
        ax0.hist(smoothedbootstrap(data, Nbootstrap,  np.std, kwargs=dict(axis=1, ddof=1)), bins=50, normed=True, histtype='step',
            color='green', lw=2, label=r'$\sigma\ {\rm (sm. bootstrap)}$')
        ax0.hist(mlre.bootstrap(data, Nbootstrap,  np.std, kwargs=dict(axis=1, ddof=1)), bins=50, normed=True, histtype='step',
           color='red', lw=2, label=r'$\sigma\ {\rm (bootstrap)}$')

# code below looks at the median bootstrap results for all nruns data sets (one median per run)
fig, ax = plt.subplots(figsize=(5, 3.75))
ax.hist(sigstddev, bins=50, normed=True, histtype='step',
        color='blue', ls='dashed', label=r'$\sigma\ {\rm (stddev)}$')
ax.hist(sigboot, bins=50, normed=True, histtype='step',
        color='red', label=r'$\sigma\ {\rm (bootstrap)}$')
def plotFoM(validationData, zbins, pzs, metric):
    refZ = 'Z'  #'ZSPEC' #'Z_1'
    plt.figure()
    for refPz in pzs:
        print(refPz)
        metric_list = []
        errmetric_list = []
        metric_nocorr_list = []
        #errmetric_nocorr_list = []
        zmid_list = []
        for zmin, zmax in zbins:
            print('Bin ', zmin, zmax)
            basicsel = (validationData[refPz] > zmin) & (
                validationData[refPz] < zmax
            )  #& (validationData['FLAGS_GOLD'] < 1) & (validationData['FLAGS_FOOTPRINT'] > 0) & (validationData['FLAGS_FOREGROUND'] < 2) & (validationData['EXTENDED_CLASS_MASH_SOF'] > 2)
            #zsel = (validationData[refPz] > zmin) & (validationData[refPz] < zmax) & (validationData['FLAGS_GOLD'] < 1) & (validationData['FLAGS_FOOTPRINT'] > 0) & (validationData['FLAGS_FOREGROUND'] < 2) & (validationData['EXTENDED_CLASS_MASH_SOF'] > 2) & (validationData['SOF_CM_MAG_CORRECTED_I'] > 17.5) & (validationData['SOF_CM_MAG_CORRECTED_I'] < 18 + 4*validationData['DNF_ZMEAN_SOF_v2_2'])
            #zsel_vip = (validationData[refPz] > zmin) & (validationData[refPz] < zmax) & (validationData['FLAGS_GOLD'] < 1) & (validationData['FLAGS_FOOTPRINT'] > 0) & (validationData['FLAGS_FOREGROUND'] < 2) & (validationData['EXTENDED_CLASS_MASH_SOF'] > 2) & (validationData['source'] == "VIPERS" ) & (validationData['SOF_CM_MAG_CORRECTED_I'] > 17.5) & (validationData['SOF_CM_MAG_CORRECTED_I'] < 18 + 4*validationData['DNF_ZMEAN_SOF_v2_2'])#& (validationData[refZ] > 0.01) & (validationData['zflg'] > 2.9) & (validationData['zflg'] < 9) & (validationData['classFlag'] > 0)
            zsel = (validationData[refPz] > zmin) & (
                validationData[refPz] < zmax
            )  #& (validationData['FLAGS_GOLD'] < 1) & (validationData['FLAGS_FOOTPRINT'] > 0) & (validationData['FLAGS_FOREGROUND'] < 2) & (validationData['EXTENDED_CLASS_MASH_SOF'] > 2) & (validationData['SOF_CM_MAG_CORRECTED_I'] > 17.5) & (validationData['SOF_CM_MAG_CORRECTED_I'] < 18 + 4*validationData[refPz])
            #zsel = (validationData[refPz] > zmin) & (validationData[refPz] < zmax) & (validationData[refZ] > 0.01) & (validationData['ZFLG'] > 2.9) & (validationData['ZFLG'] < 9) & (validationData['CLASSFLAG'] > 0)
            selection = validationData[zsel]
            #selection_vip = validationData[zsel_vip]
            #selection_nocorr = validationData[zsel_nocorr]
            #print(len(validationData[basicsel]),len(selection),len(selection_nocorr))
            print(len(validationData[basicsel]), len(selection))
            zmid = (zmin + zmax) / 2.0
            zmid_list.append(zmid)
            if metric == 'bias':
                val = np.mean(delta_z(selection[refPz], selection[refZ]))
                errval = np.std(delta_z(selection[refPz],
                                        selection[refZ])) / np.sqrt(
                                            len(selection[refPz]))
                #val_nocorr = np.mean(delta_z(selection_nocorr[refPz],selection_nocorr[refZ]))
                #errval_nocorr = np.std(delta_z(selection_nocorr[refPz],selection_nocorr[refZ]))/np.sqrt(len(selection_nocorr[refPz]))
                ylab = '$z_{photo}-z_{spec}$'
            if metric == 's68':
                val = sigma_68(selection[refPz] - selection[refZ])
                #val_nocorr = sigma_68(selection_nocorr[refPz]-selection_nocorr[refZ])
                n = 50
                errval = np.std(
                    bootstrap(selection[refPz] - selection[refZ],
                              n,
                              sigma_68,
                              kwargs=dict(axis=1)))
                #errval_nocorr = np.std(bootstrap(selection_nocorr[refPz]-selection_nocorr[refZ], n, sigma_68, kwargs=dict(axis=1)))
                #print(selection[refPz]-selection[refZ],len(selection[refPz]-selection[refZ]),n,errval)
                ylab = 'Sigma_68'
            if metric == 's681pz':
                val = sigma_68_1pz(selection[refPz] - selection[refZ],
                                   selection[refZ])
                #val_nocorr = sigma_68_1pz(selection_nocorr[refPz]-selection_nocorr[refZ],selection[refZ])
                n = 50
                errval = np.std(
                    bootstrap(selection[refPz] - selection[refZ],
                              n,
                              sigma_68_1pz,
                              kwargs=dict(axis=1, z_spec=selection[refZ])))
                #errval_nocorr = np.std(bootstrap(selection_nocorr[refPz]-selection_nocorr[refZ], n, sigma_68_1pz, kwargs=dict(axis=1, z_spec = selection[refZ])))
                ylab = '$\sigma_{68}/(1+z_{spec})$'
            #print(len(selection),zmin,zmax,'bias',bias,'+-',errbias,'s68',s68,'s68/1+z',s681pz)
        #if metric == 'bias':
            print(metric, 'val', val, 'errval', errval)
            errmetric_list.append(errval)
            metric_list.append(val)
            #errmetric_nocorr_list.append(errval_nocorr)
            #metric_nocorr_list.append(val_nocorr)
        if refPz == 'Z_MEAN':
            zmid_list = list(np.array(zmid_list) + 0.02)
        plt.errorbar(zmid_list,
                     metric_list,
                     yerr=errmetric_list,
                     label=label_dict[refPz],
                     marker='o',
                     ls='')
        #plt.errorbar(zmid_list,metric_nocorr_list,yerr=errmetric_nocorr_list,label=refPz+' no chorm. corr.',marker='o',ls='')
        #plt.scatter(zmid_list,metric_list,label=refPz)
    plt.xlabel('Photo-z bin')
    plt.ylabel(ylab)
    #else:
    #    plt.scatter(zmid_list,metric_list,label=refPz)
    #    plt.xlabel('Photo-z bin')
    #    plt.ylabel('Sigma_68')

    plt.grid()
    plt.legend(loc="upper left")
    plt.savefig(metric + '_pz_test.png')
    plt.show()
#------------------------------------------------------------
# Addition by M. DeCesar
# Take a look at your data
xx = np.linspace(1,m,num=m)
plt.figure()
plt.plot(xx,data)
plt.xlabel('Data Bin')
plt.ylabel('Data Value')
plt.title('Sample Values from a Normal Distribution')
#plt.savefig('normdist_sample')
plt.show()

#------------------------------------------------------------
# Compute bootstrap resamplings of data
mu1_bootstrap = bootstrap(data, n,  np.std, kwargs=dict(axis=1, ddof=1))
mu2_bootstrap = bootstrap(data, n, sigmaG, kwargs=dict(axis=1))

#------------------------------------------------------------
# Addition by M. DeCesar
# Take a look at what the bootstrap output is
print len(mu1_bootstrap)
print mu1_bootstrap
print len(mu1_bootstrap)
print mu2_bootstrap

#------------------------------------------------------------
# Compute the theoretical expectations for the two distributions
x = np.linspace(0.8, 1.2, 1000)     ## len(x) = m

sigma1 = 1. / np.sqrt(2 * (m - 1))  ## Standard error of mean, eq 3.35
xbar = 1
V = 4
sigma_x = np.sqrt(V)

np.random.seed(10)
xi = np.random.normal(xbar, sigma_x, size=n)

mu_mean, sig_mean = mean_sigma(xi, ddof=1)

# compute the analytically expected spread in measurements
mu_std = sig_mean / np.sqrt(n)
sig_std = sig_mean / np.sqrt(2 * (n - 1))

#------------------------------------------------------------
# bootstrap estimates
mu_bootstrap, sig_bootstrap = bootstrap(xi, 1E6, mean_sigma,
                                        kwargs=dict(ddof=1, axis=1))

#------------------------------------------------------------
# Compute analytic posteriors

# distributions for the mean
mu = np.linspace(-3, 5, 1000)
dmu = mu[1] - mu[0]

pmu = compute_pmu(mu, 1, 4, 10)
pmu /= (dmu * pmu.sum())

pmu2 = compute_pmu_alt(mu, 1, 4, 10)
pmu2 /= (dmu * pmu2.sum())

pmu_norm = gaussian(mu, mu_mean, mu_std)