def test_bootstrap_pass_indices(): np.random.seed(0) x = np.random.normal(0, 1, 100) dist1 = bootstrap(x, 100, np.mean, kwargs=dict(axis=1), random_state=0) dist2 = bootstrap(x, 100, lambda i: np.mean(x[i], axis=1), pass_indices=True, random_state=0) assert_allclose(dist1, dist2)
def test_bootstrap_multiple(): np.random.seed(0) x = np.random.normal(0, 1, 100) dist_mean = bootstrap(x, 100, np.mean, kwargs=dict(axis=1), random_state=0) dist_std = bootstrap(x, 100, np.std, kwargs=dict(axis=1), random_state=0) res = bootstrap(x, 100, mean_sigma, kwargs=dict(axis=1), random_state=0) assert_allclose(res[0], dist_mean) assert_allclose(res[1], dist_std)
def get_line(x,y,bins=12,ranges=None,use_bootstrap=False,percentiles=(16,84)): def percentile_function(x): lower = np.percentile(x,percentiles[0]) median = np.median(x) upper = np.percentile(x,percentiles[1]) return lower, median, upper if ranges == None: ranges = (np.min(x),np.max(x),np.min(y),np.max(y)) if isinstance(bins, int) == True: bins = np.linspace(ranges[0],ranges[1],bins+1) xbin_centres = [bins[i]+(bins[i+1]-bins[i])/2 for i in range(len(bins)-1)] x_bins, _, bin_assignment = binned_statistic(x,x,bins=bins,statistic='median') x_medians = np.array([]) x_uppers = np.array([]) x_lowers = np.array([]) y_medians = np.array([]) y_uppers = np.array([]) y_lowers = np.array([]) for b in np.unique(bin_assignment): in_bin = bin_assignment == b x_bin = x[in_bin] y_bin = y[in_bin] if use_bootstrap == True: x_lower, x_median, x_upper = bootstrap(x_bin,10,percentile_function,random_state=0) y_lower, y_median, y_upper = bootstrap(y_bin,10,percentile_function,random_state=0) else: x_lower, x_median, x_upper = percentile_function(x_bin) y_lower, y_median, y_upper = percentile_function(y_bin) x_lowers = np.append(x_lowers,x_lower) x_medians = np.append(x_medians,x_median) x_uppers = np.append(x_uppers,x_upper) y_lowers = np.append(y_lowers,y_lower) y_medians = np.append(y_medians,y_median) y_uppers = np.append(y_uppers,y_upper) stats_table = Table() stats_table['x'] = x_medians stats_table['x_upper'] = x_uppers stats_table['x_lower'] = x_lowers stats_table['y'] = y_medians stats_table['y_upper'] = y_uppers stats_table['y_lower'] = y_lowers stats_table['x_centres'] = xbin_centres return stats_table, bins
def test_bootstrap_covar(): np.random.seed(0) mean = [0.,0.] covar = [[10.,3.],[3.,20.]] x = np.random.multivariate_normal(mean, covar, 1000) dist_cov = bootstrap(x, 10000, np.cov, kwargs=dict(rowvar=0), random_state=0) assert_allclose(covar[0][0], dist_cov[0][0], atol=2.*0.4)
def test_bootstrap_results(): np.random.seed(0) x = np.random.normal(0, 1, 100) distribution = bootstrap(x, 100, np.mean, kwargs=dict(axis=1), random_state=0) mu, sigma = mean_sigma(distribution) assert_allclose([mu, sigma], [0.08139846, 0.10465327])
def test_bootstrap_covar(): np.random.seed(0) mean = [0., 0.] covar = [[10., 3.], [3., 20.]] x = np.random.multivariate_normal(mean, covar, 1000) dist_cov = bootstrap(x, 10000, np.cov, kwargs=dict(rowvar=0), random_state=0) assert_allclose(covar[0][0], dist_cov[0][0], atol=2. * 0.4)
# plt.figure() # plt.plot(temp_mag_out['MAG'],temp_mag_out['MAG_AUTO']-temp_mag_out['MAG'],',') temp_diffs = sigma_clip(temp_mag_out['MAG_AUTO'] - temp_mag_out['MAG'], sigma=2.3, iters=None) mask = (ma.getmaskarray(temp_diffs) == False) # temp_mag_out = temp_mag_out[mask] temp_diffs = temp_diffs[mask] # plt.plot(temp_mag_out['MAG'],temp_diffs,'r,') print len(temp_diffs) if len(temp_diffs) > 1: temp_boots = bootstrap(temp_diffs, 10000, np.std) else: temp_boots = 0.0 print >> error_fileout, "%.2f %.2f %.2e" % (mag_chk, mag_chk + dmag, temp_boots) boots_out.append(temp_boots) # temp_x = mag_chk+dmag/2. # plt.plot([mag_chk,mag_chk+dmag],[quad_fit(mag_chk),quad_fit(mag_chk+dmag)],'g',lw=2) # plt.plot([temp_x,temp_x],[quad_fit(temp_x)-3.*boots,quad_fit(temp_x)+3.*boots],'k',lw=2) # plt.plot([temp_x,temp_x],[quad_fit(temp_x)-boots,quad_fit(temp_x)+boots],'g',lw=2) # print 3.*boots # # plt.show() # exit() mag_outs.append(mag_chk + dmag / 2.)
Nbootstrap = 2000 truemean = 0. truesigma = 1. nruns = 1000 sigstddev = np.zeros(nruns) sigboot = np.zeros(nruns) sigsmboot = np.zeros(nruns) for irun in xrange(nruns): np.random.seed() data = stats.norm(truemean, truesigma).rvs(Ndata) sigstddev[irun] = np.std(data, ddof=1) sigboot[irun] = np.median( mlre.bootstrap(data, Nbootstrap, np.std, kwargs=dict(axis=1, ddof=1))) sigsmboot[irun] = np.median( smoothedbootstrap(data, Nbootstrap, np.std, kwargs=dict(axis=1, ddof=1))) # code here in loop looks at all bootstrap results for a single run (single data set) if irun == 17: # choose 17 as an example run for no particular reason fig0, ax0 = plt.subplots(figsize=(5, 3.75)) ax0.hist(smoothedbootstrap(data, Nbootstrap, np.std, kwargs=dict(axis=1, ddof=1)), bins=50, normed=True, histtype='step',
def get_line(x, y, bins=12, ranges=None, use_bootstrap=False, percentiles=(16, 84)): def percentile_function(x): lower = np.percentile(x, percentiles[0]) median = np.median(x) upper = np.percentile(x, percentiles[1]) return lower, median, upper if ranges == None: ranges = (np.min(x), np.max(x), np.min(y), np.max(y)) if isinstance(bins, int) == True: bins = np.linspace(ranges[0], ranges[1], bins + 1) xbin_centres = [ bins[i] + (bins[i + 1] - bins[i]) / 2 for i in range(len(bins) - 1) ] x_bins, _, bin_assignment = binned_statistic(x, x, bins=bins, statistic='median') x_medians = np.array([]) x_uppers = np.array([]) x_lowers = np.array([]) y_medians = np.array([]) y_uppers = np.array([]) y_lowers = np.array([]) for b in np.unique(bin_assignment): in_bin = bin_assignment == b x_bin = x[in_bin] y_bin = y[in_bin] if use_bootstrap == True: x_lower, x_median, x_upper = bootstrap(x_bin, 10, percentile_function, random_state=0) y_lower, y_median, y_upper = bootstrap(y_bin, 10, percentile_function, random_state=0) else: x_lower, x_median, x_upper = percentile_function(x_bin) y_lower, y_median, y_upper = percentile_function(y_bin) x_lowers = np.append(x_lowers, x_lower) x_medians = np.append(x_medians, x_median) x_uppers = np.append(x_uppers, x_upper) y_lowers = np.append(y_lowers, y_lower) y_medians = np.append(y_medians, y_median) y_uppers = np.append(y_uppers, y_upper) stats_table = Table() stats_table['x'] = x_medians stats_table['x_upper'] = x_uppers stats_table['x_lower'] = x_lowers stats_table['y'] = y_medians stats_table['y_upper'] = y_uppers stats_table['y_lower'] = y_lowers stats_table['x_centres'] = xbin_centres return stats_table, bins
from astroML.resample import bootstrap def fit_samples(sample): # sample is an array of size [n_bootstraps, n_samples] # compute the maximum likelihood for each bootstrap. return np.array([ optimize.fmin(neg_log_likelihood, theta_guess, args=(F, np.sqrt(F)), disp=0) for F in sample ]) samples = bootstrap(F, 1000, fit_samples) # 1000 bootstrap resamplings # Now in a similar manner to what we did above for the MCMC Bayesian posterior, we'll compute the sample mean and standard deviation to determine the errors on the parameters. # In[11]: mu_samp = samples[:, 0] sig_samp = abs(samples[:, 1]) print " mu = {0:.0f} +/- {1:.0f}".format(mu_samp.mean(), mu_samp.std()) print " sigma = {0:.0f} +/- {1:.0f}".format(sig_samp.mean(), sig_samp.std()) # I should note that there is a **huge** literature on the details of bootstrap resampling, and there are definitely some subtleties of the approach that I am glossing over here. One obvious piece is that there is potential for errors to be correlated or non-Gaussian, neither of which is reflected by simply finding the mean and standard deviation of each model parameter. Nevertheless, I trust that this gives the basic idea of the frequentist approach to this problem. #### Varying Photon Counts: The Bayesian Approach
# you can set usetex to False. if "setup_text_plots" not in globals(): from astroML.plotting import setup_text_plots setup_text_plots(fontsize=8, usetex=True) m = 1000 # number of points n = 10000 # number of bootstraps #------------------------------------------------------------ # sample values from a normal distribution np.random.seed(123) data = norm(0, 1).rvs(m) #------------------------------------------------------------ # Compute bootstrap resamplings of data mu1_bootstrap = bootstrap(data, n, np.std, kwargs=dict(axis=1, ddof=1)) mu2_bootstrap = bootstrap(data, n, sigmaG, kwargs=dict(axis=1)) #------------------------------------------------------------ # Compute the theoretical expectations for the two distributions x = np.linspace(0.8, 1.2, 1000) sigma1 = 1. / np.sqrt(2 * (m - 1)) pdf1 = norm(1, sigma1).pdf(x) sigma2 = 1.06 / np.sqrt(m) pdf2 = norm(1, sigma2).pdf(x) #------------------------------------------------------------ # Plot the results fig, ax = plt.subplots(figsize=(5, 3.75))
likelihood = np.exp(cauchy_logL(xi, gamma[:, np.newaxis], mu)) pmu = likelihood.sum(0) pmu /= pmu.sum() * dmu pgamma = likelihood.sum(1) pgamma /= pgamma.sum() * dgamma #------------------------------------------------------------ # bootstrap estimate mu_bins = np.linspace(-3, 3, 21) gamma_bins = np.linspace(0, 5, 17) mu_bootstrap, gamma_bootstrap = bootstrap(xi, 20000, estimate_mu_gamma, kwargs=dict(axis=1), random_state=0) #------------------------------------------------------------ # Plot results fig = plt.figure(figsize=(5, 5)) fig.subplots_adjust(wspace=0.35, right=0.95, hspace=0.2, top=0.95) # first axes: mu posterior ax1 = fig.add_subplot(221) ax1.plot(mu, pmu, '-k') ax1.hist(mu_bootstrap, mu_bins, normed=True, histtype='step',
dmu = mu[1] - mu[0] likelihood = np.exp(cauchy_logL(xi, gamma[:, np.newaxis], mu)) pmu = likelihood.sum(0) pmu /= pmu.sum() * dmu pgamma = likelihood.sum(1) pgamma /= pgamma.sum() * dgamma #------------------------------------------------------------ # bootstrap estimate mu_bins = np.linspace(-3, 3, 21) gamma_bins = np.linspace(0, 5, 17) mu_bootstrap, gamma_bootstrap = bootstrap(xi, 20000, estimate_mu_gamma, kwargs=dict(axis=1), random_state=0) #------------------------------------------------------------ # Plot results fig = plt.figure(figsize=(8, 8)) fig.subplots_adjust(wspace=0.35, right=0.95, hspace=0.2, top=0.95) # first axes: mu posterior ax1 = fig.add_subplot(221) ax1.plot(mu, pmu, '-k') ax1.hist(mu_bootstrap, mu_bins, normed=True, histtype='step', color='b', linestyle='dashed') ax1.set_xlabel(r'$\mu$') ax1.set_ylabel(r'$p(\mu|x,I)$')
# [2]: http://en.wikipedia.org/wiki/Jackknife_(statistics) # [3]: http://en.wikipedia.org/wiki/Bootstrapping_(statistics) # [4]: http://astroML.org # In[10]: from astroML.resample import bootstrap def fit_samples(sample): # sample is an array of size [n_bootstraps, n_samples] # compute the maximum likelihood for each bootstrap. return np.array([optimize.fmin(neg_log_likelihood, theta_guess, args=(F, np.sqrt(F)), disp=0) for F in sample]) samples = bootstrap(F, 1000, fit_samples) # 1000 bootstrap resamplings # Now in a similar manner to what we did above for the MCMC Bayesian posterior, we'll compute the sample mean and standard deviation to determine the errors on the parameters. # In[11]: mu_samp = samples[:, 0] sig_samp = abs(samples[:, 1]) print " mu = {0:.0f} +/- {1:.0f}".format(mu_samp.mean(), mu_samp.std()) print " sigma = {0:.0f} +/- {1:.0f}".format(sig_samp.mean(), sig_samp.std()) # I should note that there is a **huge** literature on the details of bootstrap resampling, and there are definitely some subtleties of the approach that I am glossing over here. One obvious piece is that there is potential for errors to be correlated or non-Gaussian, neither of which is reflected by simply finding the mean and standard deviation of each model parameter. Nevertheless, I trust that this gives the basic idea of the frequentist approach to this problem.
from astroML.resample import bootstrap import numpy as np from scipy.stats import * import matplotlib.pyplot as plt plt.ion() N=1000 samples = np.random.normal(0,1,N) median_BS = bootstrap(samples,10000,np.median,kwargs=dict(axis=1)) standard_dev=np.std(median_BS) std=np.sqrt(np.pi/(2*N)) print('deviation of bootstrap sammples',standard_dev) x = np.linspace(-0.5,0.5,1000) mu = np.mean(median_BS) sigma = np.std(median_BS) pdf = norm(0, std).pdf(x)#theorically expected gaussian pdf1 = norm(mu, sigma).pdf(x) #samples based gaussian plt.hist(median_BS,bins=30,normed=True,label='Histogram') #histogram of Bootstrap samples plt.plot(x,pdf,'-k',label='sample calculated plot') plt.plot(x,pdf1,'-r',label='Theoritical Plot') plt.legend()
mask = np.logical_and(mock_out_data['MAG'] >= mag_chk, mock_out_data['MAG'] < mag_chk+dmag) temp_mag_out = mock_out_data[mask] # plt.figure() # plt.plot(temp_mag_out['MAG'],temp_mag_out['MAG_AUTO']-temp_mag_out['MAG'],',') temp_diffs = sigma_clip(temp_mag_out['MAG_AUTO']-temp_mag_out['MAG'],sigma=2.3,iters=None) mask = (ma.getmaskarray(temp_diffs) == False) # temp_mag_out = temp_mag_out[mask] temp_diffs = temp_diffs[mask] # plt.plot(temp_mag_out['MAG'],temp_diffs,'r,') print len(temp_diffs) if len(temp_diffs) > 1: temp_boots = bootstrap(temp_diffs,10000,np.std) else: temp_boots = 0.0 print >> error_fileout, "%.2f %.2f %.2e" % (mag_chk, mag_chk+dmag, temp_boots) boots_out.append(temp_boots) # temp_x = mag_chk+dmag/2. # plt.plot([mag_chk,mag_chk+dmag],[quad_fit(mag_chk),quad_fit(mag_chk+dmag)],'g',lw=2) # plt.plot([temp_x,temp_x],[quad_fit(temp_x)-3.*boots,quad_fit(temp_x)+3.*boots],'k',lw=2) # plt.plot([temp_x,temp_x],[quad_fit(temp_x)-boots,quad_fit(temp_x)+boots],'g',lw=2) # print 3.*boots # # plt.show() # exit() mag_outs.append(mag_chk+dmag/2.) comps.append(float(len(temp_mag_out['MAG'])/float(len(temp_mag_in))))
Ndata=5 Nbootstrap=1000 truemean = 0. truesigma = 1. nruns = 10000 sigstddev = np.zeros(nruns) sigboot = np.zeros(nruns) sigsmboot = np.zeros(nruns) for irun in xrange(nruns): np.random.seed() data = stats.norm(truemean, truesigma).rvs(Ndata) sigstddev[irun] = np.std(data,ddof=1) sigboot[irun] = np.median(mlre.bootstrap(data, Nbootstrap, np.std, kwargs=dict(axis=1, ddof=1))) sigsmboot[irun] = np.median(smoothedbootstrap(data, Nbootstrap, np.std, kwargs=dict(axis=1, ddof=1))) # code below looks at all bootstrap results for a single data set # fig0, ax0 = plt.subplots(figsize=(5, 3.75)) # ax0.hist(smoothedbootstrap(data, Nbootstrap, np.std, kwargs=dict(axis=1, ddof=1)), bins=50, normed=True, histtype='step', # color='green', lw=2) # ax0.hist(mlre.bootstrap(data, Nbootstrap, np.std, kwargs=dict(axis=1, ddof=1)), bins=50, normed=True, histtype='step', # color='red', lw=2) # code below looks at individual median bootstrap results for all nruns data sets fig, ax = plt.subplots(figsize=(5, 3.75)) ax.hist(sigstddev, bins=50, normed=True, histtype='step', color='blue', ls='dashed', label=r'$\sigma\ {\rm (stddev)}$') ax.hist(sigboot, bins=50, normed=True, histtype='step', color='red', label=r'$\sigma\ {\rm (bootstrap)}$') ax.hist(sigsmboot, bins=50, normed=True, histtype='step',
from astroML.resample import bootstrap from bootstrap_comp import smoothedbootstrap from bootstrap_comp import smoothedbootstrap2 #============================================================================== #Unsmoothed Section plt.close('all') sigma = 1 numdat = 5 mean = 0 rndsamp = npr.normal(mean, sigma, numdat) dev = np.std(rndsamp) print('Direct Method:') print(dev) boot1 = bootstrap(rndsamp, 5, np.std, kwargs=dict(axis=1, ddof=1)) bootsort = sorted(boot1) bootmed = np.median(bootsort) print('Bootstrap Method:') print(bootmed) boot2 = smoothedbootstrap(rndsamp, numdat, 0, np.std, kwargs=dict(axis=1, ddof=1)) bootsort2 = sorted(boot2) bootmed2 = np.median(bootsort2) print('Smoothed Bootstrap Method:') print(bootmed2)
Ndata=5 Nbootstrap=2000 truemean = 0. truesigma = 1. nruns = 1000 sigstddev = np.zeros(nruns) sigboot = np.zeros(nruns) sigsmboot = np.zeros(nruns) for irun in xrange(nruns): np.random.seed() data = stats.norm(truemean, truesigma).rvs(Ndata) sigstddev[irun] = np.std(data,ddof=1) sigboot[irun] = np.median(mlre.bootstrap(data, Nbootstrap, np.std, kwargs=dict(axis=1, ddof=1))) sigsmboot[irun] = np.median(smoothedbootstrap(data, Nbootstrap, np.std, kwargs=dict(axis=1, ddof=1))) # code here in loop looks at all bootstrap results for a single run (single data set) if irun == 17: # choose 17 as an example run for no particular reason fig0, ax0 = plt.subplots(figsize=(5, 3.75)) ax0.hist(smoothedbootstrap(data, Nbootstrap, np.std, kwargs=dict(axis=1, ddof=1)), bins=50, normed=True, histtype='step', color='green', lw=2, label=r'$\sigma\ {\rm (sm. bootstrap)}$') ax0.hist(mlre.bootstrap(data, Nbootstrap, np.std, kwargs=dict(axis=1, ddof=1)), bins=50, normed=True, histtype='step', color='red', lw=2, label=r'$\sigma\ {\rm (bootstrap)}$') # code below looks at the median bootstrap results for all nruns data sets (one median per run) fig, ax = plt.subplots(figsize=(5, 3.75)) ax.hist(sigstddev, bins=50, normed=True, histtype='step', color='blue', ls='dashed', label=r'$\sigma\ {\rm (stddev)}$') ax.hist(sigboot, bins=50, normed=True, histtype='step', color='red', label=r'$\sigma\ {\rm (bootstrap)}$')
def plotFoM(validationData, zbins, pzs, metric): refZ = 'Z' #'ZSPEC' #'Z_1' plt.figure() for refPz in pzs: print(refPz) metric_list = [] errmetric_list = [] metric_nocorr_list = [] #errmetric_nocorr_list = [] zmid_list = [] for zmin, zmax in zbins: print('Bin ', zmin, zmax) basicsel = (validationData[refPz] > zmin) & ( validationData[refPz] < zmax ) #& (validationData['FLAGS_GOLD'] < 1) & (validationData['FLAGS_FOOTPRINT'] > 0) & (validationData['FLAGS_FOREGROUND'] < 2) & (validationData['EXTENDED_CLASS_MASH_SOF'] > 2) #zsel = (validationData[refPz] > zmin) & (validationData[refPz] < zmax) & (validationData['FLAGS_GOLD'] < 1) & (validationData['FLAGS_FOOTPRINT'] > 0) & (validationData['FLAGS_FOREGROUND'] < 2) & (validationData['EXTENDED_CLASS_MASH_SOF'] > 2) & (validationData['SOF_CM_MAG_CORRECTED_I'] > 17.5) & (validationData['SOF_CM_MAG_CORRECTED_I'] < 18 + 4*validationData['DNF_ZMEAN_SOF_v2_2']) #zsel_vip = (validationData[refPz] > zmin) & (validationData[refPz] < zmax) & (validationData['FLAGS_GOLD'] < 1) & (validationData['FLAGS_FOOTPRINT'] > 0) & (validationData['FLAGS_FOREGROUND'] < 2) & (validationData['EXTENDED_CLASS_MASH_SOF'] > 2) & (validationData['source'] == "VIPERS" ) & (validationData['SOF_CM_MAG_CORRECTED_I'] > 17.5) & (validationData['SOF_CM_MAG_CORRECTED_I'] < 18 + 4*validationData['DNF_ZMEAN_SOF_v2_2'])#& (validationData[refZ] > 0.01) & (validationData['zflg'] > 2.9) & (validationData['zflg'] < 9) & (validationData['classFlag'] > 0) zsel = (validationData[refPz] > zmin) & ( validationData[refPz] < zmax ) #& (validationData['FLAGS_GOLD'] < 1) & (validationData['FLAGS_FOOTPRINT'] > 0) & (validationData['FLAGS_FOREGROUND'] < 2) & (validationData['EXTENDED_CLASS_MASH_SOF'] > 2) & (validationData['SOF_CM_MAG_CORRECTED_I'] > 17.5) & (validationData['SOF_CM_MAG_CORRECTED_I'] < 18 + 4*validationData[refPz]) #zsel = (validationData[refPz] > zmin) & (validationData[refPz] < zmax) & (validationData[refZ] > 0.01) & (validationData['ZFLG'] > 2.9) & (validationData['ZFLG'] < 9) & (validationData['CLASSFLAG'] > 0) selection = validationData[zsel] #selection_vip = validationData[zsel_vip] #selection_nocorr = validationData[zsel_nocorr] #print(len(validationData[basicsel]),len(selection),len(selection_nocorr)) print(len(validationData[basicsel]), len(selection)) zmid = (zmin + zmax) / 2.0 zmid_list.append(zmid) if metric == 'bias': val = np.mean(delta_z(selection[refPz], selection[refZ])) errval = np.std(delta_z(selection[refPz], selection[refZ])) / np.sqrt( len(selection[refPz])) #val_nocorr = np.mean(delta_z(selection_nocorr[refPz],selection_nocorr[refZ])) #errval_nocorr = np.std(delta_z(selection_nocorr[refPz],selection_nocorr[refZ]))/np.sqrt(len(selection_nocorr[refPz])) ylab = '$z_{photo}-z_{spec}$' if metric == 's68': val = sigma_68(selection[refPz] - selection[refZ]) #val_nocorr = sigma_68(selection_nocorr[refPz]-selection_nocorr[refZ]) n = 50 errval = np.std( bootstrap(selection[refPz] - selection[refZ], n, sigma_68, kwargs=dict(axis=1))) #errval_nocorr = np.std(bootstrap(selection_nocorr[refPz]-selection_nocorr[refZ], n, sigma_68, kwargs=dict(axis=1))) #print(selection[refPz]-selection[refZ],len(selection[refPz]-selection[refZ]),n,errval) ylab = 'Sigma_68' if metric == 's681pz': val = sigma_68_1pz(selection[refPz] - selection[refZ], selection[refZ]) #val_nocorr = sigma_68_1pz(selection_nocorr[refPz]-selection_nocorr[refZ],selection[refZ]) n = 50 errval = np.std( bootstrap(selection[refPz] - selection[refZ], n, sigma_68_1pz, kwargs=dict(axis=1, z_spec=selection[refZ]))) #errval_nocorr = np.std(bootstrap(selection_nocorr[refPz]-selection_nocorr[refZ], n, sigma_68_1pz, kwargs=dict(axis=1, z_spec = selection[refZ]))) ylab = '$\sigma_{68}/(1+z_{spec})$' #print(len(selection),zmin,zmax,'bias',bias,'+-',errbias,'s68',s68,'s68/1+z',s681pz) #if metric == 'bias': print(metric, 'val', val, 'errval', errval) errmetric_list.append(errval) metric_list.append(val) #errmetric_nocorr_list.append(errval_nocorr) #metric_nocorr_list.append(val_nocorr) if refPz == 'Z_MEAN': zmid_list = list(np.array(zmid_list) + 0.02) plt.errorbar(zmid_list, metric_list, yerr=errmetric_list, label=label_dict[refPz], marker='o', ls='') #plt.errorbar(zmid_list,metric_nocorr_list,yerr=errmetric_nocorr_list,label=refPz+' no chorm. corr.',marker='o',ls='') #plt.scatter(zmid_list,metric_list,label=refPz) plt.xlabel('Photo-z bin') plt.ylabel(ylab) #else: # plt.scatter(zmid_list,metric_list,label=refPz) # plt.xlabel('Photo-z bin') # plt.ylabel('Sigma_68') plt.grid() plt.legend(loc="upper left") plt.savefig(metric + '_pz_test.png') plt.show()
#------------------------------------------------------------ # Addition by M. DeCesar # Take a look at your data xx = np.linspace(1,m,num=m) plt.figure() plt.plot(xx,data) plt.xlabel('Data Bin') plt.ylabel('Data Value') plt.title('Sample Values from a Normal Distribution') #plt.savefig('normdist_sample') plt.show() #------------------------------------------------------------ # Compute bootstrap resamplings of data mu1_bootstrap = bootstrap(data, n, np.std, kwargs=dict(axis=1, ddof=1)) mu2_bootstrap = bootstrap(data, n, sigmaG, kwargs=dict(axis=1)) #------------------------------------------------------------ # Addition by M. DeCesar # Take a look at what the bootstrap output is print len(mu1_bootstrap) print mu1_bootstrap print len(mu1_bootstrap) print mu2_bootstrap #------------------------------------------------------------ # Compute the theoretical expectations for the two distributions x = np.linspace(0.8, 1.2, 1000) ## len(x) = m sigma1 = 1. / np.sqrt(2 * (m - 1)) ## Standard error of mean, eq 3.35
xbar = 1 V = 4 sigma_x = np.sqrt(V) np.random.seed(10) xi = np.random.normal(xbar, sigma_x, size=n) mu_mean, sig_mean = mean_sigma(xi, ddof=1) # compute the analytically expected spread in measurements mu_std = sig_mean / np.sqrt(n) sig_std = sig_mean / np.sqrt(2 * (n - 1)) #------------------------------------------------------------ # bootstrap estimates mu_bootstrap, sig_bootstrap = bootstrap(xi, 1E6, mean_sigma, kwargs=dict(ddof=1, axis=1)) #------------------------------------------------------------ # Compute analytic posteriors # distributions for the mean mu = np.linspace(-3, 5, 1000) dmu = mu[1] - mu[0] pmu = compute_pmu(mu, 1, 4, 10) pmu /= (dmu * pmu.sum()) pmu2 = compute_pmu_alt(mu, 1, 4, 10) pmu2 /= (dmu * pmu2.sum()) pmu_norm = gaussian(mu, mu_mean, mu_std)