def compute(self): var, sample, pairs, weighted, bins = self.config bin_scale, bin_edges = bins self.selection = self.parent.pair_list.eval_selection(sample, pairs) P = self.parent.pair_list.select(self.selection) if bin_scale == 'log': P.separations = np.log10(P.separations) mask = np.logical_and(bin_edges[0] < P.separations, P.separations < bin_edges[-1]) P = P.select(mask) P.sort(by='separation') f = self.parent.data[var] ff = f[P.first()] * f[P.second()] edges = self.config.bins.edges bin_counts = np.histogram(P.separations, edges)[0] ff = np.split(ff, np.cumsum(bin_counts)[:-1]) rp = np.split(P.separations, np.cumsum(bin_counts)[:-1]) if weighted: weights = P.downweight() weights = np.split(weights, np.cumsum(bin_counts)[:-1]) else: weights = list(repeat(None, len(ff))) cf = [] errorbars = [] stdev = [] for pairs, wts in zip(ff, weights): if len(pairs) == 0: cf.append(np.nan) errorbars.append(np.nan) stdev.append(np.nan) else: cf.append(np.average(pairs, weights=wts)) if wts is None: errorbars.append( np.std(bootstrap(pairs, bootfunc=np.average))) stdev.append(np.std(pairs)) else: errorbars.append( np.std( bootstrap(np.array([pairs, wts]).T, bootfunc=lambda p: np.average( p[:, 0], weights=p[:, 1])))) wmean = np.average(pairs, weights=wts) stdev.append( np.sqrt(np.average((pairs - wmean)**2, weights=wts))) self.results = cf self.errorbars = errorbars self.stdev = stdev
def calc_velstd_withnan(cum, dt_cum): """ Calculate std of velocity by bootstrap for each point which may include nan. Inputs: cum : Cumulative phase block for each point (n_pt, n_im) Can include nan. dt_cum : Cumulative days for each image (n_im) Returns: vstd : Std of Velocity for each point (n_pt) """ global bootcount, bootnum n_pt, n_im = cum.shape bootnum = 100 bootcount = 0 vstd = np.zeros((n_pt), dtype=np.float32) G = np.stack((np.ones_like(dt_cum), dt_cum), axis=1) data = cum.transpose().copy() ixs_day = np.arange(n_im) mask = (~np.isnan(data)) data[np.isnan(data)] = 0 velinv = lambda x: censored_lstsq2(G[x, :], data[x, :], mask[x, :])[1] with NumpyRNGContext(1): bootresult = bootstrap(ixs_day, bootnum, bootfunc=velinv) vstd = np.nanstd(bootresult, axis=0) print('') return vstd
def bootstrap_curvefit(self,x,y,N=100): indices = np.arange(len(ftab)) # get resampled indices boot_indices = bootstrap(indices,N) bslope = np.zeros(N,'d') binter = np.zeros(N,'d') for i,myindices in enumerate(boot_indices): myindices = np.array(myindices,'i') popt,pcov = curve_fit(linear_func,x[myindices],y[myindices]) bslope[i] = popt[0] binter[i] = popt[1] bslope_lower = scoreatpercentile(bslope,16) bslope_upper = scoreatpercentile(bslope,84) binter_lower = scoreatpercentile(binter,16) binter_upper = scoreatpercentile(binter,84) bslope_med = np.median(bslope) binter_med = np.median(binter) print('median slope = {:.2f}+{:.2f}-{:.2f}'.format(bslope_med,\ bslope_med - bslope_lower,\ bslope_upper - bslope_med)) print('median inter = {:.2f}+{:.2f}-{:.2f}'.format(binter_med,\ binter_med - binter_lower,\ binter_upper - binter_med)) return bslope_med,bslope_med-bslope_lower,bslope_upper - bslope_med,\ binter_med,binter_med-binter_lower,binter_upper - binter_med,\
def run_lenstool_parallel(folder,ini,ncores): backgx = np.loadtxt(folder+'/background_galaxies_main.lenstool') infile = open(folder+'/background_galaxies_main.lenstool', 'r') header = infile.readline()[2:-2] index = np.arange(len(backgx)) with NumpyRNGContext(1): bootresult = (bootstrap(index, ncores)).astype(int) total_folders = [] for j in np.arange(ini,ini+ncores): os.system('rm -r '+folder+'_'+str(j)) os.system('mkdir '+folder+'_'+str(j)) os.system('cp -r '+folder+'/* '+folder+'_'+str(j)+'/') total_folders = np.append(total_folders, folder+'_'+str(j)) lenstool_catalogue = backgx[bootresult[j-ini,:]] lenstool_catalogue[:,0] = np.arange(1,len(backgx)+1) np.savetxt( folder+'_'+str(j)+'/background_galaxies_main.lenstool',lenstool_catalogue,\ fmt='%i %f %f %f %f %f %f %f', header=header) pool = Pool(processes=(ncores)) salida=np.array(pool.map(run_lenstool, total_folders))
def _error_map(self, boot_n, data, nbins, box_size_hMpc, cosmo): if box_size_hMpc is None: raise ValueError('You need to specify a box_size_hMpc value ' 'for the bootstrap analysis.') cube_shape = self.kappa.shape + (boot_n, ) # add extra dimension for each map kE_err_cube = np.zeros(cube_shape) kB_err_cube = np.zeros(cube_shape) index = np.arange(len(data)) with NumpyRNGContext(seed=1): index_boot = bootstrap(index, boot_n).astype(int) for i in range(boot_n): if isinstance(data, pd.DataFrame): b_data = data.iloc[i] else: b_data = data[i] # assuming numpy array b_kappa = self._kappa_map(b_data, nbins, box_size_hMpc, cosmo, save_ref=False) kE_err_cube[:, :, i] = b_kappa.real kB_err_cube[:, :, i] = b_kappa.imag kE_err = np.std(kE_err_cube, axis=2) kB_err = np.std(kB_err_cube, axis=2) error_map = kE_err + 1j * kB_err return error_map
def getbiweight(self,x,clipiters=None,nbootstrap=1000): #z=sigma_clip(x,sig=3,iters=clipiters,cenfunc=biweight_location,varfunc=biweight_midvariance) z=x # skip sigma clipping #biweightlocation=biweight_location(z) #biweightscale=biweight_midvariance(z) biweightlocation, biweightscale=getbiweight(z) # calculate bootstrap errors nboot=nbootstrap boot=bootstrap(z,bootnum=nboot) row,col=boot.shape bootlocation=np.zeros(row,'f') bootscale=np.zeros(row,'f') for i in range(row): #bootlocation[i]=biweight_location(boot[i,:]) #bootscale[i]=biweight_midvariance(boot[i,:]) bootlocation[i],bootscale[i]=getbiweight(boot[i,:]) # get percentiles location_lower=np.percentile(bootlocation,q=16) location_upper=np.percentile(bootlocation,q=82) location_median=np.percentile(bootlocation,q=50) scale_lower=np.percentile(bootscale,q=16) scale_upper=np.percentile(bootscale,q=82) scale_median=np.percentile(bootscale,q=50) return biweightlocation,location_upper-biweightlocation,biweightlocation-location_lower,biweightscale,scale_upper-biweightscale,biweightscale-scale_lower,location_median,scale_median
def bootstrapping(bootarr, bootfunc): # gives multiple velocity dispersion with NumpyRNGContext(1): bootresult = bootstrap(bootarr, bootnum=100, samples=len(bootarr) - 1, bootfunc=bootfunc) return bootresult
def _boot_error(self, shear, cero, weight, nboot): index=np.arange(len(shear)) with NumpyRNGContext(seed=1): bootresult = bootstrap(index, nboot) index_boot = bootresult.astype(int) shear_boot = shear[index_boot] cero_boot = cero[index_boot] weight_boot = weight[index_boot] shear_means = np.average(shear_boot, weights=weight_boot, axis=1) cero_means = np.average(cero_boot, weights=weight_boot, axis=1) return np.std(shear_means), np.std(cero_means)
def fboot(y): """ Helper function of trendplot. Returns bootstrap error of input 1d-array. Calculates the biweight mean of each bootstrap sample, and then gets the biweight standard deviation of all samples. """ if len(y) > 5: bb = bootstrap(y, bootnum=250, bootfunc=biweight_location) res = biweight_midvariance(bb) else: res = np.float("nan") return res
def bootstrap_errors(et, ex, peso, nboot): index = np.arange(len(et)) with NumpyRNGContext(1): bootresult = bootstrap(index, nboot) INDEX = bootresult.astype(int) ET = et[INDEX] EX = ex[INDEX] W = peso[INDEX] et_means = np.average(ET, axis=1, weights=W) ex_means = np.average(EX, axis=1, weights=W) return np.std(et_means), np.std(ex_means), et_means, ex_means
def lognormal(data, logfile=None, verbose=False, boot=True): if logfile: wlog("Fitting: Log-Normal", logfile, verbose, u=True) fit = stats.lognorm.fit(data, floc=0) if logfile: wlog("Completed fit", logfile, verbose) if boot: wlog("Performing bootstrap to estimate error in fit", logfile, verbose) rand_context = np.random.randint(0, 1e7) bootnum = 1000 with NumpyRNGContext(rand_context): if logfile: wlog("Running Bootstrap", logfile, verbose, u=True) wlog("Bootstrap Parameters:", logfile, verbose) wlog("bootnum: {0}".format(bootnum), logfile, verbose) wlog("NumpyRNGContext: {0}".format(rand_context), logfile, verbose) boot_resample = bootstrap(data, bootnum=bootnum, num_samples=bootnum) bootstrap_shape = [] bootstrap_loc = [] bootstrap_scale = [] for i in range(len(boot_resample)): resample_fit = stats.lognorm.fit(boot_resample[i]) bootstrap_shape.append(resample_fit[0]) bootstrap_loc.append(resample_fit[1]) bootstrap_scale.append(resample_fit[2]) err = (stats.norm.fit(bootstrap_shape)[1], stats.norm.fit(bootstrap_loc)[1], stats.norm.fit(bootstrap_scale)[1]) if not boot: wlog("Did not perform bootstrap analysis for errors", logfile, verbose) err = ["NaN","NaN","NaN"] if logfile: wlog("Completed Bootstrap Analysis", logfile, verbose, u=True) wlog("{0:<15}{1:<15}{2:<15}".format("Parameter", "Fit", "BootUncert"), logfile, verbose) wlog("{0:<15}{1:<15.8}{2:<15.8}".format("shape", fit[0], err[0]), logfile, verbose) wlog("{0:<15}{1:<15}{2:<15}".format("loc", fit[1], err[1]), logfile, verbose) wlog("{0:<15}{1:<15.8}{2:<15.8}".format("scale", fit[2], err[2]), logfile, verbose) return fit, err
def bootstrap(self, cl, bkg, df, ra_boot_lims, dec_boot_lims, nboot=9, df_ra='ra', df_dec='dec', method="sample"): from astropy.stats import bootstrap import numpy as np if method == 'sample': radec = np.array(list(zip(df[df_ra], df[df_dec]))) radec_boot = bootstrap(radec, bootnum=nboot) self.boot_dict = {} self.ra_boot_lims = ra_boot_lims self.dec_boot_lims = dec_boot_lims for i in range(nboot): print("Counting stars for bootstrap " + str(i + 1) + ' of ' + str(nboot)) data_aux = df.copy() if method == "sample": data_aux.loc[:, df_ra] = radec_boot[i][:, 0] data_aux.loc[:, df_dec] = radec_boot[i][:, 1] elif method == "uniform": ra_aux = np.random.uniform(low=ra_boot_lims[0], high=ra_boot_lims[1], size=len(df)) dec_aux = np.random.uniform(low=dec_boot_lims[0], high=dec_boot_lims[1], size=len(df)) data_aux.loc[:, df_ra] = ra_aux data_aux.loc[:, df_dec] = dec_aux density_map_boot = DensityMap(self.obj_name, cl, bkg, ra_boot_lims, dec_boot_lims, self.ra_delta, self.dec_delta) density_map_boot.count_stars(data_aux) self.boot_dict['boot' + str(i + 1)] = density_map_boot
def calc_threshold(time, flux, bootnum=10000, percentile=99.9, parallel=False): # Define frequency array to run the LSP on # Oversample the baseline by a factor of 2 fnyq = 0.005 fres = 1. / (2 * (np.max(time) - np.min(time))) farr = np.linspace(1e-6, fnyq, int(fnyq / fres)) # Bootstrap the time-series 10,000 times with replacement # and specify the random seed for replicability with NumpyRNGContext(1): bootstrapped_lcs = bootstrap(flux, bootnum=bootnum) # Initialize progress bar action = 'Performing Bootstrapping...' # Progress bar message progress_bar(0, bootnum, action) # If the operation is to be parallelized: if parallel: # Calculate the maximum value for the periodogram of each bootstrapped light curve max_vals = Parallel(n_jobs=4)(delayed(calc_lsp_max)( time, bootstrapped_lcs[i], farr, i, bootnum, action) for i in range(bootnum)) else: # Define a list to append the maximum values into max_vals = [] # Calculate the maximum value for the periodogram of each bootstrapped light curve for i in range(bootnum): max_vals.append(np.max(calc_lsp(time, bootstrapped_lcs[i], farr))) progress_bar(i + 1, bootnum, action) # Estimate the false alarm probability fap = np.percentile(max_vals, percentile) # Compute the mean amplitude of the original periodogram og_mean_amp = np.mean(calc_lsp(time, flux, farr)) print('\n') print( '--------------------------------------------------------------------------' ) print("The 0.1 % False Alarm Probability threshold: {:.4f} %".format(fap * 1e2)) print("This is equal to {:.4f} times the original peridogram's amplitude". format(fap / og_mean_amp)) # Make python talk to you to let you know the script is finished # os.system("say 'Your bootstrapping routine is finished running.'") # Return the 0.1% false alarm probability in both percent and units of the original periodogram's mean amplitude return fap * 1e2, fap / og_mean_amp
def generate_dataset(self): boot = [] for i in range(len(self.train_dataset)): boot.append(i) with NumpyRNGContext(1): bootresult = bootstrap(np.array(boot), self.learners, int(len(self.train_dataset)*self.partitions)) dataset = [] for samples in bootresult: d = wp.DataSet() for sample in samples: d.add(self.train_dataset.get(int(sample)), self.train_dataset.getLabel(int(sample))) dataset.append(d) return dataset
def bootstrap_errors_stack(et, ex, peso, nboot, array): unique = np.unique(array) with NumpyRNGContext(1): bootresult = bootstrap(unique, nboot) et_means = np.array([ np.average(et[np.in1d(array, x)], weights=peso[np.in1d(array, x)]) for x in bootresult ]) ex_means = np.array([ np.average(ex[np.in1d(array, x)], weights=peso[np.in1d(array, x)]) for x in bootresult ]) return np.std(et_means), np.std(ex_means), et_means, ex_means
def qbootstrap_errors(et, ex, peso, angle, nboot): index = np.arange(len(et)) with NumpyRNGContext(1): bootresult = bootstrap(index, nboot) INDEX = bootresult.astype(int) ET = et[INDEX] EX = ex[INDEX] W = peso[INDEX] A = angle[INDEX] et_means = np.sum((ET * np.cos(2. * A) * W), axis=1) / np.sum( ((np.cos(2. * A)**2) * W), axis=1) ex_means = np.sum((EX * np.sin(2. * A) * W), axis=1) / np.sum( ((np.sin(2. * A)**2) * W), axis=1) return np.std(et_means), np.std(ex_means), et_means, ex_means
def gaussian(data, logfile=None, verbose=False): if logfile: wlog("Fitting: Gaussian", logfile, verbose, u=True) fit = stats.norm.fit(data) if logfile: wlog("Completed fit", logfile, verbose) wlog("Performing bootstrap to estimate error in fit", logfile, verbose) rand_context = np.random.randint(0, 1e7) bootnum = 1000 with NumpyRNGContext(rand_context): if logfile: wlog("Running Bootstrap", logfile, verbose, u=True) wlog("Bootstrap Parameters:", logfile, verbose) wlog("bootnum: {0}".format(bootnum), logfile, verbose) wlog("NumpyRNGContext: {0}".format(rand_context), logfile, verbose) boot_resample = bootstrap(data, bootnum=bootnum, num_samples=bootnum) bootstrap_mean = [] bootstrap_std = [] for i in range(len(boot_resample)): resample_fit = stats.norm.fit(boot_resample[i]) bootstrap_mean.append(resample_fit[0]) bootstrap_std.append(resample_fit[1]) err = (stats.norm.fit(bootstrap_mean)[1], stats.norm.fit(bootstrap_std)[1]) if logfile: wlog("Completed Bootstrap Analysis", logfile, verbose, u=True) wlog("{0:<15}{1:<15}{2:<15}".format("Parameter", "Fit", "BootUncert"), logfile, verbose) wlog("{0:<15}{1:<15.8}{2:<15.8}".format("Mean", fit[0], err[0]), logfile, verbose) wlog("{0:<15}{1:<15.8}{2:<15.8}".format("Std", fit[1], err[1]), logfile, verbose) return fit, err
def _getd(self, el): md = [np.zeros(self.structures[0].frac_coords.shape)] for i in range(self.skip_first + 1, self.total_t): dx = self.structures[i].frac_coords - self.structures[ i - 1].frac_coords dx -= np.round(dx) md.append(dx) self.md = np.array(md) * self.abc # remove other elements from the rest of the calculations s = set(self.structures[0].indices_from_symbol(el)) self.md = np.delete( self.md, [x for x in list(range(self.natoms)) if x not in s], 1) msds = [] # get the correlation time from the ACF #mean_md = [np.mean(np.mean(x, axis=1), axis=0) for x in self.md] #acf = autocorrelation(mean_md, normalize=True) #tao = np.ceil(np.trapz(acf, np.arange(0, len(acf)))) #self.corr_t = int(tao) if self.sampling_method == 'block': for i in range(self.n_origins): su = np.square( np.cumsum(self.md[i * self.corr_t:i * self.corr_t + self.block_t], axis=0)) msds.append(np.mean(su, axis=1)) elif self.sampling_method == 'bootstrap': boots = bootstrap(self.md, bootnum=self.n_trials(el), samples=self.block_t) for boot in boots: su = np.square(np.cumsum(boot, axis=0)) msds.append(np.mean(su, axis=1)) self.msds = msds
def _getd(self, el): md = [np.zeros(self.structures[0].frac_coords.shape)] for i in range(self.skip_first + 1, self.total_t): dx = self.structures[i].frac_coords - self.structures[ i - 1].frac_coords dx -= np.round(dx) md.append(dx) self.md = np.array(md) * self.abc # remove other elements from the rest of the calculations s = set(self.structures[0].indices_from_symbol(el)) self.md = np.delete( self.md, [x for x in list(range(self.natoms)) if x not in s], 1) msds = [] block = False boot_strap = True if self.sampling_method == 'block': for i in range(self.n_origins): su = np.square( np.cumsum(self.md[i * self.corr_t:i * self.corr_t + self.block_t], axis=0)) msds.append(np.mean(su, axis=1)) elif self.sampling_method == 'bootstrap': boots = bootstrap(self.md, bootnum=self.n_trials(el)) self.block_l = len(boots[0]) / self.corr_t for boot in boots: su = np.square(np.cumsum(boot, axis=0)) msds.append(np.mean(su, axis=1)) self.msds = msds
def calcstats(self, allgals=True, nboot=100, percentile=68.): if allgals: sampleflag = self.sfsampleflag & (self.logstellarmass > 9.7) else: sampleflag = self.sampleflag & (self.logstellarmass > 9.7) # calc mean, errormean, median_absolute_deviation, skew, kurtosis # SFR relative to main sequence # perpendicular distance from SF main sequence # offset in sSFR relative to best-fit sSFR, as a function of mass # errors - bootstrap resampling, calculate mean and 68% confidence interval #plt.figure(12,4) #plt.subplot(1,3,1) # hist of # keep seed of random generator constant, so numbers are the same each time with NumpyRNGContext(1): test_variables = [self.msdist, self.msperpdist, self.sSFRdist] names = [ 'MS DISTANCE', 'MS PERPENDICULAR DISTANCE', 'SSFR DISTANCE' ] for i in range(len(test_variables)): # core sample myvara = test_variables[i][self.membflag & sampleflag] test = bootstrap(myvara, bootnum=nboot, bootfunc=test_statistics) results = np.zeros((6, test.shape[1]), 'f') results[1] = np.mean(test, axis=0) results[0] = scoreatpercentile(test, (50. - percentile / 2.), axis=0) results[2] = scoreatpercentile(test, (50. + percentile / 2.), axis=0) # external sample myvarb = test_variables[i][~self.membflag & sampleflag] test = bootstrap(myvarb, bootnum=nboot, bootfunc=test_statistics) results[4] = np.mean(test, axis=0) results[3] = scoreatpercentile(test, (50. - percentile / 2.), axis=0) results[5] = scoreatpercentile(test, (50. + percentile / 2.), axis=0) # print K-S test print('##################################') print(names[i] + ' STATS') print('##################################\n') ks(myvara, myvarb) print('##################################\n') #print('the array below prints statistics for ') #print(results) # columns are (np.mean(x), np.var(x), MAD(x), st.skew(x), st.kurtosis(x)) # save results if i == 0: self.msdist_stats = results elif i == 1: self.msperpdist_stats = results elif i == 2: self.sSFRdist_stats = results #cols = ['mean','var','MAD','skew','kurt'] for j in range(results.shape[1]): print(stat_cols[j] + ' (conf interval = {:.1f} %'.format(percentile)) print('CORE: {:.3f} - {:.3f} - {:3f}'.format( results[0, j], results[1, j], results[2, j])) print('EXT : {:.3f} - {:.3f} - {:3f}'.format( results[3, j], results[4, j], results[5, j])) print('') print("") print("")
def partial_profile(backcat_ids,RA0,DEC0,Z, RIN,ROUT,ndots,h,nboot=100): cosmo = LambdaCDM(H0=100*h, Om0=0.3, Ode0=0.7) backcat = S.data.loc[backcat_ids] ndots = int(ndots) if 'KiDS' in np.array(backcat.CATNAME)[0]: mask = (backcat.Z_B > (Z + 0.1))*(backcat.ODDS >= 0.5)*(backcat.Z_B < 0.9)*(backcat.Z_B > 0.2) else: mask = (backcat.Z_B > (Z + 0.1))*(backcat.ODDS >= 0.5)*(backcat.Z_B > 0.2) catdata = backcat[mask] dl, ds, dls = gentools.compute_lensing_distances(np.array([Z]), catdata.Z_B, precomputed=True) dl = (dl*0.7)/h ds = (ds*0.7)/h dls = (dls*0.7)/h KPCSCALE = dl*(((1.0/3600.0)*np.pi)/180.0)*1000.0 BETA_array = dls/ds Dl = dl*1.e6*pc sigma_c = (((cvel**2.0)/(4.0*np.pi*G*Dl))*(1./BETA_array))*(pc**2/Msun) rads, theta, test1,test2 = eq2p2(np.deg2rad(catdata.RAJ2000), np.deg2rad(catdata.DECJ2000), np.deg2rad(RA0), np.deg2rad(DEC0)) #Correct polar angle for e1, e2 theta = theta+np.pi/2. e1 = catdata.e1 e2 = catdata.e2 #get tangential ellipticities et = (-e1*np.cos(2*theta)-e2*np.sin(2*theta))*sigma_c #get cross ellipticities ex = (-e1*np.sin(2*theta)+e2*np.cos(2*theta))*sigma_c del(e1) del(e2) r=np.rad2deg(rads)*3600*KPCSCALE del(rads) peso = catdata.weight peso = peso/(sigma_c**2) m = catdata.m Ntot = len(catdata) del(catdata) bines = np.logspace(np.log10(RIN),np.log10(ROUT),num=ndots+1) dig = np.digitize(r,bines) DSIGMAwsum_T = [] DSIGMAwsum_X = [] WEIGHTsum = [] Mwsum = [] BOOTwsum_T = np.zeros((nboot,ndots)) BOOTwsum_X = np.zeros((nboot,ndots)) BOOTwsum = np.zeros((nboot,ndots)) NGAL = [] for nbin in range(ndots): mbin = dig == nbin+1 DSIGMAwsum_T = np.append(DSIGMAwsum_T,(et[mbin]*peso[mbin]).sum()) DSIGMAwsum_X = np.append(DSIGMAwsum_X,(ex[mbin]*peso[mbin]).sum()) WEIGHTsum = np.append(WEIGHTsum,(peso[mbin]).sum()) Mwsum = np.append(Mwsum,(m[mbin]*peso[mbin]).sum()) NGAL = np.append(NGAL,mbin.sum()) index = np.arange(mbin.sum()) if mbin.sum() == 0: continue else: with NumpyRNGContext(1): bootresult = bootstrap(index, nboot) INDEX=bootresult.astype(int) BOOTwsum_T[:,nbin] = np.sum(np.array(et[mbin]*peso[mbin])[INDEX],axis=1) BOOTwsum_X[:,nbin] = np.sum(np.array(ex[mbin]*peso[mbin])[INDEX],axis=1) BOOTwsum[:,nbin] = np.sum(np.array(peso[mbin])[INDEX],axis=1) output = {'DSIGMAwsum_T':DSIGMAwsum_T,'DSIGMAwsum_X':DSIGMAwsum_X, 'WEIGHTsum':WEIGHTsum, 'Mwsum':Mwsum, 'BOOTwsum_T':BOOTwsum_T, 'BOOTwsum_X':BOOTwsum_X, 'BOOTwsum':BOOTwsum, 'Ntot':Ntot,'NGAL':NGAL} return output
if sfq_method == 'SSFR_MED': if sfq_type == 'sf': mock_cat = mock_cat[mock_cat['SSFR_MED'] > -11] elif sfq_type == 'q': mock_cat = mock_cat[mock_cat['SSFR_MED'] < -11] elif sfq_method == 'sfq_nuvrk' or sfq_method == 'sfq_nuvrz': if sfq_type == 'sf': mock_cat = mock_cat[mock_cat[sfq_method] == 0] elif sfq_type == 'q': mock_cat = mock_cat[mock_cat[sfq_method] == 1] elif sfq_method == 'sfProb_nuvrk' or sfq_method == 'sfProb_nuvrz': mock_cat = mock_cat[mock_cat[sfq_method] > 0] mock_cat = mock_cat[mock_cat[sfq_method] < 1] # bootstrap resampling boot_idx = bootstrap(np.arange(len(mock_cat)), bootnum=1) mock_cat = mock_cat[boot_idx[0].astype(int)] if as_func_of == 'mag': bin_number = 25 bin_edges = np.linspace(15, 30, num=bin_number) mock_cat = mock_cat[~np.isnan(np.array(mock_cat['i']))] mag_list = np.array(mock_cat['i']) if 'sfProb' in sfq_method: if sfq_type == 'sf': all = np.histogram(mag_list, bins=bin_edges, weights=mock_cat[sfq_method])[0] elif sfq_type == 'q': all = np.histogram(mag_list, bins=bin_edges,
def calc_zeropoints(base, verbose=False, fend='_flux'): fnu, efnu, fit_seds, wl = getSEDs(base, base+'.tempfilt') #phot = Table.read(catalog, format='ascii.commented_header') #loadzp = np.loadtxt('/home/duncan/code/eazy-photoz/inputs/'+base+'.zeropoint',dtype='str')[:,1].astype('float') flux = fnu fluxerr = efnu fit_flux = fit_seds fwhm = 0.1*wl translate = np.loadtxt(base+'.translate', dtype='str') fnames = translate[:,0] eazy_fno = translate[:,1] isflux = [filt.endswith(fend) for filt in fnames] fnames = fnames[np.array(isflux)] eazy_fno = eazy_fno[np.array(isflux)] medians = np.zeros(fnu.shape[1]) scatter = np.zeros(fnu.shape[1]) Nfilts = len(isflux) Fig, Ax = plt.subplots(5, int(Nfilts/5)-1, sharex=True, figsize = (12.*golden, 10)) for i, ax in enumerate(Ax.flatten()[:fnu.shape[1]]): cut = ((fnu > 3*efnu) * (efnu > 0.) * (fnu < 100.))[:,i] ratio = (fit_seds[cut,i]-fnu[cut,i])/fit_seds[cut,i] + 1 #ratio = (fit_seds[cut,i]-fnu[cut,i])/efnu[cut,i] c = np.invert(np.isnan(ratio)) ratio = ratio[c] if np.sum(c) > 10: medians[i] = np.nanmedian(ratio) bootresult = bootstrap(ratio, 100, samples=np.maximum(len(ratio)-1, int(0.1*len(ratio))), bootfunc=np.nanmedian) scatter[i] = np.std(bootresult) hist, bins, ob = ax.hist(ratio, bins=101, range=(0.,2.), histtype='stepfilled', normed=True) ax.text(1.5,1,'{0:.3f}'.format(medians[i]),size=10, bbox=dict(boxstyle="round", fc="w", alpha=0.7, lw=0.)) ax.set_xlim([0,2]) ax.set_ylim([0,np.max(hist)*1.33]) #ax.set_yscale('log') else: medians[i] = -99. scatter[i] = -99. if i % 9 == 0: ax.set_ylabel('Normalised counts') ax.set_xlabel(r'$F_{\rm{fit}}/F_{\rm{obs}}$') #ax.set_xticks([0.,0.5,1.,1.5]) ax.set_title(fnames[i],x=0.5,y=0.8,size=9, bbox=dict(boxstyle="round", fc="w", alpha=0.7, lw=0.)) if verbose: print ('{0}: {1:.3f} +/- {2:.3f}'.format(fnames[i], medians[i], scatter[i])) Fig.subplots_adjust(left=0.05,right=0.98,bottom=0.065,top=0.98,wspace=0,hspace=0) #plt.show() c = np.isnan(medians) medians[c] = 99. scatter[c] = 99. output_path = base+'.zeropoint' with open(output_path,'w') as file: for i, med in enumerate(medians): if np.logical_and(np.abs(med-1) > 2.*np.abs(scatter[i]), med > 0): file.write('{0} {1:.3f} {2}'.format(eazy_fno[i], med, '\n')) return output_path, Fig, medians, scatter
"Wild Type mean / std:", np.mean(df[df["Genotype"] == "Wild Type"][key]), np.std(df[df["Genotype"] == "Wild Type"][key]), ) print( key, "Messaging Knockout mean:", np.mean(df[df["Genotype"] == "Messaging Knockout"][key]), np.std(df[df["Genotype"] == "Messaging Knockout"][key]), ) # do bootstrap statistics bootsamples = 1000000 boots = zip( bootstrap(np.array(df[df["Genotype"] == "Wild Type"][key]), bootsamples), bootstrap(np.array(df[df["Genotype"] == "Messaging Knockout"][key]), bootsamples), ) bootstats = [ np.mean(std_boot) - np.mean(control_boot) for std_boot, control_boot in tqdm(boots, total=bootsamples) ] print( key, "p:", # divide by 100 b/c percentileofscore returns 0-100 and we want 0-1 sp.stats.percentileofscore(bootstats, 0, 'rank') / 100, )
def halo_value_list(virial_mass, property_plot, mean): bin_for_disk = np.arange(11, 16, 0.2) halo_mass = np.zeros(len(bin_for_disk)) prop_mass = np.zeros(len(bin_for_disk)) prop_mass_low = np.zeros(len(bin_for_disk)) prop_mass_high = np.zeros(len(bin_for_disk)) if mean == True: for i in range(1, len(bin_for_disk)): halo_mass[i - 1] = np.log10( np.mean( virial_mass[(virial_mass < 10**bin_for_disk[i]) & (virial_mass >= 10**bin_for_disk[i - 1])])) prop_mass[i - 1] = np.log10( np.mean( property_plot[(virial_mass < 10**bin_for_disk[i]) & (virial_mass >= 10**bin_for_disk[i - 1])])) bootarr = np.log10( property_plot[(virial_mass < 10**bin_for_disk[i]) & (virial_mass >= 10**bin_for_disk[i - 1])]) bootarr = bootarr[bootarr != float('-inf')] print( len(virial_mass[(virial_mass < 10**bin_for_disk[i]) & (virial_mass >= 10**bin_for_disk[i - 1])])) if len(bootarr) > 10: if bootarr != []: with NumpyRNGContext(1): bootresult = bootstrap(bootarr, 10, bootfunc=np.mean) bootresult_error = bootstrap( bootarr, 10, bootfunc=stats.tstd) / 2 prop_mass_low[ i - 1] = prop_mass[i - 1] - np.average(bootresult_error) prop_mass_high[ i - 1] = np.average(bootresult_error) + prop_mass[i - 1] else: for i in range(1, len(bin_for_disk)): halo_mass[i - 1] = np.log10( np.median( virial_mass[(virial_mass < 10**bin_for_disk[i]) & (virial_mass >= 10**bin_for_disk[i - 1])])) prop_mass[i - 1] = np.log10( np.median( property_plot[(virial_mass < 10**bin_for_disk[i]) & (virial_mass >= 10**bin_for_disk[i - 1])])) bootarr = np.log10( property_plot[(virial_mass < 10**bin_for_disk[i]) & (virial_mass >= 10**bin_for_disk[i - 1])]) bootarr = bootarr[bootarr != float('-inf')] print( len(virial_mass[(virial_mass < 10**bin_for_disk[i]) & (virial_mass >= 10**bin_for_disk[i - 1])])) if len(bootarr) > 10: if bootarr != []: with NumpyRNGContext(1): bootresult = bootstrap(bootarr, 10, bootfunc=np.median) bootresult_lower = bootstrap( bootarr, 10, bootfunc=nanpercentile_lower) bootresult_upper = bootstrap( bootarr, 10, bootfunc=nanpercentile_upper) prop_mass_low[i - 1] = np.mean(bootresult_lower) prop_mass_high[i - 1] = np.mean(bootresult_upper) return halo_mass, prop_mass, prop_mass_low, prop_mass_high
def bootstrapping(): # read simulations of each model for model in sorted(population.keys()): if model[1] == 'model': din_hits = [] # list of column vectors, one vector per rule din_fluxes = [] # list of square numpy arrays, but not symmetrics model_key = model[0] files = sorted(glob.glob('./flux_{:s}_*json'.format(model_key))) for file in files: with open(file, 'r') as infile: data = pandas.read_json(infile) # vector column of lists din_hits.append(data['din_hits'].iloc[1:].values) # reshape matrix of fluxes into a vector column of lists tmp = [x for x in data['din_fluxs']] din_fluxes.append( pandas.DataFrame(tmp).values ) # easy conversion of a list of lists into a numpy array # DIN hits are easy to evaluate recursively (for-loop), parallelized (multiprocessing) or distributed (dask) din_hits = [numpy.asarray(x) for x in numpy.transpose(din_hits)] # DIN fluxes are not that easy to evaluate recursively; data needs to be reshaped a, b = numpy.shape(din_fluxes[0][1:, 1:]) din_fluxes = [ x[0] for x in [numpy.reshape(x[1:, 1:], (1, a * b)) for x in din_fluxes] ] din_fluxes = [ numpy.asarray(x) for x in numpy.transpose(din_fluxes) ] # bootstrap tmp = [] for row in numpy.array(din_hits): tmp.append( numpy.mean( bootstrap(row, opts['resamples'], bootfunc=numpy.mean))) with open('./hits_bootstrapped_{:s}.txt'.format(model_key), 'w') as outfile: pandas.DataFrame(data=tmp).to_csv(outfile, sep='\t', index=False, header=False) tmp = [] for row in numpy.array(din_fluxes): tmp.append( numpy.mean( bootstrap(row, opts['resamples'], bootfunc=numpy.mean))) with open('./fluxes_bootstrapped_{:s}.txt'.format(model_key), 'w') as outfile: pandas.DataFrame(data=tmp).to_csv(outfile, sep='\t', index=False, header=False) return 0
def bootstrap(in_vec,num_samples=100,bootfunc=np.mean): from astropy import stats return stats.bootstrap(np.array(in_vec),bootnum=num_samples,bootfunc=bootfunc)
def pspec(psd2, nbins=None, return_stddev=False, binsize=1.0, logspacing=True, max_bin=None, min_bin=None, return_freqs=True, theta_0=None, delta_theta=None, boot_iter=None): ''' Calculate the radial profile using scipy.stats.binned_statistic. Parameters ---------- psd2 : np.ndarray 2D Spectral power density. nbins : int, optional Number of bins to use. If None, it is calculated based on the size of the given arrays. return_stddev : bool, optional Return the standard deviations in each bin. binsize : float, optional Size of bins to be used. If logspacing is enabled, this will increase the number of bins used by the inverse of the given binsize. logspacing : bool, optional Use logarithmically spaces bins. max_bin : float, optional Give the maximum value to bin to. min_bin : float, optional Give the minimum value to bin to. return_freqs : bool, optional Return spatial frequencies. theta_0 : `~astropy.units.Quantity`, optional The center angle of the azimuthal mask. Must have angular units. delta_theta : `~astropy.units.Quantity`, optional The width of the azimuthal mask. This must be given when a `theta_0` is given. Must have angular units. boot_iter : int, optional Number of bootstrap iterations for estimating the standard deviation in each bin. Require `return_stddev=True`. Returns ------- bins_cents : np.ndarray Centre of the bins. ps1D : np.ndarray 1D binned power spectrum. ps1D_stddev : np.ndarray Returned when return_stddev is enabled. Standard deviations within each of the bins. ''' yy, xx = make_radial_arrays(psd2.shape) dists = np.sqrt(yy**2 + xx**2) if theta_0 is not None: if delta_theta is None: raise ValueError("Must give delta_theta.") theta_0 = theta_0.to(u.rad) delta_theta = delta_theta.to(u.rad) theta_limits = Angle([theta_0 - 0.5 * delta_theta, theta_0 + 0.5 * delta_theta]) # Define theta array thetas = Angle(np.arctan2(yy, xx) * u.rad) # Wrap around pi theta_limits = theta_limits.wrap_at(np.pi * u.rad) if nbins is None: nbins = int(np.round(dists.max() / binsize) + 1) if return_freqs: yy_freq, xx_freq = make_radial_freq_arrays(psd2.shape) freqs_dist = np.sqrt(yy_freq**2 + xx_freq**2) zero_freq_val = freqs_dist[np.nonzero(freqs_dist)].min() / 2. freqs_dist[freqs_dist == 0] = zero_freq_val if max_bin is None: if return_freqs: max_bin = 0.5 else: max_bin = dists.max() if min_bin is None: if return_freqs: min_bin = 1.0 / min(psd2.shape) else: min_bin = 0.5 if logspacing: bins = np.logspace(np.log10(min_bin), np.log10(max_bin), nbins + 1) else: bins = np.linspace(min_bin, max_bin, nbins + 1) if return_freqs: dist_arr = freqs_dist else: dist_arr = dists if theta_0 is not None: if theta_limits[0] < theta_limits[1]: azim_mask = np.logical_and(thetas >= theta_limits[0], thetas <= theta_limits[1]) else: azim_mask = np.logical_or(thetas >= theta_limits[0], thetas <= theta_limits[1]) azim_mask = np.logical_or(azim_mask, azim_mask[::-1, ::-1]) # Fill in the middle angles ny = np.floor(psd2.shape[0] / 2.).astype(int) nx = np.floor(psd2.shape[1] / 2.).astype(int) azim_mask[ny - 1:ny + 1, nx - 1:nx + 1] = True else: azim_mask = None ps1D, bin_edge, cts = binned_statistic(dist_arr[azim_mask].ravel(), psd2[azim_mask].ravel(), bins=bins, statistic=np.nanmean) bin_cents = (bin_edge[1:] + bin_edge[:-1]) / 2. if not return_stddev: if theta_0 is not None: return bin_cents, ps1D, azim_mask else: return bin_cents, ps1D else: if boot_iter is None: stat_func = lambda x: np.nanstd(x, ddof=1) else: from astropy.stats import bootstrap stat_func = lambda data: np.mean(bootstrap(data, boot_iter, bootfunc=np.std)) ps1D_stddev = binned_statistic(dist_arr[azim_mask].ravel(), psd2[azim_mask].ravel(), bins=bins, statistic=stat_func)[0] # We're dealing with variations in the number of samples for each bin. # Add a correction based on the t distribution bin_cts = binned_statistic(dist_arr[azim_mask].ravel(), psd2[azim_mask].ravel(), bins=bins, statistic='count')[0] # Two-tail CI for 85% (~1 sigma) alpha = 1 - (0.15 / 2.) # Correction factor to convert to the standard error A = t_dist.ppf(alpha, bin_cts - 1) / np.sqrt(bin_cts) # If the standard error is larger than the standard deviation, # use it instead ps1D_stddev[A > 1] *= A[A > 1] # Mask out bins that have 1 or fewer points mask = bin_cts <= 1 ps1D_stddev[mask] = np.NaN ps1D[mask] = np.NaN # ps1D_stddev[ps1D_stddev == 0.] = np.NaN if theta_0 is not None: return bin_cents, ps1D, ps1D_stddev, azim_mask else: return bin_cents, ps1D, ps1D_stddev
def binxycolor(x,y,color,nbin=5,yweights=None,yerr=True,use_median=False,equal_pop_bins=False,bins=None): ''' - bin x in nbin equally spaced bins - calculate the median y value in each bin - calculate the median color in each bin ''' if bins != None: xbins = bins nbin = len(xbins) else: xbins = np.zeros(nbin,'f') ybins = np.zeros(nbin,'f') ybinerr = np.zeros(len(xbins),'f') colorbins = np.zeros(len(xbins),'f') if equal_pop_bins: sorted_indices = np.argsort(x) y = y[sorted_indices] x = x[sorted_indices] color = color[sorted_indices] n_per_bin = len(x)/nbin xbin_number = np.arange(len(x))/int(n_per_bin) #print xbin_number #print x else: #xbin_number = np.array(((x-min(x))*nbin/(max(x)-min(x))),'i') xbin_number = -1*np.ones(len(x),'i') for i in range(len(xbins)-1): flag = (x >= xbins[i]) & (x < xbins[i+1]) xbin_number[flag] = i*np.ones(sum(flag),'i') xbins = xbins + 0.5*(xbins[1]-xbins[0]) for i in range(nbin): if sum(xbin_number == i) < 1: continue if use_median: if bins == None: xbins[i] = np.median(x[xbin_number == i]) ybins[i] = np.median(y[xbin_number == i]) colorbins[i] = np.median(color[xbin_number == i]) t = bootstrap(y[xbin_number == i], bootnum=100, bootfunc = np.median) #print t ybinerr[i]= (scoreatpercentile(t,84) - scoreatpercentile(t,16))/2. # not worrying about asymmetric errors right now else: if bins == None: xbins[i] = np.mean(x[xbin_number == i]) if yweights != None: print i print 'xbin = ',xbins[i] print 'yweights = ',yweights[xbin_number == i] print 'y = ',y[xbin_number == i] ybins[i] = np.average(y[xbin_number ==i], weights = yweights[xbin_number == i]) ybinerr[i] = np.std(y[xbin_number == i])/np.sqrt(sum(xbin_number == i)) else: ybins[i] = np.mean(y[xbin_number == i]) ybinerr[i] = np.std(y[xbin_number == i])/np.sqrt(sum(xbin_number == i)) colorbins[i] = np.mean(color[xbin_number == i]) if yerr: return xbins,ybins,ybinerr,colorbins else: return xbins,ybins,colorbins
def main(argv): num = 3000000 if sys.argv[1] == 'metacal': ##g1=0, g2=0 dirr = ['v2_noshear_offset_0', 'v2_noshear_offset_45'] shape = sys.argv[1] g1_0 = [] g2_0 = [] for i in range(len(dirr)): a = fio.FITS(dirr[i] + '_sim_0.fits')[-1].read() b = fio.FITS(dirr[i] + '_sim_1.fits')[-1].read() c = fio.FITS(dirr[i] + '_sim_2.fits')[-1].read() d = fio.FITS(dirr[i] + '_sim_3.fits')[-1].read() e = fio.FITS(dirr[i] + '_sim_4.fits')[-1].read() R11, R22, R12, R21, g1_obs, g2_obs = residual_bias([a, b, c, d, e], shape) g1_0.append(g1_obs[0:num]) g2_0.append(g2_obs[0:num]) del_g1_0 = g1_0[1] - g1_0[0] del_g2_0 = g2_0[1] - g2_0[0] ## g1=+-0.02, g2=0 dirr = ['v2_7_offset_0', 'v2_7_offset_45'] g_pos2 = [] g_neg2 = [] g_pos0 = [] g_neg0 = [] for i in range(len(dirr)): a = fio.FITS(dirr[i] + '_sim_0.fits')[-1].read() b = fio.FITS(dirr[i] + '_sim_1.fits')[-1].read() c = fio.FITS(dirr[i] + '_sim_2.fits')[-1].read() d = fio.FITS(dirr[i] + '_sim_3.fits')[-1].read() e = fio.FITS(dirr[i] + '_sim_4.fits')[-1].read() R11, R22, R12, R21, g1_obs, g2_obs = residual_bias([a, b, c, d, e], shape) g_pos2.append(g1_obs[0:num:2]) g_neg2.append(g1_obs[1:num:2]) g_pos0.append(g2_obs[0:num:2]) g_neg0.append(g2_obs[1:num:2]) del_g1_pos2 = g_pos2[1] - g_pos2[0] del_g1_neg2 = g_neg2[1] - g_neg2[0] del_g2_pos0 = g_pos0[1] - g_pos0[0] del_g2_neg0 = g_neg0[1] - g_neg0[0] #print('The difference of the measured g1, when sheared in g1 direction, is, \u0394\u03B3='+str("%6.6f"% np.mean(del_gamma1))+"+-"+str("%6.6f"% (np.std(del_gamma1)/np.sqrt(num)))) #print('The difference of the measured g2, when sheared in g1 direction, is, \u0394\u03B3='+str("%6.6f"% np.mean(del_gamma2))+"+-"+str("%6.6f"% (np.std(del_gamma2)/np.sqrt(num)))) ## g1=0, g2=+-0.02 dirr = ['v2_8_offset_0', 'v2_8_offset_45'] g_pos2 = [] g_neg2 = [] g_pos0 = [] g_neg0 = [] for i in range(len(dirr)): a = fio.FITS(dirr[i] + '_sim_0.fits')[-1].read() b = fio.FITS(dirr[i] + '_sim_1.fits')[-1].read() c = fio.FITS(dirr[i] + '_sim_2.fits')[-1].read() d = fio.FITS(dirr[i] + '_sim_3.fits')[-1].read() e = fio.FITS(dirr[i] + '_sim_4.fits')[-1].read() R11, R22, R12, R21, g1_obs, g2_obs = residual_bias([a, b, c, d, e], shape) g_pos2.append(g2_obs[0:num:2]) g_neg2.append(g2_obs[1:num:2]) g_pos0.append(g1_obs[0:num:2]) g_neg0.append(g1_obs[1:num:2]) del_g2_pos2 = g_pos2[1] - g_pos2[0] del_g2_neg2 = g_neg2[1] - g_neg2[0] del_g1_pos0 = g_pos0[1] - g_pos0[0] del_g1_neg0 = g_neg0[1] - g_neg0[0] #print('The difference of the measured g1, when sheared in g2 direction, is, \u0394\u03B3='+str("%6.6f"% np.mean(del_gamma1))+"+-"+str("%6.6f"% (np.std(del_gamma1)/np.sqrt(num)))) #print('The difference of the measured g2, when sheared in g2 direction, is, \u0394\u03B3='+str("%6.6f"% np.mean(del_gamma2))+"+-"+str("%6.6f"% (np.std(del_gamma2)/np.sqrt(num)))) dirr = ['v2_7_offset_0_rand360', 'v2_7_offset_45_rand360'] g_pos2 = [] g_neg2 = [] g_pos0 = [] g_neg0 = [] for i in range(len(dirr)): a = fio.FITS(dirr[i] + '_sim_0.fits')[-1].read() b = fio.FITS(dirr[i] + '_sim_1.fits')[-1].read() c = fio.FITS(dirr[i] + '_sim_2.fits')[-1].read() d = fio.FITS(dirr[i] + '_sim_3.fits')[-1].read() e = fio.FITS(dirr[i] + '_sim_4.fits')[-1].read() R11, R22, R12, R21, g1_obs, g2_obs = residual_bias([a, b, c, d, e], shape) g_pos2.append(g1_obs[0:num:2]) g_neg2.append(g1_obs[1:num:2]) g_pos0.append(g2_obs[0:num:2]) g_neg0.append(g2_obs[1:num:2]) del_g1_randpos2 = g_pos2[1] - g_pos2[0] del_g1_randneg2 = g_neg2[1] - g_neg2[0] del_g2_randpos0 = g_pos0[1] - g_pos0[0] del_g2_randneg0 = g_neg0[1] - g_neg0[0] dirr = ['v2_7_offset_0_rand20', 'v2_7_offset_45_rand20'] g_pos2 = [] g_neg2 = [] g_pos0 = [] g_neg0 = [] for i in range(len(dirr)): a = fio.FITS(dirr[i] + '_sim_0.fits')[-1].read() b = fio.FITS(dirr[i] + '_sim_1.fits')[-1].read() c = fio.FITS(dirr[i] + '_sim_2.fits')[-1].read() d = fio.FITS(dirr[i] + '_sim_3.fits')[-1].read() e = fio.FITS(dirr[i] + '_sim_4.fits')[-1].read() R11, R22, R12, R21, g1_obs, g2_obs = residual_bias([a, b, c, d, e], shape) g_pos2.append(g1_obs[0:num:2]) g_neg2.append(g1_obs[1:num:2]) g_pos0.append(g2_obs[0:num:2]) g_neg0.append(g2_obs[1:num:2]) del_g1_rand2pos2 = g_pos2[1] - g_pos2[0] del_g1_rand2neg2 = g_neg2[1] - g_neg2[0] del_g2_rand2pos0 = g_pos0[1] - g_pos0[0] del_g2_rand2neg0 = g_neg0[1] - g_neg0[0] fig, ax1 = plt.subplots(figsize=(10, 8)) input_shear = [-0.02, 0, 0, 0.02] #ax1.plot([0.0, 0.0], [np.mean(del_g1_0), np.mean(del_g2_0)], 'o', c='m', label='No shear, a fixed angle orientation') #ax1.errorbar([0.0, 0.0], [np.mean(del_g1_0), np.mean(del_g2_0)], yerr=[np.std(del_g1_0)/np.sqrt(len(del_g1_0)), np.std(del_g2_0)/np.sqrt(len(del_g2_0))], fmt='o', c='m') ax1.plot([0.0], [np.mean(del_g1_0)], 'o', c='m', label='No shear, a fixed angle orientation') ax1.errorbar([0.0], [np.mean(del_g1_0)], yerr=[np.std(del_g1_0) / np.sqrt(len(del_g1_0))], fmt='o', c='m') error_g1 = [ np.std(del_g1_neg2) / np.sqrt(len(del_g1_neg2)), np.std(del_g1_neg0) / np.sqrt(len(del_g1_neg0)), np.std(del_g1_pos0) / np.sqrt(len(del_g1_pos0)), np.std(del_g1_pos2) / np.sqrt(len(del_g1_pos2)) ] mean_difference_g1 = [ np.mean(del_g1_neg2), np.mean(del_g1_neg0), np.mean(del_g1_pos0), np.mean(del_g1_pos2) ] ax1.plot(input_shear, mean_difference_g1, 'o', c='r', label='g1, a fixed angle orientation') ax1.errorbar(input_shear, mean_difference_g1, yerr=error_g1, c='r', fmt='o') #error_g2=[np.std(del_g2_neg2)/np.sqrt(len(del_g2_neg2)), np.std(del_g2_neg0)/np.sqrt(len(del_g2_neg0)), np.std(del_g2_pos0)/np.sqrt(len(del_g2_pos0)), np.std(del_g2_pos2)/np.sqrt(len(del_g2_pos2))] #mean_difference_g2 = [np.mean(del_g2_neg2), np.mean(del_g2_neg0), np.mean(del_g2_pos0), np.mean(del_g2_pos2)] #ax1.plot(input_shear, mean_difference_g2, 'o', c='b', label='g2') #ax1.errorbar(input_shear, mean_difference_g2, yerr=error_g2, c='b', fmt='o') input2 = [-0.02, 0.02] error_randg1 = [ np.std(del_g1_randneg2) / np.sqrt(len(del_g1_randneg2)), np.std(del_g1_randpos2) / np.sqrt(len(del_g1_randpos2)) ] mean_randdiff = [np.mean(del_g1_randneg2), np.mean(del_g1_randpos2)] ax1.plot(input2, mean_randdiff, 'o', c='b', label='g1, perfectly randomized orientations') ax1.errorbar(input2, mean_randdiff, yerr=error_randg1, c='b', fmt='o') error_rand2g1 = [ np.std(del_g1_rand2neg2) / np.sqrt(len(del_g1_rand2neg2)), np.std(del_g1_rand2pos2) / np.sqrt(len(del_g1_rand2pos2)) ] mean_rand2diff = [np.mean(del_g1_rand2neg2), np.mean(del_g1_rand2pos2)] ax1.plot(input2, mean_rand2diff, 'o', c='g', label='g1, slightly randomized orientations') ax1.errorbar(input2, mean_rand2diff, yerr=error_rand2g1, c='g', fmt='o') ax1.set_xlabel('input shear', fontsize=16) ax1.set_ylabel("\u0394\u03B3", fontsize=16) ax1.set_title( 'Mean difference in measured shapes (random orientation angles, offsets=45 degrees)', fontsize=13) plt.legend(loc=7, fontsize=10) ax1.tick_params(labelsize=10) ax1.axhline(y=0, ls='--') plt.savefig('delta_g_randoffset45.png') plt.show() return None elif sys.argv[1] == 'ngmix': ## ngmix plot """ dirr=[['v2_11_offset_0', 'v2_11_offset_10'], ['v2_11_offset_0', 'v2_11_offset_20'], ['v2_11_offset_0', 'v2_11_offset_35'], ['v2_11_offset_0', 'v2_11_offset_45']] angles=[10,20,35,45] ind=0 ## g1 difference fig,ax1=plt.subplots(figsize=(10,8)) for d in dirr: g_pos2 = [] g_neg2 = [] g_pos0 = [] g_neg0 = [] for name in d: a=fio.FITS(name+'_ngmix_0.fits')[-1].read() b=None c=None d=None e=None R11, R22, R12, R21, g1_obs, g2_obs = residual_bias([a,b,c,d,e], 'ngmix') g_pos2.append(g1_obs[0:num:2]) g_neg2.append(g1_obs[1:num:2]) g_pos0.append(g2_obs[0:num:2]) g_neg0.append(g2_obs[1:num:2]) del_g1_pos2 = g_pos2[1] - g_pos2[0] del_g1_neg2 = g_neg2[1] - g_neg2[0] del_g2_pos0 = g_pos0[1] - g_pos0[0] del_g2_neg0 = g_neg0[1] - g_neg0[0] mean_g1=[np.mean(del_g1_neg2), np.mean(del_g1_pos2)] error_g1=[np.std(del_g1_neg2)/np.sqrt(len(del_g1_neg2)), np.std(del_g1_pos2)/np.sqrt(len(del_g1_pos2))] l3,=ax1.plot(angles[ind], mean_g1[0], 'o', c='b') ax1.errorbar(angles[ind], mean_g1[0], yerr=error_g1[0], c='b', fmt='o') l4,=ax1.plot(angles[ind], mean_g1[1], 'o', c='g') ax1.errorbar(angles[ind], mean_g1[1], yerr=error_g1[1], c='g', fmt='o') ind+=1 """ ## metacal plot fig, ax1 = plt.subplots(figsize=(10, 8)) dirr = [['v2_7_offset_0', 'v2_7_offset_10'], ['v2_7_offset_0', 'v2_7_offset_20'], ['v2_7_offset_0', 'v2_7_offset_35'], ['v2_7_offset_0', 'v2_7_offset_40'], ['v2_7_offset_0', 'v2_7_offset_45'], ['v2_7_offset_0', 'v2_7_offset_50'], ['v2_7_offset_0', 'v2_7_offset_60']] angles = [10, 20, 35, 40, 45, 50, 60] ind = 0 ## g1 difference for d in dirr: g_pos2 = [] g_neg2 = [] g_pos0 = [] g_neg0 = [] for name in d: a = fio.FITS(name + '_sim_0.fits')[-1].read() b = fio.FITS(name + '_sim_1.fits')[-1].read() c = fio.FITS(name + '_sim_2.fits')[-1].read() d = fio.FITS(name + '_sim_3.fits')[-1].read() e = fio.FITS(name + '_sim_4.fits')[-1].read() R11, R22, R12, R21, g1_obs, g2_obs = residual_bias( [a, b, c, d, e], 'metacal') g_pos2.append(g1_obs[0:num:2]) g_neg2.append(g1_obs[1:num:2]) g_pos0.append(g2_obs[0:num:2]) g_neg0.append(g2_obs[1:num:2]) del_g1_pos2 = g_pos2[1] - g_pos2[0] del_g1_neg2 = g_neg2[1] - g_neg2[0] del_g2_pos0 = g_pos0[1] - g_pos0[0] del_g2_neg0 = g_neg0[1] - g_neg0[0] mean_g1 = [np.mean(del_g1_neg2), np.mean(del_g1_pos2)] boot = [bootstrap(del_g1_neg2, 100), bootstrap(del_g1_pos2, 100)] boot_mean = [ np.mean([np.mean(sample) for sample in boot[0]]), np.mean([np.mean(sample) for sample in boot[1]]) ] sigma = [(np.sum([(np.mean(sample) - boot_mean[0])**2 for sample in boot[0]]) / 99)**(1 / 2), (np.sum([(np.mean(sample) - boot_mean[1])**2 for sample in boot[1]]) / 99)**(1 / 2)] error_g1 = [ np.std(del_g1_neg2) / np.sqrt(len(del_g1_neg2)), np.std(del_g1_pos2) / np.sqrt(len(del_g1_pos2)) ] print(sigma, error_g1) l1, = ax1.plot(angles[ind], mean_g1[0], 'o', c='r') ax1.errorbar(angles[ind], mean_g1[0], yerr=sigma[0], c='r', fmt='o') l2, = ax1.plot(angles[ind], mean_g1[1], 'o', c='m') ax1.errorbar(angles[ind], mean_g1[1], yerr=sigma[1], c='m', fmt='o') ind += 1 ax1.set_xlabel('Angle offsets', fontsize=16) ax1.set_ylabel("\u0394\u03B3", fontsize=16) #ax1.set_title('Mean difference in measured shapes for different shape measurement techniques', fontsize=13) l1.set_label('mcal g=-0.02') l2.set_label('mcal g=+0.02') #l3.set_label('ngmix g=-0.02') #l4.set_label('ngmix g=+0.02') plt.legend(loc=5, fontsize=10) ax1.tick_params(labelsize=10) ax1.axhline(y=0, ls='--') plt.savefig('ngmixmcal_delta_g_booterr.png') plt.show()
for iz, zmin in enumerate(zsbins[:-1]): zmax = zsbins[iz+1] zcut = np.logical_and(zspec >= zmin, zspec < zmax) print(zcut.sum()) st = [] for zset in zsets: bs = [] zphot = zz[zset][zcut] zsc = zspec[zcut] ix = np.arange(len(zsc)).astype('int') samples = bootstrap(ix, bootnum=50) for sample in samples: bs.append(calcStats(zphot[sample.astype('int')], zsc[sample.astype('int')])) st.append(bs) stats.append(st) stats = np.array(stats) #stats = stats[:, :, :, 1:-1] smean = stats[:, :, :, 1:-3].mean(2) sstd = stats[:, :, :, 1:-3].std(2)
'==========' + cat_name + '===') # bootstrap resampling smf_dist_arr = np.zeros(bin_number) smf_dist_bkg_arr = np.zeros(bin_number) mass_key_ori = cat_gal['MASS_MED'].copy() z_key_ori = cat_gal[zkeyname].copy() mass_centrals_ori = [] isolated_counts_ori = 0 count_bkg_ori = 0 for boot_iter in range(boot_num): if boot_iter != 0: cat_gal['MASS_MED'] = mass_key_ori cat_gal[zkeyname] = z_key_ori scatter() boot_idx = bootstrap(np.arange(len(cat_gal)), bootnum=1) cat_gal_copy = cat_gal[boot_idx[0].astype(int)] else: cat_gal_copy = cat_gal # select massive galaxies cat_massive_gal = cat_gal_copy[cat_gal_copy['MASS_MED'] > masscut_host] cat_massive_z_slice = cat_massive_gal[abs(cat_massive_gal[zkeyname] - z) < z_bin_size] coord_massive_gal = SkyCoord(cat_massive_z_slice['RA'] * u.deg, cat_massive_z_slice['DEC'] * u.deg) # read in random point catalog cat_random = Table.read('/home/lejay/random_point_cat/' + cat_name + '_random_point.fits') cat_random = cat_random[cat_random['inside'] == 0]
def calc_zeropoints(base, verbose=False, fend='_flux'): fnu, efnu, fit_seds, wl = getSEDs(base, base + '.tempfilt') #phot = Table.read(catalog, format='ascii.commented_header') #loadzp = np.loadtxt('/home/duncan/code/eazy-photoz/inputs/'+base+'.zeropoint',dtype='str')[:,1].astype('float') flux = fnu fluxerr = efnu fit_flux = fit_seds fwhm = 0.1 * wl translate = np.loadtxt(base + '.translate', dtype='str') fnames = translate[:, 0] eazy_fno = translate[:, 1] isflux = [filt.endswith(fend) for filt in fnames] fnames = fnames[np.array(isflux)] eazy_fno = eazy_fno[np.array(isflux)] medians = np.zeros(fnu.shape[1]) scatter = np.zeros(fnu.shape[1]) Nfilts = len(isflux) Fig, Ax = plt.subplots(5, int(Nfilts / 5) - 1, sharex=True, figsize=(12. * golden, 10)) for i, ax in enumerate(Ax.flatten()[:fnu.shape[1]]): cut = ((fnu > 3 * efnu) * (efnu > 0.) * (fnu < 100.))[:, i] ratio = (fit_seds[cut, i] - fnu[cut, i]) / fit_seds[cut, i] + 1 #ratio = (fit_seds[cut,i]-fnu[cut,i])/efnu[cut,i] c = np.invert(np.isnan(ratio)) ratio = ratio[c] if np.sum(c) > 10: medians[i] = np.nanmedian(ratio) bootresult = bootstrap(ratio, 100, samples=np.maximum( len(ratio) - 1, int(0.1 * len(ratio))), bootfunc=np.nanmedian) scatter[i] = np.std(bootresult) hist, bins, ob = ax.hist(ratio, bins=101, range=(0., 2.), histtype='stepfilled', normed=True) ax.text(1.5, 1, '{0:.3f}'.format(medians[i]), size=10, bbox=dict(boxstyle="round", fc="w", alpha=0.7, lw=0.)) ax.set_xlim([0, 2]) ax.set_ylim([0, np.max(hist) * 1.33]) #ax.set_yscale('log') else: medians[i] = -99. scatter[i] = -99. if i % 9 == 0: ax.set_ylabel('Normalised counts') ax.set_xlabel(r'$F_{\rm{fit}}/F_{\rm{obs}}$') #ax.set_xticks([0.,0.5,1.,1.5]) ax.set_title(fnames[i], x=0.5, y=0.8, size=9, bbox=dict(boxstyle="round", fc="w", alpha=0.7, lw=0.)) if verbose: print('{0}: {1:.3f} +/- {2:.3f}'.format(fnames[i], medians[i], scatter[i])) Fig.subplots_adjust(left=0.05, right=0.98, bottom=0.065, top=0.98, wspace=0, hspace=0) #plt.show() c = np.isnan(medians) medians[c] = 99. scatter[c] = 99. output_path = base + '.zeropoint' with open(output_path, 'w') as file: for i, med in enumerate(medians): if np.logical_and( np.abs(med - 1) > 2. * np.abs(scatter[i]), med > 0): file.write('{0} {1:.3f} {2}'.format(eazy_fno[i], med, '\n')) return output_path, Fig, medians, scatter
def pspec(psd2, nbins=None, return_stddev=False, binsize=1.0, logspacing=True, max_bin=None, min_bin=None, return_freqs=True, theta_0=None, delta_theta=None, boot_iter=None): ''' Calculate the radial profile using scipy.stats.binned_statistic. Parameters ---------- psd2 : np.ndarray 2D Spectral power density. nbins : int, optional Number of bins to use. If None, it is calculated based on the size of the given arrays. return_stddev : bool, optional Return the standard deviations in each bin. binsize : float, optional Size of bins to be used. If logspacing is enabled, this will increase the number of bins used by the inverse of the given binsize. logspacing : bool, optional Use logarithmically spaces bins. max_bin : float, optional Give the maximum value to bin to. min_bin : float, optional Give the minimum value to bin to. return_freqs : bool, optional Return spatial frequencies. theta_0 : `~astropy.units.Quantity`, optional The center angle of the azimuthal mask. Must have angular units. delta_theta : `~astropy.units.Quantity`, optional The width of the azimuthal mask. This must be given when a `theta_0` is given. Must have angular units. boot_iter : int, optional Number of bootstrap iterations for estimating the standard deviation in each bin. Require `return_stddev=True`. Returns ------- bins_cents : np.ndarray Centre of the bins. ps1D : np.ndarray 1D binned power spectrum. ps1D_stddev : np.ndarray Returned when return_stddev is enabled. Standard deviations within each of the bins. ''' yy, xx = make_radial_arrays(psd2.shape) dists = np.sqrt(yy**2 + xx**2) if theta_0 is not None: if delta_theta is None: raise ValueError("Must give delta_theta.") theta_0 = theta_0.to(u.rad) delta_theta = delta_theta.to(u.rad) theta_limits = Angle( [theta_0 - 0.5 * delta_theta, theta_0 + 0.5 * delta_theta]) # Define theta array thetas = Angle(np.arctan2(yy, xx) * u.rad) # Wrap around pi theta_limits = theta_limits.wrap_at(np.pi * u.rad) if nbins is None: nbins = int(np.round(dists.max() / binsize) + 1) if return_freqs: yy_freq, xx_freq = make_radial_freq_arrays(psd2.shape) freqs_dist = np.sqrt(yy_freq**2 + xx_freq**2) zero_freq_val = freqs_dist[np.nonzero(freqs_dist)].min() / 2. freqs_dist[freqs_dist == 0] = zero_freq_val if max_bin is None: if return_freqs: max_bin = 0.5 else: max_bin = dists.max() if min_bin is None: if return_freqs: min_bin = 1.0 / min(psd2.shape) else: min_bin = 0.5 if logspacing: bins = np.logspace(np.log10(min_bin), np.log10(max_bin), nbins + 1) else: bins = np.linspace(min_bin, max_bin, nbins + 1) if return_freqs: dist_arr = freqs_dist else: dist_arr = dists if theta_0 is not None: if theta_limits[0] < theta_limits[1]: azim_mask = np.logical_and(thetas >= theta_limits[0], thetas <= theta_limits[1]) else: azim_mask = np.logical_or(thetas >= theta_limits[0], thetas <= theta_limits[1]) azim_mask = np.logical_or(azim_mask, azim_mask[::-1, ::-1]) # Fill in the middle angles ny = np.floor(psd2.shape[0] / 2.).astype(int) nx = np.floor(psd2.shape[1] / 2.).astype(int) azim_mask[ny - 1:ny + 1, nx - 1:nx + 1] = True else: azim_mask = None ps1D, bin_edge, cts = binned_statistic(dist_arr[azim_mask].ravel(), psd2[azim_mask].ravel(), bins=bins, statistic=np.nanmean) bin_cents = (bin_edge[1:] + bin_edge[:-1]) / 2. if not return_stddev: if theta_0 is not None: return bin_cents, ps1D, azim_mask else: return bin_cents, ps1D else: if boot_iter is None: stat_func = lambda x: np.nanstd(x, ddof=1) else: from astropy.stats import bootstrap stat_func = lambda data: np.mean( bootstrap(data, boot_iter, bootfunc=np.std)) ps1D_stddev = binned_statistic(dist_arr[azim_mask].ravel(), psd2[azim_mask].ravel(), bins=bins, statistic=stat_func)[0] # We're dealing with variations in the number of samples for each bin. # Add a correction based on the t distribution bin_cts = binned_statistic(dist_arr[azim_mask].ravel(), psd2[azim_mask].ravel(), bins=bins, statistic='count')[0] # Two-tail CI for 85% (~1 sigma) alpha = 1 - (0.15 / 2.) # Correction factor to convert to the standard error A = t_dist.ppf(alpha, bin_cts - 1) / np.sqrt(bin_cts) # If the standard error is larger than the standard deviation, # use it instead ps1D_stddev[A > 1] *= A[A > 1] # Mask out bins that have 1 or fewer points mask = bin_cts <= 1 ps1D_stddev[mask] = np.NaN ps1D[mask] = np.NaN # ps1D_stddev[ps1D_stddev == 0.] = np.NaN if theta_0 is not None: return bin_cents, ps1D, ps1D_stddev, azim_mask else: return bin_cents, ps1D, ps1D_stddev