def do_histograms(self, minmag, maxmag): data=self.data w,=where((data['mag'] > minmag) & (data['mag'] < maxmag)) more=True data=self.data g1=data[self.ellip_name][w,0] g2=data[self.ellip_name][w,1] gtot = sqrt(g1**2 + g2**2) sigma=gtot.std() binsize=0.1*sigma self.binsize=binsize h1=histogram(g1, binsize=binsize, min=self.mine_2d, max=self.maxe_2d, more=more) h2=histogram(g2, binsize=binsize, min=self.mine_2d, max=self.maxe_2d, more=more) h=histogram(gtot, binsize=binsize, min=0., max=self.maxe, more=more) #h=histogram(gtot, binsize=binsize, more=more) if False: import biggles hp=biggles.Histogram(h['hist'], x0=h['low'][0], binsize=binsize) plt=biggles.FramedPlot() plt.add(hp) plt.show() return h1, h2, h
def do_histograms(self, minmag, maxmag, ellip_name): data = self.data w, = where((data["mag"] > minmag) & (data["mag"] < maxmag)) more = True data = self.data g1 = data[ellip_name][w, 0] g2 = data[ellip_name][w, 1] gtot = sqrt(g1 ** 2 + g2 ** 2) sigma = gtot.std() binsize = 0.1 * sigma if ellip_name == "eta": mine_2d = self.min_eta_2d maxe_2d = self.max_eta_2d maxe = self.max_eta self.binsize_eta = binsize else: mine_2d = self.min_g_2d maxe_2d = self.max_g_2d maxe = self.max_g self.binsize_g = binsize h1 = histogram(g1, binsize=binsize, min=mine_2d, max=maxe_2d, more=more) h2 = histogram(g2, binsize=binsize, min=mine_2d, max=maxe_2d, more=more) h = histogram(gtot, binsize=binsize, min=0.0, max=maxe, more=more) return h1, h2, h
def doplot(self): tab = Table(2, 1) tab.title = self.title xfit, yfit, gprior = self.get_prior_vals() nrand = 100000 binsize = self.binsize h = self.h h1 = self.h1 h2 = self.h2 g1rand, g2rand = gprior.sample2d(nrand) grand = gprior.sample1d(nrand) hrand = histogram(grand, binsize=binsize, min=0.0, max=1.0, more=True) h1rand = histogram(g1rand, binsize=binsize, min=-1.0, max=1.0, more=True) fbinsize = xfit[1] - xfit[0] hrand["hist"] = hrand["hist"] * float(yfit.sum()) / hrand["hist"].sum() * fbinsize / binsize h1rand["hist"] = h1rand["hist"] * float(h1["hist"].sum()) / h1rand["hist"].sum() pltboth = FramedPlot() pltboth.xlabel = r"$g$" hplt1 = Histogram(h1["hist"], x0=h1["low"][0], binsize=binsize, color="red") hplt2 = Histogram(h2["hist"], x0=h2["low"][0], binsize=binsize, color="blue") hpltrand = Histogram(hrand["hist"], x0=hrand["low"][0], binsize=binsize, color="magenta") hplt1rand = Histogram(h1rand["hist"], x0=h1rand["low"][0], binsize=binsize, color="magenta") hplt1.label = r"$g_1$" hplt2.label = r"$g_2$" hplt1rand.label = "rand" hpltrand.label = "rand" keyboth = PlotKey(0.9, 0.9, [hplt1, hplt2, hplt1rand], halign="right") pltboth.add(hplt1, hplt2, hplt1rand, keyboth) tab[0, 0] = pltboth plt = FramedPlot() plt.xlabel = r"$|g|$" hplt = Histogram(h["hist"], x0=h["low"][0], binsize=binsize) hplt.label = "|g|" line = Curve(xfit, yfit, color="blue") line.label = "model" key = PlotKey(0.9, 0.9, [hplt, line, hpltrand], halign="right") plt.add(line, hplt, hpltrand, key) tab[1, 0] = plt if self.show: tab.show() return tab
def hist_match(data1, data2, binsize, extra_weights1=None): """ Generate a set of weights for data set 1 such that the distribution of observables are matched to dataset 2. This is the simplest method for histogram matching and just works in rectangular bins parameters ---------- data1: This data set is to be matched by weighting to data2 data2: The data to be matched against binsize: The binsize to use for the histogram extra_weights1: An extra set of weights to apply to data1. The returned weights will include this weight """ weights1 = zeros(data1.size) min2=data2.min() max2=data2.max() if extra_weights1 is not None: bs1 = histogram(data1, binsize=binsize, min=min2, max=max2, rev=True, weights=extra_weights1) h1=bs1['whist'] rev1=bs1['rev'] else: h1,rev1=histogram(data1, binsize=binsize, min=min2, max=max2, rev=True) h2 = histogram(data2, min=min2, max=max2, binsize=binsize) if h1.size != h2.size: raise ValueError("histogram sizes don't match: %d/%d" % (h1.size,h2.size)) ratio = zeros(h1.size) w,=where(h1 > 0) ratio[w] = (h2[w]*1.0)/h1[w] # this is the weight for each object in the bin ratio /= ratio.max() for i in xrange(h1.size): if rev1[i] != rev1[i+1]: w1 = rev1[ rev1[i]:rev1[i+1] ] weights1[w1] = ratio[i] if extra_weights1 is not None: weights1 *= extra_weights1 return weights1
def do_histogram(self, minmag, maxmag): data=self.data w,=where((data['mag'] > minmag) & (data['mag'] < maxmag)) more=True data=self.data sigma_vals=self.sigma[w] mean=sigma_vals.mean() sigma=sigma_vals.std() for i in xrange(3): w,=where(sigma_vals < (mean+4.*sigma)) sigma_vals=sigma_vals[w] mean=sigma_vals.mean() sigma=sigma_vals.std() binsize=sigma*self.binfac self.binsize=binsize h=histogram(sigma_vals, binsize=binsize, more=more) if False: hp=biggles.Histogram(h['hist'], x0=h['low'][0], binsize=binsize) plt=biggles.FramedPlot() plt.add(hp) plt.show() return h, mean, sigma
def do_histograms(self): more = True data = self.data binsize = self.binsize h1 = histogram(data["g"][:, 0], binsize=binsize, min=-1.0, max=1.0, more=more) h2 = histogram(data["g"][:, 1], binsize=binsize, min=-1.0, max=1.0, more=more) gtot = sqrt(data["g"][:, 0] ** 2 + data["g"][:, 1] ** 2) h = histogram(gtot, binsize=binsize, min=0.0, max=1.0, more=more) # h=histogram(gtot, binsize=binsize, more=more) self.h = h self.h1 = h1 self.h2 = h2
def get_jackknife_sums_weighted(data, weights, jackreg_col=None): """ the sums for jackknifing. If regions are sent, use them for jackknifing, otherwise jackknife one object at a time parameters ---------- data: array An array with fields 'dsum' and 'wsum'. If shear style is lensfit, dsensum is needed rather than wsum. weights: array Additional weights jackreg_col: string, optional column name holding the jackknife region ids """ from esutil.stat import histogram shear_style = get_shear_style(data) dcol = "dsum" if shear_style == "lensfit": wcol = "dsensum" else: wcol = "wsum" if jackreg_col is None: # broadcast it wa = weights[:, newaxis] jdsum = data[dcol] * wa jwsum = data[wcol] * wa else: print("using jackreg_col:", jackreg_col) regions = data[jackreg_col] h, rev = histogram(regions, rev=True) nbin = h.size nrad = data[dcol].shape[1] jdsum = zeros((nbin, nrad)) jwsum = zeros((nbin, nrad)) for i in xrange(nbin): if rev[i] != rev[i + 1]: w = rev[rev[i] : rev[i + 1]] # broadcast it wa = weights[w] wa = wa[:, newaxis] # note leaving off trailing axis in subscripts jdsum[i] = (data[dcol][w] * wa).sum(axis=0) jwsum[i] = (data[wcol][w] * wa).sum(axis=0) w, = where(h > 0) jdsum = jdsum[w, :] jwsum = jwsum[w, :] return jdsum, jwsum
def compare_all_other(self, type, show=True): fdict=self.all_other_fdict(type) # this is the original file. It has the redshifts orig = zphot.weighting.read_training(fdict['origfile']) # this is the outputs num = zphot.weighting.read_num(fdict['numfile1']) # this is the weights file weights = zphot.weighting.read_training(fdict['wfile2']) # recoverable set w_recoverable = where1(num['num'] > 0) # this is actually the indexes back into the "orig" file w_keep = num['photoid'][w_recoverable] # get the z values for these validation objects zrec = orig['z'][w_keep] binsize=0.0314 valid_dict = histogram(zrec, min=0, max=1.1, binsize=binsize, more=True) plt=FramedPlot() vhist = valid_dict['hist']/(float(valid_dict['hist'].sum())) pvhist=biggles.Histogram(vhist, x0=valid_dict['low'][0], binsize=binsize) pvhist.label = 'truth' weights_dict = histogram(weights['z'], min=0, max=1.1, binsize=binsize, weights=weights['weight'], more=True) whist = weights_dict['whist']/weights_dict['whist'].sum() pwhist=biggles.Histogram(whist, x0=weights_dict['low'][0], binsize=binsize, color='red') pwhist.label = 'weighted train' key = PlotKey(0.6,0.6,[pvhist,pwhist]) plt.add(pvhist,pwhist,key) plt.add( biggles.PlotLabel(.8, .9, type) ) plt.write_eps(fdict['zhistfile']) converter.convert(fdict['zhistfile'],dpi=90,verbose=True) if show: plt.show()
def hist_match_remove(data1, data2, binsize, extra_weights1=None): """ Similar to hist_match but instead of returning the weights, actually remove a random subset from data set 1 """ import esutil as eu min2=data2.min() max2=data2.max() if extra_weights1 is not None: bs1 = histogram(data1, binsize=binsize, min=min2, max=max2, rev=True, weights=extra_weights1) h1=bs1['whist'] rev1=bs1['rev'] else: h1,rev1=histogram(data1, binsize=binsize, min=min2, max=max2, rev=True) h2 = histogram(data2, min=min2, max=max2, binsize=binsize) if h1.size != h2.size: raise ValueError("histogram sizes don't match: %d/%d" % (h1.size,h2.size)) ratio = zeros(h1.size) w,=where(h1 > 0) ratio[w] = (h2[w]*1.0)/h1[w] # this is the weight for each object in the bin ratio /= ratio.max() keep=[] for i in xrange(h1.size): if rev1[i] != rev1[i+1]: w1 = rev1[ rev1[i]:rev1[i+1] ] # get a random subsample nkeep = int(w1.size*ratio[i]) if nkeep > 0: # sort method is faster here. indices = eu.random.random_indices(w1.size, nkeep) keep.append(w1[indices]) return eu.numpy_util.combine_arrlist(keep)
def get_jackknife_sums(data, jackreg_col=None, weights=None): """ the sums for jackknifing. If regions are sent, use them for jackknifing, otherwise jackknife one object at a time parameters ---------- data: array An array with fields 'dsum' and 'wsum'. If shear style is lensfit, dsensum is needed rather than wsum. jackreg_col: string, optional column name holding the jackknife region ids weights: array, optional Additional weights """ from esutil.stat import histogram if weights is not None: return get_jackknife_sums_weighted(data, weights, jackreg_col=jackreg_col) shear_style = get_shear_style(data) dcol = "dsum" if shear_style == "lensfit": wcol = "dsensum" else: wcol = "wsum" if jackreg_col is None: jdsum = data[dcol] jwsum = data[wcol] else: print("using jackreg_col:", jackreg_col) regions = data[jackreg_col] h, rev = histogram(regions, rev=True) nbin = h.size nrad = data[dcol].shape[1] jdsum = zeros((nbin, nrad)) jwsum = zeros((nbin, nrad)) for i in xrange(nbin): if rev[i] != rev[i + 1]: w = rev[rev[i] : rev[i + 1]] jdsum[i] = data[dcol][w].sum(axis=0) jwsum[i] = data[wcol][w].sum(axis=0) w, = where(h > 0) jdsum = jdsum[w, :] jwsum = jwsum[w, :] return jdsum, jwsum
def bin_shear_data(data, bin_field, **keys): """ median or wmedian are not an improvement """ use_median=keys.get('use_median',False) use_wmedian=keys.get('use_wmedian',False) h,rev=histogram(data[bin_field], rev=True, **keys) nbin=len(h) dt,fields=get_binned_dtype(bin_field) bindata=zeros(nbin, dtype=dt) for i in xrange(nbin): if rev[i] != rev[i+1]: w=rev[ rev[i]:rev[i+1] ] bindata['n'][i] = w.size wts=get_weights(data['gcov'], ind=w) for field in fields: if field == 'g1': fdata=data['g'][w,0] elif field=='g2': fdata=data['g'][w,1] elif field=='g1sens': fdata=data['gsens'][w,0] elif field=='g2sens': fdata=data['gsens'][w,1] else: fdata=data[field][w] err_field=field+'_err' wmean,werr=wmom(fdata, wts, calcerr=True) if use_median: bindata[field][i] = median(fdata) elif use_wmedian: bindata[field][i] = wmedian(fdata,wts) else: bindata[field][i] = wmean bindata[err_field][i] = werr # for when we use a binsize instead of nperbni w,=where(h > 0) bindata=bindata[w] bindata['g1'] /= bindata['g1sens'] bindata['g1_err'] /= bindata['g1sens'] bindata['g2'] /= bindata['g2sens'] bindata['g2_err'] /= bindata['g2sens'] return bindata
def do_histograms(self, minmag, maxmag): data=self.data w,=where((data['mag'] > minmag) & (data['mag'] < maxmag)) more=True data=self.data if self.evals: import lensing # assum g is actually e e1=data['g'][w,0] e2=data['g'][w,1] g1=zeros(e1.size,dtype='f8') g2=zeros(e1.size,dtype='f8') for i in xrange(g1.size): g1[i],g2[i] =lensing.util.e1e2_to_g1g2(e1[i],e2[i]) else: g1=data['g'][w,0] g2=data['g'][w,1] gtot = sqrt(g1**2 + g2**2) sigma=gtot.std() binsize=0.2*sigma self.binsize=binsize h1=histogram(g1, binsize=binsize, min=-1., max=1., more=more) h2=histogram(g2, binsize=binsize, min=-1., max=1., more=more) #h=histogram(gtot, binsize=binsize, min=0., max=1., more=more) h=histogram(gtot, binsize=binsize, more=more) if False: import biggles hp=biggles.Histogram(h['hist'], x0=h['low'][0], binsize=binsize) plt=biggles.FramedPlot() plt.add(hp) plt.show() return h1, h2, h
def match_prepare(self, ra, dec, verbose=False): if verbose: stdout.write("looking up ids\n") htmid = self.lookup_id(ra, dec) minid = htmid.min() maxid = htmid.max() if verbose: stdout.write("Getting reverse indices\n");stdout.flush() hist, htmrev = stat.histogram(htmid-minid,rev=True) return htmrev, minid, maxid
def plot_multiple(self, hardcopy=False): """ Plot the results from run_multiple, saved in meanvals. """ import biggles from esutil import stat if self.type == "constant": meanvals = self.meanvals xmin = meanvals.min() xmax = meanvals.max() xstd = meanvals.std() binsize = xstd * 0.2 bindata = stat.histogram(meanvals, binsize=binsize, more=True) plt = biggles.FramedPlot() x_range = [xmin, xmax] # plt.x1.range = x_range d = biggles.Histogram(bindata["hist"], x0=xmin, binsize=binsize) d.label = "Trials Means" # get the expected gaussian expected_error = self.true_error / numpy.sqrt(self.npoints) xvals = numpy.arange(x_range[0], x_range[1], 0.02, dtype="f8") gpoints = self.gaussfunc(self.true_pars, expected_error, xvals) gpoints *= meanvals.size * binsize g = biggles.Curve(xvals, gpoints, color="blue") g.label = "Expected Distribution" k = biggles.PlotKey(0.1, 0.9, [d, g]) plt.add(d, k, g) plt.xlabel = "Trial Means" plt.ylabel = "count" if hardcopy: fname = "mcmc-constant-multi.eps" stdout.write("Writing test file hardcopy: %s\n" % fname) plt.write_eps(fname) plt.show() else: raise ValueError("only support type='constant'")
def logbin_shear_data(data, bin_field, **keys): """ Send nbin """ mindata=keys.get('min',data[bin_field].min()) maxdata=keys.get('max',data[bin_field].max()) keys['min']=log10(mindata) keys['max']=log10(maxdata) keys['rev']=True logdata=log10(data[bin_field]) h,rev=histogram(logdata, **keys) nbin=len(h) dt,fields=get_binned_dtype(bin_field) bindata=zeros(nbin, dtype=dt) for i in xrange(nbin): if rev[i] != rev[i+1]: w=rev[ rev[i]:rev[i+1] ] bindata['n'][i] = w.size wts=get_weights(data['gcov'], ind=w) for field in fields: if field == 'g1': fdata=data['g'][w,0] elif field=='g2': fdata=data['g'][w,1] elif field=='g1sens': fdata=data['gsens'][w,0] elif field=='g2sens': fdata=data['gsens'][w,1] else: fdata=data[field][w] err_field=field+'_err' wmean,werr=wmom(fdata, wts, calcerr=True) bindata[field][i] = wmean bindata[err_field][i] = werr w,=where(h > 0) bindata=bindata[w] bindata['g1'] /= bindata['g1sens'] bindata['g1_err'] /= bindata['g1sens'] bindata['g2'] /= bindata['g2sens'] bindata['g2_err'] /= bindata['g2sens'] return bindata
def set_rev(self): from esutil.stat import histogram print("histogramming epoch 'number'") m=self.meds_list[0] number_max=m['number'].max() h_number,rev = histogram(self.epoch_data['number'], min=1, max=number_max, rev=True) self.h_number=h_number self.rev_number=rev
def doplot(self, fitres, h, minmag, maxmag): tab=biggles.Table(2,1) plt=FramedPlot() plt.title='%s %.2f %.2f ' % (self.objtype, minmag, maxmag) plt.xlabel=r'$\sigma$' sprior=fitres.get_model() nrand=100000 binsize=self.binsize hplt=Histogram(h['hist'], x0=h['low'][0], binsize=binsize) hplt.label='data' srand=sprior.sample(nrand) hrand=histogram(srand, binsize=binsize, min=h['low'][0], max=h['high'][-1], more=True) hrand['hist'] = hrand['hist']*float(h['hist'].sum())/nrand hpltrand=Histogram(hrand['hist'], x0=hrand['low'][0], binsize=binsize, color='blue') hpltrand.label='rand' key=PlotKey(0.9,0.9,[hplt,hpltrand],halign='right') plt.add(hplt, hpltrand, key) tplt=fitres.plot_trials(show=False,fontsize_min=0.88888888) tab[0,0] = plt tab[1,0] = tplt if self.show: tab.show() d=files.get_prior_dir() d=os.path.join(d, 'plots') epsfile='pofs-%.2f-%.2f-%s.eps' % (minmag,maxmag,self.objtype) epsfile=os.path.join(d,epsfile) eu.ostools.makedirs_fromfile(epsfile) print epsfile tab.write_eps(epsfile) os.system('converter -d 100 %s' % epsfile) return tab
def plot_results(self, burnin=100, hardcopy=False): import biggles from esutil import stat if self.type == "constant": par0 = self.trials[burnin:, 0] xmin = par0.min() binsize = 0.05 bindata = stat.histogram(par0, binsize=binsize, more=True) plt = biggles.FramedPlot() x_range = [8.5, 11.5] plt.x1.range = x_range d = biggles.Histogram(bindata["hist"], x0=xmin, binsize=binsize) d.label = "trials" # get the expected gaussian expected_error = self.true_error / numpy.sqrt(self.npoints) xvals = numpy.arange(x_range[0], x_range[1], 0.02, dtype="f8") gpoints = self.gaussfunc(self.true_pars, expected_error, xvals) gpoints *= par0.size * binsize g = biggles.Curve(xvals, gpoints, color="blue") g.label = "Expected Distribution" k = biggles.PlotKey(0.1, 0.9, [d, g]) plt.add(d, k, g) plt.xlabel = "trial values" plt.ylabel = "count" if hardcopy: fname = "mcmc-constant.eps" stdout.write("Writing test file hardcopy: %s\n" % fname) plt.write_eps(fname) plt.show() else: raise ValueError("only support type='constant'")
def match_prepare(self, ra, dec, verbose=False): """ deprecated. Use an htm.Matcher instead """ print 'deprecated: use a htm.Matcher instead' if verbose: stdout.write("looking up ids\n") htmid = self.lookup_id(ra, dec) minid = htmid.min() maxid = htmid.max() if verbose: stdout.write("Getting reverse indices\n");stdout.flush() hist, htmrev = stat.histogram(htmid-minid,rev=True) return htmrev, minid, maxid
def test_lognormal(): import biggles import esutil as eu from esutil.random import LogNormal, srandu from esutil.stat import histogram n=1000 nwalkers=100 burnin=100 nstep=100 mean=8 sigma=3 ln=LogNormal(mean,sigma) vals=ln.sample(n) binsize=0.5 plt=eu.plotting.bhist(vals, binsize=binsize,show=False) h=histogram(vals, binsize=binsize,more=True) herr=sqrt(h['hist']) herr=herr.clip(1.0, herr.max()) guess=[n*(1. + .1*srandu()), mean*(1. + .1*srandu()), sigma*(1. + .1*srandu())] guess=[n*binsize,mean,sigma] print 'guess:',guess nlf=LogNormalFitter(h['center'], h['hist'], guess, nwalkers, burnin, nstep, yerr=herr) print nlf res=nlf.get_result() model=nlf.get_model() yvals=model.scaled(h['center']) plt.add(biggles.Curve(h['center'], yvals, color='blue')) plt.show()
def match_prepare(self, ra, dec, verbose=False): """ deprecated. Use an htm.Matcher instead """ print 'deprecated: use a htm.Matcher instead' if verbose: stdout.write("looking up ids\n") htmid = self.lookup_id(ra, dec) minid = htmid.min() maxid = htmid.max() if verbose: stdout.write("Getting reverse indices\n") stdout.flush() hist, htmrev = stat.histogram(htmid - minid, rev=True) return htmrev, minid, maxid
def test_line(burnin=1000, nstep=10000, doplot=False): """ run all steps at once so we can plot burnin phase """ import esutil pars = [1.0, 1.0] xmin = -1.0 xmax = 1.0 nx = 10 yerr = 0.1 x, y, yerr = noisy_line(pars, xmin, xmax, nx, yerr) LF = LinFitter(x, y, yerr) fitter = MH(LF.get_loglike, LF.step) # bad guess parguess = [pars[0] + 0.2, pars[1] - 0.2] ntot = nstep + burnin pos = fitter.run(parguess, ntot) data = fitter.get_trials() if doplot: import biggles from esutil import stat burnin = 1000 # plot the burnin tab = biggles.Table(2, 1) steps = numpy.arange(ntot, dtype="i4") offset_steps_plot = biggles.FramedPlot() offset_steps_plot.ylabel = "offset" slope_steps_plot = biggles.FramedPlot() slope_steps_plot.ylabel = "slope" slope_steps_plot.xlabel = "step number" offset_burnin_curve = biggles.Curve(steps[0:burnin], data[0:burnin, 0], color="red") slope_burnin_curve = biggles.Curve(steps[0:burnin], data[0:burnin, 1], color="red") offset_rest_curve = biggles.Curve(steps[burnin:], data[burnin:, 0]) slope_rest_curve = biggles.Curve(steps[burnin:], data[burnin:, 1]) offset_steps_plot.add(offset_burnin_curve, offset_rest_curve) slope_steps_plot.add(slope_burnin_curve, slope_rest_curve) tab[0, 0] = offset_steps_plot tab[1, 0] = slope_steps_plot tab.show() # get status for chain parfit, cov = extract_stats(data[burnin:, :]) errfit = sqrt(diag(cov)) # plot the histograms and comparison plot tab = biggles.Table(2, 2) offsets = data[burnin:, 0] slopes = data[burnin:, 1] offset_binsize = offsets.std() * 0.2 slope_binsize = slopes.std() * 0.2 offset_hist = stat.histogram(offsets, binsize=offset_binsize, more=True) slope_hist = stat.histogram(slopes, binsize=slope_binsize, more=True) offset_phist = biggles.FramedPlot() offset_phist.add(biggles.Histogram(offset_hist["hist"], x0=offset_hist["low"][0], binsize=offset_binsize)) offset_phist.xlabel = "Offsets" offset_phist.add(biggles.PlotLabel(0.1, 0.9, "offset=%0.2f +/- %0.2f" % (parfit[0], errfit[0]), halign="left")) offset_phist.yrange = [0, offset_hist["hist"].max() * 1.2] slope_phist = biggles.FramedPlot() slope_phist.add(biggles.Histogram(slope_hist["hist"], x0=slope_hist["low"][0], binsize=slope_binsize)) slope_phist.xlabel = "slopes" slope_phist.add(biggles.PlotLabel(0.1, 0.9, "slope=%0.2f +/- %0.2f" % (parfit[1], errfit[1]), halign="left")) slope_phist.yrange = [0, slope_hist["hist"].max() * 1.2] tab[0, 0] = offset_phist tab[0, 1] = slope_phist # now plot original data and best fit par yfit = parfit[0] * x + parfit[1] fitplt = biggles.FramedPlot() data_errbar = biggles.SymmetricErrorBarsY(x, y, yerr) data_points = biggles.Points(x, y, type="filled circle") data_points.label = "Data" yfit_curve = biggles.Curve(x, yfit, color="blue") key = biggles.PlotKey(0.1, 0.9, [data_points, yfit_curve]) fitplt.add(data_errbar, data_points, yfit_curve, key) tab[1, 0] = fitplt tab.show() return data
def match(self, ra1, dec1, ra2, dec2, radius, maxmatch=1, htmid2=None, htmrev2=None, minid=None, maxid=None, file=None, verbose=False): """ Class: HTM Method Name: match Purpose: Match two sets of ra/dec points using the Hierarchical Triangular Mesh code. This is very efficient for large search angles and large lists. May seem slow otherwise due to overhead creating htm indices. Calling Sequence: import esutil depth = 10 h=esutil.htm.HTM(depth) m1,m2,d12 = h.match(ra1,dec1,ra2,dec2,radius, maxmatch=1, htmid2=None, htmrev2=None, minid=None, maxid=None, file=None) To speed up successive calls with the same ra2,dec2, you can use: htmrev2,minid,maxid = h.match_prepare(ra2,dec2) Then m1,m2,e12 = h.match(ra1,dec1,ra2,dec2,radius, htmrev2=htmrev2,minid=minid,maxid=maxid) Inputs: ra1,dec1,ra2,dec2: ra,dec lists in degrees. Can be scalars or arrays but require size(ra) == size(dec) in each set. radius: The search radius in degrees. May be a scalar or an array same length as ra1,dec1. Keyword Parameters: maxmatch=1: The maximum number of allowed matches per point. Defaults to return the closest match, maxmatch=1. Use maxmatch<=0 to return all matches htmid2=None: the htm indexes for the second list. If not sent they are generated internally. You can generate these with htmid = h.lookup_id(ra, dec) htmrev2=None: The result of import esutil htmid2 = h.lookup_id(ra, dec) minid=htmid2.min() hist2,htmrev2=\\ esutil.stat.histogram(htmid2-minid,rev=True) If not sent it is calculated internally for fast lookups. You can save time on successive calls by generating these your self. minid=None, maxid=None: If htmrev2 is sent along with these, there is no need to calculate htmid2. file=None: A file into which will be written the indices and distances. When this keyword is sent, None,None,None is returned. This is useful when the match data will not fit into memory. The file is an unformatted binary file. It can be read with the read() method. The format is a 64-bit signed integer representing the number of rows, followed by rows of i1 i2 d12 Where i1,i2 are the match indices as 64-bit signed integers and d12 is the distance between them in degrees as a 64-bit float. Outputs: m1,m2,d12: A tuple of m1,m2,d12. m1 and m2 are the match indices for list1 and list2. d12 is the distance between them in degrees. You can subscript the arrays ra1,dec1 with the m1 array, and ra2,dec2 with the m2 array. If you do so the data "line-up" so that points in list one and list two at the same index are matches. If you write the results to a file, the returned value is simply the match count. Restrictions: The C++ wrapper must be compiled. This will happen automatically during installation of esutil. EXAMPLE: # try the matching two lists of ra/dec points # Matching by ra/dec, expect 10 matches ordered by distance.... # match within two arcseconds two = 2.0/3600. # offset second list by fraction of 2 arcsec in dec # but last one won't match anything ra1 = numpy.array( [200.0, 200.0, 200.0, 175.23, 21.36]) dec1 = numpy.array( [24.3, 24.3, 24.3, -28.25, -15.32]) ra2 = numpy.array( [200.0, 200.0, 200.0, 175.23, 55.25]) dec2 = numpy.array( [24.3+0.75*two, 24.3 + 0.25*two, 24.3 - 0.33*two, -28.25 + 0.58*two, 75.22]) m1,m2,d12 = h.match(ra1,dec1,ra2,dec2,two,maxmatch=0) for i in range(m1.size): print m1[i],m2[i],d12[i] # this produces 0 1 0.00013888984367 0 2 0.00018333285694 0 0 0.000416666032158 1 1 0.00013888984367 1 2 0.00018333285694 1 0 0.000416666032158 2 1 0.00013888984367 2 2 0.00018333285694 2 0 0.000416666032158 3 3 0.000322221232243 MODIFICATION HISTORY: SWIG Wrapper and matching code working 2010-03-03, Erin Sheldon, BNL. 2010-03-19: Default to maxmatch=1, return the closest match. 2010-06-16: Fixed bug that disallowed scalar inputs. -BFG """ if ((numpy.size(ra1) != numpy.size(dec1)) or (numpy.size(ra2) != numpy.size(dec2))): raise ValueError("require size(ra)==size(dec) for " "both sets of inputs") if (htmrev2 is None) or (minid is None) or (maxid is None): if htmid2 is None: if verbose: stdout.write("looking up ids\n");stdout.flush() htmid2 = self.lookup_id(ra2, dec2) minid = htmid2.min() maxid = htmid2.max() else: if minid is None: minid = htmid2.min() if maxid is None: maxid = htmid2.max() if htmrev2 is None: if verbose: stdout.write("Getting reverse indices\n");stdout.flush() hist2, htmrev2 = stat.histogram(htmid2-minid,rev=True) if verbose: stdout.write("calling cmatch\n");stdout.flush() return self.cmatch(radius, ra1, dec1, ra2, dec2, htmrev2, minid, maxid, maxmatch, file)
def doplot(self, gprior, h1, h2, h, minmag, maxmag): tab=Table(2,1) tab.title='%s %.2f %.2f ' % (self.otype, minmag, maxmag) nrand=1000000 binsize=self.binsize rbinsize=binsize*0.2 gr = gprior.sample(nrand) g1rand=gr[:,0] g2rand=gr[:,1] grand = numpy.sqrt( g1rand**2 + g2rand**2 ) #hrand=histogram(grand, binsize=rbinsize, min=h['low'][0], max=h['high'][-1], more=True) hrand=histogram(grand, binsize=rbinsize, min=0,max=self.maxe, more=True) h1rand=histogram(g1rand, binsize=rbinsize, min=self.mine_2d, max=self.maxe_2d, more=True) bratio = self.binsize/rbinsize hrand['hist'] = hrand['hist']*bratio*float(h['hist'].sum())/nrand h1rand['hist'] = h1rand['hist']*bratio*float(h1['hist'].sum())/h1rand['hist'].sum() pltboth=FramedPlot() pltboth.xlabel=r'$%s$' % self.ellip_name hplt1=Histogram(h1['hist'], x0=h1['low'][0], binsize=binsize,color='darkgreen') hplt2=Histogram(h2['hist'], x0=h2['low'][0], binsize=binsize,color='blue') hpltrand=Histogram(hrand['hist'], x0=hrand['low'][0], binsize=rbinsize, color='red') hplt1rand=Histogram(h1rand['hist'], x0=h1rand['low'][0], binsize=rbinsize, color='red') hplt1.label=r'$g_1$' hplt2.label=r'$g_2$' hplt1rand.label='rand' hpltrand.label='rand' keyboth=PlotKey(0.9,0.9,[hplt1,hplt2,hplt1rand],halign='right') pltboth.add(hplt1, hplt2, hplt1rand, keyboth) tab[0,0]=pltboth plt=FramedPlot() plt.xlabel=r'$|%s|$' % self.ellip_name hplt=Histogram(h['hist'], x0=h['low'][0], binsize=binsize) hplt.label='|%s|' % self.ellip_name #line=Curve(xfit, yfit, color='blue') #line.label='model' #key=PlotKey(0.9,0.9,[hplt,line,hpltrand],halign='right') #plt.add(line, hplt, hpltrand, key) key=PlotKey(0.9,0.9,[hplt,hpltrand],halign='right') plt.add(hplt, hpltrand, key) tab[1,0]=plt if self.show: tab.show() d=files.get_prior_dir() d=os.path.join(d, 'plots') epsfile='pofe-%.2f-%.2f-%s.eps' % (minmag,maxmag,self.otype) epsfile=os.path.join(d,epsfile) eu.ostools.makedirs_fromfile(epsfile) print epsfile tab.write_eps(epsfile) os.system('converter -d 100 %s' % epsfile) return tab
def doplot(self, gprior, minmag, maxmag): from lensing.util import eta1eta2_to_g1g2 tab = Table(2, 2) tab.title = "%s %.2f %.2f " % (self.otype, minmag, maxmag) h1_g, h2_g, h_g = self.do_histograms(minmag, maxmag, "g") h1_eta, h2_eta, h_eta = self.do_histograms(minmag, maxmag, "eta") nrand = 1000000 binsize_eta = self.binsize_eta binsize_g = self.binsize_g rbinsize_eta = binsize_eta * 0.2 rbinsize_g = binsize_g * 0.2 gr = gprior.sample(nrand) eta1_rand = gr[:, 0] eta2_rand = gr[:, 1] eta_rand = numpy.sqrt(eta1_rand ** 2 + eta2_rand ** 2) g1_rand, g2_rand = eta1eta2_to_g1g2(eta1_rand, eta2_rand) g_rand = numpy.sqrt(g1_rand ** 2 + g2_rand ** 2) hrand_eta = histogram(eta_rand, binsize=rbinsize_eta, min=0, max=self.max_eta, more=True) h1rand_eta = histogram(eta1_rand, binsize=rbinsize_eta, min=self.min_eta_2d, max=self.max_eta_2d, more=True) hrand_g = histogram(g_rand, binsize=rbinsize_g, min=0, max=self.max_g, more=True) h1rand_g = histogram(g1_rand, binsize=rbinsize_g, min=self.min_g_2d, max=self.max_g_2d, more=True) # eta 2d plots bratio_eta = self.binsize_eta / rbinsize_eta hrand_eta["hist"] = hrand_eta["hist"] * bratio_eta * float(h_eta["hist"].sum()) / nrand h1rand_eta["hist"] = h1rand_eta["hist"] * bratio_eta * float(h1_eta["hist"].sum()) / h1rand_eta["hist"].sum() pltboth_eta = FramedPlot() pltboth_eta.xlabel = r"$\eta$" hplt1_eta = Histogram(h1_eta["hist"], x0=h1_eta["low"][0], binsize=binsize_eta, color="darkgreen") hplt2_eta = Histogram(h2_eta["hist"], x0=h2_eta["low"][0], binsize=binsize_eta, color="blue") hpltrand_eta = Histogram(hrand_eta["hist"], x0=hrand_eta["low"][0], binsize=rbinsize_eta, color="red") hplt1rand_eta = Histogram(h1rand_eta["hist"], x0=h1rand_eta["low"][0], binsize=rbinsize_eta, color="red") hplt1_eta.label = r"$\eta_1$" hplt2_eta.label = r"$\eta_2$" hplt1rand_eta.label = "rand" hpltrand_eta.label = "rand" keyboth_eta = PlotKey(0.9, 0.9, [hplt1_eta, hplt2_eta, hplt1rand_eta], halign="right") pltboth_eta.add(hplt1_eta, hplt2_eta, hplt1rand_eta, keyboth_eta) tab[0, 0] = pltboth_eta plt1d_eta = FramedPlot() plt1d_eta.xlabel = r"$|\eta|$" hplt_eta = Histogram(h_eta["hist"], x0=h_eta["low"][0], binsize=binsize_eta) hplt_eta.label = r"$|\eta|$" key_eta = PlotKey(0.9, 0.9, [hplt_eta, hpltrand_eta], halign="right") plt1d_eta.add(hplt_eta, hpltrand_eta, key_eta) tab[1, 0] = plt1d_eta # g plots bratio_g = self.binsize_g / rbinsize_g hrand_g["hist"] = hrand_g["hist"] * bratio_g * float(h_g["hist"].sum()) / nrand h1rand_g["hist"] = h1rand_g["hist"] * bratio_g * float(h1_g["hist"].sum()) / h1rand_g["hist"].sum() pltboth_g = FramedPlot() pltboth_g.xlabel = r"$g$" hplt1_g = Histogram(h1_g["hist"], x0=h1_g["low"][0], binsize=binsize_g, color="darkgreen") hplt2_g = Histogram(h2_g["hist"], x0=h2_g["low"][0], binsize=binsize_g, color="blue") hpltrand_g = Histogram(hrand_g["hist"], x0=hrand_g["low"][0], binsize=rbinsize_g, color="red") hplt1rand_g = Histogram(h1rand_g["hist"], x0=h1rand_g["low"][0], binsize=rbinsize_g, color="red") hplt1_g.label = r"$g_1$" hplt2_g.label = r"$g_2$" hplt1rand_g.label = "rand" hpltrand_g.label = "rand" keyboth_g = PlotKey(0.9, 0.9, [hplt1_g, hplt2_g, hplt1rand_g], halign="right") pltboth_g.add(hplt1_g, hplt2_g, hplt1rand_g, keyboth_g) tab[0, 1] = pltboth_g plt1d_g = FramedPlot() plt1d_g.xlabel = r"$|g|$" hplt_g = Histogram(h_g["hist"], x0=h_g["low"][0], binsize=binsize_g) hplt_g.label = "|g|" key_g = PlotKey(0.9, 0.9, [hplt_g, hpltrand_g], halign="right") plt1d_g.add(hplt_g, hpltrand_g, key_g) tab[1, 1] = plt1d_g if self.show: tab.show() d = files.get_prior_dir() d = os.path.join(d, "plots") epsfile = "pofe-pofeta-%.2f-%.2f-%s.eps" % (minmag, maxmag, self.otype) epsfile = os.path.join(d, epsfile) eu.ostools.makedirs_fromfile(epsfile) print epsfile tab.write_eps(epsfile) os.system("converter -d 100 %s" % epsfile) return tab
def doplot(self, fitres, h1, h2, h, minmag, maxmag): tab=Table(2,1) tab.title='%s %.2f %.2f ' % (self.objtype, minmag, maxmag) #xfit,yfit,gprior = self.get_prior_vals(fitres, h) gprior=self.get_prior(fitres) nrand=100000 binsize=self.binsize g1rand,g2rand=gprior.sample2d(nrand) grand=gprior.sample1d(nrand) #hrand=histogram(grand, binsize=binsize, min=0., max=1., more=True) hrand=histogram(grand, binsize=binsize, min=h['low'][0], max=h['high'][-1], more=True) h1rand=histogram(g1rand, binsize=binsize, min=-1., max=1., more=True) #fbinsize=xfit[1]-xfit[0] #hrand['hist'] = hrand['hist']*float(yfit.sum())/hrand['hist'].sum()*fbinsize/binsize hrand['hist'] = hrand['hist']*float(h['hist'].sum())/nrand h1rand['hist'] = h1rand['hist']*float(h1['hist'].sum())/h1rand['hist'].sum() pltboth=FramedPlot() pltboth.xlabel=r'$g$' hplt1=Histogram(h1['hist'], x0=h1['low'][0], binsize=binsize,color='red') hplt2=Histogram(h2['hist'], x0=h2['low'][0], binsize=binsize,color='blue') hpltrand=Histogram(hrand['hist'], x0=hrand['low'][0], binsize=binsize, color='magenta') hplt1rand=Histogram(h1rand['hist'], x0=h1rand['low'][0], binsize=binsize, color='magenta') hplt1.label=r'$g_1$' hplt2.label=r'$g_2$' hplt1rand.label='rand' hpltrand.label='rand' keyboth=PlotKey(0.9,0.9,[hplt1,hplt2,hplt1rand],halign='right') pltboth.add(hplt1, hplt2, hplt1rand, keyboth) tab[0,0]=pltboth plt=FramedPlot() plt.xlabel=r'$|g|$' hplt=Histogram(h['hist'], x0=h['low'][0], binsize=binsize) hplt.label='|g|' #line=Curve(xfit, yfit, color='blue') #line.label='model' #key=PlotKey(0.9,0.9,[hplt,line,hpltrand],halign='right') #plt.add(line, hplt, hpltrand, key) key=PlotKey(0.9,0.9,[hplt,hpltrand],halign='right') plt.add(hplt, hpltrand, key) tab[1,0]=plt if self.show: tab.show() d=files.get_prior_dir() d=os.path.join(d, 'plots') epsfile='pofe-%.2f-%.2f-%s.eps' % (minmag,maxmag,self.objtype) epsfile=os.path.join(d,epsfile) eu.ostools.makedirs_fromfile(epsfile) print epsfile tab.write_eps(epsfile) os.system('converter -d 100 %s' % epsfile) return tab
def doplot(self, fitres, h1, h2, h, minmag, maxmag): tab=Table(2,1) tab.title='%s %.2f %.2f ' % (self.objtype, minmag, maxmag) #xfit,yfit,gprior = self.get_prior_vals(fitres, h) gprior=self.get_prior(fitres) nrand=100000 binsize=self.binsize g1rand,g2rand=gprior.sample2d(nrand) grand=gprior.sample1d(nrand) hrand=histogram(grand, binsize=binsize, min=0., max=1., more=True) h1rand=histogram(g1rand, binsize=binsize, min=-1., max=1., more=True) #fbinsize=xfit[1]-xfit[0] #hrand['hist'] = hrand['hist']*float(yfit.sum())/hrand['hist'].sum()*fbinsize/binsize hrand['hist'] = hrand['hist']*float(h['hist'].sum())/nrand h1rand['hist'] = h1rand['hist']*float(h1['hist'].sum())/h1rand['hist'].sum() pltboth=FramedPlot() pltboth.xlabel=r'$g$' hplt1=Histogram(h1['hist'], x0=h1['low'][0], binsize=binsize,color='red') hplt2=Histogram(h2['hist'], x0=h2['low'][0], binsize=binsize,color='blue') hpltrand=Histogram(hrand['hist'], x0=hrand['low'][0], binsize=binsize, color='magenta') hplt1rand=Histogram(h1rand['hist'], x0=h1rand['low'][0], binsize=binsize, color='magenta') hplt1.label=r'$g_1$' hplt2.label=r'$g_2$' hplt1rand.label='rand' hpltrand.label='rand' keyboth=PlotKey(0.9,0.9,[hplt1,hplt2,hplt1rand],halign='right') pltboth.add(hplt1, hplt2, hplt1rand, keyboth) tab[0,0]=pltboth plt=FramedPlot() plt.xlabel=r'$|g|$' hplt=Histogram(h['hist'], x0=h['low'][0], binsize=binsize) hplt.label='|g|' #line=Curve(xfit, yfit, color='blue') #line.label='model' #key=PlotKey(0.9,0.9,[hplt,line,hpltrand],halign='right') #plt.add(line, hplt, hpltrand, key) key=PlotKey(0.9,0.9,[hplt,hpltrand],halign='right') plt.add(hplt, hpltrand, key) tab[1,0]=plt if self.show: tab.show() return tab
def compare_same_same(self, type, show=True): """ Use the id from the validation set to go back and get the z for those objects. Then plot histograms for comparision. read in all file read in validation set take recoverable subset based on num file Get z info for these points from the all file plot the histgram of actual validation set redshifts overplot the histgram of weighted redshifts Then bin by true validation set redshift and plot the ztrue - <z> Where <z> is the expectation value of z based on the p(z) <z> = integral( z*p(z) )/integral( p(z) ) That will be noisy """ fdict=self.same_same_fdict(type) # this is the original file all = zphot.weighting.read_training(fdict['origfile']) # this is the validation set, for which the "photoid" field # is actually an id pointing back into "all" # we take version 1 and will demand num > 0 valid = zphot.weighting.read_photo(fdict['photofile']) num = zphot.weighting.read_num(fdict['numfile1']) # this is the weights file weights = zphot.weighting.read_training(fdict['wfile2']) # recoverable set w_recoverable = where1(num['num'] > 0) # this is actually the indexes back into the "all" file w_keep = num['photoid'][w_recoverable] # get the z values for these validation objects zvalid = all['z'][w_keep] binsize=0.0314 valid_dict = histogram(zvalid, min=0, max=1.1, binsize=binsize, more=True) plt=FramedPlot() vhist = valid_dict['hist']/(float(valid_dict['hist'].sum())) pvhist=biggles.Histogram(vhist, x0=valid_dict['low'][0], binsize=binsize) pvhist.label = 'validation' weights_dict = histogram(weights['z'], min=0, max=1.1, binsize=binsize, weights=weights['weight'], more=True) whist = weights_dict['whist']/weights_dict['whist'].sum() pwhist=biggles.Histogram(whist, x0=weights_dict['low'][0], binsize=binsize, color='red') pwhist.label = 'weighted train' key = PlotKey(0.6,0.6,[pvhist,pwhist]) plt.add(pvhist,pwhist,key) plt.add( biggles.PlotLabel(.8, .9, type) ) plt.write_eps(fdict['zhistfile']) converter.convert(fdict['zhistfile'],dpi=90,verbose=True) if show: plt.show()
def plot_results1d(data1, data2, weights1, binsize=None, xmin=None, xmax=None, xlabel=None, title=None, epsfile=None, pngfile=None, show=True, label1='dataset 1', label2='dataset 2'): """ compare the histograms at the input binsize Unless the domains are exactlyl the same, you should restrict xmin,xmax so that the normalizations will match correctly. """ import biggles from esutil.stat import histogram #if xmin is None: # xmin = data2.min() #if xmax is None: # xmax = data2.max() if xmin is None: xmin = min([data1.min(), data2.min()]) if xmax is None: xmax = max([data1.max(), data2.max()]) if binsize is None: w,=where( (data2 < xmax) & (data2 > xmin) ) binsize=0.2*data2[w].std() nw=weights1/weights1.max() effnum = nw.sum() effperc = effnum/data1.size*100 plabtext='effnum: %d/%d = %0.1f%%' % (effnum,data1.size,effperc) print(" plotting hist match results") print(" histogramming data set 1") h1dict = histogram(data1, binsize=binsize, more=True, min=xmin, max=xmax) print(" histogramming data set 1 with weights") h1wdict = histogram(data1, binsize=binsize, min=xmin, max=xmax, weights=weights1, more=True) print(" histogramming data set 2") h2dict = histogram(data2, binsize=binsize, more=True, min=xmin, max=xmax) h1=h1dict['hist']/float(h1dict['hist'].sum()) h1w=h1wdict['whist']/float(h1wdict['whist'].sum()) h2=h2dict['hist']/float(h2dict['hist'].sum()) hdiff = h2-h1w #arr=biggles.FramedArray(2,1) tab=biggles.Table(2,1) ph1 = biggles.Histogram(h1, binsize=binsize, x0=h1dict['low'][0],color='blue') ph1.label = label1 ph1w = biggles.Histogram(h1w, binsize=binsize, x0=h1dict['low'][0], color='red', width=2) ph1w.label = label1+' weighted' ph2 = biggles.Histogram(h2, binsize=binsize, x0=h2dict['low'][0], width=2) ph2.label = label2 #plt=arr[0,0] plt=biggles.FramedPlot() plt.title=title plt.add(ph1) plt.add(ph2) plt.add(ph1w) plt.xlabel=xlabel key=biggles.PlotKey(0.1,0.90,[ph1,ph2,ph1w],halign='left') plt.add(key) tab[0,0]=plt #pltdiff=arr[1,0] pltdiff=biggles.FramedPlot() phdiff = biggles.Points(h1dict['center'], hdiff) zero=biggles.Curve([xmin,xmax],[0,0]) plab=biggles.PlotLabel(0.05,0.9,plabtext,halign='left') pltdiff.add(phdiff, zero, plab) pltdiff.xlabel = xlabel pltdiff.ylabel = '%s-%s weighted' % (label2, label1) pltdiff.title=title tab[1,0] = pltdiff #arr.xlabel=xlabel #arr.title=title if epsfile is not None: tab.write_eps(epsfile) if pngfile is not None: tab.write_img(800,800,pngfile) if show: tab.show() return tab
def bincount(self, rmin, rmax, nbin, ra1, dec1, ra2, dec2, scale=None, htmid2=None, htmrev2=None, minid=None, maxid=None, getbins=True): """ Class: HTM Method Name: bincount Purpose: Count number of pairs between two ra/dec lists as a function of their separation. The binning is equal spaced in the log10 of the separation. By default the bin sizes are in degrees, unless the scale= keyword is sent, in which case the units are angle*scale with angle in radians. This code can be used to calculate correlation functions by calling it on the data as well as random points. Calling Sequence: import esutil depth = 10 h=esutil.htm.HTM(depth) rlower, rupper, counts = h.bincount( rmin, rmax, nbin, ra1, dec1, ra2, dec2, scale=None, htmid2=None, htmrev2=None, minid=None, maxid=None, getbins=True) Inputs: rmin,rmax: Smallest and largest separations to consider. This is in degrees unless the scale= keyword is sent, in which case the units are angle*scale with angle in radians. nbin: The number of bins to use. Bins will be equally spaced in the log10 of the separation. ra1,dec1,ra2,dec2: ra,dec lists in degrees. Can be scalars or arrays but require len(ra) == len(dec) in each set. Keyword Parameters: scale: A scale to apply to the angular separations. Must be the same length as ra1/dec1 or a scalar. This is useful for converting angle to physical distance. For example, scale could be the angular diameter distance to cosmological objects in list 1. If scale is sent, rmin,rmax must be in units of angle*scale where angle is in *radians*, as opposed to degrees when scale is not sent. htmid2=None: the htm indexes for the second list. If not sent they are generated internally. You can generate these with htmid = h.lookup_id(ra, dec) htmrev2=None: The result of import esutil htmid2 = h.lookup_id(ra, dec) minid=htmid2.min() hist2,htmrev2=\\ esutil.stat.histogram(htmid2-minid,rev=True) If not sent it is calculated internally for fast lookups. You can save time on successive calls by generating these your self. getbins: If True, return a tuple rlower,rupper,counts instead of just counts. rlower,rupper are the lower and upper limits of each bin. getbins=True is the default. Outputs: if getbins=True: rlower,rupper,counts: rlower,rupper are the lower and upper limits of each bin. getbins=True is the default. if getbins=False: counts: The pair counts in equally spaced logarithmic bins in separation. Restrictions: The C++ wrapper must be compiled. This will happend automatically during installation of esutil. EXAMPLE: import esutil # simple angular counts, no scaling # cross correlate with second catalog h=esutil.htm.HTM() rmin=10/3600. # degrees rmax=1000/3600. # degrees nbin=25 rlower,rupper,counts = h.bincount(rmin,rmax,nbin, cat1['ra'],cat1['dec'], cat2['ra'],cat2['dec']) # counts using scaling of the angular separations with # the angular diameter distance to get projected # physical separations. c=esutil.cosmology.Cosmo() # get angular diameter distance to catalog 1 objects DA=c.Da(0.0, cat1['z']) # cross correlate with second catalog h=esutil.htm.HTM() rmin=0.025 # Mpc rmax=30.0 # Mpc nbin=25 rlower,rupper,counts = h.bincount(rmin,rmax,nbin, cat1['ra'],cat1['dec'], cat2['ra'],cat2['dec'], scale=DA) MODIFICATION HISTORY: Created: 2010-03-31, Erin Sheldon, BNL """ if htmid2 is None: stdout.write("Generating HTM ids\n") htmid2 = self.lookup_id(ra2, dec2) minid = htmid2.min() maxid = htmid2.max() else: if minid is None: minid = htmid2.min() if maxid is None: maxid = htmid2.max() if htmrev2 is None: stdout.write("Generating reverse indices\n") hist2, htmrev2 = stat.histogram(htmid2-minid,rev=True) counts = self.cbincount(rmin,rmax,nbin,ra1,dec1,ra2,dec2, htmrev2,minid,maxid,scale) if getbins: lower,upper = log_bins(rmin, rmax, nbin) return lower,upper,counts else: return counts
def bincount(self, rmin, rmax, nbin, ra1, dec1, ra2, dec2, scale=None, htmid2=None, htmrev2=None, minid=None, maxid=None, getbins=True): """ Class: HTM Method Name: bincount Purpose: Count number of pairs between two ra/dec lists as a function of their separation. The binning is equal spaced in the log10 of the separation. By default the bin sizes are in degrees, unless the scale= keyword is sent, in which case the units are angle*scale with angle in radians. This code can be used to calculate correlation functions by calling it on the data as well as random points. Calling Sequence: import esutil depth = 10 h=esutil.htm.HTM(depth) rlower, rupper, counts = h.bincount( rmin, rmax, nbin, ra1, dec1, ra2, dec2, scale=None, htmid2=None, htmrev2=None, minid=None, maxid=None, getbins=True) Inputs: rmin,rmax: Smallest and largest separations to consider. This is in degrees unless the scale= keyword is sent, in which case the units are angle*scale with angle in radians. nbin: The number of bins to use. Bins will be equally spaced in the log10 of the separation. ra1,dec1,ra2,dec2: ra,dec lists in degrees. Can be scalars or arrays but require len(ra) == len(dec) in each set. Keyword Parameters: scale: A scale to apply to the angular separations. Must be the same length as ra1/dec1 or a scalar. This is useful for converting angle to physical distance. For example, scale could be the angular diameter distance to cosmological objects in list 1. If scale is sent, rmin,rmax must be in units of angle*scale where angle is in *radians*, as opposed to degrees when scale is not sent. htmid2=None: the htm indexes for the second list. If not sent they are generated internally. You can generate these with htmid = h.lookup_id(ra, dec) htmrev2=None: The result of import esutil htmid2 = h.lookup_id(ra, dec) minid=htmid2.min() hist2,htmrev2=\\ esutil.stat.histogram(htmid2-minid,rev=True) If not sent it is calculated internally for fast lookups. You can save time on successive calls by generating these your self. getbins: If True, return a tuple rlower,rupper,counts instead of just counts. rlower,rupper are the lower and upper limits of each bin. getbins=True is the default. Outputs: if getbins=True: rlower,rupper,counts: rlower,rupper are the lower and upper limits of each bin. getbins=True is the default. if getbins=False: counts: The pair counts in equally spaced logarithmic bins in separation. Restrictions: The C++ wrapper must be compiled. This will happend automatically during installation of esutil. EXAMPLE: import esutil # simple angular counts, no scaling # cross correlate with second catalog h=esutil.htm.HTM() rmin=10/3600. # degrees rmax=1000/3600. # degrees nbin=25 rlower,rupper,counts = h.bincount(rmin,rmax,nbin, cat1['ra'],cat1['dec'], cat2['ra'],cat2['dec']) # counts using scaling of the angular separations with # the angular diameter distance to get projected # physical separations. c=esutil.cosmology.Cosmo() # get angular diameter distance to catalog 1 objects DA=c.Da(0.0, cat1['z']) # cross correlate with second catalog h=esutil.htm.HTM() rmin=0.025 # Mpc rmax=30.0 # Mpc nbin=25 rlower,rupper,counts = h.bincount(rmin,rmax,nbin, cat1['ra'],cat1['dec'], cat2['ra'],cat2['dec'], scale=DA) MODIFICATION HISTORY: Created: 2010-03-31, Erin Sheldon, BNL """ if htmid2 is None: stdout.write("Generating HTM ids\n") htmid2 = self.lookup_id(ra2, dec2) minid = htmid2.min() maxid = htmid2.max() else: if minid is None: minid = htmid2.min() if maxid is None: maxid = htmid2.max() if htmrev2 is None: stdout.write("Generating reverse indices\n") hist2, htmrev2 = stat.histogram(htmid2 - minid, rev=True) counts = self.cbincount(rmin, rmax, nbin, ra1, dec1, ra2, dec2, htmrev2, minid, maxid, scale) if getbins: lower, upper = log_bins(rmin, rmax, nbin) return lower, upper, counts else: return counts