def test_unweighted(): numpy.random.seed(1234) pos = numpy.random.uniform(size=(1000, 3)) pos1 = pos[:, None, :] pos2 = pos[None, :, :] dist = pos1 - pos2 dist[dist > 0.5] -= 1.0 dist[dist < -0.5] += 1.0 dist = numpy.einsum('ijk,ijk->ij', dist, dist) ** 0.5 dataset = correlate.points(pos, boxsize=1.0) # use the python point point counting binning = correlate.RBinning(numpy.linspace(0, 0.5, 10)) # use the C node node counting binning1 = correlate.FastRBinning(numpy.linspace(0, 0.5, 10)) dig = binning.edges.searchsorted(dist.flat, side='left') truth = numpy.bincount(dig) r = correlate.paircount(dataset, dataset, binning, np=0) assert_equal( r.sum1, truth[1:-1]) r1 = correlate.paircount(dataset, dataset, binning1, np=0) assert_equal(r1.sum1, truth[1:-1])
def test_cluster(): numpy.random.seed(1234) dec = numpy.arcsin(numpy.random.uniform(-1, 1, size=100000)) / numpy.pi * 180 ra = numpy.random.uniform(0, 2 * numpy.pi, size=100000) / numpy.pi * 180 # testing bootstrap for area, rand, in sphere.bootstrap(4, (ra, dec), 41252.96 / len(dec)): pass dataset = sphere.points(ra, dec) r = cluster.fof(dataset, 0.00001, np=None) assert r.N == len(dataset) binning = sphere.FastAngularBinning(numpy.linspace(0, 1.0, 10)) binning1 = sphere.AngularBinning(numpy.linspace(0, 1.0, 10)) binningR = correlate.RBinning(binning.edges) r = correlate.paircount(dataset, dataset, binning=binning) r1 = correlate.paircount(dataset, dataset, binning=binning1, compute_mean_coords=True) r2 = correlate.paircount(dataset, dataset, binning=binningR) # make sure mean_centers compute angular centers for i, val in enumerate(r1.mean_centers): assert binning.angular_edges[i] < val < binning.angular_edges[i+1] assert_equal(r1.sum1, r2.sum1) assert_equal(r1.sum1, r.sum1) assert_allclose( r.sum1, numpy.diff(2 * numpy.pi * (1 - numpy.cos(numpy.radians(binning.angular_edges)))) / ( 4 * numpy.pi) * len(ra) ** 2, rtol=10e-2)
def test_unweighted(): numpy.random.seed(1234) pos = numpy.random.uniform(size=(1000, 3)) pos1 = pos[:, None, :] pos2 = pos[None, :, :] dist = pos1 - pos2 dist[dist > 0.5] -= 1.0 dist[dist < -0.5] += 1.0 dist = numpy.einsum('ijk,ijk->ij', dist, dist)**0.5 dataset = correlate.points(pos, boxsize=1.0) # use the python point point counting binning = correlate.RBinning(numpy.linspace(0, 0.5, 10)) # use the C node node counting binning1 = correlate.FastRBinning(numpy.linspace(0, 0.5, 10)) dig = binning.edges.searchsorted(dist.flat, side='left') truth = numpy.bincount(dig) r = correlate.paircount(dataset, dataset, binning, np=0) assert_equal(r.sum1, truth[1:-1]) r1 = correlate.paircount(dataset, dataset, binning1, np=0) assert_equal(r1.sum1, truth[1:-1])
def test_cluster(): numpy.random.seed(1234) dec = numpy.arcsin(numpy.random.uniform(-1, 1, size=100000)) / numpy.pi * 180 ra = numpy.random.uniform(0, 2 * numpy.pi, size=100000) / numpy.pi * 180 # testing bootstrap for area, rand, in sphere.bootstrap(4, (ra, dec), 41252.96 / len(dec)): pass dataset = sphere.points(ra, dec) r = cluster.fof(dataset, 0.00001, np=None) assert r.N == len(dataset) binning = sphere.AngularBinning(numpy.linspace(0, 1.0, 10)) binningR = correlate.RBinning(binning.edges) r = correlate.paircount(dataset, dataset, binning=binning, usefast=True) r1 = correlate.paircount(dataset, dataset, binning=binning, usefast=False) r2 = correlate.paircount(dataset, dataset, binning=binningR, usefast=True) assert_equal(r1.sum1, r2.sum1) assert_equal(r1.sum1, r.sum1) assert_allclose( r.sum1, numpy.diff(2 * numpy.pi * (1 - numpy.cos(numpy.radians(binning.angular_edges)))) / ( 4 * numpy.pi) * len(ra) ** 2, rtol=10e-2)
def corr(): datafile = h5py.File(ns.catalogue, 'r') randfile = h5py.File(ns.random, 'r') datamask = datafile['COMPLETENESS'][:] >= 1 for vetoname in ns.use_tycho_veto: datamask &= ~datafile['TYCHO_VETO'][vetoname][:] dataRA = datafile['RA'][:][datamask] dataDEC = datafile['DEC'][:][datamask] randmask = randfile['COMPLETENESS'][:] >= 1 for vetoname in ns.use_tycho_veto: randmask &= ~randfile['TYCHO_VETO'][vetoname][:] randRA = randfile['RA'][:][randmask] randDEC = randfile['DEC'][:][randmask] data = sphere.points(dataRA, dataDEC) rand = sphere.points(randRA, randDEC) abin = sphere.AngularBinning(np.logspace(-3, 0, 16, endpoint=True)) DD = correlate.paircount(data, data, abin, np=ns.np) DR = correlate.paircount(data, rand, abin, np=ns.np) RR = correlate.paircount(rand, rand, abin, np=ns.np) r = 1. * len(data) / len(rand) dd = 1.0 * DD.sum1 dr = 1.0 * DR.sum1 * r rr = 1.0 * RR.sum1 * (r * r) return abin.angular_centers, (dd - 2 * dr + rr) / rr
def test_simple(): numpy.random.seed(1234) pos = numpy.random.uniform(size=(10, 3)) dataset = correlate.points(pos, boxsize=1.0) binning = correlate.RBinning(numpy.linspace(0.5, 10)) r = correlate.paircount(dataset, dataset, binning, np=0) r1 = correlate.paircount(dataset, dataset, binning, usefast=True, np=0) assert_equal( r.sum1, r1.sum1)
def test_weighted(): numpy.random.seed(1234) pos = numpy.random.uniform(size=(1000, 3)) datasetw = correlate.points(pos, boxsize=1.0, weights=numpy.ones(len(pos))) dataset = correlate.points(pos, boxsize=1.0) binning = correlate.RBinning(numpy.linspace(0, 0.5, 10)) r = correlate.paircount(datasetw, datasetw, binning, np=0) r1 = correlate.paircount(dataset, dataset, binning, np=0) assert_equal(r.sum1, r1.sum1)
def test_weighted(): numpy.random.seed(1234) pos = numpy.random.uniform(size=(1000, 3)) datasetw = correlate.points(pos, boxsize=1.0, weights=numpy.ones(len(pos))) dataset = correlate.points(pos, boxsize=1.0) binning = correlate.RBinning(numpy.linspace(0, 0.5, 10)) r = correlate.paircount(datasetw, datasetw, binning, np=0) r1 = correlate.paircount(dataset, dataset, binning, np=0) assert_equal( r.sum1, r1.sum1)
def main(A): data = correlate.points(getqso(A)) random = correlate.points(getrandom(A)) binning = correlate.RmuBinning(160000, Nbins=40, Nmubins=48, observer=0) DD = correlate.paircount(data, data, binning) DR = correlate.paircount(data, random, binning) RR = correlate.paircount(random, random, binning) r = 1.0 * len(data) / len(random) xi = (DD.sum1 + r ** 2 * RR.sum1 - 2 * r * DR.sum1) / (r ** 2 * RR.sum1) func = CorrFunc(DD.centers[0], DD.centers[1], xi) numpy.savez(os.path.join(A.datadir, "qsocorr-Rmu.npz"), center=DD.centers, xi=xi, corr=func)
def ac_yu(self): from kdcount import correlate from kdcount import sphere abin = sphere.AngularBinning(np.logspace(-4, -2.6, 10)) D = sphere.points(self.gal_ra, self.gal_dec) R = sphere.points(self.ran_ra, self.ran_dec) #weights=wt_array DD = correlate.paircount(D, D, abin, np=self.ncores) DR = correlate.paircount(D, R, abin, np=self.ncores) RR = correlate.paircount(R, R, abin, np=self.ncores) r = D.norm / R.norm w = (DD.sum1 - 2 * r * DR.sum1 + r**2 * RR.sum1) / (r**2 * RR.sum1) return abin.angular_centers, w
def test_cross(): numpy.random.seed(1234) pos1 = numpy.random.uniform(size=(10000, 2)) pos2 = numpy.random.uniform(size=(10000, 2)) * 0.3 dataset1 = correlate.points(pos1, boxsize=None) dataset2 = correlate.points(pos2, boxsize=None) binning = correlate.RBinning(numpy.linspace(0, 0.1, 10)) r1 = correlate.paircount(dataset1, dataset2, binning, np=0, usefast=False) r2 = correlate.paircount(dataset1, dataset2, binning, np=0, usefast=True) assert_equal(r1.sum1, r2.sum1) r3 = correlate.paircount(dataset1, dataset2, binning, np=4, usefast=False) assert_equal(r1.sum1, r3.sum1) r4 = correlate.paircount(dataset1, dataset2, binning, np=4, usefast=True) assert_equal(r1.sum1, r4.sum1)
def test_simple(): numpy.random.seed(1234) pos = numpy.random.uniform(size=(10, 3)) dataset = correlate.points(pos, boxsize=1.0) # use the python point point counting binning = correlate.RBinning(numpy.linspace(0, 0.5, 10)) # use the C node node counting binning1 = correlate.FastRBinning(numpy.linspace(0, 0.5, 10)) r = correlate.paircount(dataset, dataset, binning, np=0) r1 = correlate.paircount(dataset, dataset, binning1, np=0) assert_equal(r.sum1, r1.sum1)
def test_simple(): numpy.random.seed(1234) pos = numpy.random.uniform(size=(10, 3)) dataset = correlate.points(pos, boxsize=1.0) # use the python point point counting binning = correlate.RBinning(numpy.linspace(0, 0.5, 10)) # use the C node node counting binning1 = correlate.FastRBinning(numpy.linspace(0, 0.5, 10)) r = correlate.paircount(dataset, dataset, binning, np=0) r1 = correlate.paircount(dataset, dataset, binning1, np=0) assert_equal( r.sum1, r1.sum1)
def reference_survey_paircount(pos1, w1, redges, Nmu, pos2=None, w2=None, los=2): """Reference pair counting via kdcount""" tree1 = correlate.points(pos1, boxsize=None, weights=w1) if pos2 is None: tree2 = tree1 else: tree2 = correlate.points(pos2, boxsize=None, weights=w2) bins = correlate.RmuBinning(redges, Nmu, observer=(0, 0, 0), mu_min=0., absmu=True) pc = correlate.paircount(tree1, tree2, bins, np=0, compute_mean_coords=True) return numpy.nan_to_num(pc.pair_counts), numpy.nan_to_num( pc.mean_centers[0]), pc.sum1
def correlate_info(data1,data2, NBINS = NBINS, RMIN=1, RMAX=2, BOXSIZE = BOXSIZE, WRAP = WRAP): if data1 is not None: if RMAX is None: RMAX = BOXSIZE if WRAP: wrap_length = BOXSIZE else: wrap_length = None dataset1 = correlate.points(data1, boxsize = wrap_length) dataset2 = correlate.points(data2, boxsize = wrap_length) binning = correlate.RBinning(np.logspace(np.log10(RMIN),np.log10(RMAX),NBINS+1)) DD = correlate.paircount(dataset1,dataset2, binning, np=0) DD = DD.sum1 N=len(dataset1)-1 # if (sum(DD)!=N): # print data1,data2 return DD,N else: return None, None,None
def reference_sim_paircount(pos1, w1, redges, Nmu, boxsize, pos2=None, w2=None, los=2): """Reference pair counting via kdcount""" tree1 = correlate.points(pos1, boxsize=boxsize, weights=w1) if pos2 is None: tree2 = tree1 else: tree2 = correlate.points(pos2, boxsize=boxsize, weights=w2) bins = correlate.FlatSkyBinning( redges, Nmu, los=los, mu_min=0., absmu=True, ) pc = correlate.paircount(tree1, tree2, bins, np=0, usefast=False, compute_mean_coords=True) return numpy.nan_to_num(pc.pair_counts), numpy.nan_to_num( pc.mean_centers[0]), pc.sum1
def main(A): delta, pos, id = getforest(A, Zmin=2.0, Zmax=2.2, RfLamMin=1040, RfLamMax=1185, combine=4) print len(pos) print pos, delta data = correlate.field(pos, value=delta) DD = correlate.paircount(data, data, correlate.RBinning(160000, 40)) r = DD.centers xi = DD.sum1 / DD.sum2 print r.shape, xi.shape numpy.savez(os.path.join(A.datadir, 'delta-corr1d-both.npz'), r=r, xi=xi) figure = Figure(figsize=(4, 5), dpi=200) ax = figure.add_subplot(311) ax.plot(r / 1000, (r / 1000) ** 2 * xi[0], 'o ', label='$dF$ RSD') ax.set_ylim(-0.4, 1.0) ax.legend() ax = figure.add_subplot(312) ax.plot(r / 1000, (r / 1000) ** 2 * xi[1], 'o ', label='$dF$ Real') ax.set_ylim(-0.4, 1.0) ax.legend() ax = figure.add_subplot(313) ax.plot(r / 1000, (r / 1000) ** 2 * xi[2], 'o ', label=r'$dF$ Broadband') ax.set_ylim(-20, 60) ax.legend() canvas = FigureCanvasAgg(figure) figure.savefig(os.path.join(A.datadir, 'delta-corr-both.svg'))
def test_channels(): numpy.random.seed(1234) pos = numpy.random.uniform(size=(1000, 3)) datasetw = correlate.points(pos, boxsize=1.0, weights=numpy.ones(len(pos))) dataset = correlate.points(pos, boxsize=1.0) binning_mc1 = correlate.FlatSkyMultipoleBinning(numpy.linspace(0, 0.5, 10), ells=[0, 0, 0], los=0) binning_mc2 = correlate.MultipoleBinning(numpy.linspace(0, 0.5, 10), ells=[0, 0, 0]) binning = correlate.RBinning(numpy.linspace(0, 0.5, 10)) r_mc1 = correlate.paircount(datasetw, datasetw, binning_mc1, np=0) r_mc2 = correlate.paircount(datasetw, datasetw, binning_mc2, np=0) r1 = correlate.paircount(dataset, dataset, binning, np=0) assert_equal( r_mc1.sum1[0], r1.sum1) assert_equal( r_mc2.sum1[0], r1.sum1)
def correlate_info(data, NBINS=NBINS, RMIN=RMIN, RMAX=RMAX, BOXSIZE=BOXSIZE, WRAP=WRAP): if data is not None: if RMAX is None: RMAX = BOXSIZE if WRAP: wrap_length = BOXSIZE else: wrap_length = None dataset = correlate.points(data, boxsize=wrap_length) binning = correlate.RBinning( np.logspace(np.log10(RMIN), np.log10(RMAX), NBINS + 1)) # RR=N**2*numpy.asarray([poiss(rbin[i],rbin[i+1]) for i in range(0,nbins)]) DD = correlate.paircount(dataset, dataset, binning, np=16) DD = DD.sum1 # print 'Done correlating' r = binning.centers return r, DD else: return None, None
def test_field(): numpy.random.seed(1234) pos = numpy.random.uniform(size=(1000, 3)) dataset = correlate.field(pos, value=numpy.ones(len(pos)), boxsize=1.0, weights=numpy.ones(len(pos))) binning = correlate.RBinning(numpy.linspace(0, 0.5, 10)) r = correlate.paircount(dataset, dataset, binning, np=0) assert_allclose(r.sum1, r.sum2)
def test_field(): numpy.random.seed(1234) dec = numpy.arcsin(numpy.random.uniform(-1, 1, size=100000)) / numpy.pi * 180 ra = numpy.random.uniform(0, 2 * numpy.pi, size=100000) / numpy.pi * 180 dataset = sphere.field(ra, dec, value=numpy.ones_like(dec) * 0.5) binning = sphere.AngularBinning(numpy.linspace(0, 1.0, 10)) r = correlate.paircount(dataset, dataset, binning=binning)
def main(A): delta, pos, id = getforest(A, Zmin=2.0, Zmax=2.2, RfLamMin=1040, RfLamMax=1185, combine=4) print len(pos) print pos.min(), pos.max() data = correlate.field(pos, value=delta) DD = correlate.paircount(data, data, correlate.RmuBinning(80000, Nbins=20, Nmubins=48, observer=0)) numpy.savez(os.path.join(A.datadir, 'pixcorr-Rmu.npz'), center=DD.centers, sum1=DD.sum1, sum2=DD.sum2)
def test_channels(): numpy.random.seed(1234) pos = numpy.random.uniform(size=(1000, 3)) datasetw = correlate.points(pos, boxsize=1.0, weights=numpy.ones(len(pos))) dataset = correlate.points(pos, boxsize=1.0) binning_mc1 = correlate.FlatSkyMultipoleBinning(numpy.linspace(0, 0.5, 10), ells=[0, 0, 0], los=0) binning_mc2 = correlate.MultipoleBinning(numpy.linspace(0, 0.5, 10), ells=[0, 0, 0]) binning = correlate.RBinning(numpy.linspace(0, 0.5, 10)) r_mc1 = correlate.paircount(datasetw, datasetw, binning_mc1, np=0) r_mc2 = correlate.paircount(datasetw, datasetw, binning_mc2, np=0) r1 = correlate.paircount(dataset, dataset, binning, np=0) assert_equal(r_mc1.sum1[0], r1.sum1) assert_equal(r_mc2.sum1[0], r1.sum1)
def test_cross(): numpy.random.seed(1234) pos1 = numpy.random.uniform(size=(10000, 2)) pos2 = numpy.random.uniform(size=(10000, 2)) * 0.3 dataset1 = correlate.points(pos1, boxsize=None) dataset2 = correlate.points(pos2, boxsize=None) # use the python point point counting binning = correlate.RBinning(numpy.linspace(0, 0.5, 10)) # use the C node node counting binning1 = correlate.FastRBinning(numpy.linspace(0, 0.5, 10)) r1 = correlate.paircount(dataset1, dataset2, binning, np=0) r2 = correlate.paircount(dataset1, dataset2, binning1, np=0) assert_equal(r1.sum1, r2.sum1) r3 = correlate.paircount(dataset1, dataset2, binning, np=4) assert_equal(r1.sum1, r3.sum1) r4 = correlate.paircount(dataset1, dataset2, binning1, np=4) assert_equal(r1.sum1, r4.sum1)
def reference_2pcf_s(sedges,position1,weight1,position2=None,weight2=None): """Reference pair counting via kdcount""" tree1 = correlate.points(position1,boxsize=None,weights=weight1) factor = 1. if position2 is None: tree2 = tree1 factor = 1./2. else: tree2 = correlate.points(position2,boxsize=None,weights=weight2) bins = correlate.RBinning(np.asarray(sedges)) pc = correlate.paircount(tree1,tree2,bins,np=0,usefast=False,compute_mean_coords=True) return factor*pc.sum1
def reference_2pcf_angular(thetaedges,position1,weight1,position2=None,weight2=None): """Reference pair counting via kdcount""" tree1 = sphere.points(position1[:,0],position1[:,1],weights=weight1) factor = 1. if position2 is None: tree2 = tree1 factor = 1./2. else: tree2 = sphere.points(position2[:,0],position2[:,1],weights=weight2) bins = sphere.AngularBinning(np.asarray(thetaedges)) pc = correlate.paircount(tree1,tree2,bins) return factor*pc.sum1
def reference_2pcf_smu(sedges,muedges,position1,weight1,position2=None,weight2=None,los='midpoint'): """Reference pair counting via kdcount""" tree1 = correlate.points(position1,boxsize=None,weights=weight1) if position2 is None: tree2 = tree1 else: tree2 = correlate.points(position2,boxsize=None,weights=weight2) if los=='midpoint': bins = correlate.RmuBinning(np.asarray(sedges),(len(muedges)-1),observer=(0,0,0),mu_min=muedges[0],mu_max=muedges[-1],absmu=False) else: bins = correlate.FlatSkyBinning(np.asarray(sedges),(len(muedges)-1),los='xyz'.index(los),mu_min=muedges[0],mu_max=muedges[-1],absmu=False) pc = correlate.paircount(tree2,tree1,bins,np=0,usefast=False,compute_mean_coords=True) return pc.sum1
def main(A): data = correlate.points(getqso(A)) random = correlate.points(getrandom(A)) binning = correlate.RBinning(160000, 20) DD = correlate.paircount(data, data, binning) DR = correlate.paircount(data, random, binning) RR = correlate.paircount(random, random, binning) r = 1.0 * len(data) / len(random) corr = (DD.sum1 + r ** 2 * RR.sum1 - 2 * r * DR.sum1) / (r ** 2 * RR.sum1) numpy.savetxt(stdout, zip(DD.centers, corr), fmt='%g') r = DD.centers from matplotlib.figure import Figure from matplotlib.backends.backend_agg import FigureCanvasAgg figure = Figure(figsize=(4, 3), dpi=200) ax = figure.add_axes([.1, .1, .85, .85]) ax.plot(r / 1000, (r / 1000) ** 2 * corr, 'o ', label='LS') ax.legend() canvas = FigureCanvasAgg(figure) figure.savefig(os.path.join(A.datadir, 'quasar-corr.svg'))
def reference_2pcf_multi(sedges,position1,weight1,position2=None,weight2=None,ells=[0,1,2,3,4],los='midpoint'): """Reference pair counting via kdcount""" tree1 = correlate.points(position1,boxsize=None,weights=weight1) if position2 is None: tree2 = tree1 else: tree2 = correlate.points(position2,boxsize=None,weights=weight2) if los=='midpoint': bins = correlate.MultipoleBinning(np.asarray(sedges),ells) else: bins = correlate.FlatSkyMultipoleBinning(np.asarray(sedges),ells,los='xyz'.index(los)) pc = correlate.paircount(tree2,tree1,bins,np=0,usefast=False,compute_mean_coords=True) norm = (-1)**np.asarray(ells)*1./(2*np.asarray(ells)+1) return pc.sum1.T*norm
def reference_survey_paircount(pos1, w1, redges, Nmu, pos2=None, w2=None, los=2): """Reference pair counting via kdcount""" tree1 = correlate.points(pos1, boxsize=None, weights=w1) if pos2 is None: tree2 = tree1 else: tree2 = correlate.points(pos2, boxsize=None, weights=w2) bins = correlate.RmuBinning(redges, Nmu, observer=(0,0,0), mu_min=0., absmu=True) pc = correlate.paircount(tree1, tree2, bins, np=0, compute_mean_coords=True) return numpy.nan_to_num(pc.pair_counts), numpy.nan_to_num(pc.mean_centers[0]), pc.sum1
def work(i): with pool.critical: print 'doing chunk', i, Nchunks Qchunk = correlate.points(qpos[qchunks[i]], extra=Qfull.extra[qchunks[i]]) Rchunk = correlate.points(rpos[rchunks[i]], extra=Rfull.extra[rchunks[i]]) Fchunk = correlate.field(fpos[fchunks[i]], value=fdelta[fchunks[i]], extra=objectid[fchunks[i]] ) #Q-Q DQDQ[i, ...] = correlate.paircount(Qchunk, Qfull, binning, np=0).fullsum1 RQDQ[i, ...] = correlate.paircount(Rchunk, Qfull, binning, np=0).fullsum1 RQRQ[i, ...] = correlate.paircount(Rchunk, Rfull, binning, np=0).fullsum1 #Q-F DQDF = correlate.paircount(Qchunk, Ffull, binning, np=0) DQDFsum1[:, i, ...] = DQDF.fullsum1 DQDFsum2[i, ...] = DQDF.fullsum2 RQDF = correlate.paircount(Rchunk, Ffull, binning, np=0) RQDFsum1[:, i, ...] = RQDF.fullsum1 RQDFsum2[i, ...] = RQDF.fullsum2 #F-F DFDF = correlate.paircount(Fchunk, Ffull, binning, np=0) DFDFsum1[:, i, ...] = DFDF.fullsum1 DFDFsum2[i, ...] = DFDF.fullsum2 with pool.critical: print 'done chunk', i, Nchunks, len(fchunks[i])
def reference_paircount(pos1, w1, redges, boxsize, pos2=None, w2=None, los=2): """Reference pair counting via kdcount""" # make the trees tree1 = correlate.points(pos1, boxsize=boxsize, weights=w1) if pos2 is None: tree2 = tree1 else: tree2 = correlate.points(pos2, boxsize=boxsize, weights=w2) # do the paircount bins = correlate.RBinning(redges) pc = correlate.paircount(tree1, tree2, bins, np=0, compute_mean_coords=True) return numpy.nan_to_num(pc.pair_counts), numpy.nan_to_num(pc.mean_centers), pc.sum1
def test_cluster(): numpy.random.seed(1234) dec = numpy.arcsin(numpy.random.uniform(-1, 1, size=100000)) / numpy.pi * 180 ra = numpy.random.uniform(0, 2 * numpy.pi, size=100000) / numpy.pi * 180 # testing bootstrap for area, rand, in sphere.bootstrap(4, (ra, dec), 41252.96 / len(dec)): pass dataset = sphere.points(ra, dec) r = cluster.fof(dataset, 0.00001, np=None) assert r.N == len(dataset) binning = sphere.FastAngularBinning(numpy.linspace(0, 1.0, 10)) binning1 = sphere.AngularBinning(numpy.linspace(0, 1.0, 10)) binningR = correlate.RBinning(binning.edges) r = correlate.paircount(dataset, dataset, binning=binning) r1 = correlate.paircount(dataset, dataset, binning=binning1, compute_mean_coords=True) r2 = correlate.paircount(dataset, dataset, binning=binningR) # make sure mean_centers compute angular centers for i, val in enumerate(r1.mean_centers): assert binning.angular_edges[i] < val < binning.angular_edges[i + 1] assert_equal(r1.sum1, r2.sum1) assert_equal(r1.sum1, r.sum1) assert_allclose( r.sum1, numpy.diff(2 * numpy.pi * (1 - numpy.cos(numpy.radians(binning.angular_edges)))) / (4 * numpy.pi) * len(ra)**2, rtol=10e-2)
def corr(): data1file = h5py.File(ns.catalogue1, 'r') data2file = h5py.File(ns.catalogue2, 'r') rand1file = h5py.File(ns.random1, 'r') data1mask = data1file['COMPLETENESS'][:] >= 1 for vetoname in ns.use_tycho_veto: data1mask &= ~data1file['TYCHO_VETO'][vetoname][:] data1RA = data1file['RA'][:][data1mask] data1DEC = data1file['DEC'][:][data1mask] rand1mask = rand1file['COMPLETENESS'][:] >= 1 for vetoname in ns.use_tycho_veto: rand1mask &= ~rand1file['TYCHO_VETO'][vetoname][:] rand1RA = rand1file['RA'][:][rand1mask] rand1DEC = rand1file['DEC'][:][rand1mask] # data2mask = data2file['COMPLETENESS'][:] >= 1 # for vetoname in ns.use_tycho_veto: # data2mask &= ~data2file['TYCHO_VETO'][vetoname][:] data2mask = Ellipsis data2RA = data2file['RA'][:][data2mask] data2DEC = data2file['DEC'][:][data2mask] data1 = sphere.points(data1RA, data1DEC) data2 = sphere.points(data2RA, data2DEC) rand1 = sphere.points(rand1RA, rand1DEC) abin = sphere.AngularBinning(np.logspace(-3, 0, 16, endpoint=True)) DD = correlate.paircount(data1, data2, abin, np=ns.np) DR = correlate.paircount(rand1, data2, abin, np=ns.np) r = 1. * len(data1) / len(rand1) dd = 1.0 * DD.sum1 dr = 1.0 * DR.sum1 * r return abin.angular_centers, (dd - dr) / dr
def reference_paircount(pos1, w1, edges, pos2=None, w2=None): """Reference pair counting via kdcount""" # set up the trees tree1 = sphere.points(*pos1, boxsize=None, weights=w1) if pos2 is None: tree2 = tree1 else: tree2 = sphere.points(*pos2, boxsize=None, weights=w2) # run the pair count bins = sphere.AngularBinning(edges) pc = correlate.paircount(tree1, tree2, bins, np=0, compute_mean_coords=True) return numpy.nan_to_num(pc.pair_counts), numpy.nan_to_num(pc.mean_centers), pc.sum1
def reference_paircount(pos1, w1, redges, boxsize, pos2=None, w2=None, los=2): """Reference pair counting via kdcount""" # make the trees tree1 = correlate.points(pos1, boxsize=boxsize, weights=w1) if pos2 is None: tree2 = tree1 else: tree2 = correlate.points(pos2, boxsize=boxsize, weights=w2) # do the paircount bins = correlate.RBinning(redges) pc = correlate.paircount(tree1, tree2, bins, np=0, compute_mean_coords=True) return numpy.nan_to_num(pc.pair_counts), numpy.nan_to_num( pc.mean_centers), pc.sum1
def compute_brutal_corr(datasources, redges, Nmu=0, comm=None, subsample=1, los='z', poles=[]): r""" Compute the correlation function by direct pair summation, either as a function of separation (`R`) or as a function of separation and line-of-sight angle (`R`, `mu`) The estimator used to compute the correlation function is: .. math:: \xi(r, \mu) = DD(r, \mu) / RR(r, \mu) - 1. where `DD` is the number of data-data pairs, and `RR` is the number of random-random pairs, which is determined solely by the binning used, assuming a constant number density Parameters ---------- datasources : list of DataSource objects the list of data instances from which the 3D correlation will be computed redges : array_like the bin edges for the `R` variable Nmu : int, optional the number of desired `mu` bins, where `mu` is the cosine of the angle from the line-of-sight. Default is `0`, in which case the correlation function is binned as a function of `R` only comm : MPI.Communicator, optional the communicator to pass to the ``ParticleMesh`` object. If not provided, ``MPI.COMM_WORLD`` is used subsample : int, optional downsample the input datasources by choosing 1 out of every `N` points. Default is `1` (no subsampling). los : str, {'x', 'y', 'z'}, optional the dimension to treat as the line-of-sight; default is 'z'. poles : list of int, optional integers specifying the multipoles to compute from the 2D correlation function Returns ------- pc : :class:`kdcount.correlate.paircount` the pair counting instance xi : array_like the correlation function result; if `poles` supplied, the shape is `(len(redges)-1, len(poles))`, otherwise, the shape is either `(len(redges)-1, )` or `(len(redges)-1, Nmu)` RR : array_like the number of random-random pairs (used as normalization of the data-data pairs) """ from pmesh.domain import GridND from kdcount import correlate # some setup if los not in "xyz": raise ValueError("`los` must be `x`, `y`, or `z`") los = "xyz".index(los) poles = numpy.array(poles) Rmax = redges[-1] if comm is None: comm = MPI.COMM_WORLD # determine processor division for domain decomposition for Nx in range(int(comm.size**0.3333) + 1, 0, -1): if comm.size % Nx == 0: break else: Nx = 1 for Ny in range(int(comm.size**0.5) + 1, 0, -1): if (comm.size // Nx) % Ny == 0: break else: Ny = 1 Nz = comm.size // Nx // Ny Nproc = [Nx, Ny, Nz] # log some info if comm.rank == 0: logger.info('Nproc = %s' % str(Nproc)) logger.info('Rmax = %g' % Rmax) # domain decomposition grid = [ numpy.linspace(0, datasources[0].BoxSize[i], Nproc[i] + 1, endpoint=True) for i in range(3) ] domain = GridND(grid, comm=comm) # read position for field #1 with datasources[0].open() as stream: [[pos1]] = stream.read(['Position'], full=True) pos1 = pos1[comm.rank * subsample // comm.size::subsample] N1 = comm.allreduce(len(pos1)) # read position for field #2 if len(datasources) > 1: with datasources[1].open() as stream: [[pos2]] = stream.read(['Position'], full=True) pos2 = pos2[comm.rank * subsample // comm.size::subsample] N2 = comm.allreduce(len(pos2)) else: pos2 = pos1 N2 = N1 # exchange field #1 positions layout = domain.decompose(pos1, smoothing=0) pos1 = layout.exchange(pos1) if comm.rank == 0: logger.info('exchange pos1') # exchange field #2 positions if Rmax > datasources[0].BoxSize[0] * 0.25: pos2 = numpy.concatenate(comm.allgather(pos2), axis=0) else: layout = domain.decompose(pos2, smoothing=Rmax) pos2 = layout.exchange(pos2) if comm.rank == 0: logger.info('exchange pos2') # initialize the trees to hold the field points tree1 = correlate.points(pos1, boxsize=datasources[0].BoxSize) tree2 = correlate.points(pos2, boxsize=datasources[0].BoxSize) # log the sizes of the trees logger.info('rank %d correlating %d x %d' % (comm.rank, len(tree1), len(tree2))) if comm.rank == 0: logger.info('all correlating %d x %d' % (N1, N2)) # use multipole binning if len(poles): bins = correlate.FlatSkyMultipoleBinning(redges, poles, los, compute_mean_coords=True) # use (R, mu) binning elif Nmu > 0: bins = correlate.FlatSkyBinning(redges, Nmu, los, compute_mean_coords=True) # use R binning else: bins = correlate.RBinning(redges, compute_mean_coords=True) # do the pair counting # have to set usefast = False to get mean centers, or exception thrown pc = correlate.paircount(tree2, tree1, bins, np=0, usefast=False) pc.sum1[:] = comm.allreduce(pc.sum1) # get the mean bin values, reducing from all ranks pc.pair_counts[:] = comm.allreduce(pc.pair_counts) with numpy.errstate(invalid='ignore'): if bins.Ndim > 1: for i in range(bins.Ndim): pc.mean_centers[i][:] = comm.allreduce( pc.mean_centers_sum[i]) / pc.pair_counts else: pc.mean_centers[:] = comm.allreduce( pc.mean_centers_sum[0]) / pc.pair_counts # compute the random pairs from the fractional volume RR = 1. * N1 * N2 / datasources[0].BoxSize.prod() if Nmu > 0: dr3 = numpy.diff(pc.edges[0]**3) dmu = numpy.diff(pc.edges[1]) RR *= 2. / 3. * numpy.pi * dr3[:, None] * dmu[None, :] else: RR *= 4. / 3. * numpy.pi * numpy.diff(pc.edges**3) # return the correlation and the pair count object xi = (1. * pc.sum1 / RR) - 1.0 if len(poles): xi = xi.T # makes ell the second axis xi[:, poles != 0] += 1.0 # only monopole gets the minus one return pc, xi, RR
def main(): comm = MPI.COMM_WORLD SNAP, LABEL = None, None if comm.rank == 0: SNAP = files.Snapshot(ns.snapfilename, files.TPMSnapshotFile) LABEL = files.Snapshot(ns.halolabel, files.HaloLabelFile) SNAP = comm.bcast(SNAP) LABEL = comm.bcast(LABEL) Ntot = sum(SNAP.npart) assert Ntot == sum(LABEL.npart) h = files.HaloFile(ns.halocatalogue) N = h.read_mass() N0 = Ntot - sum(N[1:]) # halos are assigned to ranks 0, 1, 2, 3 ... halorank = numpy.arange(len(N)) % comm.size # but non halos are special we will fix it later. halorank[0] = -1 NonhaloStart = comm.rank * int(N0) // comm.size NonhaloEnd = (comm.rank + 1) * int(N0) // comm.size myNtotal = numpy.sum(N[halorank == comm.rank], dtype='i8') + (NonhaloEnd - NonhaloStart) print("Rank %d NonhaloStart %d NonhaloEnd %d myNtotal %d" % (comm.rank, NonhaloStart, NonhaloEnd, myNtotal)) data = numpy.empty(myNtotal, dtype=[ ('Position', ('f4', 3)), ('Label', ('i4')), ('Rank', ('i4')), ]) allNtotal = comm.allgather(myNtotal) start = sum(allNtotal[:comm.rank]) end = sum(allNtotal[:comm.rank + 1]) data['Position'] = SNAP.read("Position", start, end) data['Label'] = LABEL.read("Label", start, end) data['Rank'] = halorank[data['Label']] # now assign ranks to nonhalo particles nonhalomask = (data['Label'] == 0) nonhalocount = comm.allgather(nonhalomask.sum()) data['Rank'][nonhalomask] = (sum(nonhalocount[:comm.rank]) + numpy.arange(nonhalomask.sum())) % comm.size mpsort.sort(data, orderby='Rank') arg = data['Label'].argsort() data = data[arg] ul = numpy.unique(data['Label']) bins = correlate.RBinning(40. / ns.boxsize, Nbins=ns.Nmesh) sum1 = numpy.zeros(len(bins.centers)) for l in ul: if l == 0: continue start = data['Label'].searchsorted(l, side='left') end = data['Label'].searchsorted(l, side='right') pos = data['Position'][start:end] dataset = correlate.points(pos, boxsize=1.0) result = correlate.paircount(dataset, dataset, bins, np=0) sum1 += result.sum1 if l % 1000 == 0: print l sum1 = comm.allreduce(sum1, MPI.SUM) Ntot = sum(SNAP.npart) RR = 4. / 3 * numpy.pi * numpy.diff(bins.edges**3) * (1.0 * Ntot * Ntot) k = numpy.arange(ns.Nmesh // 2) * 2 * numpy.pi / ns.boxsize # asymtotically zero at r. The mean doesn't matter as # we don't use zero k mode anyways. k, p = corrfrompower(bins.centers * ns.boxsize, sum1 / RR, R=k) # inverse FT factor p *= (2 * numpy.pi)**3 if comm.rank == 0: if ns.output != '-': ff = open(ns.output, 'w') ff2 = open(ns.output + '.xi', 'w') with ff2: numpy.savetxt(ff2, zip(bins.centers, sum1 / RR - 1.0)) else: ff = stdout with ff: # numpy.savetxt(ff, zip(bins.centers, sum1 / RR - 1.0)) numpy.savetxt(ff, zip(k, p))
def estimator( x, y): r = correlate.paircount(x, y, binning, np=0) return r.fullsum1
def estimator( x, y): r = correlate.paircount(x, y, binning, usefast=False, np=0) return r.fullsum1
def main(): comm = MPI.COMM_WORLD SNAP, LABEL = None, None if comm.rank == 0: SNAP = files.Snapshot(ns.snapfilename, files.TPMSnapshotFile) LABEL = files.Snapshot(ns.halolabel, files.HaloLabelFile) SNAP = comm.bcast(SNAP) LABEL = comm.bcast(LABEL) Ntot = sum(SNAP.npart) assert Ntot == sum(LABEL.npart) h = files.HaloFile(ns.halocatalogue) N = h.read_mass() N0 = Ntot - sum(N[1:]) # halos are assigned to ranks 0, 1, 2, 3 ... halorank = numpy.arange(len(N)) % comm.size # but non halos are special we will fix it later. halorank[0] = -1 NonhaloStart = comm.rank * int(N0) // comm.size NonhaloEnd = (comm.rank + 1)* int(N0) // comm.size myNtotal = numpy.sum(N[halorank == comm.rank], dtype='i8') + (NonhaloEnd - NonhaloStart) print("Rank %d NonhaloStart %d NonhaloEnd %d myNtotal %d" % (comm.rank, NonhaloStart, NonhaloEnd, myNtotal)) data = numpy.empty(myNtotal, dtype=[ ('Position', ('f4', 3)), ('Label', ('i4')), ('Rank', ('i4')), ]) allNtotal = comm.allgather(myNtotal) start = sum(allNtotal[:comm.rank]) end = sum(allNtotal[:comm.rank+1]) data['Position'] = SNAP.read("Position", start, end) data['Label'] = LABEL.read("Label", start, end) data['Rank'] = halorank[data['Label']] # now assign ranks to nonhalo particles nonhalomask = (data['Label'] == 0) nonhalocount = comm.allgather(nonhalomask.sum()) data['Rank'][nonhalomask] = (sum(nonhalocount[:comm.rank]) + numpy.arange(nonhalomask.sum())) % comm.size mpsort.sort(data, orderby='Rank') arg = data['Label'].argsort() data = data[arg] ul = numpy.unique(data['Label']) bins = correlate.RBinning(40./ ns.boxsize, Nbins=ns.Nmesh) sum1 = numpy.zeros(len(bins.centers)) for l in ul: if l == 0: continue start = data['Label'].searchsorted(l, side='left') end = data['Label'].searchsorted(l, side='right') pos = data['Position'][start:end] dataset = correlate.points(pos, boxsize=1.0) result = correlate.paircount(dataset, dataset, bins, np=0) sum1 += result.sum1 if l % 1000 == 0: print l sum1 = comm.allreduce(sum1, MPI.SUM) Ntot = sum(SNAP.npart) RR = 4. / 3 * numpy.pi * numpy.diff(bins.edges**3) * (1.0 * Ntot *Ntot) k = numpy.arange(ns.Nmesh // 2) * 2 * numpy.pi / ns.boxsize # asymtotically zero at r. The mean doesn't matter as # we don't use zero k mode anyways. k, p = corrfrompower(bins.centers * ns.boxsize, sum1 / RR, R=k) # inverse FT factor p *= (2 * numpy.pi) ** 3 if comm.rank == 0: if ns.output != '-': ff = open(ns.output, 'w') ff2 = open(ns.output +'.xi' , 'w') with ff2: numpy.savetxt(ff2, zip(bins.centers, sum1 / RR - 1.0)) else: ff = stdout with ff: # numpy.savetxt(ff, zip(bins.centers, sum1 / RR - 1.0)) numpy.savetxt(ff, zip(k, p))
def compute_brutal_corr(datasources, redges, Nmu=0, comm=None, subsample=1, los='z', poles=[]): r""" Compute the correlation function by direct pair summation, either as a function of separation (`R`) or as a function of separation and line-of-sight angle (`R`, `mu`) The estimator used to compute the correlation function is: .. math:: \xi(r, \mu) = DD(r, \mu) / RR(r, \mu) - 1. where `DD` is the number of data-data pairs, and `RR` is the number of random-random pairs, which is determined solely by the binning used, assuming a constant number density Parameters ---------- datasources : list of DataSource objects the list of data instances from which the 3D correlation will be computed redges : array_like the bin edges for the `R` variable Nmu : int, optional the number of desired `mu` bins, where `mu` is the cosine of the angle from the line-of-sight. Default is `0`, in which case the correlation function is binned as a function of `R` only comm : MPI.Communicator, optional the communicator to pass to the ``ParticleMesh`` object. If not provided, ``MPI.COMM_WORLD`` is used subsample : int, optional downsample the input datasources by choosing 1 out of every `N` points. Default is `1` (no subsampling). los : str, {'x', 'y', 'z'}, optional the dimension to treat as the line-of-sight; default is 'z'. poles : list of int, optional integers specifying the multipoles to compute from the 2D correlation function Returns ------- pc : :class:`kdcount.correlate.paircount` the pair counting instance xi : array_like the correlation function result; if `poles` supplied, the shape is `(len(redges)-1, len(poles))`, otherwise, the shape is either `(len(redges)-1, )` or `(len(redges)-1, Nmu)` RR : array_like the number of random-random pairs (used as normalization of the data-data pairs) """ from pmesh.domain import GridND from kdcount import correlate # some setup if los not in "xyz": raise ValueError("`los` must be `x`, `y`, or `z`") los = "xyz".index(los) poles = numpy.array(poles) Rmax = redges[-1] if comm is None: comm = MPI.COMM_WORLD # determine processor division for domain decomposition for Nx in range(int(comm.size**0.3333) + 1, 0, -1): if comm.size % Nx == 0: break else: Nx = 1 for Ny in range(int(comm.size**0.5) + 1, 0, -1): if (comm.size // Nx) % Ny == 0: break else: Ny = 1 Nz = comm.size // Nx // Ny Nproc = [Nx, Ny, Nz] # log some info if comm.rank == 0: logger.info('Nproc = %s' %str(Nproc)) logger.info('Rmax = %g' %Rmax) # domain decomposition grid = [numpy.linspace(0, datasources[0].BoxSize[i], Nproc[i]+1, endpoint=True) for i in range(3)] domain = GridND(grid, comm=comm) # read position for field #1 with datasources[0].open() as stream: [[pos1]] = stream.read(['Position'], full=True) pos1 = pos1[comm.rank * subsample // comm.size ::subsample] N1 = comm.allreduce(len(pos1)) # read position for field #2 if len(datasources) > 1: with datasources[1].open() as stream: [[pos2]] = stream.read(['Position'], full=True) pos2 = pos2[comm.rank * subsample // comm.size ::subsample] N2 = comm.allreduce(len(pos2)) else: pos2 = pos1 N2 = N1 # exchange field #1 positions layout = domain.decompose(pos1, smoothing=0) pos1 = layout.exchange(pos1) if comm.rank == 0: logger.info('exchange pos1') # exchange field #2 positions if Rmax > datasources[0].BoxSize[0] * 0.25: pos2 = numpy.concatenate(comm.allgather(pos2), axis=0) else: layout = domain.decompose(pos2, smoothing=Rmax) pos2 = layout.exchange(pos2) if comm.rank == 0: logger.info('exchange pos2') # initialize the trees to hold the field points tree1 = correlate.points(pos1, boxsize=datasources[0].BoxSize) tree2 = correlate.points(pos2, boxsize=datasources[0].BoxSize) # log the sizes of the trees logger.info('rank %d correlating %d x %d' %(comm.rank, len(tree1), len(tree2))) if comm.rank == 0: logger.info('all correlating %d x %d' %(N1, N2)) # use multipole binning if len(poles): bins = correlate.FlatSkyMultipoleBinning(redges, poles, los, compute_mean_coords=True) # use (R, mu) binning elif Nmu > 0: bins = correlate.FlatSkyBinning(redges, Nmu, los, compute_mean_coords=True) # use R binning else: bins = correlate.RBinning(redges, compute_mean_coords=True) # do the pair counting # have to set usefast = False to get mean centers, or exception thrown pc = correlate.paircount(tree2, tree1, bins, np=0, usefast=False) pc.sum1[:] = comm.allreduce(pc.sum1) # get the mean bin values, reducing from all ranks pc.pair_counts[:] = comm.allreduce(pc.pair_counts) with numpy.errstate(invalid='ignore'): if bins.Ndim > 1: for i in range(bins.Ndim): pc.mean_centers[i][:] = comm.allreduce(pc.mean_centers_sum[i]) / pc.pair_counts else: pc.mean_centers[:] = comm.allreduce(pc.mean_centers_sum[0]) / pc.pair_counts # compute the random pairs from the fractional volume RR = 1.*N1*N2 / datasources[0].BoxSize.prod() if Nmu > 0: dr3 = numpy.diff(pc.edges[0]**3) dmu = numpy.diff(pc.edges[1]) RR *= 2. / 3. * numpy.pi * dr3[:,None] * dmu[None,:] else: RR *= 4. / 3. * numpy.pi * numpy.diff(pc.edges**3) # return the correlation and the pair count object xi = (1. * pc.sum1 / RR) - 1.0 if len(poles): xi = xi.T # makes ell the second axis xi[:,poles!=0] += 1.0 # only monopole gets the minus one return pc, xi, RR