def test_distributed_array_bincount(comm): from nbodykit.utils import DistributedArray, EmptyRank data = numpy.array( comm.scatter([ numpy.array([ 0, 1, 2, 3, ], 'i4'), numpy.array([ 3, 3, 3, 3, ], 'i4'), numpy.array([], 'i4'), numpy.array([3, 5], 'i4'), ])) da = DistributedArray(data, comm) N = da.bincount() assert_array_equal(numpy.concatenate(comm.allgather(N.local)), [1, 1, 1, 6, 6, 6, 0, 1]) weights = numpy.ones_like(data) N = da.bincount(weights) assert_array_equal(numpy.concatenate(comm.allgather(N.local)), [1, 1, 1, 6, 6, 6, 0, 1]) N = da.bincount(weights, shared_edges=False) assert_array_equal(numpy.concatenate(comm.allgather(N.local)), [1, 1, 1, 6, 0, 1])
def test_distributed_array_bincount_gaps(comm): from nbodykit.utils import DistributedArray, EmptyRank data = numpy.array( comm.scatter([ numpy.array([ 0, 1, ], 'i4'), numpy.array([ 3, 3, 3, 3, ], 'i4'), numpy.array([], 'i4'), numpy.array([5, 5], 'i4'), ])) da = DistributedArray(data, comm) N = da.bincount(shared_edges=True) assert_array_equal(numpy.concatenate(comm.allgather(N.local)), [1, 1, 0, 4, 0, 2]) N = da.bincount(shared_edges=False) assert_array_equal(numpy.concatenate(comm.allgather(N.local)), [1, 1, 0, 4, 0, 2])
def test_distributed_array_concat(comm): from nbodykit.utils import DistributedArray, EmptyRank data = numpy.array( comm.scatter([ numpy.array([ 0, 1, ], 'i4'), numpy.array([ 2, 3, ], 'i4'), numpy.array([], 'i4'), numpy.array([ 4, ], 'i4'), ])) da = DistributedArray(data, comm) assert da.cshape[0] == 5 assert_array_equal(comm.allgather(da.coffset), [0, 2, 4, 4]) cc = DistributedArray.concat(da, da) assert_array_equal(numpy.concatenate(comm.allgather(cc.local)), [0, 1, 2, 3, 4, 0, 1, 2, 3, 4])
def getinsat(self, mHIsat, satid, totalsize, localsize, comm): da = DistributedArray(satid, comm) mHI = da.bincount(mHIsat, shared_edges=False) zerosize = totalsize - mHI.cshape[0] zeros = DistributedArray.cempty(cshape=(zerosize, ), dtype=mHI.local.dtype, comm=comm) zeros.local[...] = 0 mHItotal = DistributedArray.concat(mHI, zeros, localsize=localsize) return mHItotal
def weighted_map(ipix, npix, weights, localsize, comm): """ Make a map from particles, for quantities like W(t) = \int dx delta(t, x) w Parameters ---------- ipix: array_like weights : array_like Returns ------- Wmap, Nmap; distributed maps Wmap is the weighted map. Nmap is the number of objects """ ipix, labels = numpy.unique(ipix, return_inverse=True) N = numpy.bincount(labels) weights = numpy.bincount(labels, weights) #print("shrink to %d from %d" % (len(ipix), len(labels))) del labels pairs = numpy.empty(len(ipix) + 1, dtype=[('ipix', 'i4'), ('N', 'i4'), ('weights', 'f8') ]) pairs['ipix'][:-1] = ipix pairs['weights'][:-1] = weights pairs['N'][:-1] = N pairs['ipix'][-1] = npix - 1 # trick to make sure the final length is correct. pairs['weights'][-1] = 0 pairs['N'][-1] = 0 disa = DistributedArray(pairs, comm=comm) disa.sort('ipix') w = disa['ipix'].bincount(weights=disa['weights'].local, local=False, shared_edges=False) N = disa['ipix'].bincount(weights=disa['N'].local, local=False, shared_edges=False) if npix - w.cshape[0] != 0: if comm.rank == 0: print('padding -- this shouldnt have occured ', npix, w.cshape) # pad with zeros, since the last few bins can be empty. ipadding = DistributedArray.cempty((npix - w.cshape[0],), dtype='i4', comm=comm) fpadding = DistributedArray.cempty((npix - w.cshape[0],), dtype='f8', comm=comm) fpadding.local[:] = 0 ipadding.local[:] = 0 w = DistributedArray.concat(w, fpadding) N = DistributedArray.concat(N, ipadding) w = DistributedArray.concat(w, localsize=localsize) N = DistributedArray.concat(N, localsize=localsize) return w.local, N.local
def getinsat(self, mHIsat, satid, totalsize, localsize, comm): #print(comm.rank, np.all(np.diff(satid) >=0)) #diff = np.diff(satid) #if comm.rank == 260: # print(satid[:-1][diff <0], satid[1:][diff < 0]) da = DistributedArray(satid, comm) mHI = da.bincount(mHIsat, shared_edges=False) zerosize = totalsize - mHI.cshape[0] zeros = DistributedArray.cempty(cshape=(zerosize, ), dtype=mHI.local.dtype, comm=comm) zeros.local[...] = 0 mHItotal = DistributedArray.concat(mHI, zeros, localsize=localsize) return mHItotal
def test_distributed_array_cempty(comm): from nbodykit.utils import DistributedArray, EmptyRank da = DistributedArray.cempty((20, 3), dtype=('f4', 3), comm=comm) assert_array_equal(comm.allgather(da.cshape), [(20, 3, 3)] * comm.size) assert_array_equal(da.local.shape, [5, 3, 3])
def test_distributed_array_unique_labels(comm): from nbodykit.utils import DistributedArray, EmptyRank data = numpy.array( comm.scatter([ numpy.array([0, 1, 2, 3], 'i4'), numpy.array([3, 4, 5, 6], 'i4'), numpy.array([], 'i4'), numpy.array([6], 'i4'), ])) da = DistributedArray(data, comm) da.sort() labels = da.unique_labels() assert_array_equal(numpy.concatenate(comm.allgather(labels.local)), [0, 1, 2, 3, 3, 4, 5, 6, 6])
def test_distributed_array_bincount_gaps(comm): from nbodykit.utils import DistributedArray, EmptyRank data = numpy.array(comm.scatter( [numpy.array([0, 1, ], 'i4'), numpy.array([3, 3, 3, 3, ], 'i4'), numpy.array([], 'i4'), numpy.array([5, 5], 'i4'), ])) da = DistributedArray(data, comm) N = da.bincount(shared_edges=True) assert_array_equal( numpy.concatenate(comm.allgather(N.local)), [1, 1, 0, 4, 0, 2]) N = da.bincount(shared_edges=False) assert_array_equal( numpy.concatenate(comm.allgather(N.local)), [1, 1, 0, 4, 0, 2])
def test_distributed_array_unique_labels(comm): from nbodykit.utils import DistributedArray, EmptyRank data = numpy.array(comm.scatter( [numpy.array([0, 1, 2, 3], 'i4'), numpy.array([3, 4, 5, 6], 'i4'), numpy.array([], 'i4'), numpy.array([6], 'i4'), ])) da = DistributedArray(data, comm) da.sort() labels = da.unique_labels() assert_array_equal( numpy.concatenate(comm.allgather(labels.local)), [0, 1, 2, 3, 3, 4, 5, 6, 6] )
def test_distributed_array_topo(comm): from nbodykit.utils import DistributedArray, EmptyRank data = numpy.arange(10) if comm.rank == 1: data = data[:0] da = DistributedArray(data, comm) prev = da.topology.prev() next = da.topology.next() assert_array_equal(comm.allgather(prev), [EmptyRank, 9, 9, 9]) assert_array_equal(comm.allgather(next), [0, 0, 0, EmptyRank])
def test_distributed_array_bincount(comm): from nbodykit.utils import DistributedArray, EmptyRank data = numpy.array(comm.scatter( [numpy.array([0, 1, 2, 3, ], 'i4'), numpy.array([3, 3, 3, 3, ], 'i4'), numpy.array([], 'i4'), numpy.array([3, 5], 'i4'), ])) da = DistributedArray(data, comm) N = da.bincount() assert_array_equal( numpy.concatenate(comm.allgather(N.local)), [1, 1, 1, 6, 6, 6, 0, 1]) weights = numpy.ones_like(data) N = da.bincount(weights) assert_array_equal( numpy.concatenate(comm.allgather(N.local)), [1, 1, 1, 6, 6, 6, 0, 1]) N = da.bincount(weights, shared_edges=False) assert_array_equal( numpy.concatenate(comm.allgather(N.local)), [1, 1, 1, 6, 0, 1])
def test_distributed_array_concat(comm): from nbodykit.utils import DistributedArray, EmptyRank data = numpy.array(comm.scatter( [numpy.array([0, 1, ], 'i4'), numpy.array([2, 3, ], 'i4'), numpy.array([], 'i4'), numpy.array([4, ], 'i4'), ])) da = DistributedArray(data, comm) assert da.cshape[0] == 5 assert_array_equal( comm.allgather(da.coffset), [ 0, 2, 4, 4] ) cc = DistributedArray.concat(da, da) assert_array_equal( numpy.concatenate(comm.allgather(cc.local)), [0, 1, 2, 3, 4, 0, 1, 2, 3, 4] )
def _assign_labels(minid, comm, thresh): """ Convert minid to sequential labels starting from 0. This routine is used to assign halo label to particles with the same minid. Halos with less than thresh particles are reclassified to 0. Parameters ---------- minid : array_like, ('i8') The minimum particle id of the halo. All particles of a halo have the same minid comm : py:class:`MPI.Comm` communicator. since this is a collective operation thresh : int halo with less than thresh particles are merged into halo 0 Returns ------- labels : array_like ('i8') The new labels of particles. Note that this is ordered by the size of halo, with the exception 0 represents all particles that are in halos that contain less than thresh particles. """ from mpi4py import MPI dtype = numpy.dtype([ ('origind', 'u8'), ('fofid', 'u8'), ]) data = numpy.empty(len(minid), dtype=dtype) # assign origind for recovery of ordering, since # we need to work in sorted fofid data['fofid'] = minid data['origind'] = numpy.arange(len(data), dtype='u4') data['origind'] += numpy.sum(comm.allgather(len(data))[:comm.rank], dtype='intp') \ data = DistributedArray(data, comm) # first attempt is to assign fofid for each group data.sort('fofid') label = data['fofid'].unique_labels() N = label.bincount() # now eliminate those with less than thresh particles small = N.local <= thresh Nlocal = label.bincount(local=True) # mask == True for particles in small halos mask = numpy.repeat(small, Nlocal) # globally shift halo id by one label.local += 1 label.local[mask] = 0 data['fofid'].local[:] = label.local[:] del label data.sort('fofid') data['fofid'].local[:] = data['fofid'].unique_labels().local[:] # unique_labels may miss the 0 index representing disconnected # particles if there are no such particles. # shift the fofoid by 1 in that case. anysmall = comm.allreduce(small.sum()) != 0 if not anysmall: data['fofid'].local[:] += 1 data.sort('origind') label = data['fofid'].local.view('i8').copy() del data Nhalo0 = max(comm.allgather(label.max())) + 1 Nlocal = numpy.bincount(label, minlength=Nhalo0) comm.Allreduce(MPI.IN_PLACE, Nlocal, op=MPI.SUM) # sort the labels by halo size arg = Nlocal[1:].argsort()[::-1] + 1 if Nhalo0 > 2**31: dtype = 'i8' else: dtype = 'i4' P = numpy.arange(Nhalo0, dtype=dtype) P[arg] = numpy.arange(len(arg), dtype=dtype) + 1 label = P[label] return label
cen = BigFileCatalog(myscratch + sim + '/fastpm_%0.4f/cencat-%s/' % (aa, suff)) sat = BigFileCatalog(myscratch + sim + '/fastpm_%0.4f/satcat-%s/' % (aa, suff)) ## hmass = halos['Length'].compute() * mp cmass = cen["Mass"].compute() chmass = cen["HaloMass"].compute() smass = sat["Mass"].compute() hpos, cpos, spos = halos['Position'].compute( ), cen['Position'].compute(), sat['Position'].compute() chid, shid = cen['GlobalID'].compute(), sat['GlobalID'].compute() cnsat = cen['Nsat'].compute() da = DistributedArray(shid, comm) N = da.bincount(shared_edges=False) print('rank, shid, N : ', rank, shid[:10], N.local[:10]) print('rank, chid, csat, N : ', rank, chid[:10], cnsat[:10], N.local[:10]) print('rank, cen.csize, N.cshape : ', rank, cen.csize, N.cshape) print('rank, cen.size, Nlocal.size : ', rank, cen.size, N.local.size) print(cen.csize - N.cshape) zerosize = (cen.csize - N.cshape[0]) #start = (zerosize *rank // wsize) #end = (zerosize *(rank+1) // wsize) #zeros = DistributedArray(np.zeros(end-start), comm=comm) print(zerosize, N.local.dtype) zeros = DistributedArray.cempty(cshape=(zerosize, ),
def _assign_labels(minid, comm, thresh): """ Convert minid to sequential labels starting from 0. This routine is used to assign halo label to particles with the same minid. Halos with less than thresh particles are reclassified to 0. Parameters ---------- minid : array_like, ('i8') The minimum particle id of the halo. All particles of a halo have the same minid comm : py:class:`MPI.Comm` communicator. since this is a collective operation thresh : int halo with less than thresh particles are merged into halo 0 Returns ------- labels : array_like ('i8') The new labels of particles. Note that this is ordered by the size of halo, with the exception 0 represents all particles that are in halos that contain less than thresh particles. """ from mpi4py import MPI dtype = numpy.dtype([ ('origind', 'u8'), ('fofid', 'u8'), ]) data = numpy.empty(len(minid), dtype=dtype) # assign origind for recovery of ordering, since # we need to work in sorted fofid data['fofid'] = minid data['origind'] = numpy.arange(len(data), dtype='u4') data['origind'] += numpy.sum(comm.allgather(len(data))[:comm.rank], dtype='intp') \ data = DistributedArray(data, comm) # first attempt is to assign fofid for each group data.sort('fofid') label = data['fofid'].unique_labels() N = label.bincount() # now eliminate those with less than thresh particles small = N.local <= thresh Nlocal = label.bincount(local=True) # mask == True for particles in small halos mask = numpy.repeat(small, Nlocal) # globally shift halo id by one label.local += 1 label.local[mask] = 0 data['fofid'].local[:] = label.local[:] del label data.sort('fofid') data['fofid'].local[:] = data['fofid'].unique_labels().local[:] data.sort('origind') label = data['fofid'].local.view('i8').copy() del data Nhalo0 = max(comm.allgather(label.max())) + 1 Nlocal = numpy.bincount(label, minlength=Nhalo0) comm.Allreduce(MPI.IN_PLACE, Nlocal, op=MPI.SUM) # sort the labels by halo size arg = Nlocal[1:].argsort()[::-1] + 1 if Nhalo0 > 2**31: dtype = 'i8' else: dtype = 'i4' P = numpy.arange(Nhalo0, dtype=dtype) P[arg] = numpy.arange(len(arg), dtype=dtype) + 1 label = P[label] return label