def get_galaxy_catalog_from_source_catalog( self, source_cat, rand_seed_for_galaxy_sampling=123): assert self.log10M_column in source_cat.columns #cat = deepcopy(source_cat) cat = copy(source_cat) comm = CurrentMPIComm.get() # For each halo draw a random number RAND b/w 0 and 1. # For each halo, compute prob to be a galaxy. # Keep only halos where RAND<=prob_gal, remove rest from catalog. # This is our galaxy catalog. # Draw random number b/w 0 and 1 rng = MPIRandomState(comm, seed=rand_seed_for_galaxy_sampling, size=cat.size, chunksize=100000) cat['RAND'] = rng.uniform(low=0.0, high=1.0, dtype='f8') #print(cat[self.log10M_column]) #cat['PROB_GAL'] = 0.0 #cat[self.log10M_column] cat['PROB_GAL'] = 0.5 * (1.0 + erf( (cat[self.log10M_column].compute() - self.log10Mmin) / self.sigma_log10M)) print('Nhalos:', cat.csize) cat = cat[cat['RAND'] <= cat['PROB_GAL']] print('Ngalaxies:', cat.csize) print('Galaxy mass: ', get_cstats_string(cat[self.log10M_column].compute())) return cat
def test_mpirng_unique(comm): rng = MPIRandomState(comm, seed=1234, size=10, chunksize=3) local1 = rng.uniform() local2 = rng.uniform() # it shouldn't be the same! assert (local1 != local2).any()
def test_mpirng_poisson(comm): rng = MPIRandomState(comm, seed=1234, size=10, chunksize=3) local = rng.poisson(lam=numpy.ones(rng.size)[:, None] * 0.5, itemshape=(3,)) all = numpy.concatenate(comm.allgather(local), axis=0) rng1 = MPIRandomState(MPI.COMM_SELF, seed=1234, size=rng.csize, chunksize=rng.chunksize) correct = rng1.poisson(lam=0.5, itemshape=(3,)) assert_array_equal(all, correct)
def test_mpirng_args(comm): rng = MPIRandomState(comm, seed=1234, size=10, chunksize=3) local = rng.uniform(low=numpy.ones(rng.size) * 0.5) all = numpy.concatenate(comm.allgather(local), axis=0) rng1 = MPIRandomState(MPI.COMM_SELF, seed=1234, size=rng.csize, chunksize=rng.chunksize) correct = rng1.uniform(low=0.5) assert_array_equal(all, correct)
def test_mpirng_large_chunk(comm): rng = MPIRandomState(comm, seed=1234, size=1, chunksize=10) local = rng.uniform() all = numpy.concatenate(comm.allgather(local), axis=0) rng1 = MPIRandomState(MPI.COMM_SELF, seed=1234, size=rng.csize, chunksize=rng.chunksize) correct = rng1.uniform() assert_array_equal(all, correct)
def test_mpirng_poisson(comm): rng = MPIRandomState(comm, seed=1234, size=10, chunksize=3) local = rng.poisson(lam=numpy.ones(rng.size)[:, None] * 0.5, itemshape=(3, )) all = numpy.concatenate(comm.allgather(local), axis=0) rng1 = MPIRandomState(MPI.COMM_SELF, seed=1234, size=rng.csize, chunksize=rng.chunksize) correct = rng1.poisson(lam=0.5, itemshape=(3, )) assert_array_equal(all, correct)
def __init__(self, csize, seed=None, comm=None): self.comm = comm # set the seed randomly if it is None if seed is None: if self.comm.rank == 0: seed = numpy.random.randint(0, 4294967295) seed = self.comm.bcast(seed) self.attrs['seed'] = seed # generate the seeds from the global seed if csize == 0: raise ValueError("no random particles generated!") start = comm.rank * csize // comm.size end = (comm.rank + 1) * csize // comm.size self._size = end - start self._rng = MPIRandomState(comm, seed=seed, size=self._size) # init the base class CatalogSource.__init__(self, comm=comm)
def poisson_sample_to_points(delta, displacement, pm, nbar, bias=1., seed=None, logger=None): """ Poisson sample the linear delta and displacement fields to points. The steps in this function: #. Apply a biased, lognormal transformation to the input ``delta`` field #. Poisson sample the overdensity field to discrete points #. Disribute the positions of particles uniformly within the mesh cells, and assign the displacement field at each cell to the particles Parameters ---------- delta : RealField the linear overdensity field to sample displacement : list of RealField (3,) the linear displacement fields which is used to move the particles nbar : float the desired number density of the output catalog of objects bias : float, optional apply a linear bias to the overdensity field (default is 1.) seed : int, optional the random seed used to Poisson sample the field to points Returns ------- pos : array_like, (N, 3) the Cartesian positions of each of the generated particles displ : array_like, (N, 3) the displacement field sampled for each of the generated particles in the same units as the ``pos`` array """ comm = delta.pm.comm # seed1 used for poisson sampling # seed2 used for uniform shift within a cell. seed1, seed2 = numpy.random.RandomState(seed).randint(0, 0xfffffff, size=2) # apply the lognormal transformation to the initial conditions density # this creates a positive-definite delta (necessary for Poisson sampling) lagrangian_bias = bias - 1. delta = lognormal_transform(delta, bias=lagrangian_bias) if logger and pm.comm.rank == 0: logger.info("Lognormal transformation done") # mean number of objects per cell H = delta.BoxSize / delta.Nmesh overallmean = H.prod() * nbar # number of objects in each cell (per rank, as a RealField) cellmean = delta * overallmean # create a random state with the input seed rng = MPIRandomState(seed=seed1, comm=comm, size=delta.size) # generate poissons. Note that we use ravel/unravel to # maintain MPI invariane. Nravel = rng.poisson(lam=cellmean.ravel()) N = delta.pm.create(mode='real') N.unravel(Nravel) Ntot = N.csum() if logger and pm.comm.rank == 0: logger.info("Poisson sampling done, total number of objects is %d" % Ntot) pos_mesh = delta.pm.generate_uniform_particle_grid(shift=0.0) disp_mesh = numpy.empty_like(pos_mesh) # no need to do decompose because pos_mesh is strictly within the # local volume of the RealField. N_per_cell = N.readout(pos_mesh, resampler='nnb') for i in range(N.ndim): disp_mesh[:, i] = displacement[i].readout(pos_mesh, resampler='nnb') # fight round off errors, if any N_per_cell = numpy.int64(N_per_cell + 0.5) pos = pos_mesh.repeat(N_per_cell, axis=0) disp = disp_mesh.repeat(N_per_cell, axis=0) del pos_mesh del disp_mesh if logger and pm.comm.rank == 0: logger.info("catalog produced. Assigning in cell shift.") # generate linear ordering of the positions. # this should have been a method in pmesh, e.g. argument # to genereate_uniform_particle_grid(return_id=True); # FIXME: after pmesh update, remove this orderby = numpy.int64(pos[:, 0] / H[0] + 0.5) for i in range(1, delta.ndim): orderby[...] *= delta.Nmesh[i] orderby[...] += numpy.int64(pos[:, i] / H[i] + 0.5) # sort by ID to maintain MPI invariance. pos = mpsort.sort(pos, orderby=orderby, comm=comm) disp = mpsort.sort(disp, orderby=orderby, comm=comm) if logger and pm.comm.rank == 0: logger.info("sorting done") rng_shift = MPIRandomState(seed=seed2, comm=comm, size=len(pos)) in_cell_shift = rng_shift.uniform(0, H[i], itemshape=(delta.ndim, )) pos[...] += in_cell_shift pos[...] %= delta.BoxSize if logger and pm.comm.rank == 0: logger.info("catalog shifted.") return pos, disp
def poisson_sample_to_points(delta, displacement, pm, nbar, bias=1., seed=None, logger=None): """ Poisson sample the linear delta and displacement fields to points. The steps in this function: #. Apply a biased, lognormal transformation to the input ``delta`` field #. Poisson sample the overdensity field to discrete points #. Disribute the positions of particles uniformly within the mesh cells, and assign the displacement field at each cell to the particles Parameters ---------- delta : RealField the linear overdensity field to sample displacement : list of RealField (3,) the linear displacement fields which is used to move the particles nbar : float the desired number density of the output catalog of objects bias : float, optional apply a linear bias to the overdensity field (default is 1.) seed : int, optional the random seed used to Poisson sample the field to points Returns ------- pos : array_like, (N, 3) the Cartesian positions of each of the generated particles displ : array_like, (N, 3) the displacement field sampled for each of the generated particles in the same units as the ``pos`` array """ comm = delta.pm.comm # seed1 used for poisson sampling # seed2 used for uniform shift within a cell. seed1, seed2 = numpy.random.RandomState(seed).randint(0, 0xfffffff, size=2) # apply the lognormal transformation to the initial conditions density # this creates a positive-definite delta (necessary for Poisson sampling) lagrangian_bias = bias - 1. delta = lognormal_transform(delta, bias=lagrangian_bias) if logger and pm.comm.rank == 0: logger.info("Lognormal transformation done") # mean number of objects per cell H = delta.BoxSize / delta.Nmesh overallmean = H.prod() * nbar # number of objects in each cell (per rank, as a RealField) cellmean = delta * overallmean # create a random state with the input seed rng = MPIRandomState(seed=seed1, comm=comm, size=delta.size) # generate poissons. Note that we use ravel/unravel to # maintain MPI invariane. Nravel = rng.poisson(lam=cellmean.ravel()) N = delta.pm.create(type='real') N.unravel(Nravel) Ntot = N.csum() if logger and pm.comm.rank == 0: logger.info("Poisson sampling done, total number of objects is %d" % Ntot) pos_mesh = delta.pm.generate_uniform_particle_grid(shift=0.0) disp_mesh = numpy.empty_like(pos_mesh) # no need to do decompose because pos_mesh is strictly within the # local volume of the RealField. N_per_cell = N.readout(pos_mesh, resampler='nnb') for i in range(N.ndim): disp_mesh[:, i] = displacement[i].readout(pos_mesh, resampler='nnb') # fight round off errors, if any N_per_cell = numpy.int64(N_per_cell + 0.5) pos = pos_mesh.repeat(N_per_cell, axis=0) disp = disp_mesh.repeat(N_per_cell, axis=0) del pos_mesh del disp_mesh if logger and pm.comm.rank == 0: logger.info("catalog produced. Assigning in cell shift.") # generate linear ordering of the positions. # this should have been a method in pmesh, e.g. argument # to genereate_uniform_particle_grid(return_id=True); # FIXME: after pmesh update, remove this orderby = numpy.int64(pos[:, 0] / H[0] + 0.5) for i in range(1, delta.ndim): orderby[...] *= delta.Nmesh[i] orderby[...] += numpy.int64(pos[:, i] / H[i] + 0.5) # sort by ID to maintain MPI invariance. pos = mpsort.sort(pos, orderby=orderby, comm=comm) disp = mpsort.sort(disp, orderby=orderby, comm=comm) if logger and pm.comm.rank == 0: logger.info("sorting done") rng_shift = MPIRandomState(seed=seed2, comm=comm, size=len(pos)) in_cell_shift = rng_shift.uniform(0, H[i], itemshape=(delta.ndim,)) pos[...] += in_cell_shift pos[...] %= delta.BoxSize if logger and pm.comm.rank == 0: logger.info("catalog shifted.") return pos, disp
def PoissonSample(self, delta, parameters_sampling): nbar=parameters_sampling['nbar'] seed1=parameters_sampling['seed1'] seed2=parameters_sampling['seed2'] comm = self.pm.comm # mean number of objects per cell H = self.BoxSize / self.pm.Nmesh overallmean = H.prod() * nbar # number of objects in each cell (per rank, as a RealField) cellmean = delta * overallmean # create a random state with the input seed rng = MPIRandomState(seed=seed1, comm=comm, size=delta.size) # generate poissons. Note that we use ravel/unravel to # maintain MPI invariane. Nravel = rng.poisson(lam=cellmean.ravel()) N = self.pm.create(type='real') N.unravel(Nravel) Ntot = N.csum() if self.log.isEnabledFor(logging.INFO): self.log.info('Poisson sampling done, total number of objects is {}'.format(Ntot)) pos_mesh = self.pm.generate_uniform_particle_grid(shift=0.0) disp_mesh = np.empty_like(pos_mesh) # no need to do decompose because pos_mesh is strictly within the # local volume of the RealField. N_per_cell = N.readout(pos_mesh, resampler='nnb') for i in range(N.ndim): disp_mesh[:, i] = self.displacement[i].readout(pos_mesh, resampler='nnb') # fight round off errors, if any N_per_cell = np.int64(N_per_cell + 0.5) pos = pos_mesh.repeat(N_per_cell, axis=0) disp = disp_mesh.repeat(N_per_cell, axis=0) del pos_mesh del disp_mesh if self.log.isEnabledFor(logging.INFO): self.log.info("Catalog produced. Assigning in cell shift.") # FIXME: after pmesh update, remove this orderby = np.int64(pos[:, 0] / H[0] + 0.5) for i in range(1, delta.ndim): orderby[...] *= self.pm.Nmesh[i] orderby[...] += np.int64(pos[:, i] / H[i] + 0.5) # sort by ID to maintain MPI invariance. pos = mpsort.sort(pos, orderby=orderby, comm=comm) disp = mpsort.sort(disp, orderby=orderby, comm=comm) if self.log.isEnabledFor(logging.INFO): self.log.info("Sorting done") rng_shift = MPIRandomState(seed=seed2, comm=comm, size=len(pos)) in_cell_shift = rng_shift.uniform(0, H[i], itemshape=(delta.ndim,)) pos[...] += in_cell_shift pos[...] %= self.pm.BoxSize if self.log.isEnabledFor(logging.INFO): self.log.info("Catalog shifted.") #Catalog needs to be shifted in z-coordinate, such that pos and comoving match pos[...,0]+=(self.comoving_distance-self.width)*np.ones(pos.shape[0]) return pos, disp
def avg_value_mass_weighted_paint_cat_to_rho( cat=None, value_column=None, weight_ptcles_by=None, Ngrid=None, fill_empty_cells='RandNeighb', RandNeighbSeed=1234, raise_exception_if_too_many_empty_cells=True, to_mesh_kwargs=None, verbose=False ): """ Helper function that paints cat[value_column] to grid, averaging over values of all particles belonging to a cell, and allowing for additional particle mass weights. Also has several methods to fill empty cells. """ # In the code 'value' is called 'chi', because value is chi in reconstruction # code. if to_mesh_kwargs is None: to_mesh_kwargs = { 'window': 'cic', 'compensated': False, 'interlaced': False} comm = CurrentMPIComm.get() logger = logging.getLogger('paint_utils') ## Get mass density rho so we can normalize chi later. Assume mass=1, or given by # weight_ptcles_by. # This is to get avg chi if multiple ptcles are in same cell. # 1 Sep 2017: Want chi_avg = sum_i m_i chi_i / sum_j m_i where m_i is particle mass, # because particle mass says how much the average should be dominated by a single ptcle # that can represent many original no-mass particles. # Compute rho4chi = sum_i m_i rho4chi, rho4chi_attrs = weighted_paint_cat_to_delta( cat, weight=weight_ptcles_by, weighted_paint_mode='sum', to_mesh_kwargs=to_mesh_kwargs, normalize=False, # want rho not 1+delta Nmesh=Ngrid, set_mean=None, verbose=verbose) # compute chi weighted by ptcle mass chi(x)m(x) weighted_col = 'TMP weighted %s' % value_column if weight_ptcles_by is not None: cat[weighted_col] = cat[weight_ptcles_by] * cat[value_column] else: # weight 1 for each ptcle cat[weighted_col] = cat[value_column] thisChi, thisChi_attrs = weighted_paint_cat_to_delta( cat, weight=weighted_col, # chi weighted by ptcle mass weighted_paint_mode='sum', to_mesh_kwargs=to_mesh_kwargs, normalize=False, # want rho not 1+delta (TODO: check) Nmesh=Ngrid, set_mean=None, verbose=verbose) # Normalize Chi by dividing by rho: So far, our chi will get larger if there are # more particles, because it sums up displacements over all particles. # To normalize, divide by rho (=mass density on grid if all ptcles have mass m=1, # or mass given by weight_ptcles_by). # (i.e. divide by number of contributions to a cell) if fill_empty_cells in [None, 'SetZero']: # Set chi=0 if there are not ptcles in grid cell. Used until 7 April 2017. # Seems ok for correl coeff and BAO, but gives large-scale bias in transfer # function or broad-band power because violates mass conservation. raise Exception('Possible bug: converting to np array only uses root rank?') thisChi = FieldMesh( np.where( rho4chi.compute(mode='real') == 0, rho4chi.compute(mode='real') * 0, thisChi.compute(mode='real') / rho4chi.compute(mode='real'))) #thisChi = np.where(gridx.G['rho4chi']==0, thisChi*0, thisChi/gridx.G['rho4chi']) elif fill_empty_cells in [ 'RandNeighb', 'RandNeighbReadout', 'AvgAndRandNeighb']: # Set chi in empty cells equal to a random neighbor cell. Do this until all empty # cells are filled. # First set all empty cells to nan. #thisChi = np.where(gridx.G['rho4chi']==0, thisChi*0+np.nan, thisChi/gridx.G['rho4chi']) thisChi = thisChi / rho4chi # get nan when rho4chi=0 if True: # test if nan ok ww1 = np.where(rho4chi == 0) #ww2 = np.where(np.isnan(thisChi.compute(mode='real'))) ww2 = np.where(np.isnan(thisChi)) assert np.allclose(ww1, ww2) del ww1, ww2 # Progressively replace nan by random neighbors: Ng = Ngrid #thisChi = thisChi.reshape((Ng,Ng,Ng)) logger.info('thisChi.shape: %s' % str(thisChi.shape)) #assert thisChi.shape == (Ng,Ng,Ng) # indices of empty cells on this rank ww = np.where(np.isnan(thisChi)) # number of empty cells across all ranks Nfill = comm.allreduce(ww[0].shape[0], op=MPI.SUM) have_empty_cells = (Nfill > 0) if fill_empty_cells in ['RandNeighb', 'RandNeighbReadout']: i_iter = -1 while have_empty_cells: i_iter += 1 if comm.rank == 0: logger.info( "Fill %d empty chi cells (%g percent) using random neighbors" % (Nfill, Nfill / float(Ng)**3 * 100.)) if Nfill / float(Ng)**3 >= 0.999: if raise_exception_if_too_many_empty_cells: raise Exception( "Stop because too many empty chi cells") else: logger.warning( "More than 99.9 percent of cells are empty") # draw -1,0,+1 for each empty cell, in 3 directions # r = np.random.randint(-1,2, size=(ww[0].shape[0],3), dtype='int') rng = MPIRandomState(comm, seed=RandNeighbSeed + i_iter * 100, size=ww[0].shape[0], chunksize=100000) r = rng.uniform(low=-2, high=2, dtype='int', itemshape=(3,)) assert np.all(r >= -1) assert np.all(r <= 1) # Old serial code to replace nan by random neighbors. # thisChi[ww[0],ww[1],ww[2]] = thisChi[(ww[0]+r[:,0])%Ng, (ww[1]+r[:,1])%Ng, (ww[2]+r[:,2])%Ng] if fill_empty_cells == 'RandNeighbReadout': # New parallel code, 1st implementation. # Use readout to get field at positions [(ww+rank_offset+r)%Ng] dx. BoxSize = cat.attrs['BoxSize'] dx = BoxSize / (float(Ng)) #pos_wanted = ((np.array(ww).transpose() + r) % Ng) * dx # ranges from 0 to BoxSize # more carefully: pos_wanted = np.zeros((ww[0].shape[0], 3)) + np.nan for idir in [0, 1, 2]: pos_wanted[:, idir] = ( (np.array(ww[idir] + thisChi.start[idir]) + r[:, idir]) % Ng) * dx[idir] # ranges from 0..BoxSize # use readout to get neighbors readout_window = 'nnb' layout = thisChi.pm.decompose(pos_wanted, smoothing=readout_window) # interpolate field to particle positions (use pmesh 'readout' function) thisChi_neighbors = thisChi.readout( pos_wanted, resampler=readout_window, layout=layout) if False: # print dbg info for ii in range(10000, 10004): if comm.rank == 1: logger.info( 'chi manual neighbor: %g' % thisChi[(ww[0][ii] + r[ii, 0]) % Ng, (ww[1][ii] + r[ii, 1]) % Ng, (ww[2][ii] + r[ii, 2]) % Ng]) logger.info('chi readout neighbor: %g' % thisChi_neighbors[ii]) thisChi[ww] = thisChi_neighbors elif fill_empty_cells == 'RandNeighb': # New parallel code, 2nd implementation. # Use collective getitem and only work with indices. # http://rainwoodman.github.io/pmesh/pmesh.pm.html#pmesh.pm.Field.cgetitem. # Note ww are indices of local slab, need to convert to global indices. thisChi_neighbors = None my_cindex_wanted = None for root in range(comm.size): # bcast to all ranks b/c must call cgetitem collectively with same args on each rank if comm.rank == root: # convert local index to collective index using ltoc which gives 3 tuple assert len(ww) == 3 wwarr = np.array(ww).transpose() #cww = np.array([ # ltoc(field=thisChi, index=[ww[0][i],ww[1][i],ww[2][i]]) # for i in range(ww[0].shape[0]) ]) cww = ltoc_index_arr(field=thisChi, lindex_arr=wwarr) #logger.info('cww: %s' % str(cww)) #my_cindex_wanted = [(cww[:,0]+r[:,0])%Ng, (cww[1][:]+r[:,1])%Ng, (cww[2][:]+r[:,2])%Ng] my_cindex_wanted = (cww + r) % Ng #logger.info('my_cindex_wanted: %s' % str(my_cindex_wanted)) cindex_wanted = comm.bcast(my_cindex_wanted, root=root) glob_thisChi_neighbors = cgetitem_index_arr( thisChi, cindex_wanted) # slower version doing the same # glob_thisChi_neighbors = [ # thisChi.cgetitem([cindex_wanted[i,0], cindex_wanted[i,1], cindex_wanted[i,2]]) # for i in range(cindex_wanted.shape[0]) ] if comm.rank == root: thisChi_neighbors = np.array( glob_thisChi_neighbors) #thisChi_neighbors = thisChi.cgetitem([40,42,52]) #print('thisChi_neighbors:', thisChi_neighbors) if False: # print dbg info (rank 0 ok, rank 1 fails to print) for ii in range(11000, 11004): if comm.rank == 1: logger.info( 'ww: %s' % str([ww[0][ii], ww[1][ii], ww[2][ii]])) logger.info( 'chi[ww]: %g' % thisChi[ww[0][ii], ww[1][ii], ww[2][ii]] ) logger.info( 'chi manual neighbor: %g' % thisChi[(ww[0][ii] + r[ii, 0]) % Ng, (ww[1][ii] + r[ii, 1]) % Ng, (ww[2][ii] + r[ii, 2]) % Ng]) logger.info('chi bcast neighbor: %g' % thisChi_neighbors[ii]) raise Exception('just dbg') thisChi[ww] = thisChi_neighbors ww = np.where(np.isnan(thisChi)) Nfill = comm.allreduce(ww[0].shape[0], op=MPI.SUM) have_empty_cells = (Nfill > 0) comm.barrier() elif fill_empty_cells == 'AvgAndRandNeighb': raise NotImplementedError # while have_empty_cells: # print("Fill %d empty chi cells (%g percent) using avg and random neighbors" % ( # ww[0].shape[0],ww[0].shape[0]/float(Ng)**3*100.)) # # first take average (only helps empty cells surrounded by filled cells) # thisChi[ww[0],ww[1],ww[2]] = 0.0 # for r0 in range(-1,2): # for r1 in range(-1,2): # for r2 in range(-1,2): # if (r0==0) and (r1==0) and (r2==0): # # do not include center point in avg b/c this is nan # continue # else: # # average over 27-1 neighbor points # thisChi[ww[0],ww[1],ww[2]] += thisChi[(ww[0]+r0)%Ng, (ww[1]+r1)%Ng, (ww[2]+r2)%Ng]/26.0 # # get indices of cells that are still empty (happens if a neighbor was nan above) # ww = np.where(np.isnan(thisChi)) # have_empty_cells = (ww[0].shape[0] > 0) # if have_empty_cells: # # draw -1,0,+1 for each empty cell, in 3 directions # r = np.random.randint(-1,2, size=(ww[0].shape[0],3), dtype='int') # # replace nan by random neighbors # thisChi[ww[0],ww[1],ww[2]] = thisChi[(ww[0]+r[:,0])%Ng, (ww[1]+r[:,1])%Ng, (ww[2]+r[:,2])%Ng] # # recompute indices of nan cells # ww = np.where(np.isnan(thisChi)) # have_empty_cells = (ww[0].shape[0] > 0) else: raise Exception("Invalid fill_empty_cells option: %s" % str(fill_empty_cells)) return thisChi, thisChi_attrs