def __init__(self, filetype, args=(), kwargs={}, comm=None): self.comm = comm self.filetype = filetype # bcast the FileStack if self.comm.rank == 0: self._source = FileStack(filetype, *args, **kwargs) else: self._source = None self._source = self.comm.bcast(self._source) # compute the size; start with full file. lstart = self.comm.rank * self._source.size // self.comm.size lend = (self.comm.rank + 1) * self._source.size // self.comm.size self._size = lend - lstart self.start = 0 self.end = self._source.size self._lstart = lstart # offset in the file for this rank self._lend = lend # offset in the file for this rank # update the meta-data self.attrs.update(self._source.attrs) if self.comm.rank == 0: self.logger.info("Extra arguments to FileType: %s %s" % (str(args), str(kwargs))) CatalogSource.__init__(self, comm=comm)
def query_range(self, start, end): """ Seek to a range in the file catalog. Parameters ---------- start : int start of the file relative to the physical file end : int end of the file relative to the physical file Returns ------- A new catalog that only accesses the given region of the file. If the original catalog (self) contains any assigned columns not directly obtained from the file, then the function will raise ValueError, since the operation in that case is not well defined. """ if len(CatalogSource.hardcolumns.fget(self)) > 0: raise ValueError( "cannot seek if columns have been attached to the FileCatalog") other = self.copy() other._lstart = self.start + start + self.comm.rank * ( end - start) // self.comm.size other._lend = self.start + start + (self.comm.rank + 1) * ( end - start) // self.comm.size other._size = other._lend - other._lstart other.start = start other.end = end CatalogSource.__init__(other, comm=self.comm) return other
def query_range(self, start, end): """ Seek to a range in the file catalog. Parameters ---------- start : int start of the file relative to the physical file end : int end of the file relative to the physical file Returns ------- A new catalog that only accesses the given region of the file. If the original catalog (self) contains any assigned columns not directly obtained from the file, then the function will raise ValueError, since the operation in that case is not well defined. """ if len(CatalogSource.hardcolumns.fget(self)) > 0: raise ValueError("cannot seek if columns have been attached to the FileCatalog") other = self.copy() other._lstart = self.start + start + self.comm.rank * (end - start) // self.comm.size other._lend = self.start + start + (self.comm.rank + 1) * (end - start) // self.comm.size other._size = other._lend - other._lstart other.start = start other.end = end CatalogSource.__init__(other, comm=self.comm) return other
def __init__(self, Plin, nbar, BoxSize, Nmesh, bias=2., seed=None, cosmo=None, redshift=None, unitary_amplitude=False, inverted_phase=False, comm=None): self.comm = comm self.Plin = Plin # try to infer cosmo or redshift from Plin if cosmo is None: cosmo = getattr(self.Plin, 'cosmo', None) if redshift is None: redshift = getattr(self.Plin, 'redshift', None) if cosmo is None: raise ValueError("'cosmo' must be passed if 'Plin' does not have 'cosmo' attribute") if redshift is None: raise ValueError("'redshift' must be passed if 'Plin' does not have 'redshift' attribute") self.cosmo = cosmo # try to add attrs from the Plin if hasattr(Plin, 'attrs'): self.attrs.update(Plin.attrs) else: self.attrs['cosmo'] = dict(cosmo) # save the meta-data self.attrs['nbar'] = nbar self.attrs['redshift'] = redshift self.attrs['bias'] = bias self.attrs['unitary_amplitude'] = unitary_amplitude self.attrs['inverted_phase'] = inverted_phase # set the seed randomly if it is None if seed is None: if self.comm.rank == 0: seed = numpy.random.randint(0, 4294967295) seed = self.comm.bcast(seed) self.attrs['seed'] = seed # make the actual source self._source, pm = self._makesource(BoxSize=BoxSize, Nmesh=Nmesh) self.pm = pm self.attrs['Nmesh'] = pm.Nmesh.copy() self.attrs['BoxSize'] = pm.BoxSize.copy() # set the size self._size = len(self._source) # init the base class CatalogSource.__init__(self, comm=comm) # crash with no particles! if self.csize == 0: raise ValueError("no particles in LogNormal source; try increasing ``nbar`` parameter")
def __init__(self, source, cosmo, redshift, mdef='vir', mass='Mass', position='Position', velocity='Velocity'): # make sure all of the columns are there required = ['mass', 'position', 'velocity'] for name, col in zip(required, [mass, position, velocity]): if col is None: raise ValueError( "the %s column cannot be None in HaloCatalog" % name) if col not in source: raise ValueError( "input source is missing the %s column; '%s' does not exist" % (name, col)) if not isinstance(source, CatalogSourceBase): raise TypeError( "input source to HalotoolsCatalog should be a CatalogSource") comm = source.comm self._source = source self.cosmo = cosmo # get the attrs from the source self.attrs.update(source.attrs) # and save the parameters self.attrs['redshift'] = redshift self.attrs['cosmo'] = dict(self.cosmo) self.attrs['mass'] = mass self.attrs['velocity'] = velocity self.attrs['position'] = position self.attrs['mdef'] = mdef # names of the mass and radius fields, based on mass def self.attrs['halo_mass_key'] = 'halo_m' + mdef self.attrs['halo_radius_key'] = 'halo_r' + mdef # the size self._size = self._source.size # init the base class CatalogSource.__init__(self, comm=comm)
def __init__(self, linear, astart=0.1, aend=1.0, boost=2, Nsteps=5, cosmo=None): self.comm = linear.comm if cosmo is None: cosmo = linear.Plin.cosmo self.cosmo = cosmo # the linear density field mesh self.linear = linear self.attrs.update(linear.attrs) asteps = numpy.linspace(astart, aend, Nsteps) self.attrs['astart'] = astart self.attrs['aend'] = aend self.attrs['Nsteps'] = Nsteps self.attrs['asteps'] = asteps self.attrs['boost'] = boost solver = Solver(self.linear.pm, cosmology=self.cosmo, B=boost) Q = self.linear.pm.generate_uniform_particle_grid(shift=0.5) self.linear = linear dlin = self.linear.to_field(mode='complex') state = solver.lpt(dlin, Q, a=astart, order=2) state = solver.nbody( state, leapfrog(numpy.linspace(astart, aend, Nsteps + 1, endpoint=True))) H0 = 100. self.RSD = 1.0 / (H0 * aend * self.cosmo.efunc(1.0 / aend - 1)) self._size = len(Q) CatalogSource.__init__(self, comm=linear.comm, use_cache=False) self._csize = self.comm.allreduce(self._size) self['Displacement'] = state.S self['InitialPosition'] = state.Q self['ConjugateMomentum'] = state.P # a ** 2 / H0 dx / dt
def ConcatenateSources(*sources, **kwargs): """ Concatenate CatalogSource objects together, optionally including only certain columns in the returned source. .. note:: The returned catalog object carries the meta-data from only the first catalog supplied to this function (in the ``attrs`` dict). Parameters ---------- *sources : subclass of :class:`~nbodykit.base.catalog.CatalogSource` the catalog source objects to concatenate together columns : str, list of str, optional the columns to include in the concatenated catalog Returns ------- CatalogSource : the concatenated catalog source object Examples -------- >>> from nbodykit.lab import * >>> source1 = UniformCatalog(nbar=100, BoxSize=1.0) >>> source2 = UniformCatalog(nbar=100, BoxSize=1.0) >>> print(source1.csize, source2.csize) >>> combined = transform.ConcatenateSources(source1, source2, columns=['Position', 'Velocity']) >>> print(combined.csize) """ from nbodykit.base.catalog import CatalogSource columns = kwargs.get('columns', None) if isinstance(columns, string_types): columns = [columns] # concatenate all columns, if none provided if columns is None or columns == []: columns = sources[0].columns # check comms if not all(src.comm == sources[0].comm for src in sources): raise ValueError("cannot concatenate sources: comm mismatch") # check all columns are there for source in sources: if not all(col in source for col in columns): raise ValueError( ("cannot concatenate sources: columns are missing " "from some sources")) # the total size size = numpy.sum([src.size for src in sources], dtype='intp') data = {} for col in columns: data[col] = da.concatenate([src[col] for src in sources], axis=0) toret = CatalogSource._from_columns(size, sources[0].comm, **data) toret.attrs.update(sources[0].attrs) return toret
def Pk(pos, mode='1d', Nmesh=None, BoxSize=None, **kwargs): ''' Wrapper for nbodykit.algorithms.fftpower.FFTPower in the nbodykit package. Given xyz positions of objects in a **periodic box**, this code will calculate the powerspectrum. pos : (3, N_particles) ''' n_part = pos.shape[1] # number of particles # generate CatalogSource object cat = CatalogSource() cat._size = n_part cat._csize = cat.comm.allreduce(cat.size) cat['Position'] = pos.T # measure powerspectrum pique = FFTPower(cat, mode, Nmesh=Nmesh, BoxSize=BoxSize, **kwargs) return pique.power
def ConcatenateSources(*sources, **kwargs): """ Concatenate CatalogSource objects together, optionally including only certain columns in the returned source. .. note:: The returned catalog object carries the meta-data from only the first catalog supplied to this function (in the ``attrs`` dict). Parameters ---------- *sources : subclass of :class:`~nbodykit.base.catalog.CatalogSource` the catalog source objects to concatenate together columns : str, list of str, optional the columns to include in the concatenated catalog Returns ------- CatalogSource : the concatenated catalog source object Examples -------- >>> from nbodykit.lab import * >>> source1 = UniformCatalog(nbar=100, BoxSize=1.0) >>> source2 = UniformCatalog(nbar=100, BoxSize=1.0) >>> print(source1.csize, source2.csize) >>> combined = transform.ConcatenateSources(source1, source2, columns=['Position', 'Velocity']) >>> print(combined.csize) """ from nbodykit.base.catalog import CatalogSource columns = kwargs.get('columns', None) if isinstance(columns, string_types): columns = [columns] # concatenate all columns, if none provided if columns is None or columns == []: columns = sources[0].columns # check comms if not all(src.comm == sources[0].comm for src in sources): raise ValueError("cannot concatenate sources: comm mismatch") # check all columns are there for source in sources: if not all(col in source for col in columns): raise ValueError(("cannot concatenate sources: columns are missing " "from some sources")) # the total size size = numpy.sum([src.size for src in sources], dtype='intp') data = {} for col in columns: data[col] = da.concatenate([src[col] for src in sources], axis=0) toret = CatalogSource._from_columns(size, sources[0].comm, **data) toret.attrs.update(sources[0].attrs) return toret
def __init__(self, source, domain, position='Position', columns=[]): self.domain = domain self.source = source layout = domain.decompose(source[position].compute()) self._size = layout.newlength CatalogSource.__init__(self, comm=source.comm) self.attrs.update(source.attrs) self._frozen = {} if columns is None: columns = source.columns for column in columns: data = source[column].compute() self._frozen[column] = self.make_column(layout.exchange(data))
def __init__(self, data, comm=None, **kwargs): # convert astropy Tables to structured numpy arrays if isinstance(data, Table): data = data.as_array() # check for structured data if not isinstance(data, dict): if not is_structured_array(data): raise ValueError(("input data to ArrayCatalog must have a " "structured data type with fields")) self.comm = comm self._source = data # compute the data type if hasattr(data, 'dtype'): keys = sorted(data.dtype.names) else: keys = sorted(data.keys()) dtype = numpy.dtype([(key, (data[key].dtype, data[key].shape[1:])) for key in keys]) self._dtype = dtype # verify data types are the same dtypes = self.comm.gather(dtype, root=0) if self.comm.rank == 0: if any(dt != dtypes[0] for dt in dtypes): raise ValueError( "mismatch between dtypes across ranks in Array") # the local size self._size = len(self._source[keys[0]]) for key in keys: if len(self._source[key]) != self._size: raise ValueError( "column `%s` and column `%s` has different size" % (keys[0], key)) # update the meta-data self.attrs.update(kwargs) CatalogSource.__init__(self, comm=comm)
def get_hardcolumn(self, col): """ Return a column from the underlying data array/dict. Columns are returned as dask arrays. """ if col in self._dtype.names: return self.make_column(self._source[col]) else: return CatalogSource.get_hardcolumn(self, col)
def __init__(self, source, domain=None, position='Position', columns=None): comm = source.comm if domain is None: # determine processor division for domain decomposition np = split_size_3d(comm.size) if comm.rank == 0: self.logger.info("using cpu grid decomposition: %s" % str(np)) grid = [ numpy.linspace(0, source.attrs['BoxSize'][0], np[0] + 1, endpoint=True), numpy.linspace(0, source.attrs['BoxSize'][1], np[1] + 1, endpoint=True), numpy.linspace(0, source.attrs['BoxSize'][2], np[2] + 1, endpoint=True), ] domain = GridND(grid, comm=comm) self.domain = domain self.source = source layout = domain.decompose(source[position].compute()) self._size = layout.recvlength CatalogSource.__init__(self, comm=comm) self.attrs.update(source.attrs) self._frozen = {} if columns is None: columns = source.columns for column in columns: data = source[column].compute() self._frozen[column] = self.make_column(layout.exchange(data))
def get_hardcolumn(self, col): """ Return a column from the underlying file source. Columns are returned as dask arrays. """ if col in self._source.dtype.names: return self._source.get_dask(col)[self._lstart:self._lend] else: return CatalogSource.get_hardcolumn(self, col)
def __init__(self, csize, seed=None, comm=None): self.comm = comm # set the seed randomly if it is None if seed is None: if self.comm.rank == 0: seed = numpy.random.randint(0, 4294967295) seed = self.comm.bcast(seed) self.attrs['seed'] = seed # generate the seeds from the global seed if csize == 0: raise ValueError("no random particles generated!") self._rng = MPIRandomState(comm, seed, csize) self._size = self.rng.size # init the base class CatalogSource.__init__(self, comm=comm)
def __init__(self, ra, dec, collision_radius=62 / 60. / 60., seed=None, degrees=True, comm=None): # compute the pos ra = CatalogSource.make_column(ra) dec = CatalogSource.make_column(dec) pos = SkyToUnitSphere(ra, dec, degrees=degrees).compute() # make the source dt = numpy.dtype([('Position', (pos.dtype.str, 3))]) pos = numpy.squeeze(pos.view(dtype=dt)) source = ArrayCatalog(pos, BoxSize=numpy.array([2., 2., 2.])) self.source = source self.comm = source.comm # set the seed randomly if it is None if seed is None: if self.comm.rank == 0: seed = numpy.random.randint(0, 4294967295) seed = self.comm.bcast(seed) # save the attrs self.attrs = {} self.attrs['collision_radius'] = collision_radius self.attrs['seed'] = seed self.attrs['degrees'] = degrees # store collision radius in radians self._collision_radius_rad = numpy.deg2rad(collision_radius) if self.comm.rank == 0: self.logger.info("collision radius in degrees = %.4f" % collision_radius) # compute self.run()
def get_hardcolumn(self, col): """ Return a column from the underlying file source. Columns are returned as dask arrays. """ if col in self._source.dtype.names: start = self.comm.rank * self._source.size // self.comm.size end = (self.comm.rank + 1) * self._source.size // self.comm.size return self._source.get_dask(col)[start:end] else: return CatalogSource.get_hardcolumn(self, col)
def __init__(self, data, comm=None, **kwargs): # convert astropy Tables to structured numpy arrays if isinstance(data, Table): data = data.as_array() # check for structured data if not isinstance(data, dict): if not is_structured_array(data): raise ValueError(("input data to ArrayCatalog must have a " "structured data type with fields")) self.comm = comm self._source = data # compute the data type if hasattr(data, 'dtype'): keys = sorted(data.dtype.names) else: keys = sorted(data.keys()) dtype = numpy.dtype([(key, (data[key].dtype, data[key].shape[1:])) for key in keys]) self._dtype = dtype # verify data types are the same dtypes = self.comm.gather(dtype, root=0) if self.comm.rank == 0: if any(dt != dtypes[0] for dt in dtypes): raise ValueError("mismatch between dtypes across ranks in Array") # the local size self._size = len(self._source[keys[0]]) for key in keys: if len(self._source[key]) != self._size: raise ValueError("column `%s` and column `%s` has different size" % (keys[0], key)) # update the meta-data self.attrs.update(kwargs) CatalogSource.__init__(self, comm=comm)
def __init__(self, csize, seed=None, comm=None): self.comm = comm # set the seed randomly if it is None if seed is None: if self.comm.rank == 0: seed = numpy.random.randint(0, 4294967295) seed = self.comm.bcast(seed) self.attrs['seed'] = seed # generate the seeds from the global seed if csize == 0: raise ValueError("no random particles generated!") start = comm.rank * csize // comm.size end = (comm.rank + 1) * csize // comm.size self._size = end - start self._rng = MPIRandomState(comm, seed=seed, size=self._size) # init the base class CatalogSource.__init__(self, comm=comm)
def __init__(self, source, cosmo, redshift, mdef='vir', mass='Mass', position='Position', velocity='Velocity'): # make sure all of the columns are there required = ['mass', 'position', 'velocity'] for name, col in zip(required, [mass, position, velocity]): if col is None: raise ValueError("the %s column cannot be None in HaloCatalog" %name) if col not in source: raise ValueError("input source is missing the %s column; '%s' does not exist" %(name, col)) if not isinstance(source, CatalogSourceBase): raise TypeError("input source to HalotoolsCatalog should be a CatalogSource") comm = source.comm self._source = source self.cosmo = cosmo # get the attrs from the source self.attrs.update(source.attrs) # and save the parameters self.attrs['redshift'] = redshift self.attrs['cosmo'] = dict(self.cosmo) self.attrs['mass'] = mass self.attrs['velocity'] = velocity self.attrs['position'] = position self.attrs['mdef'] = mdef # names of the mass and radius fields, based on mass def self.attrs['halo_mass_key'] = 'halo_m' + mdef self.attrs['halo_radius_key'] = 'halo_r' + mdef # the size self._size = self._source.size # init the base class CatalogSource.__init__(self, comm=comm)
def __init__(self, filetype, args=(), kwargs={}, comm=None): self.comm = comm self.filetype = filetype # bcast the FileStack if self.comm.rank == 0: self._source = FileStack(filetype, *args, **kwargs) else: self._source = None self._source = self.comm.bcast(self._source) # compute the size start = self.comm.rank * self._source.size // self.comm.size end = (self.comm.rank + 1) * self._source.size // self.comm.size self._size = end - start # update the meta-data self.attrs.update(self._source.attrs) if self.comm.rank == 0: self.logger.info("Extra arguments to FileType: %s" % str(args)) CatalogSource.__init__(self, comm=comm)
def to_nbodykit(self, fields=None): from nbodykit.base.catalog import CatalogSource from nbodykit import CurrentMPIComm comm = CurrentMPIComm.get() if comm.rank == 0: source = self else: source = None source = comm.bcast(source) # compute the size start = comm.rank * source.size // comm.size end = (comm.rank + 1) * source.size // comm.size new = object.__new__(CatalogSource) new._size = end - start CatalogSource.__init__(new, comm=comm) for key in source.fields: new[key] = new.make_column(source[key])[start:end] new.attrs.update(source.attrs) return new