Example #1
0
    def query_range(self, start, end):
        """
            Seek to a range in the file catalog.

            Parameters
            ----------
            start : int
                start of the file relative to the physical file

            end : int
                end of the file relative to the physical file

            Returns
            -------
            A new catalog that only accesses the given region of the file.

            If the original catalog (self) contains any assigned columns not directly
            obtained from the file, then the function will raise ValueError, since
            the operation in that case is not well defined.

        """
        if len(CatalogSource.hardcolumns.fget(self)) > 0:
            raise ValueError("cannot seek if columns have been attached to the FileCatalog")

        other = self.copy()
        other._lstart = self.start + start +  self.comm.rank * (end - start) // self.comm.size
        other._lend = self.start + start + (self.comm.rank + 1) * (end - start) // self.comm.size
        other._size = other._lend - other._lstart
        other.start = start
        other.end = end
        CatalogSource.__init__(other, comm=self.comm)
        return other
Example #2
0
    def __init__(self, filetype, args=(), kwargs={}, comm=None):

        self.comm = comm
        self.filetype = filetype

        # bcast the FileStack
        if self.comm.rank == 0:
            self._source = FileStack(filetype, *args, **kwargs)
        else:
            self._source = None
        self._source = self.comm.bcast(self._source)

        # compute the size; start with full file.
        lstart = self.comm.rank * self._source.size // self.comm.size
        lend = (self.comm.rank + 1) * self._source.size // self.comm.size
        self._size = lend - lstart

        self.start = 0
        self.end = self._source.size

        self._lstart = lstart  # offset in the file for this rank
        self._lend = lend  # offset in the file for this rank

        # update the meta-data
        self.attrs.update(self._source.attrs)

        if self.comm.rank == 0:
            self.logger.info("Extra arguments to FileType: %s %s" %
                             (str(args), str(kwargs)))

        CatalogSource.__init__(self, comm=comm)
Example #3
0
    def __init__(self, filetype, args=(), kwargs={}, comm=None):

        self.comm = comm
        self.filetype = filetype

        # bcast the FileStack
        if self.comm.rank == 0:
            self._source = FileStack(filetype, *args, **kwargs)
        else:
            self._source = None
        self._source = self.comm.bcast(self._source)

        # compute the size; start with full file.
        lstart = self.comm.rank * self._source.size // self.comm.size
        lend = (self.comm.rank  + 1) * self._source.size // self.comm.size
        self._size = lend - lstart

        self.start = 0
        self.end = self._source.size

        self._lstart = lstart # offset in the file for this rank
        self._lend = lend     # offset in the file for this rank

        # update the meta-data
        self.attrs.update(self._source.attrs)

        if self.comm.rank == 0:
            self.logger.info("Extra arguments to FileType: %s %s" % (str(args), str(kwargs)))

        CatalogSource.__init__(self, comm=comm)
Example #4
0
    def query_range(self, start, end):
        """
            Seek to a range in the file catalog.

            Parameters
            ----------
            start : int
                start of the file relative to the physical file

            end : int
                end of the file relative to the physical file

            Returns
            -------
            A new catalog that only accesses the given region of the file.

            If the original catalog (self) contains any assigned columns not directly
            obtained from the file, then the function will raise ValueError, since
            the operation in that case is not well defined.

        """
        if len(CatalogSource.hardcolumns.fget(self)) > 0:
            raise ValueError(
                "cannot seek if columns have been attached to the FileCatalog")

        other = self.copy()
        other._lstart = self.start + start + self.comm.rank * (
            end - start) // self.comm.size
        other._lend = self.start + start + (self.comm.rank + 1) * (
            end - start) // self.comm.size
        other._size = other._lend - other._lstart
        other.start = start
        other.end = end
        CatalogSource.__init__(other, comm=self.comm)
        return other
Example #5
0
    def __init__(self, Plin, nbar, BoxSize, Nmesh, bias=2., seed=None,
                    cosmo=None, redshift=None,
                    unitary_amplitude=False, inverted_phase=False, comm=None):

        self.comm = comm
        self.Plin = Plin

        # try to infer cosmo or redshift from Plin
        if cosmo is None:
            cosmo = getattr(self.Plin, 'cosmo', None)
        if redshift is None:
            redshift = getattr(self.Plin, 'redshift', None)
        if cosmo is None:
            raise ValueError("'cosmo' must be passed if 'Plin' does not have 'cosmo' attribute")
        if redshift is None:
            raise ValueError("'redshift' must be passed if 'Plin' does not have 'redshift' attribute")
        self.cosmo = cosmo

        # try to add attrs from the Plin
        if hasattr(Plin, 'attrs'):
            self.attrs.update(Plin.attrs)
        else:
            self.attrs['cosmo'] = dict(cosmo)

        # save the meta-data
        self.attrs['nbar']  = nbar
        self.attrs['redshift'] = redshift
        self.attrs['bias'] = bias
        self.attrs['unitary_amplitude'] = unitary_amplitude
        self.attrs['inverted_phase'] = inverted_phase

        # set the seed randomly if it is None
        if seed is None:
            if self.comm.rank == 0:
                seed = numpy.random.randint(0, 4294967295)
            seed = self.comm.bcast(seed)
        self.attrs['seed'] = seed

        # make the actual source
        self._source, pm = self._makesource(BoxSize=BoxSize, Nmesh=Nmesh)
        self.pm = pm

        self.attrs['Nmesh'] = pm.Nmesh.copy()
        self.attrs['BoxSize'] = pm.BoxSize.copy()

        # set the size
        self._size = len(self._source)

        # init the base class
        CatalogSource.__init__(self, comm=comm)

        # crash with no particles!
        if self.csize == 0:
            raise ValueError("no particles in LogNormal source; try increasing ``nbar`` parameter")
Example #6
0
    def __init__(self,
                 source,
                 cosmo,
                 redshift,
                 mdef='vir',
                 mass='Mass',
                 position='Position',
                 velocity='Velocity'):

        # make sure all of the columns are there
        required = ['mass', 'position', 'velocity']
        for name, col in zip(required, [mass, position, velocity]):
            if col is None:
                raise ValueError(
                    "the %s column cannot be None in HaloCatalog" % name)
            if col not in source:
                raise ValueError(
                    "input source is missing the %s column; '%s' does not exist"
                    % (name, col))

        if not isinstance(source, CatalogSourceBase):
            raise TypeError(
                "input source to HalotoolsCatalog should be a CatalogSource")

        comm = source.comm
        self._source = source

        self.cosmo = cosmo

        # get the attrs from the source
        self.attrs.update(source.attrs)

        # and save the parameters
        self.attrs['redshift'] = redshift
        self.attrs['cosmo'] = dict(self.cosmo)
        self.attrs['mass'] = mass
        self.attrs['velocity'] = velocity
        self.attrs['position'] = position
        self.attrs['mdef'] = mdef

        # names of the mass and radius fields, based on mass def
        self.attrs['halo_mass_key'] = 'halo_m' + mdef
        self.attrs['halo_radius_key'] = 'halo_r' + mdef

        # the size
        self._size = self._source.size

        # init the base class
        CatalogSource.__init__(self, comm=comm)
Example #7
0
    def __init__(self,
                 linear,
                 astart=0.1,
                 aend=1.0,
                 boost=2,
                 Nsteps=5,
                 cosmo=None):
        self.comm = linear.comm

        if cosmo is None:
            cosmo = linear.Plin.cosmo

        self.cosmo = cosmo

        # the linear density field mesh
        self.linear = linear

        self.attrs.update(linear.attrs)

        asteps = numpy.linspace(astart, aend, Nsteps)
        self.attrs['astart'] = astart
        self.attrs['aend'] = aend
        self.attrs['Nsteps'] = Nsteps
        self.attrs['asteps'] = asteps
        self.attrs['boost'] = boost

        solver = Solver(self.linear.pm, cosmology=self.cosmo, B=boost)
        Q = self.linear.pm.generate_uniform_particle_grid(shift=0.5)

        self.linear = linear

        dlin = self.linear.to_field(mode='complex')
        state = solver.lpt(dlin, Q, a=astart, order=2)
        state = solver.nbody(
            state,
            leapfrog(numpy.linspace(astart, aend, Nsteps + 1, endpoint=True)))

        H0 = 100.
        self.RSD = 1.0 / (H0 * aend * self.cosmo.efunc(1.0 / aend - 1))

        self._size = len(Q)
        CatalogSource.__init__(self, comm=linear.comm, use_cache=False)

        self._csize = self.comm.allreduce(self._size)

        self['Displacement'] = state.S
        self['InitialPosition'] = state.Q
        self['ConjugateMomentum'] = state.P  # a ** 2  / H0 dx / dt
Example #8
0
    def __init__(self, source, domain, position='Position', columns=[]):

        self.domain = domain
        self.source = source

        layout = domain.decompose(source[position].compute())

        self._size = layout.newlength

        CatalogSource.__init__(self, comm=source.comm)
        self.attrs.update(source.attrs)

        self._frozen = {}
        if columns is None: columns = source.columns

        for column in columns:
            data = source[column].compute()
            self._frozen[column] = self.make_column(layout.exchange(data))
Example #9
0
    def __init__(self, data, comm=None, **kwargs):

        # convert astropy Tables to structured numpy arrays
        if isinstance(data, Table):
            data = data.as_array()

        # check for structured data
        if not isinstance(data, dict):
            if not is_structured_array(data):
                raise ValueError(("input data to ArrayCatalog must have a "
                                  "structured data type with fields"))

        self.comm = comm
        self._source = data

        # compute the data type
        if hasattr(data, 'dtype'):
            keys = sorted(data.dtype.names)
        else:
            keys = sorted(data.keys())
        dtype = numpy.dtype([(key, (data[key].dtype, data[key].shape[1:]))
                             for key in keys])
        self._dtype = dtype

        # verify data types are the same
        dtypes = self.comm.gather(dtype, root=0)
        if self.comm.rank == 0:
            if any(dt != dtypes[0] for dt in dtypes):
                raise ValueError(
                    "mismatch between dtypes across ranks in Array")

        # the local size
        self._size = len(self._source[keys[0]])

        for key in keys:
            if len(self._source[key]) != self._size:
                raise ValueError(
                    "column `%s` and column `%s` has different size" %
                    (keys[0], key))

        # update the meta-data
        self.attrs.update(kwargs)

        CatalogSource.__init__(self, comm=comm)
Example #10
0
    def __init__(self, csize, seed=None, comm=None):

        self.comm = comm

        # set the seed randomly if it is None
        if seed is None:
            if self.comm.rank == 0:
                seed = numpy.random.randint(0, 4294967295)
            seed = self.comm.bcast(seed)
        self.attrs['seed'] = seed

        # generate the seeds from the global seed
        if csize == 0:
            raise ValueError("no random particles generated!")
        self._rng = MPIRandomState(comm, seed, csize)
        self._size = self.rng.size

        # init the base class
        CatalogSource.__init__(self, comm=comm)
Example #11
0
    def __init__(self, source, domain=None, position='Position', columns=None):
        comm = source.comm

        if domain is None:
            # determine processor division for domain decomposition
            np = split_size_3d(comm.size)

            if comm.rank == 0:
                self.logger.info("using cpu grid decomposition: %s" % str(np))

            grid = [
                numpy.linspace(0,
                               source.attrs['BoxSize'][0],
                               np[0] + 1,
                               endpoint=True),
                numpy.linspace(0,
                               source.attrs['BoxSize'][1],
                               np[1] + 1,
                               endpoint=True),
                numpy.linspace(0,
                               source.attrs['BoxSize'][2],
                               np[2] + 1,
                               endpoint=True),
            ]

            domain = GridND(grid, comm=comm)

        self.domain = domain
        self.source = source

        layout = domain.decompose(source[position].compute())

        self._size = layout.recvlength

        CatalogSource.__init__(self, comm=comm)
        self.attrs.update(source.attrs)

        self._frozen = {}
        if columns is None: columns = source.columns

        for column in columns:
            data = source[column].compute()
            self._frozen[column] = self.make_column(layout.exchange(data))
Example #12
0
    def __init__(self, data, comm=None, **kwargs):

        # convert astropy Tables to structured numpy arrays
        if isinstance(data, Table):
            data = data.as_array()

        # check for structured data
        if not isinstance(data, dict):
            if not is_structured_array(data):
                raise ValueError(("input data to ArrayCatalog must have a "
                                   "structured data type with fields"))

        self.comm    = comm
        self._source = data

        # compute the data type
        if hasattr(data, 'dtype'):
            keys = sorted(data.dtype.names)
        else:
            keys = sorted(data.keys())
        dtype = numpy.dtype([(key, (data[key].dtype, data[key].shape[1:])) for key in keys])
        self._dtype = dtype

        # verify data types are the same
        dtypes = self.comm.gather(dtype, root=0)
        if self.comm.rank == 0:
            if any(dt != dtypes[0] for dt in dtypes):
                raise ValueError("mismatch between dtypes across ranks in Array")

        # the local size
        self._size = len(self._source[keys[0]])

        for key in keys:
            if len(self._source[key]) != self._size:
                raise ValueError("column `%s` and column `%s` has different size" % (keys[0], key))

        # update the meta-data
        self.attrs.update(kwargs)

        CatalogSource.__init__(self, comm=comm)
Example #13
0
    def __init__(self, source, cosmo, redshift, mdef='vir',
                 mass='Mass', position='Position', velocity='Velocity'):

        # make sure all of the columns are there
        required = ['mass', 'position', 'velocity']
        for name, col in zip(required, [mass, position, velocity]):
            if col is None:
                raise ValueError("the %s column cannot be None in HaloCatalog" %name)
            if col not in source:
                raise ValueError("input source is missing the %s column; '%s' does not exist" %(name, col))

        if not isinstance(source, CatalogSourceBase):
            raise TypeError("input source to HalotoolsCatalog should be a CatalogSource")

        comm = source.comm
        self._source = source

        self.cosmo = cosmo

        # get the attrs from the source
        self.attrs.update(source.attrs)

        # and save the parameters
        self.attrs['redshift'] = redshift
        self.attrs['cosmo']    = dict(self.cosmo)
        self.attrs['mass']     = mass
        self.attrs['velocity'] = velocity
        self.attrs['position'] = position
        self.attrs['mdef']     = mdef

        # names of the mass and radius fields, based on mass def
        self.attrs['halo_mass_key'] = 'halo_m' + mdef
        self.attrs['halo_radius_key'] = 'halo_r' + mdef

        # the size
        self._size = self._source.size

        # init the base class
        CatalogSource.__init__(self, comm=comm)
Example #14
0
    def __init__(self, csize, seed=None, comm=None):

        self.comm = comm

        # set the seed randomly if it is None
        if seed is None:
            if self.comm.rank == 0:
                seed = numpy.random.randint(0, 4294967295)
            seed = self.comm.bcast(seed)
        self.attrs['seed'] = seed

        # generate the seeds from the global seed
        if csize == 0:
            raise ValueError("no random particles generated!")
        start = comm.rank * csize // comm.size
        end   = (comm.rank + 1) * csize // comm.size
        self._size =  end - start

        self._rng = MPIRandomState(comm, seed=seed, size=self._size)

        # init the base class
        CatalogSource.__init__(self, comm=comm)
Example #15
0
    def __init__(self, filetype, args=(), kwargs={}, comm=None):

        self.comm = comm
        self.filetype = filetype

        # bcast the FileStack
        if self.comm.rank == 0:
            self._source = FileStack(filetype, *args, **kwargs)
        else:
            self._source = None
        self._source = self.comm.bcast(self._source)

        # compute the size
        start = self.comm.rank * self._source.size // self.comm.size
        end = (self.comm.rank + 1) * self._source.size // self.comm.size
        self._size = end - start

        # update the meta-data
        self.attrs.update(self._source.attrs)

        if self.comm.rank == 0:
            self.logger.info("Extra arguments to FileType: %s" % str(args))

        CatalogSource.__init__(self, comm=comm)
Example #16
0
    def to_nbodykit(self, fields=None):

        from nbodykit.base.catalog import CatalogSource
        from nbodykit import CurrentMPIComm

        comm = CurrentMPIComm.get()
        if comm.rank == 0:
            source = self
        else:
            source = None
        source = comm.bcast(source)

        # compute the size
        start = comm.rank * source.size // comm.size
        end = (comm.rank + 1) * source.size // comm.size

        new = object.__new__(CatalogSource)
        new._size = end - start
        CatalogSource.__init__(new, comm=comm)
        for key in source.fields:
            new[key] = new.make_column(source[key])[start:end]
        new.attrs.update(source.attrs)

        return new