Python CatalogSource Examples, nbodykit.base.catalog.CatalogSource Python Examples

Example #1

0

Show file

File: file.py Project: bccp/nbodykit

    def __init__(self, filetype, args=(), kwargs={}, comm=None):

        self.comm = comm
        self.filetype = filetype

        # bcast the FileStack
        if self.comm.rank == 0:
            self._source = FileStack(filetype, *args, **kwargs)
        else:
            self._source = None
        self._source = self.comm.bcast(self._source)

        # compute the size; start with full file.
        lstart = self.comm.rank * self._source.size // self.comm.size
        lend = (self.comm.rank  + 1) * self._source.size // self.comm.size
        self._size = lend - lstart

        self.start = 0
        self.end = self._source.size

        self._lstart = lstart # offset in the file for this rank
        self._lend = lend     # offset in the file for this rank

        # update the meta-data
        self.attrs.update(self._source.attrs)

        if self.comm.rank == 0:
            self.logger.info("Extra arguments to FileType: %s %s" % (str(args), str(kwargs)))

        CatalogSource.__init__(self, comm=comm)

Example #2

0

Show file

    def __init__(self, filetype, args=(), kwargs={}, comm=None):

        self.comm = comm
        self.filetype = filetype

        # bcast the FileStack
        if self.comm.rank == 0:
            self._source = FileStack(filetype, *args, **kwargs)
        else:
            self._source = None
        self._source = self.comm.bcast(self._source)

        # compute the size; start with full file.
        lstart = self.comm.rank * self._source.size // self.comm.size
        lend = (self.comm.rank + 1) * self._source.size // self.comm.size
        self._size = lend - lstart

        self.start = 0
        self.end = self._source.size

        self._lstart = lstart  # offset in the file for this rank
        self._lend = lend  # offset in the file for this rank

        # update the meta-data
        self.attrs.update(self._source.attrs)

        if self.comm.rank == 0:
            self.logger.info("Extra arguments to FileType: %s %s" %
                             (str(args), str(kwargs)))

        CatalogSource.__init__(self, comm=comm)

Example #3

0

Show file

    def query_range(self, start, end):
        """
            Seek to a range in the file catalog.

            Parameters
            ----------
            start : int
                start of the file relative to the physical file

            end : int
                end of the file relative to the physical file

            Returns
            -------
            A new catalog that only accesses the given region of the file.

            If the original catalog (self) contains any assigned columns not directly
            obtained from the file, then the function will raise ValueError, since
            the operation in that case is not well defined.

        """
        if len(CatalogSource.hardcolumns.fget(self)) > 0:
            raise ValueError(
                "cannot seek if columns have been attached to the FileCatalog")

        other = self.copy()
        other._lstart = self.start + start + self.comm.rank * (
            end - start) // self.comm.size
        other._lend = self.start + start + (self.comm.rank + 1) * (
            end - start) // self.comm.size
        other._size = other._lend - other._lstart
        other.start = start
        other.end = end
        CatalogSource.__init__(other, comm=self.comm)
        return other

Example #4

0

Show file

File: file.py Project: bccp/nbodykit

    def query_range(self, start, end):
        """
            Seek to a range in the file catalog.

            Parameters
            ----------
            start : int
                start of the file relative to the physical file

            end : int
                end of the file relative to the physical file

            Returns
            -------
            A new catalog that only accesses the given region of the file.

            If the original catalog (self) contains any assigned columns not directly
            obtained from the file, then the function will raise ValueError, since
            the operation in that case is not well defined.

        """
        if len(CatalogSource.hardcolumns.fget(self)) > 0:
            raise ValueError("cannot seek if columns have been attached to the FileCatalog")

        other = self.copy()
        other._lstart = self.start + start +  self.comm.rank * (end - start) // self.comm.size
        other._lend = self.start + start + (self.comm.rank + 1) * (end - start) // self.comm.size
        other._size = other._lend - other._lstart
        other.start = start
        other.end = end
        CatalogSource.__init__(other, comm=self.comm)
        return other

Example #5

0

Show file

File: lognormal.py Project: twobombs/nbodykit

    def __init__(self, Plin, nbar, BoxSize, Nmesh, bias=2., seed=None,
                    cosmo=None, redshift=None,
                    unitary_amplitude=False, inverted_phase=False, comm=None):

        self.comm = comm
        self.Plin = Plin

        # try to infer cosmo or redshift from Plin
        if cosmo is None:
            cosmo = getattr(self.Plin, 'cosmo', None)
        if redshift is None:
            redshift = getattr(self.Plin, 'redshift', None)
        if cosmo is None:
            raise ValueError("'cosmo' must be passed if 'Plin' does not have 'cosmo' attribute")
        if redshift is None:
            raise ValueError("'redshift' must be passed if 'Plin' does not have 'redshift' attribute")
        self.cosmo = cosmo

        # try to add attrs from the Plin
        if hasattr(Plin, 'attrs'):
            self.attrs.update(Plin.attrs)
        else:
            self.attrs['cosmo'] = dict(cosmo)

        # save the meta-data
        self.attrs['nbar']  = nbar
        self.attrs['redshift'] = redshift
        self.attrs['bias'] = bias
        self.attrs['unitary_amplitude'] = unitary_amplitude
        self.attrs['inverted_phase'] = inverted_phase

        # set the seed randomly if it is None
        if seed is None:
            if self.comm.rank == 0:
                seed = numpy.random.randint(0, 4294967295)
            seed = self.comm.bcast(seed)
        self.attrs['seed'] = seed

        # make the actual source
        self._source, pm = self._makesource(BoxSize=BoxSize, Nmesh=Nmesh)
        self.pm = pm

        self.attrs['Nmesh'] = pm.Nmesh.copy()
        self.attrs['BoxSize'] = pm.BoxSize.copy()

        # set the size
        self._size = len(self._source)

        # init the base class
        CatalogSource.__init__(self, comm=comm)

        # crash with no particles!
        if self.csize == 0:
            raise ValueError("no particles in LogNormal source; try increasing ``nbar`` parameter")

Example #6

0

Show file

    def __init__(self,
                 source,
                 cosmo,
                 redshift,
                 mdef='vir',
                 mass='Mass',
                 position='Position',
                 velocity='Velocity'):

        # make sure all of the columns are there
        required = ['mass', 'position', 'velocity']
        for name, col in zip(required, [mass, position, velocity]):
            if col is None:
                raise ValueError(
                    "the %s column cannot be None in HaloCatalog" % name)
            if col not in source:
                raise ValueError(
                    "input source is missing the %s column; '%s' does not exist"
                    % (name, col))

        if not isinstance(source, CatalogSourceBase):
            raise TypeError(
                "input source to HalotoolsCatalog should be a CatalogSource")

        comm = source.comm
        self._source = source

        self.cosmo = cosmo

        # get the attrs from the source
        self.attrs.update(source.attrs)

        # and save the parameters
        self.attrs['redshift'] = redshift
        self.attrs['cosmo'] = dict(self.cosmo)
        self.attrs['mass'] = mass
        self.attrs['velocity'] = velocity
        self.attrs['position'] = position
        self.attrs['mdef'] = mdef

        # names of the mass and radius fields, based on mass def
        self.attrs['halo_mass_key'] = 'halo_m' + mdef
        self.attrs['halo_radius_key'] = 'halo_r' + mdef

        # the size
        self._size = self._source.size

        # init the base class
        CatalogSource.__init__(self, comm=comm)

Example #7

0

Show file

    def __init__(self,
                 linear,
                 astart=0.1,
                 aend=1.0,
                 boost=2,
                 Nsteps=5,
                 cosmo=None):
        self.comm = linear.comm

        if cosmo is None:
            cosmo = linear.Plin.cosmo

        self.cosmo = cosmo

        # the linear density field mesh
        self.linear = linear

        self.attrs.update(linear.attrs)

        asteps = numpy.linspace(astart, aend, Nsteps)
        self.attrs['astart'] = astart
        self.attrs['aend'] = aend
        self.attrs['Nsteps'] = Nsteps
        self.attrs['asteps'] = asteps
        self.attrs['boost'] = boost

        solver = Solver(self.linear.pm, cosmology=self.cosmo, B=boost)
        Q = self.linear.pm.generate_uniform_particle_grid(shift=0.5)

        self.linear = linear

        dlin = self.linear.to_field(mode='complex')
        state = solver.lpt(dlin, Q, a=astart, order=2)
        state = solver.nbody(
            state,
            leapfrog(numpy.linspace(astart, aend, Nsteps + 1, endpoint=True)))

        H0 = 100.
        self.RSD = 1.0 / (H0 * aend * self.cosmo.efunc(1.0 / aend - 1))

        self._size = len(Q)
        CatalogSource.__init__(self, comm=linear.comm, use_cache=False)

        self._csize = self.comm.allreduce(self._size)

        self['Displacement'] = state.S
        self['InitialPosition'] = state.Q
        self['ConjugateMomentum'] = state.P  # a ** 2  / H0 dx / dt

Example #8

0

Show file

File: transform.py Project: mehdirezaie/nbodykit

def ConcatenateSources(*sources, **kwargs):
    """
    Concatenate CatalogSource objects together, optionally including only
    certain columns in the returned source.

    .. note::
        The returned catalog object carries the meta-data from only
        the first catalog supplied to this function (in the ``attrs`` dict).

    Parameters
    ----------
    *sources : subclass of :class:`~nbodykit.base.catalog.CatalogSource`
        the catalog source objects to concatenate together
    columns : str, list of str, optional
        the columns to include in the concatenated catalog

    Returns
    -------
    CatalogSource :
        the concatenated catalog source object

    Examples
    --------
    >>> from nbodykit.lab import *
    >>> source1 = UniformCatalog(nbar=100, BoxSize=1.0)
    >>> source2 = UniformCatalog(nbar=100, BoxSize=1.0)
    >>> print(source1.csize, source2.csize)
    >>> combined = transform.ConcatenateSources(source1, source2, columns=['Position', 'Velocity'])
    >>> print(combined.csize)
    """
    from nbodykit.base.catalog import CatalogSource

    columns = kwargs.get('columns', None)
    if isinstance(columns, string_types):
        columns = [columns]

    # concatenate all columns, if none provided
    if columns is None or columns == []:
        columns = sources[0].columns

    # check comms
    if not all(src.comm == sources[0].comm for src in sources):
        raise ValueError("cannot concatenate sources: comm mismatch")

    # check all columns are there
    for source in sources:
        if not all(col in source for col in columns):
            raise ValueError(
                ("cannot concatenate sources: columns are missing "
                 "from some sources"))
    # the total size
    size = numpy.sum([src.size for src in sources], dtype='intp')

    data = {}
    for col in columns:
        data[col] = da.concatenate([src[col] for src in sources], axis=0)

    toret = CatalogSource._from_columns(size, sources[0].comm, **data)
    toret.attrs.update(sources[0].attrs)
    return toret

Example #9

0

Show file

def Pk(pos, mode='1d', Nmesh=None, BoxSize=None, **kwargs):
    ''' Wrapper for nbodykit.algorithms.fftpower.FFTPower in the 
    nbodykit package. Given xyz positions of objects in a **periodic
    box**, this code will calculate the powerspectrum. 
    
    pos : (3, N_particles)
    '''
    n_part = pos.shape[1]  # number of particles
    # generate CatalogSource object
    cat = CatalogSource()
    cat._size = n_part
    cat._csize = cat.comm.allreduce(cat.size)
    cat['Position'] = pos.T
    # measure powerspectrum
    pique = FFTPower(cat, mode, Nmesh=Nmesh, BoxSize=BoxSize, **kwargs)
    return pique.power

Example #10

0

Show file

File: transform.py Project: bccp/nbodykit

def ConcatenateSources(*sources, **kwargs):
    """
    Concatenate CatalogSource objects together, optionally including only
    certain columns in the returned source.

    .. note::
        The returned catalog object carries the meta-data from only
        the first catalog supplied to this function (in the ``attrs`` dict).

    Parameters
    ----------
    *sources : subclass of :class:`~nbodykit.base.catalog.CatalogSource`
        the catalog source objects to concatenate together
    columns : str, list of str, optional
        the columns to include in the concatenated catalog

    Returns
    -------
    CatalogSource :
        the concatenated catalog source object

    Examples
    --------
    >>> from nbodykit.lab import *
    >>> source1 = UniformCatalog(nbar=100, BoxSize=1.0)
    >>> source2 = UniformCatalog(nbar=100, BoxSize=1.0)
    >>> print(source1.csize, source2.csize)
    >>> combined = transform.ConcatenateSources(source1, source2, columns=['Position', 'Velocity'])
    >>> print(combined.csize)
    """
    from nbodykit.base.catalog import CatalogSource

    columns = kwargs.get('columns', None)
    if isinstance(columns, string_types):
        columns = [columns]

    # concatenate all columns, if none provided
    if columns is None or columns == []:
        columns = sources[0].columns

    # check comms
    if not all(src.comm == sources[0].comm for src in sources):
        raise ValueError("cannot concatenate sources: comm mismatch")

    # check all columns are there
    for source in sources:
        if not all(col in source for col in columns):
            raise ValueError(("cannot concatenate sources: columns are missing "
                              "from some sources"))
    # the total size
    size = numpy.sum([src.size for src in sources], dtype='intp')

    data = {}
    for col in columns:
        data[col] = da.concatenate([src[col] for src in sources], axis=0)

    toret = CatalogSource._from_columns(size, sources[0].comm, **data)
    toret.attrs.update(sources[0].attrs)
    return toret

Example #11

0

Show file

File: decomposed.py Project: hantaoliu/nbodykit

    def __init__(self, source, domain, position='Position', columns=[]):

        self.domain = domain
        self.source = source

        layout = domain.decompose(source[position].compute())

        self._size = layout.newlength

        CatalogSource.__init__(self, comm=source.comm)
        self.attrs.update(source.attrs)

        self._frozen = {}
        if columns is None: columns = source.columns

        for column in columns:
            data = source[column].compute()
            self._frozen[column] = self.make_column(layout.exchange(data))

Example #12

0

Show file

File: array.py Project: twobombs/nbodykit

    def __init__(self, data, comm=None, **kwargs):

        # convert astropy Tables to structured numpy arrays
        if isinstance(data, Table):
            data = data.as_array()

        # check for structured data
        if not isinstance(data, dict):
            if not is_structured_array(data):
                raise ValueError(("input data to ArrayCatalog must have a "
                                  "structured data type with fields"))

        self.comm = comm
        self._source = data

        # compute the data type
        if hasattr(data, 'dtype'):
            keys = sorted(data.dtype.names)
        else:
            keys = sorted(data.keys())
        dtype = numpy.dtype([(key, (data[key].dtype, data[key].shape[1:]))
                             for key in keys])
        self._dtype = dtype

        # verify data types are the same
        dtypes = self.comm.gather(dtype, root=0)
        if self.comm.rank == 0:
            if any(dt != dtypes[0] for dt in dtypes):
                raise ValueError(
                    "mismatch between dtypes across ranks in Array")

        # the local size
        self._size = len(self._source[keys[0]])

        for key in keys:
            if len(self._source[key]) != self._size:
                raise ValueError(
                    "column `%s` and column `%s` has different size" %
                    (keys[0], key))

        # update the meta-data
        self.attrs.update(kwargs)

        CatalogSource.__init__(self, comm=comm)

Example #13

0

Show file

File: array.py Project: nickhand/nbodykit

    def get_hardcolumn(self, col):
        """
        Return a column from the underlying data array/dict.

        Columns are returned as dask arrays.
        """
        if col in self._dtype.names:
            return self.make_column(self._source[col])
        else:
            return CatalogSource.get_hardcolumn(self, col)

Example #14

0

Show file

    def __init__(self, source, domain=None, position='Position', columns=None):
        comm = source.comm

        if domain is None:
            # determine processor division for domain decomposition
            np = split_size_3d(comm.size)

            if comm.rank == 0:
                self.logger.info("using cpu grid decomposition: %s" % str(np))

            grid = [
                numpy.linspace(0,
                               source.attrs['BoxSize'][0],
                               np[0] + 1,
                               endpoint=True),
                numpy.linspace(0,
                               source.attrs['BoxSize'][1],
                               np[1] + 1,
                               endpoint=True),
                numpy.linspace(0,
                               source.attrs['BoxSize'][2],
                               np[2] + 1,
                               endpoint=True),
            ]

            domain = GridND(grid, comm=comm)

        self.domain = domain
        self.source = source

        layout = domain.decompose(source[position].compute())

        self._size = layout.recvlength

        CatalogSource.__init__(self, comm=comm)
        self.attrs.update(source.attrs)

        self._frozen = {}
        if columns is None: columns = source.columns

        for column in columns:
            data = source[column].compute()
            self._frozen[column] = self.make_column(layout.exchange(data))

Example #15

0

Show file

File: file.py Project: bccp/nbodykit

    def get_hardcolumn(self, col):
        """
        Return a column from the underlying file source.

        Columns are returned as dask arrays.
        """
        if col in self._source.dtype.names:
            return self._source.get_dask(col)[self._lstart:self._lend]
        else:
            return CatalogSource.get_hardcolumn(self, col)

Example #16

0

Show file

File: array.py Project: twobombs/nbodykit

    def get_hardcolumn(self, col):
        """
        Return a column from the underlying data array/dict.

        Columns are returned as dask arrays.
        """
        if col in self._dtype.names:
            return self.make_column(self._source[col])
        else:
            return CatalogSource.get_hardcolumn(self, col)

Example #17

0

Show file

    def get_hardcolumn(self, col):
        """
        Return a column from the underlying file source.

        Columns are returned as dask arrays.
        """
        if col in self._source.dtype.names:
            return self._source.get_dask(col)[self._lstart:self._lend]
        else:
            return CatalogSource.get_hardcolumn(self, col)

Example #18

0

Show file

File: uniform.py Project: cbyrohl/nbodykit

    def __init__(self, csize, seed=None, comm=None):

        self.comm = comm

        # set the seed randomly if it is None
        if seed is None:
            if self.comm.rank == 0:
                seed = numpy.random.randint(0, 4294967295)
            seed = self.comm.bcast(seed)
        self.attrs['seed'] = seed

        # generate the seeds from the global seed
        if csize == 0:
            raise ValueError("no random particles generated!")
        self._rng = MPIRandomState(comm, seed, csize)
        self._size = self.rng.size

        # init the base class
        CatalogSource.__init__(self, comm=comm)

Example #19

0

Show file

    def __init__(self,
                 ra,
                 dec,
                 collision_radius=62 / 60. / 60.,
                 seed=None,
                 degrees=True,
                 comm=None):

        # compute the pos
        ra = CatalogSource.make_column(ra)
        dec = CatalogSource.make_column(dec)
        pos = SkyToUnitSphere(ra, dec, degrees=degrees).compute()

        # make the source
        dt = numpy.dtype([('Position', (pos.dtype.str, 3))])
        pos = numpy.squeeze(pos.view(dtype=dt))
        source = ArrayCatalog(pos, BoxSize=numpy.array([2., 2., 2.]))

        self.source = source
        self.comm = source.comm

        # set the seed randomly if it is None
        if seed is None:
            if self.comm.rank == 0:
                seed = numpy.random.randint(0, 4294967295)
            seed = self.comm.bcast(seed)

        # save the attrs
        self.attrs = {}
        self.attrs['collision_radius'] = collision_radius
        self.attrs['seed'] = seed
        self.attrs['degrees'] = degrees

        # store collision radius in radians
        self._collision_radius_rad = numpy.deg2rad(collision_radius)
        if self.comm.rank == 0:
            self.logger.info("collision radius in degrees = %.4f" %
                             collision_radius)

        # compute
        self.run()

Example #20

0

Show file

File: file.py Project: fractional-ray/nbodykit

    def get_hardcolumn(self, col):
        """
        Return a column from the underlying file source.

        Columns are returned as dask arrays.
        """
        if col in self._source.dtype.names:
            start = self.comm.rank * self._source.size // self.comm.size
            end = (self.comm.rank + 1) * self._source.size // self.comm.size
            return self._source.get_dask(col)[start:end]
        else:
            return CatalogSource.get_hardcolumn(self, col)

Example #21

0

Show file

File: array.py Project: nickhand/nbodykit

    def __init__(self, data, comm=None, **kwargs):

        # convert astropy Tables to structured numpy arrays
        if isinstance(data, Table):
            data = data.as_array()

        # check for structured data
        if not isinstance(data, dict):
            if not is_structured_array(data):
                raise ValueError(("input data to ArrayCatalog must have a "
                                   "structured data type with fields"))

        self.comm    = comm
        self._source = data

        # compute the data type
        if hasattr(data, 'dtype'):
            keys = sorted(data.dtype.names)
        else:
            keys = sorted(data.keys())
        dtype = numpy.dtype([(key, (data[key].dtype, data[key].shape[1:])) for key in keys])
        self._dtype = dtype

        # verify data types are the same
        dtypes = self.comm.gather(dtype, root=0)
        if self.comm.rank == 0:
            if any(dt != dtypes[0] for dt in dtypes):
                raise ValueError("mismatch between dtypes across ranks in Array")

        # the local size
        self._size = len(self._source[keys[0]])

        for key in keys:
            if len(self._source[key]) != self._size:
                raise ValueError("column `%s` and column `%s` has different size" % (keys[0], key))

        # update the meta-data
        self.attrs.update(kwargs)

        CatalogSource.__init__(self, comm=comm)

Example #22

0

Show file

File: uniform.py Project: bccp/nbodykit

    def __init__(self, csize, seed=None, comm=None):

        self.comm = comm

        # set the seed randomly if it is None
        if seed is None:
            if self.comm.rank == 0:
                seed = numpy.random.randint(0, 4294967295)
            seed = self.comm.bcast(seed)
        self.attrs['seed'] = seed

        # generate the seeds from the global seed
        if csize == 0:
            raise ValueError("no random particles generated!")
        start = comm.rank * csize // comm.size
        end   = (comm.rank + 1) * csize // comm.size
        self._size =  end - start

        self._rng = MPIRandomState(comm, seed=seed, size=self._size)

        # init the base class
        CatalogSource.__init__(self, comm=comm)

Example #23

0

Show file

File: halos.py Project: bccp/nbodykit

    def __init__(self, source, cosmo, redshift, mdef='vir',
                 mass='Mass', position='Position', velocity='Velocity'):

        # make sure all of the columns are there
        required = ['mass', 'position', 'velocity']
        for name, col in zip(required, [mass, position, velocity]):
            if col is None:
                raise ValueError("the %s column cannot be None in HaloCatalog" %name)
            if col not in source:
                raise ValueError("input source is missing the %s column; '%s' does not exist" %(name, col))

        if not isinstance(source, CatalogSourceBase):
            raise TypeError("input source to HalotoolsCatalog should be a CatalogSource")

        comm = source.comm
        self._source = source

        self.cosmo = cosmo

        # get the attrs from the source
        self.attrs.update(source.attrs)

        # and save the parameters
        self.attrs['redshift'] = redshift
        self.attrs['cosmo']    = dict(self.cosmo)
        self.attrs['mass']     = mass
        self.attrs['velocity'] = velocity
        self.attrs['position'] = position
        self.attrs['mdef']     = mdef

        # names of the mass and radius fields, based on mass def
        self.attrs['halo_mass_key'] = 'halo_m' + mdef
        self.attrs['halo_radius_key'] = 'halo_r' + mdef

        # the size
        self._size = self._source.size

        # init the base class
        CatalogSource.__init__(self, comm=comm)

Example #24

0

Show file

File: file.py Project: fractional-ray/nbodykit

    def __init__(self, filetype, args=(), kwargs={}, comm=None):

        self.comm = comm
        self.filetype = filetype

        # bcast the FileStack
        if self.comm.rank == 0:
            self._source = FileStack(filetype, *args, **kwargs)
        else:
            self._source = None
        self._source = self.comm.bcast(self._source)

        # compute the size
        start = self.comm.rank * self._source.size // self.comm.size
        end = (self.comm.rank + 1) * self._source.size // self.comm.size
        self._size = end - start

        # update the meta-data
        self.attrs.update(self._source.attrs)

        if self.comm.rank == 0:
            self.logger.info("Extra arguments to FileType: %s" % str(args))

        CatalogSource.__init__(self, comm=comm)

Example #25

0

Show file

File: make_survey.py Project: adematti/pymakesurvey

    def to_nbodykit(self, fields=None):

        from nbodykit.base.catalog import CatalogSource
        from nbodykit import CurrentMPIComm

        comm = CurrentMPIComm.get()
        if comm.rank == 0:
            source = self
        else:
            source = None
        source = comm.bcast(source)

        # compute the size
        start = comm.rank * source.size // comm.size
        end = (comm.rank + 1) * source.size // comm.size

        new = object.__new__(CatalogSource)
        new._size = end - start
        CatalogSource.__init__(new, comm=comm)
        for key in source.fields:
            new[key] = new.make_column(source[key])[start:end]
        new.attrs.update(source.attrs)

        return new