Ejemplo n.º 1
0
    def to_real_field(self, out=None, normalize=True):
        """
        Paint the density field, by interpolating the position column
        on to the mesh.

        This computes the following meta-data attributes in the process of
        painting, returned in the :attr:`attrs` attributes of the returned
        RealField object:

        - N : int
            the (unweighted) total number of objects painted to the mesh
        - W : float
            the weighted number of total objects, equal to the collective
            sum of the 'weight' column
        - shotnoise : float
            the Poisson shot noise, equal to the volume divided by ``N``
        - num_per_cell : float
            the mean number of weighted objects per cell

        .. note::

            The density field on the mesh is normalized as :math:`1+\delta`,
            such that the collective mean of the field is unity.

        See the :ref:`documentation <painting-mesh>` on painting for more
        details on painting catalogs to a mesh.

        Returns
        -------
        real : :class:`pmesh.pm.RealField`
            the painted real field; this has a ``attrs`` dict storing meta-data
        """

        pm = self.pm
        Nlocal = 0 # (unweighted) number of particles read on local rank
        Wlocal = 0 # (weighted) number of particles read on local rank
        W2local = 0 # sum of weight square. This is used to estimate shotnoise.

        # the paint brush window
        resampler = window.methods[self.resampler]

        # initialize the RealField to return
        if out is not None:
            assert isinstance(out, RealField), "output of to_real_field must be a RealField"
            numpy.testing.assert_array_equal(out.pm.Nmesh, pm.Nmesh)
            toret = out
        else:
            toret = RealField(pm)
            toret[:] = 0

        # for interlacing, we need two empty meshes if out was provided
        # since out may have non-zero elements, messing up our interlacing sum
        if self.interlaced:

            real1 = RealField(pm)
            real1[:] = 0

            # the second, shifted mesh (always needed)
            real2 = RealField(pm)
            real2[:] = 0

        Position = self.Position
        Weight = self.Weight
        Value = self.Value
        Selection = self.Selection

        # ensure the slices are synced, since decomposition is collective
        Nlocalmax = max(pm.comm.allgather(len(Position)))

        H = pm.BoxSize / pm.Nmesh

        # paint data in chunks on each rank;
        # we do this by chunk 8 million is pretty big anyways.
        max_chunksize = _global_options['paint_chunk_size']

        # use a local scope to avoid having two copies of data in memory
        def dochunk(s):
            if len(Position) != 0:

                # selection has to be computed many times when data is `large`.

                columns = [Position[s]]
                if Weight is not None:
                    columns.append(Weight[s])
                if Value is not None:
                    columns.append(Value[s])
                if Selection is not None:
                    columns.append(Selection[s])

                # be sure to use the source to compute
                data = self.source.compute(columns)

                sel = Ellipsis if Selection is None else data.pop()
                value = None      if Value is None else data.pop()[sel]
                weight = None     if Weight is None else data.pop()[sel]
                position = data.pop()[sel]

            else:
                # workaround a potential dask issue on empty dask arrays
                position = numpy.empty((0, 3), dtype=Position.dtype)
                weight = None
                value = None

            if weight is None:
                weight = numpy.ones(len(position))

            if value is None:
                value = numpy.ones(len(position))

            # track total (selected) number and sum of weights
            Nlocal = len(position)
            Wlocal = weight.sum()
            W2local = (weight ** 2).sum()

            # no interlacing
            if not self.interlaced:
                lay = pm.decompose(position, smoothing=0.5 * resampler.support)
            else:
                lay = pm.decompose(position, smoothing=1.0 * resampler.support)

            # if we are receiving too many particles, abort and retry with a smaller chunksize
            newlengths = pm.comm.allgather(lay.newlength)
            if any([newlength > 2 * max_chunksize for newlength in newlengths]):
                if pm.comm.rank == 0:
                    self.logger.info("Throttling chunksize as some ranks will receive too many particles. (%d > %d)" % (max(newlengths), max_chunksize * 2))
                raise StopIteration

            p = lay.exchange(position)
            w = lay.exchange(weight)
            v = lay.exchange(value)

            if not self.interlaced:
                pm.paint(p, mass=w * v, resampler=resampler, hold=True, out=toret)

            # interlacing: use 2 meshes separated by 1/2 cell size
            else:
                # in mesh units
                shifted = pm.affine.shift(0.5)

                # paint to two shifted meshes
                pm.paint(p, mass=w * v, resampler=resampler, hold=True, out=real1)
                pm.paint(p, mass=w * v, resampler=resampler, transform=shifted, hold=True, out=real2)

            return Nlocal, Wlocal, W2local

        import gc
        i = 0
        chunksize = max_chunksize
        while i < Nlocalmax:

            s = slice(i, i + chunksize)

            if pm.comm.rank == 0:
                self.logger.info("Chunk %d ~ %d / %d " % (i, i + chunksize, Nlocalmax))

            try:
                Nlocal1, Wlocal1, W2local1 = dochunk(s)
                chunksize = min(max_chunksize, int(chunksize * 1.5))
            except StopIteration:
                chunksize = chunksize // 2
                if chunksize < 1:
                    raise RuntimeError("Cannot find a chunksize that fits into memory.")
                continue
            finally:
                # collect unfreed items
                gc.collect()

            Nlocal += Nlocal1
            Wlocal += Wlocal1
            W2local += W2local1

            Nglobal = pm.comm.allreduce(Nlocal)

            if pm.comm.rank == 0:
                self.logger.info("painted %d out of %d objects to mesh"
                    % (Nglobal, self.source.csize))

            i = i + chunksize

        # now the loop over particles is done

        if not self.interlaced:
            # nothing to do, toret is already filled.
            pass
        else:
            # compose the two interlaced fields into the final result.
            c1 = real1.r2c()
            c2 = real2.r2c()

            # and then combine
            for k, s1, s2 in zip(c1.slabs.x, c1.slabs, c2.slabs):
                kH = sum(k[i] * H[i] for i in range(3))
                s1[...] = s1[...] * 0.5 + s2[...] * 0.5 * numpy.exp(0.5 * 1j * kH)

            # FFT back to real-space
            # NOTE: cannot use "toret" here in case user supplied "out"
            c1.c2r(real1)

            # need to add to the returned mesh if user supplied "out"
            toret[:] += real1[:]


        # unweighted number of objects
        N = pm.comm.allreduce(Nlocal)

        # weighted number of objects
        W = pm.comm.allreduce(Wlocal)

        # weighted number of objects
        W2 = pm.comm.allreduce(W2local)

        # weighted number density (objs/cell)
        nbar = 1. * W / numpy.prod(pm.Nmesh)

        # make sure we painted something or nbar is nan; in which case
        # we set the density to uniform everywhere.
        if N == 0:
            warnings.warn(("trying to paint particle source to mesh, "
                           "but no particles were found!"),
                            RuntimeWarning
                        )

        # shot noise is volume / un-weighted number
        shotnoise = numpy.prod(pm.BoxSize) * W2 / W ** 2

        # save some meta-data
        toret.attrs = {}
        toret.attrs['shotnoise'] = shotnoise
        toret.attrs['N'] = N
        toret.attrs['W'] = W
        toret.attrs['W2'] = W
        toret.attrs['num_per_cell'] = nbar

        csum = toret.csum()
        if pm.comm.rank == 0:
            self.logger.info("painted %d out of %d objects to mesh" %(N, self.source.csize))
            self.logger.info("mean particles per cell is %g", nbar)
            self.logger.info("sum is %g ", csum)

        if normalize:
            if nbar > 0:
                toret[...] /= nbar
            else:
                toret[...] = 1

            if pm.comm.rank == 0:
                self.logger.info("normalized the convention to 1 + delta")

        return toret
Ejemplo n.º 2
0
    def to_real_field(self, out=None, normalize=True):
        """
        Paint the density field, by interpolating the position column
        on to the mesh.

        This computes the following meta-data attributes in the process of
        painting, returned in the :attr:`attrs` attributes of the returned
        RealField object:

        - N : int
            the (unweighted) total number of objects painted to the mesh
        - W : float
            the weighted number of total objects, equal to the collective
            sum of the 'weight' column
        - shotnoise : float
            the Poisson shot noise, equal to the volume divided by ``N``
        - num_per_cell : float
            the mean number of weighted objects per cell

        .. note::

            The density field on the mesh is normalized as :math:`1+\delta`,
            such that the collective mean of the field is unity.

        See the :ref:`documentation <painting-mesh>` on painting for more
        details on painting catalogs to a mesh.

        Returns
        -------
        real : :class:`pmesh.pm.RealField`
            the painted real field; this has a ``attrs`` dict storing meta-data
        """

        pm = self.pm
        Nlocal = 0  # (unweighted) number of particles read on local rank
        Wlocal = 0  # (weighted) number of particles read on local rank
        W2local = 0  # sum of weight square. This is used to estimate shotnoise.

        # the paint brush window
        resampler = window.methods[self.resampler]

        # initialize the RealField to return
        if out is not None:
            assert isinstance(
                out, RealField), "output of to_real_field must be a RealField"
            numpy.testing.assert_array_equal(out.pm.Nmesh, pm.Nmesh)
            toret = out
        else:
            toret = RealField(pm)
            toret[:] = 0

        # for interlacing, we need two empty meshes if out was provided
        # since out may have non-zero elements, messing up our interlacing sum
        if self.interlaced:

            real1 = RealField(pm)
            real1[:] = 0

            # the second, shifted mesh (always needed)
            real2 = RealField(pm)
            real2[:] = 0

        Position = self.Position
        Weight = self.Weight
        Value = self.Value
        Selection = self.Selection

        # ensure the slices are synced, since decomposition is collective
        Nlocalmax = max(pm.comm.allgather(len(Position)))

        H = pm.BoxSize / pm.Nmesh

        # paint data in chunks on each rank;
        # we do this by chunk 8 million is pretty big anyways.
        max_chunksize = _global_options['paint_chunk_size']

        # use a local scope to avoid having two copies of data in memory
        def dochunk(s):
            if len(Position) != 0:

                # selection has to be computed many times when data is `large`.

                columns = [Position[s]]
                if Weight is not None:
                    columns.append(Weight[s])
                if Value is not None:
                    columns.append(Value[s])
                if Selection is not None:
                    columns.append(Selection[s])

                # be sure to use the source to compute
                data = self.source.compute(columns)

                sel = Ellipsis if Selection is None else data.pop()
                value = None if Value is None else data.pop()[sel]
                weight = None if Weight is None else data.pop()[sel]
                position = data.pop()[sel]

            else:
                # workaround a potential dask issue on empty dask arrays
                position = numpy.empty((0, 3), dtype=Position.dtype)
                weight = None
                value = None

            if weight is None:
                weight = numpy.ones(len(position))

            if value is None:
                value = numpy.ones(len(position))

            # track total (selected) number and sum of weights
            Nlocal = len(position)
            Wlocal = weight.sum()
            W2local = (weight**2).sum()

            # no interlacing
            if not self.interlaced:
                lay = pm.decompose(position, smoothing=0.5 * resampler.support)
            else:
                lay = pm.decompose(position, smoothing=1.0 * resampler.support)

            # if we are receiving too many particles, abort and retry with a smaller chunksize
            recvlengths = pm.comm.allgather(lay.recvlength)
            if any(
                [recvlength > 2 * max_chunksize
                 for recvlength in recvlengths]):
                if pm.comm.rank == 0:
                    self.logger.info(
                        "Throttling chunksize as some ranks will receive too many particles. (%d > %d)"
                        % (max(recvlengths), max_chunksize * 2))
                raise StopIteration

            p = lay.exchange(position)
            w = lay.exchange(weight)
            v = lay.exchange(value)

            if not self.interlaced:
                pm.paint(p,
                         mass=w * v,
                         resampler=resampler,
                         hold=True,
                         out=toret)

            # interlacing: use 2 meshes separated by 1/2 cell size
            else:
                # in mesh units
                shifted = pm.affine.shift(0.5)

                # paint to two shifted meshes
                pm.paint(p,
                         mass=w * v,
                         resampler=resampler,
                         hold=True,
                         out=real1)
                pm.paint(p,
                         mass=w * v,
                         resampler=resampler,
                         transform=shifted,
                         hold=True,
                         out=real2)

            return Nlocal, Wlocal, W2local

        import gc
        i = 0
        chunksize = max_chunksize
        while i < Nlocalmax:

            s = slice(i, i + chunksize)

            if pm.comm.rank == 0:
                self.logger.info("Chunk %d ~ %d / %d " %
                                 (i, i + chunksize, Nlocalmax))

            try:
                Nlocal1, Wlocal1, W2local1 = dochunk(s)
            except StopIteration:
                chunksize = chunksize // 2
                if chunksize < 1:
                    raise RuntimeError(
                        "Cannot find a chunksize that fits into memory.")
                continue
            finally:
                # collect unfreed items
                gc.collect()

            Nlocal += Nlocal1
            Wlocal += Wlocal1
            W2local += W2local1

            Nglobal = pm.comm.allreduce(Nlocal)

            if pm.comm.rank == 0:
                self.logger.info("painted %d out of %d objects to mesh" %
                                 (Nglobal, self.source.csize))

            i = i + chunksize
            chunksize = min(max_chunksize, int(chunksize * 1.5))

        # now the loop over particles is done

        if not self.interlaced:
            # nothing to do, toret is already filled.
            pass
        else:
            # compose the two interlaced fields into the final result.
            c1 = real1.r2c()
            c2 = real2.r2c()

            # and then combine
            for k, s1, s2 in zip(c1.slabs.x, c1.slabs, c2.slabs):
                kH = sum(k[i] * H[i] for i in range(3))
                s1[...] = s1[...] * 0.5 + s2[...] * 0.5 * numpy.exp(
                    0.5 * 1j * kH)

            # FFT back to real-space
            # NOTE: cannot use "toret" here in case user supplied "out"
            c1.c2r(real1)

            # need to add to the returned mesh if user supplied "out"
            toret[:] += real1[:]

        # unweighted number of objects
        N = pm.comm.allreduce(Nlocal)

        # weighted number of objects
        W = pm.comm.allreduce(Wlocal)

        # weighted number of objects
        W2 = pm.comm.allreduce(W2local)

        # weighted number density (objs/cell)
        nbar = 1. * W / numpy.prod(pm.Nmesh)

        # make sure we painted something or nbar is nan; in which case
        # we set the density to uniform everywhere.
        if N == 0:
            warnings.warn(("trying to paint particle source to mesh, "
                           "but no particles were found!"), RuntimeWarning)

        # shot noise is volume / un-weighted number
        shotnoise = numpy.prod(pm.BoxSize) * W2 / W**2

        # save some meta-data
        toret.attrs = {}
        toret.attrs['shotnoise'] = shotnoise
        toret.attrs['N'] = N
        toret.attrs['W'] = W
        toret.attrs['W2'] = W
        toret.attrs['num_per_cell'] = nbar

        csum = toret.csum()
        if pm.comm.rank == 0:
            self.logger.info("painted %d out of %d objects to mesh" %
                             (N, self.source.csize))
            self.logger.info("mean particles per cell is %g", nbar)
            self.logger.info("sum is %g ", csum)

        if normalize:
            if nbar > 0:
                toret[...] /= nbar
            else:
                toret[...] = 1

            if pm.comm.rank == 0:
                self.logger.info("normalized the convention to 1 + delta")

        return toret
Ejemplo n.º 3
0
    def to_real_field(self, out=None, normalize=True):
        """
        Paint the density field, by interpolating the position column
        on to the mesh.

        This computes the following meta-data attributes in the process of
        painting, returned in the :attr:`attrs` attributes of the returned
        RealField object:

        - N : int
            the (unweighted) total number of objects painted to the mesh
        - W : float
            the weighted number of total objects, equal to the collective
            sum of the 'weight' column
        - shotnoise : float
            the Poisson shot noise, equal to the volume divided by ``N``
        - num_per_cell : float
            the mean number of weighted objects per cell

        .. note::

            The density field on the mesh is normalized as :math:`1+\delta`,
            such that the collective mean of the field is unity.

        See the :ref:`documentation <painting-mesh>` on painting for more
        details on painting catalogs to a mesh.

        Returns
        -------
        real : :class:`pmesh.pm.RealField`
            the painted real field; this has a ``attrs`` dict storing meta-data
        """
        # check for 'Position' column
        if self.position not in self.source:
            msg = "in order to paint a CatalogSource to a RealField, add a "
            msg += "column named '%s', representing the particle positions" % self.position
            raise ValueError(msg)

        pm = self.pm
        Nlocal = 0  # (unweighted) number of particles read on local rank
        Wlocal = 0  # (weighted) number of particles read on local rank

        # the paint brush window
        paintbrush = window.methods[self.window]

        # initialize the RealField to return
        if out is not None:
            assert isinstance(
                out, RealField), "output of to_real_field must be a RealField"
            numpy.testing.assert_array_equal(out.pm.Nmesh, pm.Nmesh)
            toret = out
        else:
            toret = RealField(pm)
            toret[:] = 0

        # for interlacing, we need two empty meshes if out was provided
        # since out may have non-zero elements, messing up our interlacing sum
        if self.interlaced:

            real1 = RealField(pm)
            real1[:] = 0

            # the second, shifted mesh (always needed)
            real2 = RealField(pm)
            real2[:] = 0

        # read the necessary data (as dask arrays)
        columns = [self.position, self.weight, self.value, self.selection]

        Position, Weight, Value, Selection = self.source.read(columns)

        # ensure the slices are synced, since decomposition is collective
        Nlocalmax = max(pm.comm.allgather(len(Position)))

        # paint data in chunks on each rank;
        # we do this by chunk 8 million is pretty big anyways.
        chunksize = _global_options['paint_chunk_size']
        for i in range(0, Nlocalmax, chunksize):
            s = slice(i, i + chunksize)

            if len(Position) != 0:

                # selection has to be computed many times when data is `large`.
                sel = self.source.compute(Selection[s])

                # be sure to use the source to compute
                position, weight, value = \
                    self.source.compute(Position[s], Weight[s], Value[s])

                # FIXME: investigate if move selection before compute
                # speeds up IO.
                position = position[sel]
                weight = weight[sel]
                value = value[sel]
            else:
                # workaround a potential dask issue on empty dask arrays
                position = numpy.empty((0, 3), dtype=Position.dtype)
                weight = None
                value = None
                selection = None

            if weight is None:
                weight = numpy.ones(len(position))

            if value is None:
                value = numpy.ones(len(position))

            # track total (selected) number and sum of weights
            Nlocal += len(position)
            Wlocal += weight.sum()

            # no interlacing
            if not self.interlaced:
                lay = pm.decompose(position,
                                   smoothing=0.5 * paintbrush.support)
                p = lay.exchange(position)
                w = lay.exchange(weight)
                v = lay.exchange(value)
                pm.paint(p,
                         mass=w * v,
                         resampler=paintbrush,
                         hold=True,
                         out=toret)

            # interlacing: use 2 meshes separated by 1/2 cell size
            else:
                lay = pm.decompose(position,
                                   smoothing=1.0 * paintbrush.support)
                p = lay.exchange(position)
                w = lay.exchange(weight)
                v = lay.exchange(value)

                H = pm.BoxSize / pm.Nmesh

                # in mesh units
                shifted = pm.affine.shift(0.5)

                # paint to two shifted meshes
                pm.paint(p,
                         mass=w * v,
                         resampler=paintbrush,
                         hold=True,
                         out=real1)
                pm.paint(p,
                         mass=w * v,
                         resampler=paintbrush,
                         transform=shifted,
                         hold=True,
                         out=real2)

            Nglobal = pm.comm.allreduce(Nlocal)

            if pm.comm.rank == 0:
                self.logger.info("painted %d out of %d objects to mesh" %
                                 (Nglobal, self.source.csize))

        # now the loop over particles is done

        if not self.interlaced:
            # nothing to do, toret is already filled.
            pass
        else:
            # compose the two interlaced fields into the final result.
            c1 = real1.r2c()
            c2 = real2.r2c()

            # and then combine
            for k, s1, s2 in zip(c1.slabs.x, c1.slabs, c2.slabs):
                kH = sum(k[i] * H[i] for i in range(3))
                s1[...] = s1[...] * 0.5 + s2[...] * 0.5 * numpy.exp(
                    0.5 * 1j * kH)

            # FFT back to real-space
            # NOTE: cannot use "toret" here in case user supplied "out"
            c1.c2r(real1)

            # need to add to the returned mesh if user supplied "out"
            toret[:] += real1[:]

        # unweighted number of objects
        N = pm.comm.allreduce(Nlocal)

        # weighted number of objects
        W = pm.comm.allreduce(Wlocal)

        # weighted number density (objs/cell)
        nbar = 1. * W / numpy.prod(pm.Nmesh)

        # make sure we painted something or nbar is nan; in which case
        # we set the density to uniform everywhere.
        if N == 0:
            warnings.warn(("trying to paint particle source to mesh, "
                           "but no particles were found!"), RuntimeWarning)

        # shot noise is volume / un-weighted number
        shotnoise = numpy.prod(pm.BoxSize) / N

        # save some meta-data
        toret.attrs = {}
        toret.attrs['shotnoise'] = shotnoise
        toret.attrs['N'] = N
        toret.attrs['W'] = W
        toret.attrs['num_per_cell'] = nbar

        csum = toret.csum()
        if pm.comm.rank == 0:
            self.logger.info("painted %d out of %d objects to mesh" %
                             (N, self.source.csize))
            self.logger.info("mean particles per cell is %g", nbar)
            self.logger.info("sum is %g ", csum)
            self.logger.info("normalized the convention to 1 + delta")

        if normalize:
            if nbar > 0:
                toret[...] /= nbar
            else:
                toret[...] = 1

        return toret