def get_galaxy_catalog_from_source_catalog(
            self, source_cat, rand_seed_for_galaxy_sampling=123):

        assert self.log10M_column in source_cat.columns
        #cat = deepcopy(source_cat)
        cat = copy(source_cat)
        comm = CurrentMPIComm.get()

        # For each halo draw a random number RAND b/w 0 and 1.
        # For each halo, compute prob to be a galaxy.
        # Keep only halos where RAND<=prob_gal, remove rest from catalog.
        # This is our galaxy catalog.

        # Draw random number b/w 0 and 1
        rng = MPIRandomState(comm,
                             seed=rand_seed_for_galaxy_sampling,
                             size=cat.size,
                             chunksize=100000)

        cat['RAND'] = rng.uniform(low=0.0, high=1.0, dtype='f8')
        #print(cat[self.log10M_column])
        #cat['PROB_GAL'] = 0.0 #cat[self.log10M_column]
        cat['PROB_GAL'] = 0.5 * (1.0 + erf(
            (cat[self.log10M_column].compute() - self.log10Mmin) /
            self.sigma_log10M))
        print('Nhalos:', cat.csize)

        cat = cat[cat['RAND'] <= cat['PROB_GAL']]

        print('Ngalaxies:', cat.csize)
        print('Galaxy mass: ',
              get_cstats_string(cat[self.log10M_column].compute()))

        return cat
예제 #2
0
def test_mpirng_unique(comm):
    rng = MPIRandomState(comm, seed=1234, size=10, chunksize=3)

    local1 = rng.uniform()
    local2 = rng.uniform()

    # it shouldn't be the same!
    assert (local1 != local2).any()
예제 #3
0
def test_mpirng_unique(comm):
    rng = MPIRandomState(comm, seed=1234, size=10, chunksize=3)

    local1 = rng.uniform()
    local2 = rng.uniform()

    # it shouldn't be the same!
    assert (local1 != local2).any()
예제 #4
0
def test_mpirng_poisson(comm):
    rng = MPIRandomState(comm, seed=1234, size=10, chunksize=3)

    local = rng.poisson(lam=numpy.ones(rng.size)[:, None] * 0.5, itemshape=(3,))
    all = numpy.concatenate(comm.allgather(local), axis=0)

    rng1 = MPIRandomState(MPI.COMM_SELF, seed=1234, size=rng.csize, chunksize=rng.chunksize)

    correct = rng1.poisson(lam=0.5, itemshape=(3,))

    assert_array_equal(all, correct)
예제 #5
0
def test_mpirng_args(comm):
    rng = MPIRandomState(comm, seed=1234, size=10, chunksize=3)

    local = rng.uniform(low=numpy.ones(rng.size) * 0.5)
    all = numpy.concatenate(comm.allgather(local), axis=0)

    rng1 = MPIRandomState(MPI.COMM_SELF, seed=1234, size=rng.csize, chunksize=rng.chunksize)

    correct = rng1.uniform(low=0.5)

    assert_array_equal(all, correct)
예제 #6
0
def test_mpirng_large_chunk(comm):
    rng = MPIRandomState(comm, seed=1234, size=1, chunksize=10)

    local = rng.uniform()
    all = numpy.concatenate(comm.allgather(local), axis=0)

    rng1 = MPIRandomState(MPI.COMM_SELF, seed=1234, size=rng.csize, chunksize=rng.chunksize)

    correct = rng1.uniform()

    assert_array_equal(all, correct)
예제 #7
0
def test_mpirng_args(comm):
    rng = MPIRandomState(comm, seed=1234, size=10, chunksize=3)

    local = rng.uniform(low=numpy.ones(rng.size) * 0.5)
    all = numpy.concatenate(comm.allgather(local), axis=0)

    rng1 = MPIRandomState(MPI.COMM_SELF,
                          seed=1234,
                          size=rng.csize,
                          chunksize=rng.chunksize)

    correct = rng1.uniform(low=0.5)

    assert_array_equal(all, correct)
예제 #8
0
def test_mpirng_large_chunk(comm):
    rng = MPIRandomState(comm, seed=1234, size=1, chunksize=10)

    local = rng.uniform()
    all = numpy.concatenate(comm.allgather(local), axis=0)

    rng1 = MPIRandomState(MPI.COMM_SELF,
                          seed=1234,
                          size=rng.csize,
                          chunksize=rng.chunksize)

    correct = rng1.uniform()

    assert_array_equal(all, correct)
예제 #9
0
def test_mpirng_poisson(comm):
    rng = MPIRandomState(comm, seed=1234, size=10, chunksize=3)

    local = rng.poisson(lam=numpy.ones(rng.size)[:, None] * 0.5,
                        itemshape=(3, ))
    all = numpy.concatenate(comm.allgather(local), axis=0)

    rng1 = MPIRandomState(MPI.COMM_SELF,
                          seed=1234,
                          size=rng.csize,
                          chunksize=rng.chunksize)

    correct = rng1.poisson(lam=0.5, itemshape=(3, ))

    assert_array_equal(all, correct)
예제 #10
0
    def __init__(self, csize, seed=None, comm=None):

        self.comm = comm

        # set the seed randomly if it is None
        if seed is None:
            if self.comm.rank == 0:
                seed = numpy.random.randint(0, 4294967295)
            seed = self.comm.bcast(seed)
        self.attrs['seed'] = seed

        # generate the seeds from the global seed
        if csize == 0:
            raise ValueError("no random particles generated!")
        start = comm.rank * csize // comm.size
        end = (comm.rank + 1) * csize // comm.size
        self._size = end - start

        self._rng = MPIRandomState(comm, seed=seed, size=self._size)

        # init the base class
        CatalogSource.__init__(self, comm=comm)
예제 #11
0
def poisson_sample_to_points(delta,
                             displacement,
                             pm,
                             nbar,
                             bias=1.,
                             seed=None,
                             logger=None):
    """
    Poisson sample the linear delta and displacement fields to points.

    The steps in this function:

    #.  Apply a biased, lognormal transformation to the input ``delta`` field
    #.  Poisson sample the overdensity field to discrete points
    #.  Disribute the positions of particles uniformly within the mesh cells,
        and assign the displacement field at each cell to the particles

    Parameters
    ----------
    delta : RealField
        the linear overdensity field to sample
    displacement : list of RealField (3,)
        the linear displacement fields which is used to move the particles
    nbar : float
        the desired number density of the output catalog of objects
    bias : float, optional
        apply a linear bias to the overdensity field (default is 1.)
    seed : int, optional
        the random seed used to Poisson sample the field to points

    Returns
    -------
    pos : array_like, (N, 3)
        the Cartesian positions of each of the generated particles
    displ : array_like, (N, 3)
        the displacement field sampled for each of the generated particles in the
        same units as the ``pos`` array
    """
    comm = delta.pm.comm

    # seed1 used for poisson sampling
    # seed2 used for uniform shift within a cell.
    seed1, seed2 = numpy.random.RandomState(seed).randint(0, 0xfffffff, size=2)

    # apply the lognormal transformation to the initial conditions density
    # this creates a positive-definite delta (necessary for Poisson sampling)
    lagrangian_bias = bias - 1.
    delta = lognormal_transform(delta, bias=lagrangian_bias)

    if logger and pm.comm.rank == 0:
        logger.info("Lognormal transformation done")

    # mean number of objects per cell
    H = delta.BoxSize / delta.Nmesh
    overallmean = H.prod() * nbar

    # number of objects in each cell (per rank, as a RealField)
    cellmean = delta * overallmean

    # create a random state with the input seed
    rng = MPIRandomState(seed=seed1, comm=comm, size=delta.size)

    # generate poissons. Note that we use ravel/unravel to
    # maintain MPI invariane.
    Nravel = rng.poisson(lam=cellmean.ravel())
    N = delta.pm.create(mode='real')
    N.unravel(Nravel)

    Ntot = N.csum()
    if logger and pm.comm.rank == 0:
        logger.info("Poisson sampling done, total number of objects is %d" %
                    Ntot)

    pos_mesh = delta.pm.generate_uniform_particle_grid(shift=0.0)
    disp_mesh = numpy.empty_like(pos_mesh)

    # no need to do decompose because pos_mesh is strictly within the
    # local volume of the RealField.
    N_per_cell = N.readout(pos_mesh, resampler='nnb')
    for i in range(N.ndim):
        disp_mesh[:, i] = displacement[i].readout(pos_mesh, resampler='nnb')

    # fight round off errors, if any
    N_per_cell = numpy.int64(N_per_cell + 0.5)

    pos = pos_mesh.repeat(N_per_cell, axis=0)
    disp = disp_mesh.repeat(N_per_cell, axis=0)

    del pos_mesh
    del disp_mesh

    if logger and pm.comm.rank == 0:
        logger.info("catalog produced. Assigning in cell shift.")

    # generate linear ordering of the positions.
    # this should have been a method in pmesh, e.g. argument
    # to genereate_uniform_particle_grid(return_id=True);

    # FIXME: after pmesh update, remove this
    orderby = numpy.int64(pos[:, 0] / H[0] + 0.5)
    for i in range(1, delta.ndim):
        orderby[...] *= delta.Nmesh[i]
        orderby[...] += numpy.int64(pos[:, i] / H[i] + 0.5)

    # sort by ID to maintain MPI invariance.
    pos = mpsort.sort(pos, orderby=orderby, comm=comm)
    disp = mpsort.sort(disp, orderby=orderby, comm=comm)

    if logger and pm.comm.rank == 0:
        logger.info("sorting done")

    rng_shift = MPIRandomState(seed=seed2, comm=comm, size=len(pos))
    in_cell_shift = rng_shift.uniform(0, H[i], itemshape=(delta.ndim, ))

    pos[...] += in_cell_shift
    pos[...] %= delta.BoxSize

    if logger and pm.comm.rank == 0:
        logger.info("catalog shifted.")

    return pos, disp
예제 #12
0
파일: mockmaker.py 프로젝트: bccp/nbodykit
def poisson_sample_to_points(delta, displacement, pm, nbar, bias=1., seed=None, logger=None):
    """
    Poisson sample the linear delta and displacement fields to points.

    The steps in this function:

    #.  Apply a biased, lognormal transformation to the input ``delta`` field
    #.  Poisson sample the overdensity field to discrete points
    #.  Disribute the positions of particles uniformly within the mesh cells,
        and assign the displacement field at each cell to the particles

    Parameters
    ----------
    delta : RealField
        the linear overdensity field to sample
    displacement : list of RealField (3,)
        the linear displacement fields which is used to move the particles
    nbar : float
        the desired number density of the output catalog of objects
    bias : float, optional
        apply a linear bias to the overdensity field (default is 1.)
    seed : int, optional
        the random seed used to Poisson sample the field to points

    Returns
    -------
    pos : array_like, (N, 3)
        the Cartesian positions of each of the generated particles
    displ : array_like, (N, 3)
        the displacement field sampled for each of the generated particles in the
        same units as the ``pos`` array
    """
    comm = delta.pm.comm

    # seed1 used for poisson sampling
    # seed2 used for uniform shift within a cell.
    seed1, seed2 = numpy.random.RandomState(seed).randint(0, 0xfffffff, size=2)

    # apply the lognormal transformation to the initial conditions density
    # this creates a positive-definite delta (necessary for Poisson sampling)
    lagrangian_bias = bias - 1.
    delta = lognormal_transform(delta, bias=lagrangian_bias)

    if logger and pm.comm.rank == 0:
        logger.info("Lognormal transformation done")

    # mean number of objects per cell
    H = delta.BoxSize / delta.Nmesh
    overallmean = H.prod() * nbar

    # number of objects in each cell (per rank, as a RealField)
    cellmean = delta * overallmean

    # create a random state with the input seed
    rng = MPIRandomState(seed=seed1, comm=comm, size=delta.size)

    # generate poissons. Note that we use ravel/unravel to
    # maintain MPI invariane.
    Nravel = rng.poisson(lam=cellmean.ravel())
    N = delta.pm.create(type='real')
    N.unravel(Nravel)

    Ntot = N.csum()
    if logger and pm.comm.rank == 0:
        logger.info("Poisson sampling done, total number of objects is %d" % Ntot)

    pos_mesh = delta.pm.generate_uniform_particle_grid(shift=0.0)
    disp_mesh = numpy.empty_like(pos_mesh)

    # no need to do decompose because pos_mesh is strictly within the
    # local volume of the RealField.
    N_per_cell = N.readout(pos_mesh, resampler='nnb')
    for i in range(N.ndim):
        disp_mesh[:, i] = displacement[i].readout(pos_mesh, resampler='nnb')

    # fight round off errors, if any
    N_per_cell = numpy.int64(N_per_cell + 0.5)

    pos = pos_mesh.repeat(N_per_cell, axis=0)
    disp = disp_mesh.repeat(N_per_cell, axis=0)

    del pos_mesh
    del disp_mesh

    if logger and pm.comm.rank == 0:
        logger.info("catalog produced. Assigning in cell shift.")

    # generate linear ordering of the positions.
    # this should have been a method in pmesh, e.g. argument
    # to genereate_uniform_particle_grid(return_id=True);

    # FIXME: after pmesh update, remove this
    orderby = numpy.int64(pos[:, 0] / H[0] + 0.5)
    for i in range(1, delta.ndim):
        orderby[...] *= delta.Nmesh[i]
        orderby[...] += numpy.int64(pos[:, i] / H[i] + 0.5)

    # sort by ID to maintain MPI invariance.
    pos = mpsort.sort(pos, orderby=orderby, comm=comm)
    disp = mpsort.sort(disp, orderby=orderby, comm=comm)

    if logger and pm.comm.rank == 0:
        logger.info("sorting done")

    rng_shift = MPIRandomState(seed=seed2, comm=comm, size=len(pos))
    in_cell_shift = rng_shift.uniform(0, H[i], itemshape=(delta.ndim,))

    pos[...] += in_cell_shift
    pos[...] %= delta.BoxSize

    if logger and pm.comm.rank == 0:
        logger.info("catalog shifted.")

    return pos, disp
예제 #13
0
    def PoissonSample(self, delta, parameters_sampling):
        nbar=parameters_sampling['nbar']
        seed1=parameters_sampling['seed1']
        seed2=parameters_sampling['seed2']

        comm = self.pm.comm
        # mean number of objects per cell
        H = self.BoxSize / self.pm.Nmesh
        overallmean = H.prod() * nbar

        # number of objects in each cell (per rank, as a RealField)
        cellmean = delta * overallmean

        # create a random state with the input seed
        rng = MPIRandomState(seed=seed1, comm=comm, size=delta.size)

        # generate poissons. Note that we use ravel/unravel to
        # maintain MPI invariane.
        Nravel = rng.poisson(lam=cellmean.ravel())
        N = self.pm.create(type='real')
        N.unravel(Nravel)

        Ntot = N.csum()
        if self.log.isEnabledFor(logging.INFO):
            self.log.info('Poisson sampling done, total number of objects is {}'.format(Ntot))

        pos_mesh = self.pm.generate_uniform_particle_grid(shift=0.0)
        disp_mesh = np.empty_like(pos_mesh)

        # no need to do decompose because pos_mesh is strictly within the
        # local volume of the RealField.
        N_per_cell = N.readout(pos_mesh, resampler='nnb')
        for i in range(N.ndim):
            disp_mesh[:, i] = self.displacement[i].readout(pos_mesh, resampler='nnb')

        # fight round off errors, if any
        N_per_cell = np.int64(N_per_cell + 0.5)

        pos = pos_mesh.repeat(N_per_cell, axis=0)
        disp = disp_mesh.repeat(N_per_cell, axis=0)

        del pos_mesh
        del disp_mesh

        if self.log.isEnabledFor(logging.INFO):
            self.log.info("Catalog produced. Assigning in cell shift.")

        # FIXME: after pmesh update, remove this
        orderby = np.int64(pos[:, 0] / H[0] + 0.5)
        for i in range(1, delta.ndim):
            orderby[...] *= self.pm.Nmesh[i]
            orderby[...] += np.int64(pos[:, i] / H[i] + 0.5)

        # sort by ID to maintain MPI invariance.
        pos = mpsort.sort(pos, orderby=orderby, comm=comm)
        disp = mpsort.sort(disp, orderby=orderby, comm=comm)

        if self.log.isEnabledFor(logging.INFO):
            self.log.info("Sorting done")

        rng_shift = MPIRandomState(seed=seed2, comm=comm, size=len(pos))
        in_cell_shift = rng_shift.uniform(0, H[i], itemshape=(delta.ndim,))

        pos[...] += in_cell_shift
        pos[...] %= self.pm.BoxSize

        if self.log.isEnabledFor(logging.INFO):
            self.log.info("Catalog shifted.")
            
        #Catalog needs to be shifted in z-coordinate, such that pos and comoving match
        pos[...,0]+=(self.comoving_distance-self.width)*np.ones(pos.shape[0])

        return pos, disp
예제 #14
0
def avg_value_mass_weighted_paint_cat_to_rho(
    cat=None,
    value_column=None,
    weight_ptcles_by=None,
    Ngrid=None,
    fill_empty_cells='RandNeighb',
    RandNeighbSeed=1234,
    raise_exception_if_too_many_empty_cells=True,
    to_mesh_kwargs=None,
    verbose=False
    ):
    """
    Helper function that paints cat[value_column] to grid, averaging over
    values of all particles belonging to a cell, and allowing for 
    additional particle mass weights. Also has several methods to fill empty
    cells.
    """
    # In the code 'value' is called 'chi', because value is chi in reconstruction
    # code.

    if to_mesh_kwargs is None:
        to_mesh_kwargs = {
            'window': 'cic',
            'compensated': False,
            'interlaced': False}

    comm = CurrentMPIComm.get()
    logger = logging.getLogger('paint_utils')

    ## Get mass density rho so we can normalize chi later. Assume mass=1, or given by
    # weight_ptcles_by.
    # This is to get avg chi if multiple ptcles are in same cell.
    # 1 Sep 2017: Want chi_avg = sum_i m_i chi_i / sum_j m_i where m_i is particle mass,
    # because particle mass says how much the average should be dominated by a single ptcle
    # that can represent many original no-mass particles.

    # Compute rho4chi = sum_i m_i
    rho4chi, rho4chi_attrs = weighted_paint_cat_to_delta(
        cat,
        weight=weight_ptcles_by,
        weighted_paint_mode='sum',
        to_mesh_kwargs=to_mesh_kwargs,
        normalize=False,  # want rho not 1+delta
        Nmesh=Ngrid,
        set_mean=None,
        verbose=verbose)

    # compute chi weighted by ptcle mass chi(x)m(x)
    weighted_col = 'TMP weighted %s' % value_column
    if weight_ptcles_by is not None:
        cat[weighted_col] = cat[weight_ptcles_by] * cat[value_column]
    else:
        # weight 1 for each ptcle
        cat[weighted_col] = cat[value_column]
    thisChi, thisChi_attrs = weighted_paint_cat_to_delta(
        cat,
        weight=weighted_col,  # chi weighted by ptcle mass
        weighted_paint_mode='sum',
        to_mesh_kwargs=to_mesh_kwargs,
        normalize=False,  # want rho not 1+delta (TODO: check)
        Nmesh=Ngrid,
        set_mean=None,
        verbose=verbose)

    # Normalize Chi by dividing by rho: So far, our chi will get larger if there are
    # more particles, because it sums up displacements over all particles.
    # To normalize, divide by rho (=mass density on grid if all ptcles have mass m=1,
    # or mass given by weight_ptcles_by).
    # (i.e. divide by number of contributions to a cell)
    if fill_empty_cells in [None, 'SetZero']:
        # Set chi=0 if there are not ptcles in grid cell. Used until 7 April 2017.
        # Seems ok for correl coeff and BAO, but gives large-scale bias in transfer
        # function or broad-band power because violates mass conservation.
        raise Exception('Possible bug: converting to np array only uses root rank?')
        thisChi = FieldMesh(
            np.where(
                rho4chi.compute(mode='real') == 0,
                rho4chi.compute(mode='real') * 0,
                thisChi.compute(mode='real') /
                rho4chi.compute(mode='real')))
        #thisChi = np.where(gridx.G['rho4chi']==0, thisChi*0, thisChi/gridx.G['rho4chi'])

    elif fill_empty_cells in [
        'RandNeighb', 'RandNeighbReadout', 'AvgAndRandNeighb']:

        # Set chi in empty cells equal to a random neighbor cell. Do this until all empty
        # cells are filled.
        # First set all empty cells to nan.
        #thisChi = np.where(gridx.G['rho4chi']==0, thisChi*0+np.nan, thisChi/gridx.G['rho4chi'])
        thisChi = thisChi / rho4chi  # get nan when rho4chi=0
        if True:
            # test if nan ok
            ww1 = np.where(rho4chi == 0)
            #ww2 = np.where(np.isnan(thisChi.compute(mode='real')))
            ww2 = np.where(np.isnan(thisChi))
            assert np.allclose(ww1, ww2)
            del ww1, ww2

        # Progressively replace nan by random neighbors:
        Ng = Ngrid
        #thisChi = thisChi.reshape((Ng,Ng,Ng))
        logger.info('thisChi.shape: %s' % str(thisChi.shape))
        #assert thisChi.shape == (Ng,Ng,Ng)
        # indices of empty cells on this rank
        ww = np.where(np.isnan(thisChi))
        # number of empty cells across all ranks
        Nfill = comm.allreduce(ww[0].shape[0], op=MPI.SUM)
        have_empty_cells = (Nfill > 0)

        if fill_empty_cells in ['RandNeighb', 'RandNeighbReadout']:
            i_iter = -1
            while have_empty_cells:
                i_iter += 1
                if comm.rank == 0:
                    logger.info(
                        "Fill %d empty chi cells (%g percent) using random neighbors"
                        % (Nfill, Nfill / float(Ng)**3 * 100.))
                if Nfill / float(Ng)**3 >= 0.999:
                    if raise_exception_if_too_many_empty_cells:
                        raise Exception(
                            "Stop because too many empty chi cells")
                    else:
                        logger.warning(
                            "More than 99.9 percent of cells are empty")
                # draw -1,0,+1 for each empty cell, in 3 directions
                # r = np.random.randint(-1,2, size=(ww[0].shape[0],3), dtype='int')
                rng = MPIRandomState(comm,
                                     seed=RandNeighbSeed + i_iter * 100,
                                     size=ww[0].shape[0],
                                     chunksize=100000)
                r = rng.uniform(low=-2, high=2, dtype='int', itemshape=(3,))
                assert np.all(r >= -1)
                assert np.all(r <= 1)

                # Old serial code to replace nan by random neighbors.
                # thisChi[ww[0],ww[1],ww[2]] = thisChi[(ww[0]+r[:,0])%Ng, (ww[1]+r[:,1])%Ng, (ww[2]+r[:,2])%Ng]

                if fill_empty_cells == 'RandNeighbReadout':
                    # New parallel code, 1st implementation.
                    # Use readout to get field at positions [(ww+rank_offset+r)%Ng] dx.
                    BoxSize = cat.attrs['BoxSize']
                    dx = BoxSize / (float(Ng))
                    #pos_wanted = ((np.array(ww).transpose() + r) % Ng) * dx   # ranges from 0 to BoxSize
                    # more carefully:
                    pos_wanted = np.zeros((ww[0].shape[0], 3)) + np.nan
                    for idir in [0, 1, 2]:
                        pos_wanted[:, idir] = (
                            (np.array(ww[idir] + thisChi.start[idir]) +
                             r[:, idir]) %
                            Ng) * dx[idir]  # ranges from 0..BoxSize

                    # use readout to get neighbors
                    readout_window = 'nnb'
                    layout = thisChi.pm.decompose(pos_wanted,
                                                  smoothing=readout_window)
                    # interpolate field to particle positions (use pmesh 'readout' function)
                    thisChi_neighbors = thisChi.readout(
                        pos_wanted, resampler=readout_window, layout=layout)
                    if False:
                        # print dbg info
                        for ii in range(10000, 10004):
                            if comm.rank == 1:
                                logger.info(
                                    'chi manual neighbor: %g' %
                                    thisChi[(ww[0][ii] + r[ii, 0]) % Ng,
                                            (ww[1][ii] + r[ii, 1]) % Ng,
                                            (ww[2][ii] + r[ii, 2]) % Ng])
                                logger.info('chi readout neighbor: %g' %
                                            thisChi_neighbors[ii])
                    thisChi[ww] = thisChi_neighbors

                elif fill_empty_cells == 'RandNeighb':
                    # New parallel code, 2nd implementation.
                    # Use collective getitem and only work with indices.
                    # http://rainwoodman.github.io/pmesh/pmesh.pm.html#pmesh.pm.Field.cgetitem.

                    # Note ww are indices of local slab, need to convert to global indices.
                    thisChi_neighbors = None
                    my_cindex_wanted = None
                    for root in range(comm.size):
                        # bcast to all ranks b/c must call cgetitem collectively with same args on each rank
                        if comm.rank == root:
                            # convert local index to collective index using ltoc which gives 3 tuple
                            assert len(ww) == 3
                            wwarr = np.array(ww).transpose()

                            #cww = np.array([
                            #    ltoc(field=thisChi, index=[ww[0][i],ww[1][i],ww[2][i]])
                            #    for i in range(ww[0].shape[0]) ])
                            cww = ltoc_index_arr(field=thisChi,
                                                 lindex_arr=wwarr)
                            #logger.info('cww: %s' % str(cww))

                            #my_cindex_wanted = [(cww[:,0]+r[:,0])%Ng, (cww[1][:]+r[:,1])%Ng, (cww[2][:]+r[:,2])%Ng]
                            my_cindex_wanted = (cww + r) % Ng
                            #logger.info('my_cindex_wanted: %s' % str(my_cindex_wanted))
                        cindex_wanted = comm.bcast(my_cindex_wanted,
                                                   root=root)
                        glob_thisChi_neighbors = cgetitem_index_arr(
                            thisChi, cindex_wanted)

                        # slower version doing the same
                        # glob_thisChi_neighbors = [
                        #     thisChi.cgetitem([cindex_wanted[i,0], cindex_wanted[i,1], cindex_wanted[i,2]])
                        #     for i in range(cindex_wanted.shape[0]) ]

                        if comm.rank == root:
                            thisChi_neighbors = np.array(
                                glob_thisChi_neighbors)
                        #thisChi_neighbors = thisChi.cgetitem([40,42,52])

                    #print('thisChi_neighbors:', thisChi_neighbors)

                    if False:
                        # print dbg info (rank 0 ok, rank 1 fails to print)
                        for ii in range(11000, 11004):
                            if comm.rank == 1:
                                logger.info(
                                    'ww: %s' %
                                    str([ww[0][ii], ww[1][ii], ww[2][ii]]))
                                logger.info(
                                    'chi[ww]: %g' %
                                    thisChi[ww[0][ii], ww[1][ii], ww[2][ii]]
                                )
                                logger.info(
                                    'chi manual neighbor: %g' %
                                    thisChi[(ww[0][ii] + r[ii, 0]) % Ng,
                                            (ww[1][ii] + r[ii, 1]) % Ng,
                                            (ww[2][ii] + r[ii, 2]) % Ng])
                                logger.info('chi bcast neighbor: %g' %
                                            thisChi_neighbors[ii])
                        raise Exception('just dbg')
                    thisChi[ww] = thisChi_neighbors

                ww = np.where(np.isnan(thisChi))
                Nfill = comm.allreduce(ww[0].shape[0], op=MPI.SUM)
                have_empty_cells = (Nfill > 0)
                comm.barrier()

        elif fill_empty_cells == 'AvgAndRandNeighb':
            raise NotImplementedError
            # while have_empty_cells:
            #     print("Fill %d empty chi cells (%g percent) using avg and random neighbors" % (
            #         ww[0].shape[0],ww[0].shape[0]/float(Ng)**3*100.))
            #     # first take average (only helps empty cells surrounded by filled cells)
            #     thisChi[ww[0],ww[1],ww[2]] = 0.0
            #     for r0 in range(-1,2):
            #         for r1 in range(-1,2):
            #             for r2 in range(-1,2):
            #                 if (r0==0) and (r1==0) and (r2==0):
            #                     # do not include center point in avg b/c this is nan
            #                     continue
            #                 else:
            #                     # average over 27-1 neighbor points
            #                     thisChi[ww[0],ww[1],ww[2]] += thisChi[(ww[0]+r0)%Ng, (ww[1]+r1)%Ng, (ww[2]+r2)%Ng]/26.0
            #     # get indices of cells that are still empty (happens if a neighbor was nan above)
            #     ww = np.where(np.isnan(thisChi))
            #     have_empty_cells = (ww[0].shape[0] > 0)
            #     if have_empty_cells:
            #         # draw -1,0,+1 for each empty cell, in 3 directions
            #         r = np.random.randint(-1,2, size=(ww[0].shape[0],3), dtype='int')
            #         # replace nan by random neighbors
            #         thisChi[ww[0],ww[1],ww[2]] = thisChi[(ww[0]+r[:,0])%Ng, (ww[1]+r[:,1])%Ng, (ww[2]+r[:,2])%Ng]
            #         # recompute indices of nan cells
            #         ww = np.where(np.isnan(thisChi))
            #         have_empty_cells = (ww[0].shape[0] > 0)

    else:
        raise Exception("Invalid fill_empty_cells option: %s" %
                        str(fill_empty_cells))

    return thisChi, thisChi_attrs