예제 #1
0
    def __init__(self, # host_halo=halo.HALOS,
                 occ_matrix=cuda_cell.OCCUPANCY_MATRIX):

        # self._host_halo_handle = host_halo

        self._occ_matrix = occ_matrix
        self._version = -1

        self._init = False

        # vars init
        self._boundary_cell_groups = cuda_base.Array(dtype=ctypes.c_int)
        self._boundary_groups_start_end_indices = cuda_base.Array(ncomp=27, dtype=ctypes.c_int)
        self._halo_cell_groups = cuda_base.Array(dtype=ctypes.c_int)
        self._halo_groups_start_end_indices = cuda_base.Array(ncomp=27, dtype=ctypes.c_int)
        self._boundary_groups_contents_array = cuda_base.Array(dtype=ctypes.c_int)
        self._exchange_sizes = cuda_base.Array(ncomp=26, dtype=ctypes.c_int)

        self._halo_shifts = None
        self._reverse_lookup = None

        # ensure first update
        self._boundary_cell_groups.inc_version(-1)
        self._boundary_groups_start_end_indices.inc_version(-1)
        self._halo_cell_groups.inc_version(-1)
        self._halo_groups_start_end_indices.inc_version(-1)
        self._boundary_groups_contents_array.inc_version(-1)
        self._exchange_sizes.inc_version(-1)
예제 #2
0
    def _cell_sort_setup(self):

        self.particle_layers = cuda_base.Array(ncomp=self._n_func(),
                                               dtype=ctypes.c_int)
        self.cell_reverse_lookup = cuda_base.Array(ncomp=self._n_func(),
                                                   dtype=ctypes.c_int)
        self.cell_contents_count = cuda_base.Array(
            ncomp=self._domain.cell_count, dtype=ctypes.c_int)
        self.matrix = cuda_base.device_buffer_2d(nrow=self._domain.cell_count,
                                                 ncol=self._n_func() //
                                                 self._domain.cell_count,
                                                 dtype=ctypes.c_int)

        self._n_layers = self.matrix.ncol
        self._n_cells = self.matrix.nrow

        #self._boundary = cuda_base.Array(initial_value=self._domain.boundary_outer)
        #self._cell_edge_lengths = cuda_base.Array(initial_value=self._domain.cell_edge_lengths)
        #self._cell_array = cuda_base.Array(initial_value=self._domain.cell_array, dtype=ctypes.c_int)

        self._setup = True
        self._build()
예제 #3
0
    def __init__(self):
        self._init = False
        self._setup = False

        self.cell_contents_count = None
        """Number of particles per cell, determines number of layers per cell."""

        self.cell_reverse_lookup = None
        """Map between particle index and containing cell."""

        self.particle_layers = None
        """Stores which layer each particle is contained in."""

        self.matrix = None
        """The occupancy matrix."""

        # build vars
        self._p1_lib = None
        self._boundary = None
        self._cell_edge_lengths = None
        self._cell_array = None
        self.cell_in_halo_flag = None

        # setup vars
        self._n_func = None
        self._domain = None
        self._positions = None
        self._n_layers = 0

        self.update_required = True

        self._update_set = False
        self._update_func = None
        self._update_func_pre = None
        self._update_func_post = None

        self.version_id = 0
        self.version_id_halo = 0

        self._timer = ppmd.opt.Timer()

        # scan vars
        self._ccc_scan = cuda_base.Array(ncomp=1, dtype=ctypes.c_int)
예제 #4
0
    def __init__(self, state_in=None):
        self.state = state_in

        # Initialise timers
        self.timer_apply = ppmd.opt.Timer(cuda_runtime.TIMER, 0)
        self.timer_lib_overhead = ppmd.opt.Timer(cuda_runtime.TIMER, 0)
        self.timer_search = ppmd.opt.Timer(cuda_runtime.TIMER, 0)
        self.timer_move = ppmd.opt.Timer(cuda_runtime.TIMER, 0)

        # One proc PBC lib
        self._one_process_pbc_lib = None
        # Escape guard lib
        self._escape_guard_lib = None
        self._escape_count = None
        self._escape_dir_count = None
        self._escape_list = None
        self._escape_matrix = None

        self._flag = cuda_base.Array(ncomp=1, dtype=ctypes.c_int)
예제 #5
0
    def _update_cell_in_halo(self):


        if self._cell_array is None or \
            self._cell_array[0] != self._domain.cell_array[0] or \
            self._cell_array[1] != self._domain.cell_array[1] or \
            self._cell_array[2] != self._domain.cell_array[2]:
            # --
            self._cell_array = np.array(self._domain.cell_array[:])

            tl = self._cell_array[0] * self._cell_array[1] * self._cell_array[2]
            ca = self._cell_array
            tmp = np.ones((ca[2], ca[1], ca[0]), dtype=ctypes.c_int)

            tmp[1:ca[2] - 1:, 1:ca[1] - 1:, 1:ca[0] - 1:] = 0
            tmp[2:ca[2] - 2:, 2:ca[1] - 2:, 2:ca[0] - 2:] = -1

            tmp = tmp.ravel()

            self.cell_in_halo_flag = cuda_base.Array(ncomp=tl,
                                                     dtype=ctypes.c_int)

            self.cell_in_halo_flag[:] = tmp
예제 #6
0
    def _get_pairs(self):
        _cell_pairs = (
            create_halo_pairs(self.occ_matrix.domain, Slice[0,0,0],(-1,-1,-1)),
            create_halo_pairs(self.occ_matrix.domain, Slice[::,0,0],(0,-1,-1)),
            create_halo_pairs(self.occ_matrix.domain, Slice[-1,0,0],(1,-1,-1)),
            create_halo_pairs(self.occ_matrix.domain, Slice[0,::,0],(-1,0,-1)),
            create_halo_pairs(self.occ_matrix.domain, Slice[::,::,0],(0,0,-1)),
            create_halo_pairs(self.occ_matrix.domain, Slice[-1,::,0],(1,0,-1)),
            create_halo_pairs(self.occ_matrix.domain, Slice[0,-1,0],(-1,1,-1)),
            create_halo_pairs(self.occ_matrix.domain, Slice[::,-1,0],(0,1,-1)),
            create_halo_pairs(self.occ_matrix.domain, Slice[-1,-1,0],(1,1,-1)),

            create_halo_pairs(self.occ_matrix.domain, Slice[0,0,::],(-1,-1,0)),
            create_halo_pairs(self.occ_matrix.domain, Slice[::,0,::],(0,-1,0)),
            create_halo_pairs(self.occ_matrix.domain, Slice[-1,0,::],(1,-1,0)),
            create_halo_pairs(self.occ_matrix.domain, Slice[0,::,::],(-1,0,0)),
            create_halo_pairs(self.occ_matrix.domain, Slice[-1,::,::],(1,0,0)),
            create_halo_pairs(self.occ_matrix.domain, Slice[0,-1,::],(-1,1,0)),
            create_halo_pairs(self.occ_matrix.domain, Slice[::,-1,::],(0,1,0)),
            create_halo_pairs(self.occ_matrix.domain, Slice[-1,-1,::],(1,1,0)),

            create_halo_pairs(self.occ_matrix.domain, Slice[0,0,-1],(-1,-1,1)),
            create_halo_pairs(self.occ_matrix.domain, Slice[::,0,-1],(0,-1,1)),
            create_halo_pairs(self.occ_matrix.domain, Slice[-1,0,-1],(1,-1,1)),
            create_halo_pairs(self.occ_matrix.domain, Slice[0,::,-1],(-1,0,1)),
            create_halo_pairs(self.occ_matrix.domain, Slice[::,::,-1],(0,0,1)),
            create_halo_pairs(self.occ_matrix.domain, Slice[-1,::,-1],(1,0,1)),
            create_halo_pairs(self.occ_matrix.domain, Slice[0,-1,-1],(-1,1,1)),
            create_halo_pairs(self.occ_matrix.domain, Slice[::,-1,-1],(0,1,1)),
            create_halo_pairs(self.occ_matrix.domain, Slice[-1,-1,-1],(1,1,1))
        )

        _bs = np.zeros(1, dtype=ctypes.c_int)
        _b = np.zeros(0, dtype=ctypes.c_int)

        _hs = np.zeros(1, dtype=ctypes.c_int)
        _h = np.zeros(0, dtype=ctypes.c_int)

        _s = np.zeros(0, dtype=ctypes.c_double)

        _r = np.zeros(0, dtype=ctypes.c_int)

        for hx, bhx in enumerate(_cell_pairs):

            # Boundary and Halo start index.
            _bs = np.append(_bs, ctypes.c_int(len(bhx[0])))
            _hs = np.append(_hs, ctypes.c_int(len(bhx[1])))

            # Actual cell indices
            _b = np.append(_b, bhx[0])
            _h = np.append(_h, bhx[1])

            # Offset shifts for periodic boundary
            _s = np.append(_s, bhx[2])

            # reverse lookup required for cuda.
            _r = np.append(_r, np.array(hx * np.ones(len(bhx[0])), dtype=ctypes.c_int))

        self._boundary_groups_start_end_indices = cuda_base.Array(_bs, dtype=ctypes.c_int)
        self._halo_groups_start_end_indices = cuda_base.Array(_hs, dtype=ctypes.c_int)

        # print "CA =", self.occ_matrix.domain.cell_array
        # print _b

        self._boundary_cell_groups = cuda_base.Array(_b, dtype=ctypes.c_int)
        self._halo_cell_groups = cuda_base.Array(_h, dtype=ctypes.c_int)


        # print "SHIFTS"
        self._halo_shifts = cuda_base.Array(_s, dtype=ctypes.c_double)
        # print "E_SHIFTS", self._halo_shifts.ctypes_data

        self._reverse_lookup = cuda_base.Array(_r, dtype=ctypes.c_int)

        self._version = self._occ_matrix.domain.cell_array.version
예제 #7
0
    def __init__(self, domain, cell_width, positions):

        self.domain = domain
        boundary = domain.boundary

        assert cell_width > 0, "bad cell width"
        assert boundary[1] > boundary[0], "nonsensical boundary"
        assert boundary[3] > boundary[2], "nonsensical boundary"
        assert boundary[5] > boundary[4], "nonsensical boundary"

        self.positions = positions

        self.cell_array = host.Array(ncomp=3, dtype=ctypes.c_int)
        self.cell_sizes = host.Array(ncomp=3, dtype=ctypes.c_double)

        # get sizes just considering interior
        cell_array = [0, 0, 0]
        cell_array[0] = int(float(boundary[1] - boundary[0]) / cell_width)
        cell_array[1] = int(float(boundary[3] - boundary[2]) / cell_width)
        cell_array[2] = int(float(boundary[5] - boundary[4]) / cell_width)

        cell_sizes = [0, 0, 0]
        cell_sizes[0] = float(boundary[1] - boundary[0]) / cell_array[0]
        cell_sizes[1] = float(boundary[3] - boundary[2]) / cell_array[1]
        cell_sizes[2] = float(boundary[5] - boundary[4]) / cell_array[2]
        self.cell_sizes[:] = cell_sizes[:]

        padx = int(math.ceil(
            self.domain.cell_edge_lengths[0] / cell_sizes[0])) + 1
        pady = int(math.ceil(
            self.domain.cell_edge_lengths[1] / cell_sizes[1])) + 1
        padz = int(math.ceil(
            self.domain.cell_edge_lengths[2] / cell_sizes[2])) + 1

        rpadx = padx * cell_sizes[0]
        rpady = pady * cell_sizes[1]
        rpadz = padz * cell_sizes[2]

        #print "CA", cell_array[:], "CS", self.cell_sizes[:], "CES", self.domain.cell_edge_lengths[:]

        self.cell_array[0] = cell_array[0] + 2 * padx
        self.cell_array[1] = cell_array[1] + 2 * pady
        self.cell_array[2] = cell_array[2] + 2 * padz

        #print "CA2", self.cell_array[:]

        self.boundary = host.Array(ncomp=6, dtype=ctypes.c_double)
        self.boundary[0] = boundary[0] - rpadx
        self.boundary[1] = boundary[1] + rpadx
        self.boundary[2] = boundary[2] - rpady
        self.boundary[3] = boundary[3] + rpady
        self.boundary[4] = boundary[4] - rpadz
        self.boundary[5] = boundary[5] + rpadz

        self.cell_count = cell_array[0] * cell_array[1] * cell_array[2]
        self.particle_layers = cuda_base.Array(ncomp=1, dtype=ctypes.c_int)
        self.cell_reverse_lookup = cuda_base.Array(ncomp=1, dtype=ctypes.c_int)
        self.cell_contents_count = cuda_base.Array(ncomp=self.cell_count,
                                                   dtype=ctypes.c_int)
        self.matrix = cuda_base.Matrix(nrow=self.cell_count,
                                       ncol=1,
                                       dtype=ctypes.c_int)
        self.num_layers = 0

        with open(
                str(ppmd.cuda.cuda_config.LIB_DIR) +
                '/cudaSubCellOccupancyMatrixSource.cu', 'r') as fh:
            _code = fh.read()
        with open(
                str(ppmd.cuda.cuda_config.LIB_DIR) +
                '/cudaSubCellOccupancyMatrixSource.h', 'r') as fh:
            _header = fh.read()
        _name = 'SubCellOccupancyMatrix'
        lib = cuda_build.simple_lib_creator(_header, _code, _name)
        self._sort_lib = lib['LayerSort']
        self._fill_lib = lib['PopMatrix']

        self.version_id = 0
예제 #8
0
    def apply(self):
        """
        Enforce the boundary conditions on the held state.
        """

        comm = self.state.domain.comm

        self.timer_apply.start()

        if comm.Get_size() == 1:
            """
            BC code for one proc. porbably removable when restricting to large
             parallel systems.
            """

            self.timer_lib_overhead.start()

            if self._one_process_pbc_lib is None:
                with open(
                        str(cuda_config.LIB_DIR) + '/cudaOneProcPBCSource.cu',
                        'r') as fh:
                    _one_proc_pbc_code = fh.read()

                _one_proc_pbc_kernel = kernel.Kernel('_one_proc_pbc_kernel',
                                                     _one_proc_pbc_code,
                                                     None,
                                                     static_args={
                                                         'E0': ctypes.c_double,
                                                         'E1': ctypes.c_double,
                                                         'E2': ctypes.c_double
                                                     })

                self._one_process_pbc_lib = cuda_loop.ParticleLoop(
                    _one_proc_pbc_kernel, {
                        'P': self.state.get_position_dat()(access.RW),
                        'BCFLAG': self._flag(access.INC_ZERO)
                    })

            self.timer_lib_overhead.pause()

            _E = self.state.domain.extent

            self.timer_move.start()
            self._one_process_pbc_lib.execute(
                n=self.state.get_position_dat().npart_local,
                static_args={
                    'E0': ctypes.c_double(_E[0]),
                    'E1': ctypes.c_double(_E[1]),
                    'E2': ctypes.c_double(_E[2])
                })

            res = self._flag[0]
            if res > 0:
                self._flag[0] = 1

            self.timer_move.pause()

        ############ ----- MULTIPROC -------
        else:

            if self._escape_guard_lib is None:
                # build lib
                self._escape_guard_lib = \
                    cuda_build.build_static_libs('cudaNProcPBC')

            # --- init escape count ----
            if self._escape_count is None:
                self._escape_count = cuda_base.Array(ncomp=1,
                                                     dtype=ctypes.c_int32)
            self._escape_count[0] = 0

            # --- init escape dir count ----
            if self._escape_dir_count is None:
                self._escape_dir_count = cuda_base.Array(ncomp=26,
                                                         dtype=ctypes.c_int32)
            self._escape_dir_count[:] = 0

            # --- init escape list ----
            nl3 = self.state.get_position_dat().npart_local * 3

            if self._escape_list is None:
                self._escape_list = cuda_base.Array(ncomp=nl3,
                                                    dtype=ctypes.c_int32)
            elif self._escape_list.ncomp < nl3:
                self._escape_list.realloc(nl3)

            # --- find escapees ---

            nl = self.state.get_position_dat().npart_local

            if nl > 0:
                cuda_runtime.cuda_err_check(
                    self._escape_guard_lib['cudaNProcPBCStageOne'](
                        ctypes.c_int32(nl),
                        self.state.domain.boundary.ctypes_data,
                        self.state.get_position_dat().ctypes_data,
                        self.state.domain.get_shift().ctypes_data,
                        self._escape_count.ctypes_data,
                        self._escape_dir_count.ctypes_data,
                        self._escape_list.ctypes_data))

            dir_max = np.max(self._escape_dir_count[:]) + 1

            if self._escape_matrix is None:
                self._escape_matrix = cuda_base.Matrix(nrow=26,
                                                       ncol=dir_max,
                                                       dtype=ctypes.c_int32)

            elif self._escape_matrix.ncol < dir_max:
                self._escape_matrix.realloc(nrow=26, ncol=dir_max)

            # --- Populate escape matrix (essentially sort by direction)

            escape_count = self._escape_count[0]
            if (nl > 0) and (escape_count > 0):
                cuda_runtime.cuda_err_check(
                    self._escape_guard_lib['cudaNProcPBCStageTwo'](
                        ctypes.c_int32(escape_count),
                        ctypes.c_int32(self._escape_matrix.ncol),
                        self._escape_list.ctypes_data,
                        self._escape_matrix.ctypes_data))

            self.state.move_to_neighbour(directions_matrix=self._escape_matrix,
                                         dir_counts=self._escape_dir_count)

            self.state.filter_on_domain_boundary()
예제 #9
0
    def __init__(self, kernel=None, dat_dict=None, shell_cutoff=None, sub_divide=None):

        self._dat_dict = access.DatArgStore(
            self._get_allowed_types(), dat_dict)
        self._cc = cuda_build.NVCC

        self._kernel = kernel
        self.shell_cutoff = shell_cutoff


        if sub_divide is None:
            rs_default = 5.
        else:
            rs_default = sub_divide

        self.sub_divide_size = rs_default

        #print "ACTUAL SUB CELL WIDTH", self.sub_divide_size

        self.loop_timer = ppmd.modules.code_timer.LoopTimer()
        self.wrapper_timer = opt.SynchronizedTimer(runtime.TIMER)


        self._components = {'LIB_PAIR_INDEX_0': '_i',
                            'LIB_PAIR_INDEX_1': '_j',
                            'LIB_NAME': str(self._kernel.name) + '_wrapper'}
        self._gather_size_limit = 4
        self._generate()


        self._lib = cuda_build.simple_lib_creator(
            self._generate_header_source(),
            self._components['LIB_SRC'],
            self._kernel.name,
        )[self._components['LIB_NAME']]

        self._group = None

        for pd in self._dat_dict.items():
            if issubclass(type(pd[1][0]), cuda_data.PositionDat):
                self._group = pd[1][0].group
                break

        assert self._group is not None, "No cell to particle map found"


        new_decomp_flag = self._group.domain.cell_decompose(
            self.shell_cutoff
        )

        if new_decomp_flag:
            self._group.get_cell_to_particle_map().create()

        self._key = (self.shell_cutoff,
                     self._group.domain,
                     self._group.get_position_dat())

        _nd = PairLoopCellByCell._cell_lists
        if not self._key in _nd.keys() or new_decomp_flag:
            _nd[self._key] = cuda_cell.SubCellOccupancyMatrix(
                domain=self._group.domain,
                cell_width=self.sub_divide_size,
                positions=self._group.get_position_dat(),
            )
        self.cell_list = _nd[self._key]

        self._cell_list_count = 0
        self._invocations = 0

        # get the offset list
        oslist = cell.convert_offset_tuples(
            cell.radius_cell_decompose(shell_cutoff, self.cell_list.cell_sizes),
            self.cell_list.cell_array,
            remove_zero=True
        )

        self.offset_list = cuda_base.Array(ncomp=len(oslist), dtype=ctypes.c_int)
        self.offset_list[:] = oslist[:]
예제 #10
0
    def move_to_neighbour(self, directions_matrix=None, dir_counts=None):
        """
        Move particles using the passed matrix where rows correspond to
        directions.
        """

        if self._move_lib is None:
            self._move_lib = \
                cuda_build.build_static_libs('cudaMoveLib')

        self._move_send_ranks, self._move_recv_ranks = \
            ppmd.mpi.cartcomm_get_move_send_recv_ranks(self._ccomm)

        self._move_send_ranks = ppmd.host.Array(
            initial_value=self._move_send_ranks, dtype=ctypes.c_int32)
        self._move_recv_ranks = ppmd.host.Array(
            initial_value=self._move_recv_ranks, dtype=ctypes.c_int32)
        self._move_recv_counts = ppmd.host.Array(ncomp=26,
                                                 dtype=ctypes.c_int32)
        self._move_send_counts = ppmd.host.Array(initial_value=dir_counts[:],
                                                 dtype=ctypes.c_int32)

        ndats = len(self.particle_dats)
        ptr_t = ndats * ctypes.c_void_p
        byte_t = ndats * ctypes.c_int32
        ptrs_a = []
        byte_a = []
        total_bytes = 0

        for dat in self.particle_dats:
            dath = getattr(self, dat)
            ptrs_a.append(dath.ctypes_data)
            be = ctypes.sizeof(dath.dtype) * dath.ncomp
            byte_a.append(be)
            total_bytes += be

        # These are arrays len=ndat, of dat pointers and dat byte counts per
        # particle
        ptrs = ptr_t(*ptrs_a)
        byte = byte_t(*byte_a)

        cuda_mpi.cuda_mpi_err_check(self._move_lib['cudaMoveStageOne'](
            ctypes.c_int32(self._ccomm.py2f()),
            self._move_send_ranks.ctypes_data,
            self._move_recv_ranks.ctypes_data,
            self._move_send_counts.ctypes_data,
            self._move_recv_counts.ctypes_data))

        total_particles = np.sum(dir_counts[:])
        tl = total_particles * total_bytes

        if self._move_send_buffer is None:
            self._move_send_buffer = cuda_base.Array(ncomp=tl,
                                                     dtype=ctypes.c_int8)
        elif self._move_send_buffer.ncomp < tl:
            self._move_send_buffer.realloc_zeros(tl)

        # resize tmp buffers
        total_recv_count = np.sum(self._move_recv_counts[:]) * total_bytes
        recv_count = np.sum(self._move_recv_counts[:])

        if self._move_recv_buffer is None:
            self._move_recv_buffer = cuda_base.Array(ncomp=total_recv_count,
                                                     dtype=ctypes.c_int8)
        elif self._move_recv_buffer.ncomp < total_recv_count:
            self._move_recv_buffer.realloc_zeros(total_recv_count)

        # resize dats
        new_ncomp = self.get_position_dat().npart_local + recv_count
        if self._empty_per_particle_flag is None:
            self._empty_per_particle_flag = cuda_base.Array(
                ncomp=new_ncomp, dtype=ctypes.c_int32)
        elif self._empty_per_particle_flag.ncomp < new_ncomp:
            self._empty_per_particle_flag.realloc_zeros(new_ncomp)
        else:
            self._empty_per_particle_flag.zero()

        self._resize_callback(self.npart_local + recv_count)

        # pack -> S/R unpack

        #print ppmd.mpi.MPI_HANDLE.rank, self.domain.boundary[:]
        #print self.npart_local, total_particles, recv_count

        cuda_mpi.cuda_mpi_err_check(self._move_lib['cudaMoveStageTwo'](
            ctypes.c_int32(self._ccomm.py2f()),
            ctypes.c_int32(self.npart_local), ctypes.c_int32(total_bytes),
            ctypes.c_int32(ndats), self._move_send_counts.ctypes_data,
            self._move_recv_counts.ctypes_data,
            self._move_send_ranks.ctypes_data,
            self._move_recv_ranks.ctypes_data, directions_matrix.ctypes_data,
            ctypes.c_int32(directions_matrix.ncol),
            self._move_send_buffer.ctypes_data,
            self._move_recv_buffer.ctypes_data, ctypes.byref(ptrs),
            ctypes.byref(byte), self._empty_per_particle_flag.ctypes_data))

        self.npart_local = self.npart_local + recv_count
예제 #11
0
    def __init__(self):

        self._domain = None

        self._cell_to_particle_map = cuda_cell.CellOccupancyMatrix()

        self._halo_manager = None
        self._halo_device_version = -1

        self._halo_sizes = None
        self._halo_cell_max_b = 0
        self._halo_cell_max_h = 0

        self._halo_h_scan = cuda_base.Array(ncomp=1, dtype=ctypes.c_int32)
        self._halo_b_scan = cuda_base.Array(ncomp=1, dtype=ctypes.c_int32)

        self._halo_h_groups_se_indices = cuda_base.Array(ncomp=1,
                                                         dtype=ctypes.c_int32)
        self._halo_b_groups_se_indices = cuda_base.Array(ncomp=1,
                                                         dtype=ctypes.c_int32)

        self._halo_h_cell_indices = cuda_base.Array(ncomp=1,
                                                    dtype=ctypes.c_int32)
        self._halo_b_cell_indices = cuda_base.Array(ncomp=1,
                                                    dtype=ctypes.c_int32)

        self._halo_h_cell_counts = cuda_base.Array(ncomp=1,
                                                   dtype=ctypes.c_int32)
        self._halo_b_cell_counts = cuda_base.Array(ncomp=1,
                                                   dtype=ctypes.c_int32)

        self._halo_send_counts = ppmd.host.Array(ncomp=6, dtype=ctypes.c_int32)
        self._halo_tmp_space = cuda_base.Array(ncomp=10, dtype=ctypes.c_double)
        self._halo_position_shifts = cuda_base.Array(ncomp=18,
                                                     dtype=ctypes.c_double)

        self._position_dat = None

        # Registered particle dats.
        self.particle_dats = []

        # Local number of particles
        self._npart_local = 0

        # Global number of particles
        self._npart = 0

        # do the ParticleDats have gaps in them?
        self.compressed = True
        """ Bool to determine if the held :class:`~cuda_data.ParticleDat` members have gaps in them. """

        self.uncompressed_n = False

        # compression vars
        self._filter_method = None
        self._comp_replacement_find_method = _FindCompressionIndices()
        self._compression_lib = None

        # State version id
        self.version_id = 0

        # move vars
        self._move_send_ranks = None
        self._move_recv_ranks = None
        self._move_send_buffer = None
        self._move_recv_buffer = None
        self._move_lib = None
        self._move_send_counts = None
        self._move_recv_counts = None
        self._empty_per_particle_flag = None

        # move vars.
        """If true, all cell lists/ neighbour lists should be rebuilt."""
        self.determine_update_funcs = []
        self.pre_update_funcs = []
        self.post_update_funcs = []
        self._gdm = None

        self._state_modifier_context = StateModifierContext(self)
        self.modifier = StateModifier(self)