def __init__(self, # host_halo=halo.HALOS, occ_matrix=cuda_cell.OCCUPANCY_MATRIX): # self._host_halo_handle = host_halo self._occ_matrix = occ_matrix self._version = -1 self._init = False # vars init self._boundary_cell_groups = cuda_base.Array(dtype=ctypes.c_int) self._boundary_groups_start_end_indices = cuda_base.Array(ncomp=27, dtype=ctypes.c_int) self._halo_cell_groups = cuda_base.Array(dtype=ctypes.c_int) self._halo_groups_start_end_indices = cuda_base.Array(ncomp=27, dtype=ctypes.c_int) self._boundary_groups_contents_array = cuda_base.Array(dtype=ctypes.c_int) self._exchange_sizes = cuda_base.Array(ncomp=26, dtype=ctypes.c_int) self._halo_shifts = None self._reverse_lookup = None # ensure first update self._boundary_cell_groups.inc_version(-1) self._boundary_groups_start_end_indices.inc_version(-1) self._halo_cell_groups.inc_version(-1) self._halo_groups_start_end_indices.inc_version(-1) self._boundary_groups_contents_array.inc_version(-1) self._exchange_sizes.inc_version(-1)
def _cell_sort_setup(self): self.particle_layers = cuda_base.Array(ncomp=self._n_func(), dtype=ctypes.c_int) self.cell_reverse_lookup = cuda_base.Array(ncomp=self._n_func(), dtype=ctypes.c_int) self.cell_contents_count = cuda_base.Array( ncomp=self._domain.cell_count, dtype=ctypes.c_int) self.matrix = cuda_base.device_buffer_2d(nrow=self._domain.cell_count, ncol=self._n_func() // self._domain.cell_count, dtype=ctypes.c_int) self._n_layers = self.matrix.ncol self._n_cells = self.matrix.nrow #self._boundary = cuda_base.Array(initial_value=self._domain.boundary_outer) #self._cell_edge_lengths = cuda_base.Array(initial_value=self._domain.cell_edge_lengths) #self._cell_array = cuda_base.Array(initial_value=self._domain.cell_array, dtype=ctypes.c_int) self._setup = True self._build()
def __init__(self): self._init = False self._setup = False self.cell_contents_count = None """Number of particles per cell, determines number of layers per cell.""" self.cell_reverse_lookup = None """Map between particle index and containing cell.""" self.particle_layers = None """Stores which layer each particle is contained in.""" self.matrix = None """The occupancy matrix.""" # build vars self._p1_lib = None self._boundary = None self._cell_edge_lengths = None self._cell_array = None self.cell_in_halo_flag = None # setup vars self._n_func = None self._domain = None self._positions = None self._n_layers = 0 self.update_required = True self._update_set = False self._update_func = None self._update_func_pre = None self._update_func_post = None self.version_id = 0 self.version_id_halo = 0 self._timer = ppmd.opt.Timer() # scan vars self._ccc_scan = cuda_base.Array(ncomp=1, dtype=ctypes.c_int)
def __init__(self, state_in=None): self.state = state_in # Initialise timers self.timer_apply = ppmd.opt.Timer(cuda_runtime.TIMER, 0) self.timer_lib_overhead = ppmd.opt.Timer(cuda_runtime.TIMER, 0) self.timer_search = ppmd.opt.Timer(cuda_runtime.TIMER, 0) self.timer_move = ppmd.opt.Timer(cuda_runtime.TIMER, 0) # One proc PBC lib self._one_process_pbc_lib = None # Escape guard lib self._escape_guard_lib = None self._escape_count = None self._escape_dir_count = None self._escape_list = None self._escape_matrix = None self._flag = cuda_base.Array(ncomp=1, dtype=ctypes.c_int)
def _update_cell_in_halo(self): if self._cell_array is None or \ self._cell_array[0] != self._domain.cell_array[0] or \ self._cell_array[1] != self._domain.cell_array[1] or \ self._cell_array[2] != self._domain.cell_array[2]: # -- self._cell_array = np.array(self._domain.cell_array[:]) tl = self._cell_array[0] * self._cell_array[1] * self._cell_array[2] ca = self._cell_array tmp = np.ones((ca[2], ca[1], ca[0]), dtype=ctypes.c_int) tmp[1:ca[2] - 1:, 1:ca[1] - 1:, 1:ca[0] - 1:] = 0 tmp[2:ca[2] - 2:, 2:ca[1] - 2:, 2:ca[0] - 2:] = -1 tmp = tmp.ravel() self.cell_in_halo_flag = cuda_base.Array(ncomp=tl, dtype=ctypes.c_int) self.cell_in_halo_flag[:] = tmp
def _get_pairs(self): _cell_pairs = ( create_halo_pairs(self.occ_matrix.domain, Slice[0,0,0],(-1,-1,-1)), create_halo_pairs(self.occ_matrix.domain, Slice[::,0,0],(0,-1,-1)), create_halo_pairs(self.occ_matrix.domain, Slice[-1,0,0],(1,-1,-1)), create_halo_pairs(self.occ_matrix.domain, Slice[0,::,0],(-1,0,-1)), create_halo_pairs(self.occ_matrix.domain, Slice[::,::,0],(0,0,-1)), create_halo_pairs(self.occ_matrix.domain, Slice[-1,::,0],(1,0,-1)), create_halo_pairs(self.occ_matrix.domain, Slice[0,-1,0],(-1,1,-1)), create_halo_pairs(self.occ_matrix.domain, Slice[::,-1,0],(0,1,-1)), create_halo_pairs(self.occ_matrix.domain, Slice[-1,-1,0],(1,1,-1)), create_halo_pairs(self.occ_matrix.domain, Slice[0,0,::],(-1,-1,0)), create_halo_pairs(self.occ_matrix.domain, Slice[::,0,::],(0,-1,0)), create_halo_pairs(self.occ_matrix.domain, Slice[-1,0,::],(1,-1,0)), create_halo_pairs(self.occ_matrix.domain, Slice[0,::,::],(-1,0,0)), create_halo_pairs(self.occ_matrix.domain, Slice[-1,::,::],(1,0,0)), create_halo_pairs(self.occ_matrix.domain, Slice[0,-1,::],(-1,1,0)), create_halo_pairs(self.occ_matrix.domain, Slice[::,-1,::],(0,1,0)), create_halo_pairs(self.occ_matrix.domain, Slice[-1,-1,::],(1,1,0)), create_halo_pairs(self.occ_matrix.domain, Slice[0,0,-1],(-1,-1,1)), create_halo_pairs(self.occ_matrix.domain, Slice[::,0,-1],(0,-1,1)), create_halo_pairs(self.occ_matrix.domain, Slice[-1,0,-1],(1,-1,1)), create_halo_pairs(self.occ_matrix.domain, Slice[0,::,-1],(-1,0,1)), create_halo_pairs(self.occ_matrix.domain, Slice[::,::,-1],(0,0,1)), create_halo_pairs(self.occ_matrix.domain, Slice[-1,::,-1],(1,0,1)), create_halo_pairs(self.occ_matrix.domain, Slice[0,-1,-1],(-1,1,1)), create_halo_pairs(self.occ_matrix.domain, Slice[::,-1,-1],(0,1,1)), create_halo_pairs(self.occ_matrix.domain, Slice[-1,-1,-1],(1,1,1)) ) _bs = np.zeros(1, dtype=ctypes.c_int) _b = np.zeros(0, dtype=ctypes.c_int) _hs = np.zeros(1, dtype=ctypes.c_int) _h = np.zeros(0, dtype=ctypes.c_int) _s = np.zeros(0, dtype=ctypes.c_double) _r = np.zeros(0, dtype=ctypes.c_int) for hx, bhx in enumerate(_cell_pairs): # Boundary and Halo start index. _bs = np.append(_bs, ctypes.c_int(len(bhx[0]))) _hs = np.append(_hs, ctypes.c_int(len(bhx[1]))) # Actual cell indices _b = np.append(_b, bhx[0]) _h = np.append(_h, bhx[1]) # Offset shifts for periodic boundary _s = np.append(_s, bhx[2]) # reverse lookup required for cuda. _r = np.append(_r, np.array(hx * np.ones(len(bhx[0])), dtype=ctypes.c_int)) self._boundary_groups_start_end_indices = cuda_base.Array(_bs, dtype=ctypes.c_int) self._halo_groups_start_end_indices = cuda_base.Array(_hs, dtype=ctypes.c_int) # print "CA =", self.occ_matrix.domain.cell_array # print _b self._boundary_cell_groups = cuda_base.Array(_b, dtype=ctypes.c_int) self._halo_cell_groups = cuda_base.Array(_h, dtype=ctypes.c_int) # print "SHIFTS" self._halo_shifts = cuda_base.Array(_s, dtype=ctypes.c_double) # print "E_SHIFTS", self._halo_shifts.ctypes_data self._reverse_lookup = cuda_base.Array(_r, dtype=ctypes.c_int) self._version = self._occ_matrix.domain.cell_array.version
def __init__(self, domain, cell_width, positions): self.domain = domain boundary = domain.boundary assert cell_width > 0, "bad cell width" assert boundary[1] > boundary[0], "nonsensical boundary" assert boundary[3] > boundary[2], "nonsensical boundary" assert boundary[5] > boundary[4], "nonsensical boundary" self.positions = positions self.cell_array = host.Array(ncomp=3, dtype=ctypes.c_int) self.cell_sizes = host.Array(ncomp=3, dtype=ctypes.c_double) # get sizes just considering interior cell_array = [0, 0, 0] cell_array[0] = int(float(boundary[1] - boundary[0]) / cell_width) cell_array[1] = int(float(boundary[3] - boundary[2]) / cell_width) cell_array[2] = int(float(boundary[5] - boundary[4]) / cell_width) cell_sizes = [0, 0, 0] cell_sizes[0] = float(boundary[1] - boundary[0]) / cell_array[0] cell_sizes[1] = float(boundary[3] - boundary[2]) / cell_array[1] cell_sizes[2] = float(boundary[5] - boundary[4]) / cell_array[2] self.cell_sizes[:] = cell_sizes[:] padx = int(math.ceil( self.domain.cell_edge_lengths[0] / cell_sizes[0])) + 1 pady = int(math.ceil( self.domain.cell_edge_lengths[1] / cell_sizes[1])) + 1 padz = int(math.ceil( self.domain.cell_edge_lengths[2] / cell_sizes[2])) + 1 rpadx = padx * cell_sizes[0] rpady = pady * cell_sizes[1] rpadz = padz * cell_sizes[2] #print "CA", cell_array[:], "CS", self.cell_sizes[:], "CES", self.domain.cell_edge_lengths[:] self.cell_array[0] = cell_array[0] + 2 * padx self.cell_array[1] = cell_array[1] + 2 * pady self.cell_array[2] = cell_array[2] + 2 * padz #print "CA2", self.cell_array[:] self.boundary = host.Array(ncomp=6, dtype=ctypes.c_double) self.boundary[0] = boundary[0] - rpadx self.boundary[1] = boundary[1] + rpadx self.boundary[2] = boundary[2] - rpady self.boundary[3] = boundary[3] + rpady self.boundary[4] = boundary[4] - rpadz self.boundary[5] = boundary[5] + rpadz self.cell_count = cell_array[0] * cell_array[1] * cell_array[2] self.particle_layers = cuda_base.Array(ncomp=1, dtype=ctypes.c_int) self.cell_reverse_lookup = cuda_base.Array(ncomp=1, dtype=ctypes.c_int) self.cell_contents_count = cuda_base.Array(ncomp=self.cell_count, dtype=ctypes.c_int) self.matrix = cuda_base.Matrix(nrow=self.cell_count, ncol=1, dtype=ctypes.c_int) self.num_layers = 0 with open( str(ppmd.cuda.cuda_config.LIB_DIR) + '/cudaSubCellOccupancyMatrixSource.cu', 'r') as fh: _code = fh.read() with open( str(ppmd.cuda.cuda_config.LIB_DIR) + '/cudaSubCellOccupancyMatrixSource.h', 'r') as fh: _header = fh.read() _name = 'SubCellOccupancyMatrix' lib = cuda_build.simple_lib_creator(_header, _code, _name) self._sort_lib = lib['LayerSort'] self._fill_lib = lib['PopMatrix'] self.version_id = 0
def apply(self): """ Enforce the boundary conditions on the held state. """ comm = self.state.domain.comm self.timer_apply.start() if comm.Get_size() == 1: """ BC code for one proc. porbably removable when restricting to large parallel systems. """ self.timer_lib_overhead.start() if self._one_process_pbc_lib is None: with open( str(cuda_config.LIB_DIR) + '/cudaOneProcPBCSource.cu', 'r') as fh: _one_proc_pbc_code = fh.read() _one_proc_pbc_kernel = kernel.Kernel('_one_proc_pbc_kernel', _one_proc_pbc_code, None, static_args={ 'E0': ctypes.c_double, 'E1': ctypes.c_double, 'E2': ctypes.c_double }) self._one_process_pbc_lib = cuda_loop.ParticleLoop( _one_proc_pbc_kernel, { 'P': self.state.get_position_dat()(access.RW), 'BCFLAG': self._flag(access.INC_ZERO) }) self.timer_lib_overhead.pause() _E = self.state.domain.extent self.timer_move.start() self._one_process_pbc_lib.execute( n=self.state.get_position_dat().npart_local, static_args={ 'E0': ctypes.c_double(_E[0]), 'E1': ctypes.c_double(_E[1]), 'E2': ctypes.c_double(_E[2]) }) res = self._flag[0] if res > 0: self._flag[0] = 1 self.timer_move.pause() ############ ----- MULTIPROC ------- else: if self._escape_guard_lib is None: # build lib self._escape_guard_lib = \ cuda_build.build_static_libs('cudaNProcPBC') # --- init escape count ---- if self._escape_count is None: self._escape_count = cuda_base.Array(ncomp=1, dtype=ctypes.c_int32) self._escape_count[0] = 0 # --- init escape dir count ---- if self._escape_dir_count is None: self._escape_dir_count = cuda_base.Array(ncomp=26, dtype=ctypes.c_int32) self._escape_dir_count[:] = 0 # --- init escape list ---- nl3 = self.state.get_position_dat().npart_local * 3 if self._escape_list is None: self._escape_list = cuda_base.Array(ncomp=nl3, dtype=ctypes.c_int32) elif self._escape_list.ncomp < nl3: self._escape_list.realloc(nl3) # --- find escapees --- nl = self.state.get_position_dat().npart_local if nl > 0: cuda_runtime.cuda_err_check( self._escape_guard_lib['cudaNProcPBCStageOne']( ctypes.c_int32(nl), self.state.domain.boundary.ctypes_data, self.state.get_position_dat().ctypes_data, self.state.domain.get_shift().ctypes_data, self._escape_count.ctypes_data, self._escape_dir_count.ctypes_data, self._escape_list.ctypes_data)) dir_max = np.max(self._escape_dir_count[:]) + 1 if self._escape_matrix is None: self._escape_matrix = cuda_base.Matrix(nrow=26, ncol=dir_max, dtype=ctypes.c_int32) elif self._escape_matrix.ncol < dir_max: self._escape_matrix.realloc(nrow=26, ncol=dir_max) # --- Populate escape matrix (essentially sort by direction) escape_count = self._escape_count[0] if (nl > 0) and (escape_count > 0): cuda_runtime.cuda_err_check( self._escape_guard_lib['cudaNProcPBCStageTwo']( ctypes.c_int32(escape_count), ctypes.c_int32(self._escape_matrix.ncol), self._escape_list.ctypes_data, self._escape_matrix.ctypes_data)) self.state.move_to_neighbour(directions_matrix=self._escape_matrix, dir_counts=self._escape_dir_count) self.state.filter_on_domain_boundary()
def __init__(self, kernel=None, dat_dict=None, shell_cutoff=None, sub_divide=None): self._dat_dict = access.DatArgStore( self._get_allowed_types(), dat_dict) self._cc = cuda_build.NVCC self._kernel = kernel self.shell_cutoff = shell_cutoff if sub_divide is None: rs_default = 5. else: rs_default = sub_divide self.sub_divide_size = rs_default #print "ACTUAL SUB CELL WIDTH", self.sub_divide_size self.loop_timer = ppmd.modules.code_timer.LoopTimer() self.wrapper_timer = opt.SynchronizedTimer(runtime.TIMER) self._components = {'LIB_PAIR_INDEX_0': '_i', 'LIB_PAIR_INDEX_1': '_j', 'LIB_NAME': str(self._kernel.name) + '_wrapper'} self._gather_size_limit = 4 self._generate() self._lib = cuda_build.simple_lib_creator( self._generate_header_source(), self._components['LIB_SRC'], self._kernel.name, )[self._components['LIB_NAME']] self._group = None for pd in self._dat_dict.items(): if issubclass(type(pd[1][0]), cuda_data.PositionDat): self._group = pd[1][0].group break assert self._group is not None, "No cell to particle map found" new_decomp_flag = self._group.domain.cell_decompose( self.shell_cutoff ) if new_decomp_flag: self._group.get_cell_to_particle_map().create() self._key = (self.shell_cutoff, self._group.domain, self._group.get_position_dat()) _nd = PairLoopCellByCell._cell_lists if not self._key in _nd.keys() or new_decomp_flag: _nd[self._key] = cuda_cell.SubCellOccupancyMatrix( domain=self._group.domain, cell_width=self.sub_divide_size, positions=self._group.get_position_dat(), ) self.cell_list = _nd[self._key] self._cell_list_count = 0 self._invocations = 0 # get the offset list oslist = cell.convert_offset_tuples( cell.radius_cell_decompose(shell_cutoff, self.cell_list.cell_sizes), self.cell_list.cell_array, remove_zero=True ) self.offset_list = cuda_base.Array(ncomp=len(oslist), dtype=ctypes.c_int) self.offset_list[:] = oslist[:]
def move_to_neighbour(self, directions_matrix=None, dir_counts=None): """ Move particles using the passed matrix where rows correspond to directions. """ if self._move_lib is None: self._move_lib = \ cuda_build.build_static_libs('cudaMoveLib') self._move_send_ranks, self._move_recv_ranks = \ ppmd.mpi.cartcomm_get_move_send_recv_ranks(self._ccomm) self._move_send_ranks = ppmd.host.Array( initial_value=self._move_send_ranks, dtype=ctypes.c_int32) self._move_recv_ranks = ppmd.host.Array( initial_value=self._move_recv_ranks, dtype=ctypes.c_int32) self._move_recv_counts = ppmd.host.Array(ncomp=26, dtype=ctypes.c_int32) self._move_send_counts = ppmd.host.Array(initial_value=dir_counts[:], dtype=ctypes.c_int32) ndats = len(self.particle_dats) ptr_t = ndats * ctypes.c_void_p byte_t = ndats * ctypes.c_int32 ptrs_a = [] byte_a = [] total_bytes = 0 for dat in self.particle_dats: dath = getattr(self, dat) ptrs_a.append(dath.ctypes_data) be = ctypes.sizeof(dath.dtype) * dath.ncomp byte_a.append(be) total_bytes += be # These are arrays len=ndat, of dat pointers and dat byte counts per # particle ptrs = ptr_t(*ptrs_a) byte = byte_t(*byte_a) cuda_mpi.cuda_mpi_err_check(self._move_lib['cudaMoveStageOne']( ctypes.c_int32(self._ccomm.py2f()), self._move_send_ranks.ctypes_data, self._move_recv_ranks.ctypes_data, self._move_send_counts.ctypes_data, self._move_recv_counts.ctypes_data)) total_particles = np.sum(dir_counts[:]) tl = total_particles * total_bytes if self._move_send_buffer is None: self._move_send_buffer = cuda_base.Array(ncomp=tl, dtype=ctypes.c_int8) elif self._move_send_buffer.ncomp < tl: self._move_send_buffer.realloc_zeros(tl) # resize tmp buffers total_recv_count = np.sum(self._move_recv_counts[:]) * total_bytes recv_count = np.sum(self._move_recv_counts[:]) if self._move_recv_buffer is None: self._move_recv_buffer = cuda_base.Array(ncomp=total_recv_count, dtype=ctypes.c_int8) elif self._move_recv_buffer.ncomp < total_recv_count: self._move_recv_buffer.realloc_zeros(total_recv_count) # resize dats new_ncomp = self.get_position_dat().npart_local + recv_count if self._empty_per_particle_flag is None: self._empty_per_particle_flag = cuda_base.Array( ncomp=new_ncomp, dtype=ctypes.c_int32) elif self._empty_per_particle_flag.ncomp < new_ncomp: self._empty_per_particle_flag.realloc_zeros(new_ncomp) else: self._empty_per_particle_flag.zero() self._resize_callback(self.npart_local + recv_count) # pack -> S/R unpack #print ppmd.mpi.MPI_HANDLE.rank, self.domain.boundary[:] #print self.npart_local, total_particles, recv_count cuda_mpi.cuda_mpi_err_check(self._move_lib['cudaMoveStageTwo']( ctypes.c_int32(self._ccomm.py2f()), ctypes.c_int32(self.npart_local), ctypes.c_int32(total_bytes), ctypes.c_int32(ndats), self._move_send_counts.ctypes_data, self._move_recv_counts.ctypes_data, self._move_send_ranks.ctypes_data, self._move_recv_ranks.ctypes_data, directions_matrix.ctypes_data, ctypes.c_int32(directions_matrix.ncol), self._move_send_buffer.ctypes_data, self._move_recv_buffer.ctypes_data, ctypes.byref(ptrs), ctypes.byref(byte), self._empty_per_particle_flag.ctypes_data)) self.npart_local = self.npart_local + recv_count
def __init__(self): self._domain = None self._cell_to_particle_map = cuda_cell.CellOccupancyMatrix() self._halo_manager = None self._halo_device_version = -1 self._halo_sizes = None self._halo_cell_max_b = 0 self._halo_cell_max_h = 0 self._halo_h_scan = cuda_base.Array(ncomp=1, dtype=ctypes.c_int32) self._halo_b_scan = cuda_base.Array(ncomp=1, dtype=ctypes.c_int32) self._halo_h_groups_se_indices = cuda_base.Array(ncomp=1, dtype=ctypes.c_int32) self._halo_b_groups_se_indices = cuda_base.Array(ncomp=1, dtype=ctypes.c_int32) self._halo_h_cell_indices = cuda_base.Array(ncomp=1, dtype=ctypes.c_int32) self._halo_b_cell_indices = cuda_base.Array(ncomp=1, dtype=ctypes.c_int32) self._halo_h_cell_counts = cuda_base.Array(ncomp=1, dtype=ctypes.c_int32) self._halo_b_cell_counts = cuda_base.Array(ncomp=1, dtype=ctypes.c_int32) self._halo_send_counts = ppmd.host.Array(ncomp=6, dtype=ctypes.c_int32) self._halo_tmp_space = cuda_base.Array(ncomp=10, dtype=ctypes.c_double) self._halo_position_shifts = cuda_base.Array(ncomp=18, dtype=ctypes.c_double) self._position_dat = None # Registered particle dats. self.particle_dats = [] # Local number of particles self._npart_local = 0 # Global number of particles self._npart = 0 # do the ParticleDats have gaps in them? self.compressed = True """ Bool to determine if the held :class:`~cuda_data.ParticleDat` members have gaps in them. """ self.uncompressed_n = False # compression vars self._filter_method = None self._comp_replacement_find_method = _FindCompressionIndices() self._compression_lib = None # State version id self.version_id = 0 # move vars self._move_send_ranks = None self._move_recv_ranks = None self._move_send_buffer = None self._move_recv_buffer = None self._move_lib = None self._move_send_counts = None self._move_recv_counts = None self._empty_per_particle_flag = None # move vars. """If true, all cell lists/ neighbour lists should be rebuilt.""" self.determine_update_funcs = [] self.pre_update_funcs = [] self.post_update_funcs = [] self._gdm = None self._state_modifier_context = StateModifierContext(self) self.modifier = StateModifier(self)