def _prepare_tmp_space(self, max_size): req_bytes = self._components['PARTICLE_DAT_PARTITION'].req_bytes * \ max_size if self._gather_space.n < req_bytes: self._gather_space = host.ThreadSpace(n=req_bytes + 100, dtype=ctypes.c_uint8)
def __init__(self, width, domain, entry_data, entry_map, free_space, dtype, force_unit, energy_unit): self.width = width self.domain = domain self.entry_data = entry_data self.entry_map = entry_map self.free_space = free_space self.dtype = dtype self.sh = pairloop.state_handler.StateHandler(state=None, shell_cutoff=width) self._build_lib(force_unit, energy_unit) self._global_size = np.zeros(3, dtype=INT64) self._global_size[:] = entry_map.cube_side_count self._ncells = (self._global_size[0] + 6) * \ (self._global_size[1] + 6) * \ (self._global_size[2] + 6) self._local_size = np.zeros(3, dtype=INT64) self._local_size[:] = self.entry_data.local_size[:] self._local_offset = np.zeros(3, dtype=INT64) self._local_offset[:] = self.entry_data.local_offset[:] self._u = np.zeros(1, dtype=self.dtype) self._ll_array = np.zeros(1, dtype=INT64) self._ll_ccc_array = np.zeros(self._ncells, dtype=INT64) bn = 10 self._tmp_n = bn self._tmp_int_i = host.ThreadSpace(n=bn, dtype=INT64) self._tmp_int_j = host.ThreadSpace(n=bn, dtype=INT64) self._tmp_real_pi = host.ThreadSpace(n=bn, dtype=REAL) self._tmp_real_pj = host.ThreadSpace(n=bn, dtype=REAL) self._tmp_real_qi = host.ThreadSpace(n=bn, dtype=REAL) self._tmp_real_qj = host.ThreadSpace(n=bn, dtype=REAL) self._tmp_real_fi = host.ThreadSpace(n=bn, dtype=REAL) self._tmp_real_ui = host.ThreadSpace(n=bn, dtype=REAL) self.exec_count = 0
def __init__(self, kernel=None, dat_dict=None, shell_cutoff=None): self._dat_dict = access.DatArgStore(self._get_allowed_types(), dat_dict) self._cc = build.TMPCC self._kernel = kernel self.shell_cutoff = shell_cutoff self.loop_timer = modules.code_timer.LoopTimer() self.wrapper_timer = opt.Timer(runtime.TIMER) self.list_timer = opt.Timer(runtime.TIMER) self._gather_space = host.ThreadSpace(100, ctypes.c_uint8) self._generate() self._offset_list = host.Array(ncomp=27, dtype=ctypes.c_int) self._lib = build.simple_lib_creator(self._generate_header_source(), self._components['LIB_SRC'], self._kernel.name, CC=self._cc) self._group = None for pd in self._dat_dict.items(): if issubclass(type(pd[1][0]), data.PositionDat): self._group = pd[1][0].group break #assert self._group is not None, "No cell to particle map found" if self._group is not None: self._make_cell_list(self._group) self._kernel_execution_count = INT64(0) self._invocations = 0 self._jstore = [host.Array(ncomp=100, dtype=ctypes.c_int) for tx in \ range(runtime.NUM_THREADS)]
def __call__(self, positions, charges, forces, cells, potential=None): """ const INT64 free_space, const INT64 * RESTRICT global_size, const INT64 * RESTRICT local_size, const INT64 * RESTRICT local_offset, const INT64 num_threads, const INT64 nlocal, const INT64 ntotal, const REAL * RESTRICT P, const REAL * RESTRICT Q, const REAL * RESTRICT C, REAL * RESTRICT F, REAL * RESTRICT U, INT64 * RESTRICT ll_array, INT64 * RESTRICT ll_ccc_array, INT64 * RESTRICT * RESTRICT tmp_int_i, INT64 * RESTRICT * RESTRICT tmp_int_j, REAL * RESTRICT * RESTRICT tmp_real_pi, REAL * RESTRICT * RESTRICT tmp_real_pj, REAL * RESTRICT * RESTRICT tmp_real_qi, REAL * RESTRICT * RESTRICT tmp_real_qj, REAL * RESTRICT * RESTRICT tmp_real_fi """ dats = { 'p': positions(READ), 'q': charges(READ), 'f': forces(INC), 'c': cells(READ) } if potential is not None and \ issubclass(type(potential), ParticleDat): dats['u'] = potential(INC_ZERO) assert potential[:].shape[0] >= positions.npart_local elif potential is not None: assert potential.shape[0] * potential.shape[1] >= \ positions.npart_local self._u[0] = 0.0 nlocal, nhalo, ncell = self.sh.pre_execute(dats=dats) ntotal = nlocal + nhalo compute_pot = INT64(0) dummy_real = REAL(0) pot_ptr = ctypes.byref(dummy_real) if potential is not None: compute_pot.value = 1 # pot_ptr = _check_dtype(potential, REAL) pot_ptr = potential.ctypes_data if self._ll_array.shape[0] < (ntotal + self._ncells): self._ll_array = np.zeros(ntotal + 100 + self._ncells, dtype=INT64) if self._tmp_n < ncell * 15: bn = ncell * 15 + 100 self._tmp_int_i = host.ThreadSpace(n=bn, dtype=INT64) self._tmp_int_j = host.ThreadSpace(n=bn, dtype=INT64) self._tmp_real_pi = host.ThreadSpace(n=3 * bn, dtype=REAL) self._tmp_real_pj = host.ThreadSpace(n=3 * bn, dtype=REAL) self._tmp_real_qi = host.ThreadSpace(n=bn, dtype=REAL) self._tmp_real_qj = host.ThreadSpace(n=bn, dtype=REAL) self._tmp_real_fi = host.ThreadSpace(n=3 * bn, dtype=REAL) self._tmp_real_ui = host.ThreadSpace(n=bn, dtype=REAL) self._tmp_n = bn #print("\ttmp_n", self._tmp_n, "nlocal", nlocal, "nhalo", nhalo, "max_cell", ncell) #for px in range(ntotal): # print(px, cells[px], "\t", positions[px,:], charges[px,:]) if self.domain.extent.dtype is not REAL: raise RuntimeError("expected c_double extent") if self.free_space == '27': free_space = 0 elif self.free_space == True: free_space = 1 else: free_space = 0 exec_count = INT64(0) err = self._lib( INT64(free_space), self.domain.extent.ctypes_data, self._global_size.ctypes.get_as_parameter(), self._local_size.ctypes.get_as_parameter(), self._local_offset.ctypes.get_as_parameter(), INT64(runtime.NUM_THREADS), INT64(nlocal), INT64(ntotal), self.sh.get_pointer(positions(READ)), self.sh.get_pointer(charges(READ)), self.sh.get_pointer(cells(READ)), self.sh.get_pointer(forces(INC)), self._u.ctypes.get_as_parameter(), self._ll_array.ctypes.get_as_parameter(), self._ll_ccc_array.ctypes.get_as_parameter(), self._tmp_int_i.ctypes_data, self._tmp_int_j.ctypes_data, self._tmp_real_pi.ctypes_data, self._tmp_real_pj.ctypes_data, self._tmp_real_qi.ctypes_data, self._tmp_real_qj.ctypes_data, self._tmp_real_fi.ctypes_data, self._tmp_real_ui.ctypes_data, compute_pot, pot_ptr, ctypes.byref(exec_count)) self.exec_count += exec_count.value self.sh.post_execute(dats=dats) if err < 0: raise RuntimeError("Negative error code: {}".format(err)) return self._u[0]