コード例 #1
0
    def _prepare_tmp_space(self, max_size):

        req_bytes = self._components['PARTICLE_DAT_PARTITION'].req_bytes * \
                max_size

        if self._gather_space.n < req_bytes:
            self._gather_space = host.ThreadSpace(n=req_bytes + 100,
                                                  dtype=ctypes.c_uint8)
コード例 #2
0
    def __init__(self, width, domain, entry_data, entry_map, free_space, dtype,
                 force_unit, energy_unit):

        self.width = width
        self.domain = domain
        self.entry_data = entry_data
        self.entry_map = entry_map
        self.free_space = free_space
        self.dtype = dtype

        self.sh = pairloop.state_handler.StateHandler(state=None,
                                                      shell_cutoff=width)

        self._build_lib(force_unit, energy_unit)

        self._global_size = np.zeros(3, dtype=INT64)
        self._global_size[:] = entry_map.cube_side_count

        self._ncells =  (self._global_size[0] + 6) * \
                        (self._global_size[1] + 6) * \
                        (self._global_size[2] + 6)

        self._local_size = np.zeros(3, dtype=INT64)
        self._local_size[:] = self.entry_data.local_size[:]

        self._local_offset = np.zeros(3, dtype=INT64)
        self._local_offset[:] = self.entry_data.local_offset[:]
        self._u = np.zeros(1, dtype=self.dtype)

        self._ll_array = np.zeros(1, dtype=INT64)
        self._ll_ccc_array = np.zeros(self._ncells, dtype=INT64)

        bn = 10
        self._tmp_n = bn
        self._tmp_int_i = host.ThreadSpace(n=bn, dtype=INT64)
        self._tmp_int_j = host.ThreadSpace(n=bn, dtype=INT64)
        self._tmp_real_pi = host.ThreadSpace(n=bn, dtype=REAL)
        self._tmp_real_pj = host.ThreadSpace(n=bn, dtype=REAL)
        self._tmp_real_qi = host.ThreadSpace(n=bn, dtype=REAL)
        self._tmp_real_qj = host.ThreadSpace(n=bn, dtype=REAL)
        self._tmp_real_fi = host.ThreadSpace(n=bn, dtype=REAL)
        self._tmp_real_ui = host.ThreadSpace(n=bn, dtype=REAL)

        self.exec_count = 0
コード例 #3
0
    def __init__(self, kernel=None, dat_dict=None, shell_cutoff=None):

        self._dat_dict = access.DatArgStore(self._get_allowed_types(),
                                            dat_dict)

        self._cc = build.TMPCC
        self._kernel = kernel
        self.shell_cutoff = shell_cutoff

        self.loop_timer = modules.code_timer.LoopTimer()
        self.wrapper_timer = opt.Timer(runtime.TIMER)
        self.list_timer = opt.Timer(runtime.TIMER)

        self._gather_space = host.ThreadSpace(100, ctypes.c_uint8)
        self._generate()

        self._offset_list = host.Array(ncomp=27, dtype=ctypes.c_int)

        self._lib = build.simple_lib_creator(self._generate_header_source(),
                                             self._components['LIB_SRC'],
                                             self._kernel.name,
                                             CC=self._cc)
        self._group = None

        for pd in self._dat_dict.items():
            if issubclass(type(pd[1][0]), data.PositionDat):
                self._group = pd[1][0].group
                break

        #assert self._group is not None, "No cell to particle map found"
        if self._group is not None:
            self._make_cell_list(self._group)

        self._kernel_execution_count = INT64(0)
        self._invocations = 0

        self._jstore = [host.Array(ncomp=100, dtype=ctypes.c_int) for tx in \
                        range(runtime.NUM_THREADS)]
コード例 #4
0
    def __call__(self, positions, charges, forces, cells, potential=None):
        """
        const INT64 free_space,
        const INT64 * RESTRICT global_size,
        const INT64 * RESTRICT local_size,
        const INT64 * RESTRICT local_offset,
        const INT64 num_threads,
        const INT64 nlocal,
        const INT64 ntotal,
        const REAL * RESTRICT P,
        const REAL * RESTRICT Q,
        const REAL * RESTRICT C,
        REAL * RESTRICT F,
        REAL * RESTRICT U,
        INT64 * RESTRICT ll_array,
        INT64 * RESTRICT ll_ccc_array,
        INT64 * RESTRICT * RESTRICT tmp_int_i,
        INT64 * RESTRICT * RESTRICT tmp_int_j,
        REAL * RESTRICT * RESTRICT tmp_real_pi,
        REAL * RESTRICT * RESTRICT tmp_real_pj,
        REAL * RESTRICT * RESTRICT tmp_real_qi,
        REAL * RESTRICT * RESTRICT tmp_real_qj,
        REAL * RESTRICT * RESTRICT tmp_real_fi        
        """
        dats = {
            'p': positions(READ),
            'q': charges(READ),
            'f': forces(INC),
            'c': cells(READ)
        }
        if potential is not None and \
                issubclass(type(potential), ParticleDat):
            dats['u'] = potential(INC_ZERO)
            assert potential[:].shape[0] >= positions.npart_local
        elif potential is not None:
            assert potential.shape[0] * potential.shape[1] >= \
                    positions.npart_local

        self._u[0] = 0.0

        nlocal, nhalo, ncell = self.sh.pre_execute(dats=dats)
        ntotal = nlocal + nhalo

        compute_pot = INT64(0)
        dummy_real = REAL(0)
        pot_ptr = ctypes.byref(dummy_real)
        if potential is not None:
            compute_pot.value = 1
            # pot_ptr = _check_dtype(potential, REAL)
            pot_ptr = potential.ctypes_data

        if self._ll_array.shape[0] < (ntotal + self._ncells):
            self._ll_array = np.zeros(ntotal + 100 + self._ncells, dtype=INT64)

        if self._tmp_n < ncell * 15:
            bn = ncell * 15 + 100
            self._tmp_int_i = host.ThreadSpace(n=bn, dtype=INT64)
            self._tmp_int_j = host.ThreadSpace(n=bn, dtype=INT64)
            self._tmp_real_pi = host.ThreadSpace(n=3 * bn, dtype=REAL)
            self._tmp_real_pj = host.ThreadSpace(n=3 * bn, dtype=REAL)
            self._tmp_real_qi = host.ThreadSpace(n=bn, dtype=REAL)
            self._tmp_real_qj = host.ThreadSpace(n=bn, dtype=REAL)
            self._tmp_real_fi = host.ThreadSpace(n=3 * bn, dtype=REAL)
            self._tmp_real_ui = host.ThreadSpace(n=bn, dtype=REAL)
            self._tmp_n = bn

        #print("\ttmp_n", self._tmp_n, "nlocal", nlocal, "nhalo", nhalo, "max_cell", ncell)

        #for px in range(ntotal):
        #    print(px, cells[px], "\t", positions[px,:], charges[px,:])

        if self.domain.extent.dtype is not REAL:
            raise RuntimeError("expected c_double extent")

        if self.free_space == '27':
            free_space = 0
        elif self.free_space == True:
            free_space = 1
        else:
            free_space = 0

        exec_count = INT64(0)

        err = self._lib(
            INT64(free_space), self.domain.extent.ctypes_data,
            self._global_size.ctypes.get_as_parameter(),
            self._local_size.ctypes.get_as_parameter(),
            self._local_offset.ctypes.get_as_parameter(),
            INT64(runtime.NUM_THREADS), INT64(nlocal), INT64(ntotal),
            self.sh.get_pointer(positions(READ)),
            self.sh.get_pointer(charges(READ)),
            self.sh.get_pointer(cells(READ)), self.sh.get_pointer(forces(INC)),
            self._u.ctypes.get_as_parameter(),
            self._ll_array.ctypes.get_as_parameter(),
            self._ll_ccc_array.ctypes.get_as_parameter(),
            self._tmp_int_i.ctypes_data, self._tmp_int_j.ctypes_data,
            self._tmp_real_pi.ctypes_data, self._tmp_real_pj.ctypes_data,
            self._tmp_real_qi.ctypes_data, self._tmp_real_qj.ctypes_data,
            self._tmp_real_fi.ctypes_data, self._tmp_real_ui.ctypes_data,
            compute_pot, pot_ptr, ctypes.byref(exec_count))

        self.exec_count += exec_count.value

        self.sh.post_execute(dats=dats)
        if err < 0:
            raise RuntimeError("Negative error code: {}".format(err))

        return self._u[0]