Ejemplo n.º 1
0
    def __init__(self, domain, potential=None, dat_dict=None, kernel=None):
        self._domain = domain
        self._potential = potential
        self._dat_dict = dat_dict
        self._cc = build.TMPCC

        ##########
        # End of Rapaport initialisations.
        ##########

        self._temp_dir = runtime.BUILD_DIR
        if not os.path.exists(self._temp_dir):
            os.mkdir(self._temp_dir)

        if potential is not None:
            self._kernel = self._potential.kernel
        elif kernel is not None:
            self._kernel = kernel
        else:
            print("pairloop error, no kernel passed.")

        self.loop_timer = opt.LoopTimer()

        self._code_init()

        self._lib = build.simple_lib_creator(self._generate_header_source(),
                                             self._generate_impl_source(),
                                             self._kernel.name,
                                             CC=self._cc)
Ejemplo n.º 2
0
    def __init__(self):

        header = r"""
        #include <math.h>
        #define INT64 int64_t
        #define REAL double
        """

        src = r"""
        
        extern "C" int free_space_direct(
            const INT64 N,
            const REAL * RESTRICT P,
            const REAL * RESTRICT Q,
            REAL * RESTRICT phi
        ){{

            REAL tmp_phi = 0.0;

            #pragma omp parallel for reduction(+:tmp_phi)
            for(INT64 ix=0 ; ix<N ; ix++){{
                REAL tmp_inner_phi = 0.0;
                
                const REAL iq = Q[ix];
                const REAL ip0 = P[3*ix + 0];
                const REAL ip1 = P[3*ix + 1];
                const REAL ip2 = P[3*ix + 2];

                
                #pragma omp simd reduction(+:tmp_inner_phi)
                for(INT64 jx=(ix+1) ; jx<N ; jx++){{
                    
                    const REAL jq = Q[jx];
                    const REAL jp0 = P[3*jx + 0];
                    const REAL jp1 = P[3*jx + 1];
                    const REAL jp2 = P[3*jx + 2];

                    const REAL d0 = ip0 - jp0;
                    const REAL d1 = ip1 - jp1;
                    const REAL d2 = ip2 - jp2;
                    
                    const REAL r2 = d0*d0 + d1*d1 + d2*d2;
                    const REAL r = sqrt(r2);

                    tmp_inner_phi += iq * jq / r;

                }}
                
                tmp_phi += tmp_inner_phi;

            }}
           
            phi[0] = tmp_phi;
            return 0;
        }}
        """.format()

        self._lib = build.simple_lib_creator(
            header_code=header, src_code=src,
            name="kmc_fmm_free_space_direct")['free_space_direct']
Ejemplo n.º 3
0
    def __init__(self, kernel=None, dat_dict=None):

        self._dat_dict = access.DatArgStore(self._get_allowed_types(),
                                            dat_dict)

        self._cc = build.TMPCC

        self._temp_dir = runtime.BUILD_DIR
        if not os.path.exists(self._temp_dir):
            os.mkdir(self._temp_dir)

        self._kernel = kernel

        self.loop_timer = ppmd.modules.code_timer.LoopTimer()
        self.wrapper_timer = ppmd.opt.Timer(runtime.TIMER)

        self._components = None

        self._generate()

        self._lib = build.simple_lib_creator(self._generate_header_source(),
                                             self._components['LIB_SRC'],
                                             self._kernel.name,
                                             CC=self._cc)

        self._group = None

        for pd in self._dat_dict.items():

            if issubclass(type(pd[1][0]), data.ParticleDat):
                if pd[1][0].group is not None:
                    self._group = pd[1][0].group
                    break
Ejemplo n.º 4
0
    def __init__(self, L, eps, domain, dtype, exclude_tuples=None):
        self.L = L
        self.eps = eps
        self.domain = domain
        self.dtype = dtype

        with open(str(_SRC_DIR) + \
                          '/FMMSource/PBCSource.cpp') as fh:
            cpp = fh.read()
        with open(str(_SRC_DIR) + \
                          '/FMMSource/PBCSource.h') as fh:
            hpp = fh.read()

        self._lib = build.simple_lib_creator(hpp, cpp,
            'pbc_setup_lib')

        self._lib['test1']()

        vol = self.domain.extent[0] * self.domain.extent[1] * \
              self.domain.extent[2]

        self.kappa = math.sqrt(math.pi/(vol**(2./3.)))
        
        if exclude_tuples is None:
            exclude_tuples = []
            iterset = (-1, 0, 1)
            for tx in itertools.product(iterset, iterset, iterset):
                if (tx[0] != 0) or (tx[1] != 0) or (tx[2] != 0):
                    exclude_tuples.append(tx)
        
        self.exclude_tuples = exclude_tuples
Ejemplo n.º 5
0
def test_c_ephemeral_harm_1():

    L = 12
    N = 100

    correct_gen = SphGen(L - 1, '_A', 'thetaA', 'phiA')
    to_test_gen = SphGenEphemeral(L - 1, '_B', 'thetaB', 'phiB')

    d = {}
    for lx in range(L):
        for mx in range(-lx, lx + 1):
            d[(lx, mx)] = ('err = MAX(ABS({} - {}), err);'.format(
                correct_gen.get_y_sym(lx, mx)[0],
                to_test_gen.get_y_sym(lx, mx)[0],
            ) + 'err = MAX(ABS({} - {}), err);'.format(
                correct_gen.get_y_sym(lx, mx)[1],
                to_test_gen.get_y_sym(lx, mx)[1],
            ), )

    m = to_test_gen(d)

    src = """
    #include <math.h>
    #define ABS(x) ((x) > (0) ? (x) : (-(x)))
    #define MAX(x, y) ((x) > (y) ? (x) : (y))

    extern "C"
    int test(
        const double thetaA,
        const double phiA,
        const double thetaB,
        const double phiB,
        double * err_out
    ){{
        double err = 0.0;
        {CORRECT_GEN}
        // ---------------
        {TO_TEST_GEN}

        *err_out = err;

        return 0;
    }}


    """.format(CORRECT_GEN=correct_gen.module, TO_TEST_GEN=m)

    lib = simple_lib_creator(header_code='', src_code=src)['test']

    rng = np.random.RandomState(149135315)

    for testx in range(N):

        p = rng.uniform(low=0, high=6, size=2)
        err = ctypes.c_double(0)

        lib(ctypes.c_double(p[0]), ctypes.c_double(p[1]),
            ctypes.c_double(p[0]), ctypes.c_double(p[1]), ctypes.byref(err))

        assert abs(err.value) < 10.**-15
Ejemplo n.º 6
0
    def __init__(self):
        with open(str(_SRC_DIR) + \
                          '/FMMSource/WignerSource.cpp') as fh:
            cpp = fh.read()
        with open(str(_SRC_DIR) + \
                          '/FMMSource/WignerSource.h') as fh:
            hpp = fh.read()

        self._lib = simple_lib_creator(hpp, cpp,
                                       'wigner_matrix')['get_matrix_set']
Ejemplo n.º 7
0
    def _generate_host_libs(self):

        sph_gen = self.sph_gen

        def cube_ind(L, M):
            return ((L) * ((L) + 1) + (M))

        assign_gen = 'double rhol = 1.0;\n'
        assign_gen += 'double rholcharge = rhol * charge;\n'
        for lx in range(self.L):
            for mx in range(-lx, lx + 1):
                assign_gen += 'out[{ind}] += {ylmm} * rholcharge;\n'.format(
                    ind=cube_ind(lx, mx),
                    ylmm=str(sph_gen.get_y_sym(lx, -mx)[0]))
                assign_gen += 'out[IM_OFFSET + {ind}] += {ylmm} * rholcharge;\n'.format(
                    ind=cube_ind(lx, mx),
                    ylmm=str(sph_gen.get_y_sym(lx, -mx)[1]))
            assign_gen += 'rhol *= radius;\n'
            assign_gen += 'rholcharge = rhol * charge;\n'

        src = """
        #define IM_OFFSET ({IM_OFFSET})

        {DECLARE} int multipole_exp(
            const double charge,
            const double radius,
            const double theta,
            const double phi,
            double * RESTRICT out
        ){{
            {SPH_GEN}
            {ASSIGN_GEN}
            return 0;
        }}
        """
        header = str(sph_gen.header)

        src_lib = src.format(SPH_GEN=str(sph_gen.module),
                             ASSIGN_GEN=str(assign_gen),
                             IM_OFFSET=(self.L**2),
                             DECLARE=r'static inline')

        src = src.format(SPH_GEN=str(sph_gen.module),
                         ASSIGN_GEN=str(assign_gen),
                         IM_OFFSET=(self.L**2),
                         DECLARE=r'extern "C"')

        self.create_multipole_header = header
        self.create_multipole_src = src_lib

        self._multipole_lib = simple_lib_creator(header_code=header,
                                                 src_code=src)['multipole_exp']
Ejemplo n.º 8
0
def get_timer_accuracy():

    from ppmd.lib import build

    t = ctypes.c_double(0.0)

    build.simple_lib_creator(
        '''
        #include <chrono>
        extern "C" void get_chrono_tick(double *t);
        ''',
        '''
        void get_chrono_tick(double *t){

            std::chrono::high_resolution_clock::duration t0(1);
            std::chrono::duration<double> t1 = t0;
            *t = (double) t1.count();
        }
        ''',
        'opt_tick_test')['get_chrono_tick'](ctypes.byref(t))

    return t.value
Ejemplo n.º 9
0
    def __init__(self, name=None, seed=None):
        assert name is not None, "name required"
        self.name = str(name)
        if seed is None:
            seed = int(time.time())

        header = '''
        #include <random>
        #include <memory.h>
        #include <cstdint>
        #include <iostream>
        using namespace std;
        extern "C" int get_size();
        extern "C" int get_mt_instance(uint seed, int size, mt19937 *mt_buffer);
        extern "C" uint64_t get_rand(mt19937 *mt_buffer);
        '''

        src = '''
        int get_size(){
            int size = -1;
            size = sizeof(mt19937);
            return size;
        }
        int get_mt_instance(uint seed, int size, mt19937 *mt_buffer){
            mt19937 mt_tmp(seed);
            if (sizeof(mt_tmp) != size){ return -1; }
            memcpy(mt_buffer, &mt_tmp, size);
            return 0;
        }
        // development function
        uint64_t get_rand(mt19937 *mt_buffer){
            #define foo() (mt_buffer[0]())

            const uint64_t rr = foo();
            cout << rr << endl;
            return rr;
        }
        '''

        lib = build.simple_lib_creator(header, src, 'Cpp11MT19937Lib')

        mt_size = lib['get_size']()

        assert mt_size > 0, "MT state size cannot be negative"
        self._mt_buffer = np.zeros(mt_size, dtype=ctypes.c_int8)

        mt_flag = lib['get_mt_instance'](ctypes.c_uint(seed),
                                         ctypes.c_int(mt_size),
                                         self._mt_buffer.ctypes.data_as(
                                             ctypes.POINTER(ctypes.c_void_p)))
        assert mt_flag > -1, "failed to make MT instance"
Ejemplo n.º 10
0
    def _build_lib(self, force_unit, energy_unit):

        with open(str(_SRC_DIR) + '/FMMSource/LocalCells.cpp') as fh:
            cpp = fh.read()
        with open(str(_SRC_DIR) + '/FMMSource/LocalCells.h') as fh:
            hpp = fh.read()

        hpp = hpp % {
            'SUB_FORCE_UNIT': str(force_unit),
            'SUB_ENERGY_UNIT': str(energy_unit)
        }

        self._lib = build.simple_lib_creator(hpp, cpp,
                                             'fmm_local')['local_cell_by_cell']
Ejemplo n.º 11
0
    def __init__(self, n, kernel, dat_dict):

        self._cc = build.TMPCC

        self._N = n

        self._kernel = kernel

        self._dat_dict = dat_dict

        self.loop_timer = ppmd.modules.code_timer.LoopTimer()

        self._code_init()

        self._lib = build.simple_lib_creator(self._generate_header_source(),
                                             self._generate_impl_source(),
                                             self._kernel.name,
                                             CC=self._cc)
Ejemplo n.º 12
0
def test_exp_gen_1():
    lmax = 40

    exp_gen = SphExpGen(lmax)

    assign_gen = ''
    for lx in range(-lmax, lmax+1):
        assign_gen += 're_out[{lx}] = {exp};\n'.format(lx=lx, exp=exp_gen.get_e_sym(lx)[0])
        assign_gen += 'im_out[{lx}] = {exp};\n'.format(lx=lx, exp=exp_gen.get_e_sym(lx)[1])
    
    src = """
    extern "C" int test_exp_gen(
        const double phi,
        double * RESTRICT re_out,
        double * RESTRICT im_out
    ){{
        {EXP_GEN}
        {ASSIGN_GEN}
        return 0;
    }}
    """.format(
        EXP_GEN=str(exp_gen.module),
        ASSIGN_GEN=str(assign_gen)
    )
    header = str(exp_gen.header)

    lib = simple_lib_creator(header_code=header, src_code=src)['test_exp_gen']

    re_exp_c = np.zeros(2*lmax+1, dtype=c_double)
    im_exp_c = np.zeros_like(re_exp_c)
    
    rng = np.random.RandomState(1452)
    for phi in rng.uniform(low=0.0, high=2.*math.pi, size=20):
        lib(c_double(phi),
            re_exp_c[lmax:].view().ctypes.get_as_parameter(), 
            im_exp_c[lmax:].view().ctypes.get_as_parameter()
        )
        for lx in range(-lmax, lmax+1):
            correct = cmath.exp(lx*phi*1.j)
            re_err = abs(re_exp_c[lmax + lx] - correct.real)
            im_err = abs(im_exp_c[lmax + lx] - correct.imag)

            assert re_err < 10.**-13
            assert im_err < 10.**-13
Ejemplo n.º 13
0
    def build_unpack_lib(state):

        dats = state.particle_dats

        def g(x):
            return getattr(state, x)

        args = ','.join(['{} * D_{}'.format(g(n).ctype, n) for n in dats])
        mvs = ''.join([
            '''
               memcpy(&D_{0}[pos * {1}], _R_BUF, {2});
               _R_BUF += {2};
            '''.format(str(n), str(g(n).ncomp),
                       str(g(n).ncomp * ctypes.sizeof(g(n).dtype)))
            for n in dats
        ])
        hsrc = '''
        #include <string.h>
        #include <stdint.h>
        '''
        src = '''
        extern "C"
        int move_unpack(
            const int _recv_count,
            const int _num_free_slots,
            const int _prev_num_particles,
            const int * _free_slots,
            const uint8_t * _R_BUF,
            %(ARGS)s
        ){
        for(int ix = 0; ix < _recv_count; ix++){
            int pos;
            // prioritise filling spaces in dat.
            if (ix < _num_free_slots) {pos = _free_slots[ix];}
            else {pos = _prev_num_particles + ix - _num_free_slots;}
            %(MVS)s
        }
        return 0;}
        ''' % {
            'ARGS': args,
            'MVS': mvs
        }
        return build.simple_lib_creator(hsrc, src,
                                        'move_unpack')['move_unpack']
Ejemplo n.º 14
0
def test_legendre_gen_1():

    lmax = 24

    lpmv_gen = ALegendrePolynomialGen(lmax)
    
    assign_gen = ''
    for lx in range(lmax+1):
        for mx in range(lx+1):
            assign_gen += 'out[LMAX * {lx} + {mx}] = '.format(lx=lx, mx=mx) + \
                str(lpmv_gen.get_p_sym(lx, mx)) + ';\n'

    src = """
    #define LMAX ({LMAX})
    extern "C" int test_lpmv_gen(
        const double theta,
        double * RESTRICT out
    ){{
        {LPMV_GEN}
        {ASSIGN_GEN}
        return 0;
    }}
    """.format(
        LPMV_GEN=str(lpmv_gen.module),
        ASSIGN_GEN=str(assign_gen),
        LMAX=lmax+1
    )
    header = str(lpmv_gen.header)

    lib = simple_lib_creator(header_code=header, src_code=src)['test_lpmv_gen']
    
    lpmv_c = np.zeros((lmax+1, lmax+1), dtype=c_double)

    rng = np.random.RandomState(14523)
    for theta in rng.uniform(low=-0.999999, high=0.999999, size=10):
        lib(c_double(theta), lpmv_c.ctypes.get_as_parameter())
        for lx in range(lmax+1):
            for mx in range(lx+1):
                correct = lpmv(mx, lx, theta)
                rel = 1 if (abs(correct) < 1.0) else abs(correct)
                err = abs(correct - lpmv_c[lx, mx]) / rel
                assert err < 10.**-12
Ejemplo n.º 15
0
    def __init__(self, kernel=None, dat_dict=None, shell_cutoff=None):

        self._dat_dict = access.DatArgStore(self._get_allowed_types(),
                                            dat_dict)

        self._cc = build.TMPCC
        self._kernel = kernel
        self.shell_cutoff = shell_cutoff

        self.loop_timer = modules.code_timer.LoopTimer()
        self.wrapper_timer = opt.Timer(runtime.TIMER)
        self.list_timer = opt.Timer(runtime.TIMER)

        self._gather_space = host.ThreadSpace(100, ctypes.c_uint8)
        self._generate()

        self._offset_list = host.Array(ncomp=27, dtype=ctypes.c_int)

        self._lib = build.simple_lib_creator(self._generate_header_source(),
                                             self._components['LIB_SRC'],
                                             self._kernel.name,
                                             CC=self._cc)
        self._group = None

        for pd in self._dat_dict.items():
            if issubclass(type(pd[1][0]), data.PositionDat):
                self._group = pd[1][0].group
                break

        #assert self._group is not None, "No cell to particle map found"
        if self._group is not None:
            self._make_cell_list(self._group)

        self._kernel_execution_count = INT64(0)
        self._invocations = 0

        self._jstore = [host.Array(ncomp=100, dtype=ctypes.c_int) for tx in \
                        range(runtime.NUM_THREADS)]
Ejemplo n.º 16
0
    def _transfer_unpack(self):
        """
        pack and transfer the particle dat, rebuild cell list if needed
        """
        if self._exchange_lib is None:

            _ex_args = '''
            %(DTYPE)s * RESTRICT DAT,         // DAT pointer
            int DAT_END,                      // end of dat.
            const double * RESTRICT SHIFT,    // position shifts
            const int f_MPI_COMM,             // F90 comm from mpi4py
            const int * RESTRICT SEND_RANKS,  // send directions
            const int * RESTRICT RECV_RANKS,  // recv directions
            const int * RESTRICT h_ind,       // halo indices
            const int * RESTRICT b_ind,       // local b indices
            const int * RESTRICT h_arr,       // h cell indices
            const int * RESTRICT b_arr,       // b cell indices
            const int * RESTRICT dir_counts,  // expected recv counts
            const int cell_offset,            // offset for cell list
            const int sort_flag,              // does the cl require updating
            int * RESTRICT ccc,               // cell contents count
            int * RESTRICT crl,               // cell reverse lookup
            int * RESTRICT cell_linked_list,  // cell list
            %(DTYPE)s * RESTRICT b_tmp        // tmp space for sending
            ''' % {
                'DTYPE': host.ctypes_map[self.dtype]
            }

            _ex_header = '''
            #include <generic.h>
            #include <mpi.h>
            #include <iostream>
            using namespace std;
            #define RESTRICT %(RESTRICT)s

            %(POS_ENABLE)s

            extern "C" void HALO_EXCHANGE_PD(%(ARGS)s);
            '''

            _ex_code = '''

            void HALO_EXCHANGE_PD(%(ARGS)s){

                // get mpi comm and rank
                MPI_Comm MPI_COMM = MPI_Comm_f2c(f_MPI_COMM);
                int rank = -1; MPI_Comm_rank( MPI_COMM, &rank );
                MPI_Status MPI_STATUS;
                MPI_Request sr;
                MPI_Request rr;

                //for( int dir=0 ; dir<6 ; dir++ ){
                //    cout << "dir: " << dir << " count: " << dir_counts[dir] << endl;;
                //}


                for( int dir=0 ; dir<6 ; dir++ ){
                    //for( int iy=0 ; iy<%(NCOMP)s ; iy++ ){
                    //    cout << "\tdir: " << dir << " comp " << iy << " shift " << SHIFT[dir*%(NCOMP)s + iy] << endl;
                    //}
                    const int b_s = b_ind[dir];
                    const int b_e = b_ind[dir+1];
                    const int b_c = b_e - b_s;

                    const int h_s = h_ind[dir];
                    const int h_e = h_ind[dir+1];
                    const int h_c = h_e - h_s;

                    //packing index;
                    int p_index = -1;

                    // packing loop
                    for( int cx=0 ; cx<b_c ; cx++ ){

                        // cell index
                        const int ci = b_arr[b_s + cx];

                        // loop over contents of cell.
                        int ix = cell_linked_list[cell_offset + ci];
                        while(ix > -1){

                            p_index ++;
                            for( int iy=0 ; iy<%(NCOMP)s ; iy++ ){

                                b_tmp[p_index * %(NCOMP)s + iy] = DAT[ix*%(NCOMP)s + iy];

                                //cout << "packed: " << b_tmp[p_index * %(NCOMP)s +iy];

                                #ifdef POS
                                    b_tmp[p_index * %(NCOMP)s + iy] += SHIFT[dir*%(NCOMP)s + iy];
                                #endif

                                //cout << " p_shifted: " << b_tmp[p_index * %(NCOMP)s +iy] << endl;
                            }

                        ix = cell_linked_list[ix];}
                    }

                    /*
                    cout << " SEND | ";
                    for( int tx=0 ; tx < (p_index + 1)*3; tx++){
                        cout << b_tmp[tx] << " |";
                    }
                    cout << endl;
                    */

                    // start the sendrecv as non blocking.
                    if (( SEND_RANKS[dir] > -1 ) && ( p_index > -1 ) ){
                    MPI_Isend((void *) b_tmp, (p_index + 1) * %(NCOMP)s, %(MPI_DTYPE)s,
                             SEND_RANKS[dir], rank, MPI_COMM, &sr);
                    }

                    if (( RECV_RANKS[dir] > -1 ) && ( dir_counts[dir] > 0 ) ){
                    MPI_Irecv((void *) &DAT[DAT_END * %(NCOMP)s], %(NCOMP)s * dir_counts[dir],
                              %(MPI_DTYPE)s, RECV_RANKS[dir], RECV_RANKS[dir], MPI_COMM, &rr);
                    }

                    //cout << "DAT_END: " << DAT_END << endl;



                    int DAT_END_T = DAT_END;
                    DAT_END += dir_counts[dir];

                    // build halo part of cell list whilst exchange occuring.

                    //#ifdef POS
                    if (sort_flag > 0){

                        for( int hxi=h_s ; hxi<h_e ; hxi++ ){

                            // index of a halo cell
                            const int hx = h_arr[ hxi ];

                            // number of particles in cell
                            const int hx_count = ccc[ hx ];

                            if (hx_count > 0) {

                            //cout << "\tsorting cell: " << hx << " ccc: " << hx_count << endl;
                                cell_linked_list[cell_offset + hx] = DAT_END_T;

                                for( int iy=0 ; iy<(hx_count-1) ; iy++ ){

                                    cell_linked_list[ DAT_END_T+iy ] = DAT_END_T + iy + 1;
                                    crl[ DAT_END_T+iy ] = hx;

                                }

                                cell_linked_list[ DAT_END_T + hx_count - 1 ] = -1;
                                crl[ DAT_END_T + hx_count -1 ] = hx;

                                DAT_END_T += hx_count;
                            }
                        }

                    }
                    //#endif

                    // after send has completed move to next direction.
                    if (( SEND_RANKS[dir] > -1 ) && ( p_index > -1 ) ){
                        MPI_Wait(&sr, MPI_STATUS_IGNORE);
                    }

                    if (( RECV_RANKS[dir] > -1 ) && ( dir_counts[dir] > 0 ) ){
                        MPI_Wait(&rr, MPI_STATUS_IGNORE);
                    }

                    //MPI_Barrier(MPI_COMM);


                //cout << "dir end " << dir << " -----------" << endl;

                }

                return;
            }
            '''

            if type(self) is PositionDat:
                _pos_enable = '#define POS'
            else:
                _pos_enable = ''

            _ex_dict = {
                'ARGS': _ex_args,
                'RESTRICT': build.MPI_CC.restrict_keyword,
                'DTYPE': host.ctypes_map[self.dtype],
                'POS_ENABLE': _pos_enable,
                'NCOMP': self.ncomp,
                'MPI_DTYPE': host.mpi_type_map[self.dtype]
            }

            _ex_header %= _ex_dict
            _ex_code %= _ex_dict

            self._exchange_lib = build.simple_lib_creator(
                _ex_header, _ex_code, 'HALO_EXCHANGE_PD',
                CC=build.MPI_CC)['HALO_EXCHANGE_PD']

        # End of creation code -----------------------------------------

        comm = self.group.domain.comm
        _h = self.group._halo_manager.get_halo_cell_groups()
        _b = self.group._halo_manager.get_boundary_cell_groups()

        if self.group._cell_to_particle_map.version_id > self.group._cell_to_particle_map.halo_version_id:
            _sort_flag = ctypes.c_int(1)
        else:
            _sort_flag = ctypes.c_int(-1)

        self._exchange_lib(
            self.ctypes_data, ctypes.c_int(self.npart_local),
            self.group._halo_manager.get_position_shifts().ctypes_data,
            ctypes.c_int(comm.py2f()),
            self.group._halo_manager.get_send_ranks().ctypes_data,
            self.group._halo_manager.get_recv_ranks().ctypes_data,
            _h[1].ctypes_data, _b[1].ctypes_data, _h[0].ctypes_data,
            _b[0].ctypes_data,
            self.group._halo_manager.get_dir_counts().ctypes_data,
            self.group._cell_to_particle_map.offset, _sort_flag,
            self.group._cell_to_particle_map.cell_contents_count.ctypes_data,
            self.group._cell_to_particle_map.cell_reverse_lookup.ctypes_data,
            self.group._cell_to_particle_map.cell_list.ctypes_data,
            self._tmp_halo_space.ctypes_data)
Ejemplo n.º 17
0
    def __init__(self, lmax):
        self._lmax = lmax
        im_of = (lmax+1) ** 2
        self._ncomp = 2 * im_of
        self.ncomp = self._ncomp

        sph_gen = SphGen(lmax)
        
        def lm_ind(L, M, OX=0):
            return ((L) * ( (L) + 1 ) + (M) + OX)

        radius_gen = 'const double iradius = 1.0/radius;\nconst double r0 = 1.0;\n'
        assign_gen = ''
        for lx in range(lmax+1):
            radius_gen += 'const double r{lxp1} = r{lx} * iradius;\n'.format(lxp1=lx+1, lx=lx)

            for mx in range(-lx, lx+1):
                assign_gen += 'tmp_out[{LM_IND}] += '.format(LM_IND=lm_ind(lx, mx)) + \
                    str(sph_gen.get_y_sym(lx, mx)[0]) + \
                    ' * r{lx};\n'.format(lx=lx+1)
                assign_gen += 'tmp_out[{LM_IND}] += '.format(LM_IND=lm_ind(lx, mx, im_of)) + \
                    str(sph_gen.get_y_sym(lx, mx)[1]) + \
                    ' * r{lx};\n'.format(lx=lx+1)


        src = """
        #include <omp.h>
        #define STRIDE ({STRIDE})

        extern "C" int sph_gen(
            const int num_threads,
            const int N,
            const double * RESTRICT radius_set,
            const double * RESTRICT theta_set,
            const double * RESTRICT phi_set,
            double * RESTRICT gtmp_out,
            double * RESTRICT out
        ){{
            for(int tx=0 ; tx<(num_threads*STRIDE) ; tx++){{
                gtmp_out[tx] = 0;
            }}
            omp_set_num_threads(num_threads);

            #pragma omp parallel default(none) shared(radius_set, theta_set, phi_set, gtmp_out)
            {{

                const int threadid = omp_get_thread_num();
                const int inner_num_threads = omp_get_num_threads();

                const int lower = N*threadid/inner_num_threads;
                const int upper = (threadid == (inner_num_threads - 1)) ? N : N*(threadid+1)/inner_num_threads;
                
                double * RESTRICT tmp_out = gtmp_out + threadid * STRIDE;
                
                for (int ix=lower; ix<upper ; ix++){{
                    const double radius = radius_set[ix];
                    const double theta = theta_set[ix];
                    const double phi = phi_set[ix];
                    {RADIUS_GEN}
                    {SPH_GEN}
                    {ASSIGN_GEN}
                }}

            }}

            for(int tx=0 ; tx<num_threads ; tx++){{
                for(int ix=0 ; ix<STRIDE ; ix++){{
                    out[ix] += gtmp_out[ix + tx*STRIDE];
                }}
            }}

            return 0;
        }}
        """.format(
            RADIUS_GEN=radius_gen,
            SPH_GEN=str(sph_gen.module),
            ASSIGN_GEN=str(assign_gen),
            STRIDE=self._ncomp
        )
        header = str(sph_gen.header)

        self._lib = build.simple_lib_creator(header_code=header, src_code=src)['sph_gen']
        self._nthreads = runtime.NUM_THREADS
        self._gtmp = np.zeros(self._ncomp*self._nthreads, dtype=ctypes.c_double)
Ejemplo n.º 18
0
    def _generate_host_libs(self):

        sph_gen = self.sph_gen

        def cube_ind(L, M):
            return ((L) * ( (L) + 1 ) + (M) )

        assign_gen =  'double rhol = 1.0;\n'
        assign_gen += 'double rholcharge = rhol * charge;\n'
        for lx in range(self.L):
            for mx in range(-lx, lx+1):
                assign_gen += 'out[{ind}] += {ylmm} * rholcharge;\n'.format(
                        ind=cube_ind(lx, mx),
                        ylmm=str(sph_gen.get_y_sym(lx, -mx)[0])
                    )
                assign_gen += 'out[IM_OFFSET + {ind}] += {ylmm} * rholcharge;\n'.format(
                        ind=cube_ind(lx, mx),
                        ylmm=str(sph_gen.get_y_sym(lx, -mx)[1])
                    )
            assign_gen += 'rhol *= radius;\n'
            assign_gen += 'rholcharge = rhol * charge;\n'

        src = """
        #define IM_OFFSET ({IM_OFFSET})

        {DECLARE} int multipole_exp(
            const double charge,
            const double radius,
            const double theta,
            const double phi,
            double * RESTRICT out
        ){{
            {SPH_GEN}
            {ASSIGN_GEN}
            return 0;
        }}
        """
        header = str(sph_gen.header)
        
        src_lib = src.format(
            SPH_GEN=str(sph_gen.module),
            ASSIGN_GEN=str(assign_gen),
            IM_OFFSET=(self.L**2),
            DECLARE=r'static inline'
        )

        src = src.format(
            SPH_GEN=str(sph_gen.module),
            ASSIGN_GEN=str(assign_gen),
            IM_OFFSET=(self.L**2),
            DECLARE=r'extern "C"'
        )

        self.create_multipole_header = header
        self.create_multipole_src = src_lib

        self._multipole_lib = simple_lib_creator(header_code=header, src_code=src)['multipole_exp']


        # --- lib to create vector to dot product with local expansions --- 

        assign_gen =  'double rhol = 1.0;\n'
        assign_gen += 'double rholcharge = rhol * charge;\n'

        for lx in range(self.L):
            for mx in range(-lx, lx+1):
                assign_gen += 'out[{ind}] += {ylmm} * rholcharge;\n'.format(
                        ind=cube_ind(lx, mx),
                        ylmm=str(sph_gen.get_y_sym(lx, mx)[0])
                    )
                assign_gen += 'out[IM_OFFSET + {ind}] += (-1.0) * {ylmm} * rholcharge;\n'.format(
                        ind=cube_ind(lx, mx),
                        ylmm=str(sph_gen.get_y_sym(lx, mx)[1])
                    )
            assign_gen += 'rhol *= radius;\n'
            assign_gen += 'rholcharge = rhol * charge;\n'

        src = """
        #define IM_OFFSET ({IM_OFFSET})

        {DECLARE} int local_dot_vec(
            const double charge,
            const double radius,
            const double theta,
            const double phi,
            double * RESTRICT out
        ){{
            {SPH_GEN}
            {ASSIGN_GEN}
            return 0;
        }}
        """
        header = str(sph_gen.header)
        
        src_lib = src.format(
            SPH_GEN=str(sph_gen.module),
            ASSIGN_GEN=str(assign_gen),
            IM_OFFSET=(self.L**2),
            DECLARE=r'static inline'
        )

        src = src.format(
            SPH_GEN=str(sph_gen.module),
            ASSIGN_GEN=str(assign_gen),
            IM_OFFSET=(self.L**2),
            DECLARE=r'extern "C"'
        )

        self.create_dot_vec_header = header
        self.create_dot_vec_src = src_lib

        self._dot_vec_lib = simple_lib_creator(header_code=header, src_code=src)['local_dot_vec']


        # --- lib to evaluate local expansions --- 

        assign_gen = ''
        for lx in range(self.L):
            for mx in range(-lx, lx+1):
                reL = SphSymbol('moments[{ind}]'.format(ind=cube_ind(lx, mx)))
                imL = SphSymbol('moments[IM_OFFSET + {ind}]'.format(ind=cube_ind(lx, mx)))
                reY, imY = sph_gen.get_y_sym(lx, mx)
                phi_sym = cmplx_mul(reL, imL, reY, imY)[0]
                assign_gen += 'tmp_energy += rhol * ({phi_sym});\n'.format(phi_sym=str(phi_sym))

            assign_gen += 'rhol *= radius;\n'

        src = """
        #define IM_OFFSET ({IM_OFFSET})

        {DECLARE} int local_eval(
            const double radius,
            const double theta,
            const double phi,
            const double * RESTRICT moments,
            double * RESTRICT out
        ){{
            {SPH_GEN}
            double rhol = 1.0;
            double tmp_energy = 0.0;
            {ASSIGN_GEN}

            out[0] = tmp_energy;
            return 0;
        }}
        """
        
        src_lib = src.format(
            SPH_GEN=str(sph_gen.module),
            ASSIGN_GEN=str(assign_gen),
            IM_OFFSET=(self.L**2),
            DECLARE=r'static inline'
        )

        src = src.format(
            SPH_GEN=str(sph_gen.module),
            ASSIGN_GEN=str(assign_gen),
            IM_OFFSET=(self.L**2),
            DECLARE=r'extern "C"'
        )
        header = str(sph_gen.header)


        self.create_local_eval_header = header
        self.create_local_eval_src = src_lib

        self._local_eval_lib = simple_lib_creator(header_code=header, src_code=src)['local_eval']

        # lib to create local expansions
        
        tflops = common.new_flop_dict()
        tflops = common.add_flop_dict(tflops, sph_gen.flops)

        assign_gen = 'const double iradius = 1.0/radius;\n'
        assign_gen += 'double rhol = iradius;\n'
        for lx in range(self.L):
            for mx in range(-lx, lx+1):
                assign_gen += 'out[{ind}] += {ylmm} * rhol * charge;\n'.format(
                        ind=cube_ind(lx, mx),
                        ylmm=str(sph_gen.get_y_sym(lx, -mx)[0])
                    )
                assign_gen += 'out[IM_OFFSET + {ind}] += {ylmm} * rhol * charge;\n'.format(
                        ind=cube_ind(lx, mx),
                        ylmm=str(sph_gen.get_y_sym(lx, -mx)[1])
                    )
                tflops['+'] += 2
                tflops['*'] += 4
            assign_gen += 'rhol *= iradius;\n'
            tflops['*'] += 1
        
        self.flop_count_create_local_exp = tflops

        src = """
        #define IM_OFFSET ({IM_OFFSET})

        extern "C" int create_local_exp(
            const double charge,
            const double radius,
            const double theta,
            const double phi,
            double * RESTRICT out
        ){{
            {SPH_GEN}
            {ASSIGN_GEN}
            return 0;
        }}
        """.format(
            SPH_GEN=str(sph_gen.module),
            ASSIGN_GEN=str(assign_gen),
            IM_OFFSET=(self.L**2),
        )
        header = str(sph_gen.header)


        self.create_local_exp_header = header
        self.create_local_exp_src = """
        #define IM_OFFSET ({IM_OFFSET})

        static inline void inline_local_exp(
            const double charge,
            const double radius,
            const double theta,
            const double phi,
            double * RESTRICT out
        ){{
            {SPH_GEN}
            {ASSIGN_GEN}
            return;
        }}
        """.format(
            SPH_GEN=str(sph_gen.module),
            ASSIGN_GEN=str(assign_gen),
            IM_OFFSET=(self.L**2),
        )

        self._local_create_lib = simple_lib_creator(header_code=header, src_code=src)['create_local_exp']


        # --- lib to create vector to dot product and mutlipole expansions --- 

        assign_gen =  'double rhol = 1.0;\n'
        assign_gen += 'double rholcharge = rhol * charge;\n'
        flops = {'+': 0, '-': 0, '*': 0, '/': 0}

        for lx in range(self.L):
            for mx in range(-lx, lx+1):

                assign_gen += 'out_mul[{ind}] += {ylmm} * rholcharge;\n'.format(
                        ind=cube_ind(lx, mx),
                        ylmm=str(sph_gen.get_y_sym(lx, -mx)[0])
                    )
                assign_gen += 'out_mul[IM_OFFSET + {ind}] += {ylmm} * rholcharge;\n'.format(
                        ind=cube_ind(lx, mx),
                        ylmm=str(sph_gen.get_y_sym(lx, -mx)[1])
                    )
                assign_gen += 'out_vec[{ind}] += {ylmm} * rholcharge;\n'.format(
                        ind=cube_ind(lx, mx),
                        ylmm=str(sph_gen.get_y_sym(lx, mx)[0])
                    )
                assign_gen += 'out_vec[IM_OFFSET + {ind}] += (-1.0) * {ylmm} * rholcharge;\n'.format(
                        ind=cube_ind(lx, mx),
                        ylmm=str(sph_gen.get_y_sym(lx, mx)[1])
                    )

                flops['+'] += 4
                flops['*'] += 5

            assign_gen += 'rhol *= radius;\n'
            assign_gen += 'rholcharge = rhol * charge;\n'
            flops['*'] += 2

        flops['+'] += sph_gen.flops['*']
        flops['-'] += sph_gen.flops['*']
        flops['*'] += sph_gen.flops['*']
        flops['/'] += sph_gen.flops['*']

        src = """
        #define IM_OFFSET ({IM_OFFSET})

        {DECLARE} int local_dot_vec_multipole(
            const double charge,
            const double radius,
            const double theta,
            const double phi,
            double * RESTRICT out_vec,
            double * RESTRICT out_mul
        ){{
            {SPH_GEN}
            {ASSIGN_GEN}
            return 0;
        }}
        """
        header = str(sph_gen.header)
        
        src_lib = src.format(
            SPH_GEN=str(sph_gen.module),
            ASSIGN_GEN=str(assign_gen),
            IM_OFFSET=(self.L**2),
            DECLARE='static inline'
        )

        src = src.format(
            SPH_GEN=str(sph_gen.module),
            ASSIGN_GEN=str(assign_gen),
            IM_OFFSET=(self.L**2),
            DECLARE=r'extern "C"'
        )

        self.create_dot_vec_multipole_header = header
        self.create_dot_vec_multipole_src = src_lib
        self.create_dot_vec_multipole_flops = flops

        self._dot_vec_multipole_lib = simple_lib_creator(header_code=header, src_code=src)['local_dot_vec_multipole']
Ejemplo n.º 19
0
    def _generate_host_libs(self):

        sph_gen = self.sph_gen

        def cube_ind(L, M):
            return ((L) * ((L) + 1) + (M))

        # --- lib to evaluate local expansions ---
        assign_gen = ''
        for lx in range(self.L):
            for mx in range(-lx, lx + 1):
                reL = SphSymbol('moments[{ind}]'.format(ind=cube_ind(lx, mx)))
                imL = SphSymbol(
                    'moments[IM_OFFSET + {ind}]'.format(ind=cube_ind(lx, mx)))
                reY, imY = sph_gen.get_y_sym(lx, mx)
                phi_sym = cmplx_mul(reL, imL, reY, imY)[0]
                assign_gen += 'tmp_energy += rhol * ({phi_sym});\n'.format(
                    phi_sym=str(phi_sym))

            assign_gen += 'rhol *= radius;\n'

        src = """
        #define IM_OFFSET ({IM_OFFSET})
        #define REAL double
        #define INT64 int64_t

        {DECLARE} int local_eval(
            const INT64 n,
            const REAL * RESTRICT hradius,
            const REAL * RESTRICT htheta,
            const REAL * RESTRICT hphi,
            const REAL * RESTRICT * RESTRICT hmoments,
            REAL * RESTRICT out
        ){{

            
            #pragma omp parallel for
            for(INT64 ix=0 ; ix<n ; ix++){{
                const REAL radius = hradius[ix];
                const REAL theta = htheta[ix];
                const REAL phi = hphi[ix];
                const REAL * RESTRICT moments = hmoments[ix];

                {SPH_GEN}
                REAL rhol = 1.0;
                REAL tmp_energy = 0.0;
                {ASSIGN_GEN}
                out[ix] = tmp_energy;
            }}
            return 0;
        }}
        """

        src_internal = src.format(SPH_GEN=str(sph_gen.module),
                                  ASSIGN_GEN=str(assign_gen),
                                  IM_OFFSET=(self.L**2),
                                  DECLARE=r'extern "C"')
        src_lib = src.format(SPH_GEN=str(sph_gen.module),
                             ASSIGN_GEN=str(assign_gen),
                             IM_OFFSET=(self.L**2),
                             DECLARE=r'static inline')

        header = str(sph_gen.header)

        self.create_local_eval_header = header
        self.create_local_eval_src = src_lib

        self._local_eval_lib = simple_lib_creator(
            header_code=header, src_code=src_internal)['local_eval']

        # lib to create local expansions

        assign_gen = 'const REAL iradius = 1.0/radius;\n'
        assign_gen += 'REAL rhol = iradius;\n'
        for lx in range(self.L):
            for mx in range(-lx, lx + 1):
                assign_gen += 'out[{ind}] += {ylmm} * rhol * charge;\n'.format(
                    ind=cube_ind(lx, mx),
                    ylmm=str(sph_gen.get_y_sym(lx, -mx)[0]))
                assign_gen += 'out[IM_OFFSET + {ind}] += {ylmm} * rhol * charge;\n'.format(
                    ind=cube_ind(lx, mx),
                    ylmm=str(sph_gen.get_y_sym(lx, -mx)[1]))
            assign_gen += 'rhol *= iradius;\n'

        src = """


        extern "C" int create_local_exp(
            const INT64 n,
            const REAL * RESTRICT hcharge,
            const REAL * RESTRICT hradius,
            const REAL * RESTRICT htheta,
            const REAL * RESTRICT hphi,
            REAL * RESTRICT * RESTRICT hout
        ){{

            #pragma omp parallel for
            for(INT64 ix=0 ; ix<n ; ix++){{
                const INT64 OFFSET = ix * IM_OFFSET * 2;
                const REAL charge = hcharge[ix];
                const REAL radius = hradius[ix];
                const REAL theta = htheta[ix];
                const REAL phi = hphi[ix];
                REAL * RESTRICT out = hout[ix];
                {SPH_GEN}
                {ASSIGN_GEN}
            }}
            return 0;
        }}
        """.format(
            SPH_GEN=str(sph_gen.module),
            ASSIGN_GEN=str(assign_gen),
            IM_OFFSET=(self.L**2),
        )
        header = str(sph_gen.header) + '''
        #define IM_OFFSET ({IM_OFFSET})
        #define REAL double
        #define INT64 int64_t
        '''.format(
            SPH_GEN=str(sph_gen.module),
            ASSIGN_GEN=str(assign_gen),
            IM_OFFSET=(self.L**2),
        )

        self.create_local_exp_header = header
        self.create_local_exp_src = """
        #define IM_OFFSET ({IM_OFFSET})

        static inline void inline_local_exp(
            const double charge,
            const double radius,
            const double theta,
            const double phi,
            double * RESTRICT out
        ){{
            {SPH_GEN}
            {ASSIGN_GEN}
            return;
        }}
        """.format(
            SPH_GEN=str(sph_gen.module),
            ASSIGN_GEN=str(assign_gen),
            IM_OFFSET=(self.L**2),
        )

        self._local_create_lib = simple_lib_creator(
            header_code=header, src_code=src)['create_local_exp']

        # --- lib to evaluate a single local expansion ---

        assign_gen = ''
        for lx in range(self.L):
            for mx in range(-lx, lx + 1):
                reL = SphSymbol('moments[{ind}]'.format(ind=cube_ind(lx, mx)))
                imL = SphSymbol(
                    'moments[IM_OFFSET + {ind}]'.format(ind=cube_ind(lx, mx)))
                reY, imY = sph_gen.get_y_sym(lx, mx)
                phi_sym = cmplx_mul(reL, imL, reY, imY)[0]
                assign_gen += 'tmp_energy += rhol * ({phi_sym});\n'.format(
                    phi_sym=str(phi_sym))

            assign_gen += 'rhol *= radius;\n'

        src = """
        #define IM_OFFSET ({IM_OFFSET})

        {DECLARE} int local_eval(
            const double radius,
            const double theta,
            const double phi,
            const double * RESTRICT moments,
            double * RESTRICT out
        ){{
            {SPH_GEN}
            double rhol = 1.0;
            double tmp_energy = 0.0;
            {ASSIGN_GEN}

            out[0] = tmp_energy;
            return 0;
        }}
        """

        src_lib = src.format(SPH_GEN=str(sph_gen.module),
                             ASSIGN_GEN=str(assign_gen),
                             IM_OFFSET=(self.L**2),
                             DECLARE=r'static inline')

        src = src.format(SPH_GEN=str(sph_gen.module),
                         ASSIGN_GEN=str(assign_gen),
                         IM_OFFSET=(self.L**2),
                         DECLARE=r'extern "C"')
        header = str(sph_gen.header)

        self.create_single_local_eval_header = header
        self.create_single_local_eval_src = src_lib

        self._single_local_eval_lib = simple_lib_creator(
            header_code=header, src_code=src)['local_eval']
Ejemplo n.º 20
0
    def _init_host_old_lib(self):
        ncomp = (self.L**2)*2
        half_ncomp = self.L**2
        def _re_lm(l, m): return l**2 + l + m


        src = r'''
        
        {LOCAL_EVAL_HEADER}
        {LOCAL_EVAL_SRC}


        static inline void spherical(
            const REAL dx, const REAL dy, const REAL dz,
            REAL *radius, REAL *theta, REAL *phi
        ){{
            const REAL dx2 = dx*dx;
            const REAL dx2_p_dy2 = dx2 + dy*dy;
            const REAL d2 = dx2_p_dy2 + dz*dz;
            *radius = sqrt(d2);
            *theta = atan2(sqrt(dx2_p_dy2), dz);
            *phi = atan2(dy, dx);       
            return;
        }}


        extern "C" int old_long_range_energy(
            const INT64 num_particles,
            const REAL * RESTRICT old_positions,
            const REAL * RESTRICT old_charges,
            const REAL * RESTRICT existing_multipole,
            REAL * RESTRICT out
        )
        {{

            #pragma omp parallel for schedule(dynamic)
            for(INT64 px=0 ; px<num_particles ; px++){{

                const REAL old_charge = old_charges[px];
                const REAL opx = old_positions[3*px + 0];
                const REAL opy = old_positions[3*px + 1];
                const REAL opz = old_positions[3*px + 2];

 
                REAL oradius, otheta, ophi;
                spherical(opx, opy, opz, &oradius, &otheta, &ophi);

                double tmp_energy = 0.0;
                local_eval(
                    oradius,
                    otheta,
                    ophi,
                    existing_multipole,
                    &tmp_energy
                );

                out[px] = tmp_energy * old_charge;

            }}

            return 0;
        }}
        '''.format(
            LOCAL_EVAL_HEADER=self._lee.create_local_eval_header,
            LOCAL_EVAL_SRC=self._lee.create_local_eval_src
        )

        header = str(
            Module((
                Include('omp.h'),
                Include('stdio.h'),
                Include('math.h'),
                Define('INT64', 'int64_t'),
                Define('REAL', 'double'),
                Define('NCOMP', str(ncomp)),
                Define('HALF_NCOMP', str(half_ncomp)),
                Define('DIPOLE_SX', str(self.lrc.dipole_correction[0])),
                Define('DIPOLE_SY', str(self.lrc.dipole_correction[1])),
                Define('DIPOLE_SZ', str(self.lrc.dipole_correction[2])),
                Define('RE_1P1', str(_re_lm(1, 1))),
                Define('RE_1_0', str(_re_lm(1, 0))),
                Define('RE_1N1', str(_re_lm(1,-1))),
                Define('IM_1P1', str(_re_lm(1, 1) + half_ncomp)),
                Define('IM_1_0', str(_re_lm(1, 0) + half_ncomp)),
                Define('IM_1N1', str(_re_lm(1,-1) + half_ncomp)),
            ))
        )

        _l = simple_lib_creator(header_code=header, src_code=src)['old_long_range_energy']
        return _l
Ejemplo n.º 21
0
    def __init__(self,
                 fmm,
                 domain,
                 boundary_condition,
                 local_exp_eval,
                 mirror_direction=None):

        assert boundary_condition in \
            (BCType.PBC, BCType.FREE_SPACE, BCType.NEAREST, BCType.FF_ONLY)

        self.domain = domain
        self._lee = local_exp_eval
        self._bc = boundary_condition
        self.fmm = fmm

        self._new27direct = 0.0
        ex = self.domain.extent
        for ox in cell_offsets:
            # image of old pos
            dox = np.array((ex[0] * ox[0], ex[1] * ox[1], ex[2] * ox[2]))
            if ox != (0, 0, 0):
                self._new27direct -= 1.0 / np.linalg.norm(dox)

        mirror_block = ''
        mirror_preloop = ''

        if self._bc == BCType.FREE_SPACE:
            co = ((0, 0, 0), )
        else:
            co = cell_offsets

        if mirror_direction is not None:
            # convert mirror directions to coefficients
            mcoeff = dict()

            mcoeff['mcoeffx'] = -1.0 if mirror_direction[0] else 1.0
            mcoeff['mcoeffy'] = -1.0 if mirror_direction[1] else 1.0
            mcoeff['mcoeffz'] = -1.0 if mirror_direction[2] else 1.0

            # compute position of old mirror charge
            mirror_preloop += '''
            const REAL mopx = opx * {mcoeffx};
            const REAL mopy = opy * {mcoeffy};
            const REAL mopz = opz * {mcoeffz};
            '''.format(**mcoeff)

            mirror_block += '''
            const REAL mnpx = npx * {mcoeffx};
            const REAL mnpy = npy * {mcoeffy};
            const REAL mnpz = npz * {mcoeffz};
            '''.format(**mcoeff)

            for oxi, ox in enumerate(co):

                oxi_zero = 0 if (ox[0] == 0 and ox[1] == 0
                                 and ox[2] == 0) else 1
                oxv = {'oxi': str(oxi), 'oxi_zero': str(oxi_zero)}
                oxv.update(mcoeff)

                mirror_block += '''

                // offset of the old charge
                const REAL mdpx{oxi} = dox{oxi} + mopx;
                const REAL mdpy{oxi} = doy{oxi} + mopy;
                const REAL mdpz{oxi} = doz{oxi} + mopz;
                
                // diff to the old mirror in offset
                const REAL mddx{oxi} = mdpx{oxi} - npx;
                const REAL mddy{oxi} = mdpy{oxi} - npy;
                const REAL mddz{oxi} = mdpz{oxi} - npz;
                
                // remove old energy
                energy27 -= 2.0 / sqrt(mddx{oxi}*mddx{oxi} + mddy{oxi}*mddy{oxi} + mddz{oxi}*mddz{oxi});
                
                
                // offset of the new charge
                const REAL mnpx{oxi} = dox{oxi} + mnpx;
                const REAL mnpy{oxi} = doy{oxi} + mnpy;
                const REAL mnpz{oxi} = doz{oxi} + mnpz;

                // diff to the new mirror in the offset
                const REAL mnddx{oxi} = mnpx{oxi} - npx;
                const REAL mnddy{oxi} = mnpy{oxi} - npy;
                const REAL mnddz{oxi} = mnpz{oxi} - npz;

                // add on the new contrib
                energy27 += 1.0 / sqrt(mnddx{oxi}*mnddx{oxi} + mnddy{oxi}*mnddy{oxi} + mnddz{oxi}*mnddz{oxi});

                // the factor 2 required for b_bp with the non-mirrors
                energy27 += o_bbp{oxi};


                // compute b_b, first with non-mirror
                const REAL do_opx{oxi} = opx - dpx{oxi};
                const REAL do_opy{oxi} = opy - dpy{oxi};
                const REAL do_opz{oxi} = opz - dpz{oxi};
                energy27 -= ({oxi_zero} == 0) ? 0.0 : 1.0 / sqrt(do_opx{oxi}*do_opx{oxi} + do_opy{oxi}*do_opy{oxi} + do_opz{oxi}*do_opz{oxi});
                
                // with the mirror
                const REAL do_mopx{oxi} = opx - mdpx{oxi};
                const REAL do_mopy{oxi} = opy - mdpy{oxi};
                const REAL do_mopz{oxi} = opz - mdpz{oxi};
                energy27 += 1.0 / sqrt(do_mopx{oxi}*do_mopx{oxi} + do_mopy{oxi}*do_mopy{oxi} + do_mopz{oxi}*do_mopz{oxi});
                

                '''.format(**oxv)

        preloop = ''
        bc27 = ''

        for oxi, ox in enumerate(co):

            preloop += '''
            const REAL dox{oxi} = EX * {OX};
            const REAL doy{oxi} = EY * {OY};
            const REAL doz{oxi} = EZ * {OZ};
            '''.format(
                oxi=str(oxi),
                OX=str(ox[0]),
                OY=str(ox[1]),
                OZ=str(ox[2]),
            )

        if self._bc == BCType.FREE_SPACE:
            pass
        else:
            bc27 = 'energy27 = (DOMAIN_27_ENERGY);\n'
            for oxi, ox in enumerate(cell_offsets):

                bc27 += '''
                const REAL dpx{oxi} = dox{oxi} + opx;
                const REAL dpy{oxi} = doy{oxi} + opy;
                const REAL dpz{oxi} = doz{oxi} + opz;

                const REAL ddx{oxi} = dpx{oxi} - npx;
                const REAL ddy{oxi} = dpy{oxi} - npy;
                const REAL ddz{oxi} = dpz{oxi} - npz;
                
                const REAL o_bbp{oxi} = 1.0 / sqrt(ddx{oxi}*ddx{oxi} + ddy{oxi}*ddy{oxi} + ddz{oxi}*ddz{oxi});
                energy27 += o_bbp{oxi};
                '''.format(oxi=str(oxi))

        if self._bc == BCType.FF_ONLY:
            ff_only_block = 'energy27 = 0.0;'
        else:
            ff_only_block = ''

        src = r'''
        extern "C" int self_interaction(
            const INT64 store_stride,
            const INT64 num_particles,
            const INT64 * RESTRICT exclusive_sum,
            const REAL * RESTRICT old_positions,
            const REAL * RESTRICT old_charges,
            const REAL * RESTRICT new_positions,
            REAL * RESTRICT out
        )
        {{
            
            {preloop}

            #pragma omp parallel for schedule(dynamic)
            for(INT64 px=0 ; px<num_particles ; px++){{
                
                const REAL coeff = old_charges[px] * old_charges[px];
                const REAL opx = old_positions[3*px + 0];
                const REAL opy = old_positions[3*px + 1];
                const REAL opz = old_positions[3*px + 2];

                {mirror_preloop}

                const INT64 nprop = exclusive_sum[px+1] - exclusive_sum[px];

                #pragma omp simd
                for(INT64 movii=0 ; movii<nprop ; movii++){{
                    const INT64 movi = movii + exclusive_sum[px];
                    const REAL npx = new_positions[3*movi + 0];
                    const REAL npy = new_positions[3*movi + 1];
                    const REAL npz = new_positions[3*movi + 2];
                    
                    const REAL dx = opx - npx;
                    const REAL dy = opy - npy;
                    const REAL dz = opz - npz;
                    
                    REAL energy27 = (1.0 / sqrt(dx*dx + dy*dy + dz*dz));

                    {bc27}

                    {mirror_block}

                    {ff_only_block}
                    
                    REAL tmp_energy = energy27;
                    out[store_stride * px + movii] = coeff * tmp_energy;
                }}

            }}

            return 0;
        }}
        '''.format(bc27=bc27,
                   preloop=preloop,
                   mirror_block=mirror_block,
                   mirror_preloop=mirror_preloop,
                   ff_only_block=ff_only_block)

        header = str(
            Module((
                Include('stdio.h'),
                Include('math.h'),
                Define('DOMAIN_27_ENERGY', str(self._new27direct)),
                Define('INT64', 'int64_t'),
                Define('REAL', 'double'),
                Define('EX', str(self.domain.extent[0])),
                Define('EY', str(self.domain.extent[1])),
                Define('EZ', str(self.domain.extent[2])),
                Define(
                    'PRINTF(A,B,C)',
                    r'printf("%s:\t%f,\t%s:\t%f,\t%s:\t%f\n", #A, A, #B, B, #C, C);'
                ),
                Define('PRINTF1(A)', r'printf("%s:\t%f\n", #A, A);'),
            )))

        #print(src)
        self.lib = simple_lib_creator(header_code=header,
                                      src_code=src)['self_interaction']
Ejemplo n.º 22
0
    def exchange_cell_counts(self):
        """
        Exchange the contents count of cells between processes. This is
        provided as a method in halo to avoid repeated exchanging of cell
        occupancy counts if multiple ParticleDat objects are being
        communicated.
        """
        self._update_domain()
        if self._exchange_sizes_lib is None:

            _es_args = '''
            const int f_MPI_COMM,             // F90 comm from mpi4py
            const int * RESTRICT SEND_RANKS,  // send directions
            const int * RESTRICT RECV_RANKS,  // recv directions
            const int * RESTRICT h_ind,       // halo indices
            const int * RESTRICT b_ind,       // local b indices
            const int * RESTRICT h_arr,       // h cell indices
            const int * RESTRICT b_arr,       // b cell indices
            int * RESTRICT ccc,               // cell contents count
            int * RESTRICT h_count,           // number of halo particles
            int * RESTRICT t_count,           // amount of tmp space needed
            int * RESTRICT h_tmp,             // tmp space for recving
            int * RESTRICT b_tmp,             // tmp space for sending
            int * RESTRICT dir_counts         // expected recv counts
            '''

            _es_header = '''
            #include <generic.h>
            #include <mpi.h>
            #include <iostream>
            using namespace std;
            #define RESTRICT %(RESTRICT)s

            extern "C" void HALO_ES_LIB(%(ARGS)s);
            '''

            _es_code = '''

            void HALO_ES_LIB(%(ARGS)s){
                *h_count = 0;
                *t_count = 0;

                // get mpi comm and rank
                MPI_Comm MPI_COMM = MPI_Comm_f2c(f_MPI_COMM);
                int rank = -1; MPI_Comm_rank( MPI_COMM, &rank );
                MPI_Status MPI_STATUS;

                // [W E] [N S] [O I]
                for( int dir=0 ; dir<6 ; dir++ ){

                    //cout << "dir " << dir << "-------" << endl;

                    const int dir_s = b_ind[dir];             // start index
                    const int dir_c = b_ind[dir+1] - dir_s;   // cell count

                    const int dir_s_r = h_ind[dir];             // start index
                    const int dir_c_r = h_ind[dir+1] - dir_s_r; // cell count

                    int tmp_count = 0;
                    for( int ix=0 ; ix<dir_c ; ix++ ){
                        b_tmp[ix] = ccc[b_arr[dir_s + ix]];    // copy into
                                                               // send buffer

                        tmp_count += ccc[b_arr[dir_s + ix]];
                    }

                    *t_count = MAX(*t_count, tmp_count);


                    if(rank == RECV_RANKS[dir]){

                        for( int tx=0 ; tx < dir_c ; tx++ ){
                            h_tmp[tx] = b_tmp[tx];
                        }

                    } else {
                    MPI_Sendrecv ((void *) b_tmp, dir_c, MPI_INT,
                                  SEND_RANKS[dir], rank,
                                  (void *) h_tmp, dir_c_r, MPI_INT,
                                  RECV_RANKS[dir], RECV_RANKS[dir],
                                  MPI_COMM, &MPI_STATUS);
                    }

                    tmp_count=0;
                    for( int ix=0 ; ix<dir_c_r ; ix++ ){
                        ccc[h_arr[dir_s_r + ix]] = h_tmp[ix];
                        *h_count += h_tmp[ix];
                        tmp_count += h_tmp[ix];
                    }
                    dir_counts[dir] = tmp_count;
                    *t_count = MAX(*t_count, tmp_count);

                }

                return;
            }
            '''

            _es_dict = {
                'ARGS': _es_args,
                'RESTRICT': build.MPI_CC.restrict_keyword
            }

            _es_header %= _es_dict
            _es_code %= _es_dict

            self._exchange_sizes_lib = build.simple_lib_creator(
                _es_header, _es_code, 'HALO_ES_LIB',
                CC=build.MPI_CC)['HALO_ES_LIB']

        # End of creation code -----------------------------------------------

        # update internal arrays
        if self._version < self._domain.cell_array.version:
            self._get_pairs()

        ccc = self._cell_to_particle_map.cell_contents_count

        # This if allows the host size exchnage code to be used for the gpu
        if type(ccc) is host.Array:
            ccc_ptr = ccc.ctypes_data

        else:
            if self._cell_contents_count_tmp is None:
                self._cell_contents_count_tmp = host.Array(ncomp=ccc.ncomp,
                                                           dtype=ctypes.c_int)
            elif self._cell_contents_count_tmp.ncomp < ccc.ncomp:
                self._cell_contents_count_tmp.realloc(ccc.ncomp)

            #make a local copy of the cell contents counts
            self._cell_contents_count_tmp[:] = ccc[:]
            ccc_ptr = self._cell_contents_count_tmp.ctypes_data

        assert ccc_ptr is not None, "No valid Cell Contents Count pointer found."

        self._exchange_sizes_lib(
            ctypes.c_int(self._domain.comm.py2f()),
            self._send_ranks.ctypes_data, self._recv_ranks.ctypes_data,
            self._halo_groups_start_end_indices.ctypes_data,
            self._boundary_groups_start_end_indices.ctypes_data,
            self._halo_cell_groups.ctypes_data,
            self._boundary_cell_groups.ctypes_data, ccc_ptr,
            ctypes.byref(self._h_count), ctypes.byref(self._t_count),
            self._h_tmp.ctypes_data, self._b_tmp.ctypes_data,
            self.dir_counts.ctypes_data)

        # copy new sizes back to original array (eg for gpu)
        if type(ccc) is not host.Array:
            ccc[:] = self._cell_contents_count_tmp[:ccc.ncomp:]

        return self._h_count.value, self._t_count.value
Ejemplo n.º 23
0
    def __init__(self, domain, boundary_condition, L, max_num_groups, mirror_mode=False, energy_unit=1.0):

        self.domain = domain
        self.bc = boundary_condition
        self.L = L

        exp_eval = LocalExpEval(L)

        self.lrc = LongRangeMTL(L, domain)

        ncomp = (self.L**2)*2
        half_ncomp = self.L**2
        def _re_lm(l, m): return l**2 + l + m
        E = self.domain.extent[0]
        
        assert abs(E - self.domain.extent[0]) < 10.**-14
        assert abs(E - self.domain.extent[1]) < 10.**-14
        assert abs(E - self.domain.extent[2]) < 10.**-14

        m_quater_extent_z = -0.25 * self.domain.extent[2]
        

        if mirror_mode:
            group_decl = r"""
            const REAL bb_positions[12] = {{
                positions[ix*3 + 0],
                positions[ix*3 + 1],
                positions[ix*3 + 2] + {Z_SHIFT},
                group_positions[ix * MAX_NUM_GROUPS * 3 + gx * 3 + 0],
                group_positions[ix * MAX_NUM_GROUPS * 3 + gx * 3 + 1],
                group_positions[ix * MAX_NUM_GROUPS * 3 + gx * 3 + 2] + {Z_SHIFT},
                positions[ix*3 + 0],
                positions[ix*3 + 1],
                -1.0 * (positions[ix*3 + 2] + {Z_SHIFT}),
                group_positions[ix * MAX_NUM_GROUPS * 3 + gx * 3 + 0],
                group_positions[ix * MAX_NUM_GROUPS * 3 + gx * 3 + 1],
                -1.0 * (group_positions[ix * MAX_NUM_GROUPS * 3 + gx * 3 + 2] + {Z_SHIFT})
            }};
            const REAL bb_charges[4] = {{
                charges[ix],
                group_charges[ix*MAX_NUM_GROUPS+gx],
                -1.0 * charges[ix],
                -1.0 * group_charges[ix*MAX_NUM_GROUPS+gx]                
            }};
            const INT64 NG = 4;
            """.format(
                Z_SHIFT=m_quater_extent_z
            )
        else:
            group_decl = r"""
            const REAL bb_positions[6] = {
                positions[ix*3 + 0],
                positions[ix*3 + 1],
                positions[ix*3 + 2],
                group_positions[ix * MAX_NUM_GROUPS * 3 + gx * 3 + 0],
                group_positions[ix * MAX_NUM_GROUPS * 3 + gx * 3 + 1],
                group_positions[ix * MAX_NUM_GROUPS * 3 + gx * 3 + 2]
            };
            const REAL bb_charges[2] = {
                charges[ix],
                group_charges[ix*MAX_NUM_GROUPS+gx]
            };
            const INT64 NG = 2;
            //printf("MAX_NUM_GROUPS %d, zpos %f gx %d\n", MAX_NUM_GROUPS, group_positions[ix * MAX_NUM_GROUPS * 3 + gx * 3 + 2], gx);
            //for(int ix=0 ; ix<NG ; ix++){
            //    printf("%d | P %f %f %f Q %f\n", ix, bb_positions[ix*3], bb_positions[ix*3+1], bb_positions[ix*3+2], bb_charges[ix]);
            //}



            """


        inner_direct = ''
        if boundary_condition in (BCType.NEAREST, BCType.PBC):
            ox_range = tuple(range(-1, 2))
            for oxi, ox in enumerate(product(ox_range, ox_range, ox_range)):
                    if ox[0] != 0 or ox[1] != 0 or ox[2] != 0:
                        inner_direct += """
                                d0 = jp0 - ip0 + {OX};
                                d1 = jp1 - ip1 + {OY};
                                d2 = jp2 - ip2 + {OZ};
                                r2 = d0*d0 + d1*d1 + d2*d2;
                                r = sqrt(r2);
                                tmp_inner_phi += 0.5 * iq * jq / r;

                        """.format(
                            OXI=oxi,
                            OX=ox[0] * E,
                            OY=ox[1] * E,
                            OZ=ox[2] * E
                        )


        pbc_call = ''
        if boundary_condition == BCType.PBC:
            pbc_call = r"""
            const REAL tmp_energy_lr = pbc_direct(N, positions, charges, linop_data, linop_indptr, linop_indices);
            tmp_energy += tmp_energy_lr;
            """



        src = r"""

        static inline REAL nearest_direct(
            const INT64 N,
            const REAL * RESTRICT P,
            const REAL * RESTRICT Q
        ){{

            REAL tmp_phi = 0.0;

            for(INT64 ix=0 ; ix<N ; ix++){{
                REAL tmp_inner_phi = 0.0;
                
                const REAL iq = Q[ix];
                const REAL ip0 = P[3*ix + 0];
                const REAL ip1 = P[3*ix + 1];
                const REAL ip2 = P[3*ix + 2];

                for(INT64 jx=(ix+1) ; jx<N ; jx++){{
                    
                    const REAL jq = Q[jx];
                    const REAL jp0 = P[3*jx + 0];
                    const REAL jp1 = P[3*jx + 1];
                    const REAL jp2 = P[3*jx + 2];

                    REAL d0 = ip0 - jp0;
                    REAL d1 = ip1 - jp1;
                    REAL d2 = ip2 - jp2;
                    
                    REAL r2 = d0*d0 + d1*d1 + d2*d2;
                    REAL r = sqrt(r2);

                    tmp_inner_phi += iq * jq / r;

                }}

                for(INT64 jx=0 ; jx<N ; jx++){{
                    
                    const REAL jq = Q[jx];
                    const REAL jp0 = P[3*jx + 0];
                    const REAL jp1 = P[3*jx + 1];
                    const REAL jp2 = P[3*jx + 2];

                    REAL d0;
                    REAL d1;
                    REAL d2;
                    
                    REAL r2;
                    REAL r;

                    {INNER_DIRECT}

                }}
                
                tmp_phi += tmp_inner_phi;

            }}
           
            return tmp_phi;
        }}


        static inline void spherical(
            const REAL dx, const REAL dy, const REAL dz,
            REAL *radius, REAL *theta, REAL *phi
        ){{
            const REAL dx2 = dx*dx;
            const REAL dx2_p_dy2 = dx2 + dy*dy;
            const REAL d2 = dx2_p_dy2 + dz*dz;
            *radius = sqrt(d2);
            *theta = atan2(sqrt(dx2_p_dy2), dz);
            *phi = atan2(dy, dx);       
            return;
        }}
        
        {MULTIPOLE_HEADER}
        {MULTIPOLE_SRC}

        {EVEC_HEADER}
        {EVEC_SRC}


        static inline REAL linop_csr_both(
            const REAL * RESTRICT linop_data,
            const INT64 * RESTRICT linop_indptr,
            const INT64 * RESTRICT linop_indices,
            const REAL * RESTRICT x1,
            const REAL * RESTRICT E
        ){{
            
            INT64 data_ind = 0;
            REAL dot_tmp = 0.0;

            for(INT64 row=0 ; row<HALF_NCOMP ; row++){{

                REAL row_tmp_1 = 0.0;
                REAL row_tmp_2 = 0.0;

                for(INT64 col_ind=linop_indptr[row] ; col_ind<linop_indptr[row+1] ; col_ind++){{
                    const INT64 col = linop_indices[data_ind];
                    const REAL data = linop_data[data_ind];
                    data_ind++;
                    row_tmp_1 += data * x1[col];
                    row_tmp_2 += data * x1[col  + HALF_NCOMP];
                }}

                dot_tmp += row_tmp_1 * E[row] + row_tmp_2 * E[row + HALF_NCOMP];
            }}

            return dot_tmp;
        }}


        static inline REAL apply_dipole_correction_split(
            const REAL * RESTRICT M,
            const REAL * RESTRICT E
        ){{
            
            REAL tmp = 0.0;

            tmp += (DIPOLE_SX * M[RE_1P1]) * E[RE_1P1];
            tmp += (DIPOLE_SX * M[RE_1P1]) * E[RE_1N1];
        
            tmp -= (DIPOLE_SY * M[IM_1P1]) * E[IM_1P1];
            tmp += (DIPOLE_SY * M[IM_1P1]) * E[IM_1N1];

            tmp += (DIPOLE_SZ * M[RE_1_0]) * E[RE_1_0];

            return tmp;
        }}


        static inline REAL pbc_direct(
            const INT64            N,
            const REAL  * RESTRICT positions,
            const REAL  * RESTRICT charges,
            const REAL  * RESTRICT linop_data,
            const INT64 * RESTRICT linop_indptr,
            const INT64 * RESTRICT linop_indices
        ){{

            REAL new_moments[NCOMP];
            REAL new_evector[NCOMP];
            
            for(int cx=0 ; cx<NCOMP ; cx++){{
                new_moments[cx] = 0.0;
                new_evector[cx] = 0.0;
            }}

            for(int ix=0 ; ix<N ; ix++){{
                REAL radius, theta, phi;
                const REAL px = positions[ix*3 + 0];
                const REAL py = positions[ix*3 + 1];
                const REAL pz = positions[ix*3 + 2];
                const REAL ch = charges[ix];

                spherical(px, py, pz, &radius, &theta, &phi);

                local_dot_vec(ch, radius, theta, phi, new_evector);
                multipole_exp(ch, radius, theta, phi, new_moments);

            }}


            REAL new_energy = 0.5 * linop_csr_both(
                linop_data, linop_indptr, linop_indices,
                new_moments,
                new_evector
            );

            new_energy += 0.5 * apply_dipole_correction_split(
                new_moments,
                new_evector
            );


            return new_energy;
        }}





        static inline REAL compute_energy(
            const INT64            N,
            const REAL  * RESTRICT positions,
            const REAL  * RESTRICT charges,
            const REAL  * RESTRICT linop_data,
            const INT64 * RESTRICT linop_indptr,
            const INT64 * RESTRICT linop_indices
        ){{
            REAL tmp_energy = nearest_direct(N, positions, charges);
                   
            {PBC_CALL}

            return tmp_energy;
        }}
        
        extern "C" int direct_from_dats(
            const INT64            N,
            const INT64 * RESTRICT flags,
            const REAL  * RESTRICT positions,
            const REAL  * RESTRICT charges,
            const INT64 * RESTRICT group_counts,
            const REAL  * RESTRICT group_positions,
            const REAL  * RESTRICT group_charges,
            const REAL  * RESTRICT linop_data,
            const INT64 * RESTRICT linop_indptr,
            const INT64 * RESTRICT linop_indices,
                  REAL  * RESTRICT group_energy
        ){{
            
            #pragma omp parallel for
            for(INT64 ix=0 ; ix<N ; ix++ ){{
                if ( flags[ix] > 0 ){{
                    for(INT64 gx=0 ; gx<group_counts[ix] ; gx++ ){{

                        {GROUP_DECL}
                        
                        group_energy[ix*MAX_NUM_GROUPS+gx] = compute_energy(
                            NG,
                            bb_positions,
                            bb_charges,
                            linop_data,
                            linop_indptr,
                            linop_indices
                        ) * {ENERGY_UNIT};

                    }}
                }}
            }}
            return 0;
        }}
        """.format(
            GROUP_DECL=group_decl,
            INNER_DIRECT=inner_direct,
            PBC_CALL=pbc_call,
            MULTIPOLE_HEADER=exp_eval.create_multipole_header,
            MULTIPOLE_SRC=exp_eval.create_multipole_src,
            EVEC_HEADER=exp_eval.create_dot_vec_header,
            EVEC_SRC=exp_eval.create_dot_vec_src,
            ENERGY_UNIT=float(energy_unit)
        )
        

        header = str(
            Module((
                Include('omp.h'),
                Include('stdio.h'),
                Include('math.h'),
                Define('INT64', 'int64_t'),
                Define('REAL', 'double'),
                Define('NCOMP', str(ncomp)),
                Define('HALF_NCOMP', str(half_ncomp)),
                Define('DIPOLE_SX', str(self.lrc.dipole_correction[0])),
                Define('DIPOLE_SY', str(self.lrc.dipole_correction[1])),
                Define('DIPOLE_SZ', str(self.lrc.dipole_correction[2])),
                Define('RE_1P1', str(_re_lm(1, 1))),
                Define('RE_1_0', str(_re_lm(1, 0))),
                Define('RE_1N1', str(_re_lm(1,-1))),
                Define('IM_1P1', str(_re_lm(1, 1) + half_ncomp)),
                Define('IM_1_0', str(_re_lm(1, 0) + half_ncomp)),
                Define('IM_1N1', str(_re_lm(1,-1) + half_ncomp)),
                Define('MAX_NUM_GROUPS', str(max_num_groups))
            ))
        )

        self._lib = build.simple_lib_creator(header, src)['direct_from_dats']
Ejemplo n.º 24
0
    def __init__(self, E, tuples=None):

        if not isinstance(E, Iterable):
            E = (E, E, E)

        if tuples is None:
            ox_range = tuple(range(-1, 2))
            tuples = product(ox_range, ox_range, ox_range)

        inner = ''

        for oxi, ox in enumerate(tuples):
            if ox[0] != 0 or ox[1] != 0 or ox[2] != 0:
                inner += """
                            d0 = jp0 - ip0 + {OX};
                            d1 = jp1 - ip1 + {OY};
                            d2 = jp2 - ip2 + {OZ};
                            r2 = d0*d0 + d1*d1 + d2*d2;
                            r = sqrt(r2);
                            tmp_inner_phi += 0.5 * iq * jq / r;

                    """.format(OXI=oxi,
                               OX=ox[0] * E[0],
                               OY=ox[1] * E[1],
                               OZ=ox[2] * E[2])

        header = r"""
        #include <math.h>
        #define INT64 int64_t
        #define REAL double
        """

        src = r"""
        
        extern "C" int nearest_direct(
            const INT64 N,
            const REAL * RESTRICT P,
            const REAL * RESTRICT Q,
            REAL * RESTRICT phi
        ){{

            REAL tmp_phi = 0.0;

            #pragma omp parallel for reduction(+:tmp_phi)
            for(INT64 ix=0 ; ix<N ; ix++){{
                REAL tmp_inner_phi = 0.0;
                
                const REAL iq = Q[ix];
                const REAL ip0 = P[3*ix + 0];
                const REAL ip1 = P[3*ix + 1];
                const REAL ip2 = P[3*ix + 2];

                for(INT64 jx=(ix+1) ; jx<N ; jx++){{
                    
                    const REAL jq = Q[jx];
                    const REAL jp0 = P[3*jx + 0];
                    const REAL jp1 = P[3*jx + 1];
                    const REAL jp2 = P[3*jx + 2];

                    REAL d0 = ip0 - jp0;
                    REAL d1 = ip1 - jp1;
                    REAL d2 = ip2 - jp2;
                    
                    REAL r2 = d0*d0 + d1*d1 + d2*d2;
                    REAL r = sqrt(r2);

                    tmp_inner_phi += iq * jq / r;

                }}

                for(INT64 jx=0 ; jx<N ; jx++){{
                    
                    const REAL jq = Q[jx];
                    const REAL jp0 = P[3*jx + 0];
                    const REAL jp1 = P[3*jx + 1];
                    const REAL jp2 = P[3*jx + 2];

                    REAL d0;
                    REAL d1;
                    REAL d2;
                    
                    REAL r2;
                    REAL r;

                    {INNER}

                }}
                
                tmp_phi += tmp_inner_phi;

            }}
           
            phi[0] = tmp_phi;
            return 0;
        }}
        """.format(INNER=inner)

        self._lib = build.simple_lib_creator(
            header_code=header, src_code=src,
            name="kmc_fmm_nearest_direct")['nearest_direct']
Ejemplo n.º 25
0
    def _create_dat_lib(self):

        if self.boundary_condition is BCType.FREE_SPACE:
            cell_gen = r"""
            shifted_position[0] = position[0];
            shifted_position[1] = position[1];
            shifted_position[2] = position[2];
            """
            check_mod = r""
        else:
            assert self.boundary_condition in (BCType.PBC, BCType.NEAREST,
                                               BCType.FF_ONLY)

            cell_gen = r"""
            REAL offsets[3];

            offsets[0] = ((cell[0] - local_offset[2]) < lower_allowed[0]) ? 1.0 : 0.0;
            offsets[1] = ((cell[1] - local_offset[1]) < lower_allowed[1]) ? 1.0 : 0.0;
            offsets[2] = ((cell[2] - local_offset[0]) < lower_allowed[2]) ? 1.0 : 0.0;

            if ((cell[0] - local_offset[2]) > upper_allowed[0]) { offsets[0] = -1.0; };
            if ((cell[1] - local_offset[1]) > upper_allowed[1]) { offsets[1] = -1.0; };
            if ((cell[2] - local_offset[0]) > upper_allowed[2]) { offsets[2] = -1.0; };

            cell[0] += offsets[0] * fmm_cells_per_side[2];
            cell[1] += offsets[1] * fmm_cells_per_side[1];
            cell[2] += offsets[2] * fmm_cells_per_side[0];

            shifted_position[0] = position[0] + offsets[0] * extent[0];
            shifted_position[1] = position[1] + offsets[1] * extent[1];
            shifted_position[2] = position[2] + offsets[2] * extent[2];
            """

            check_mod = r"""
            if (d0 < (0.5*extent[0])) { d0 += extent[0]; }
            if (d1 < (0.5*extent[1])) { d1 += extent[1]; }
            if (d2 < (0.5*extent[2])) { d2 += extent[2]; }
            if (d0 > (0.5*extent[0])) { d0 -= extent[0]; }
            if (d1 > (0.5*extent[1])) { d1 -= extent[1]; }
            if (d2 > (0.5*extent[2])) { d2 -= extent[2]; }
            """

        src = r"""
        #define REAL double
        #define INT64 int64_t

        static inline void get_cell(
            const REAL * RESTRICT position,
            const REAL * RESTRICT extent,
            const INT64 * fmm_cells_per_side,
            const INT64 * RESTRICT upper_allowed,
            const INT64 * RESTRICT lower_allowed,
            const INT64 * RESTRICT local_offset,
            INT64 * cell,
            REAL * shifted_position
        ){{
            shifted_position[0] = position[0] + 0.5 * extent[0];
            shifted_position[1] = position[1] + 0.5 * extent[1];
            shifted_position[2] = position[2] + 0.5 * extent[2];

            const REAL w0 = fmm_cells_per_side[0] / extent[0];
            const REAL w1 = fmm_cells_per_side[1] / extent[1];
            const REAL w2 = fmm_cells_per_side[2] / extent[2];

            cell[0] = (INT64) (shifted_position[0] * w0);
            cell[1] = (INT64) (shifted_position[1] * w1);
            cell[2] = (INT64) (shifted_position[2] * w2);

            if (cell[0] >= fmm_cells_per_side[2]) {{ cell[0] = fmm_cells_per_side[2] - 1; }}
            if (cell[1] >= fmm_cells_per_side[1]) {{ cell[1] = fmm_cells_per_side[1] - 1; }}
            if (cell[2] >= fmm_cells_per_side[0]) {{ cell[2] = fmm_cells_per_side[0] - 1; }}

            {CELL_GEN}


            return;

        }}

        static inline void get_fmm_cell(
            const INT64 cc,
            const INT64 * fmm_cells_per_side,
            INT64 * cell
        ){{ 
            const INT64 fx = fmm_cells_per_side[0];
            const INT64 fy = fmm_cells_per_side[1];
            const INT64 cx = cc % fx;
            const INT64 cycz = (cc - cx) / fx;
            const INT64 cy = cycz % fy;
            const INT64 cz = (cycz - cy) / fy;
            cell[0] = cx;
            cell[1] = cy;
            cell[2] = cz;
            return;
        }}

        static inline INT64 gcell_to_lcell(
            const INT64 * RESTRICT cell_data_offset,
            const INT64 * RESTRICT local_store_dims,
            const INT64 * cell
        ){{
            const INT64 c0 = cell[0] + cell_data_offset[2];
            const INT64 c1 = cell[1] + cell_data_offset[1];
            const INT64 c2 = cell[2] + cell_data_offset[0];
            return c0 + local_store_dims[2] * ( c1 + local_store_dims[1] * c2 );
        }}
        
        extern "C"
        int get_fmm_lcell(
            const INT64 N,
            const INT64 * RESTRICT FMM_CELLS_PER_SIDE,
            const INT64 * RESTRICT CELL_DATA_OFFSET,
            const INT64 * RESTRICT LOCAL_STORE_DIMS,
            const INT64 * RESTRICT IDS_ARRAY,
            const INT64 * RESTRICT FMM_CELLS,
            INT64 * RESTRICT OUT_BUFF
        ){{
            
            for(INT64 ix=0 ; ix<N ; ix++){{
                INT64 tmp_cell[3];
                get_fmm_cell(FMM_CELLS[IDS_ARRAY[ix]], FMM_CELLS_PER_SIDE, tmp_cell);
                OUT_BUFF[ix] = gcell_to_lcell(CELL_DATA_OFFSET, LOCAL_STORE_DIMS, tmp_cell);
            }}
            
            return 0;
        }}
        
        static inline void check_move(
            const REAL * RESTRICT extent,
            const REAL * RESTRICT p,
            const REAL * RESTRICT pp,
            int * RESTRICT err,
            const INT64 px,
            const INT64 movx
        ){{
            
            if (pp[0] < -0.5 * extent[0]) {{ err[0]++; printf("ERROR: Proposed position is outside domain (%ld, %ld). %f %f %f -> %f %f %f\n", px, movx, p[0], p[1], p[2], pp[0], pp[1], pp[2]);}}
            if (pp[1] < -0.5 * extent[1]) {{ err[0]++; printf("ERROR: Proposed position is outside domain (%ld, %ld). %f %f %f -> %f %f %f\n", px, movx, p[0], p[1], p[2], pp[0], pp[1], pp[2]);}}
            if (pp[2] < -0.5 * extent[2]) {{ err[0]++; printf("ERROR: Proposed position is outside domain (%ld, %ld). %f %f %f -> %f %f %f\n", px, movx, p[0], p[1], p[2], pp[0], pp[1], pp[2]);}}
            if (pp[0] >  0.5 * extent[0]) {{ err[0]++; printf("ERROR: Proposed position is outside domain (%ld, %ld). %f %f %f -> %f %f %f\n", px, movx, p[0], p[1], p[2], pp[0], pp[1], pp[2]);}}
            if (pp[1] >  0.5 * extent[1]) {{ err[0]++; printf("ERROR: Proposed position is outside domain (%ld, %ld). %f %f %f -> %f %f %f\n", px, movx, p[0], p[1], p[2], pp[0], pp[1], pp[2]);}}
            if (pp[2] >  0.5 * extent[2]) {{ err[0]++; printf("ERROR: Proposed position is outside domain (%ld, %ld). %f %f %f -> %f %f %f\n", px, movx, p[0], p[1], p[2], pp[0], pp[1], pp[2]);}}
            
            REAL d0 = p[0] - pp[0];
            REAL d1 = p[1] - pp[1];
            REAL d2 = p[2] - pp[2];

            {CHECK_MOD}

            if ( (d0*d0) > ({MAX_MOVE_2}) ){{ err[0]++; printf("ERROR: Proposed move violates max_move (%ld, %ld). %f %f %f -> %f %f %f\n", px, movx, p[0], p[1], p[2], pp[0], pp[1], pp[2]);}}
            if ( (d1*d1) > ({MAX_MOVE_2}) ){{ err[0]++; printf("ERROR: Proposed move violates max_move (%ld, %ld). %f %f %f -> %f %f %f\n", px, movx, p[0], p[1], p[2], pp[0], pp[1], pp[2]);}}
            if ( (d2*d2) > ({MAX_MOVE_2}) ){{ err[0]++; printf("ERROR: Proposed move violates max_move (%ld, %ld). %f %f %f -> %f %f %f\n", px, movx, p[0], p[1], p[2], pp[0], pp[1], pp[2]);}}
        }}


        extern "C" int setup_move(
            const INT64 npart_local,
            const INT64 max_prop,
            const INT64 * RESTRICT site_max_counts,
            const INT64 * RESTRICT current_sites,
            const REAL  * RESTRICT current_positions,
            const REAL  * RESTRICT current_charges,
            const INT64 * RESTRICT current_ids,
            const INT64 * RESTRICT current_fmm_cells,
            const REAL  * RESTRICT prop_positions,
            const INT64 * RESTRICT prop_masks,
            const INT64            prop_charge_flag,
            const REAL  * RESTRICT prop_charges,
                  INT64 * RESTRICT rate_location,
                  REAL  * RESTRICT new_positions,
                  REAL  * RESTRICT new_charges,
                  INT64 * RESTRICT new_ids,
                  INT64 * RESTRICT new_fmm_cells,
                  REAL  * RESTRICT new_shifted_positions,
                  REAL  * RESTRICT old_positions,
                  REAL  * RESTRICT old_charges,
                  INT64 * RESTRICT old_ids,
                  INT64 * RESTRICT old_fmm_cells,
                  INT64 * RESTRICT exclusive_sum,
                  INT64 * RESTRICT num_particles,
                  INT64 * RESTRICT total_movs,
            const REAL  * RESTRICT extent,
            const INT64 * RESTRICT fmm_cells_per_side,
            const INT64 * RESTRICT cell_data_offset,
            const INT64 * RESTRICT local_store_dims,
            const INT64 * RESTRICT upper_allowed,
            const INT64 * RESTRICT lower_allowed,
            const INT64 * RESTRICT local_offset
        ){{
            
            int err = 0;
            INT64 es_tmp = 0;
            INT64 old_ind = 0;
            for(INT64 px=0 ; px<npart_local ; px++){{

                // Compute the exclusive sum
                INT64 es_inner = 0;
                INT64 prop_found = 0;
                INT64 max_prop_count = site_max_counts[current_sites[px]];
                for(INT64 movx=0 ; (movx<max_prop) && (prop_found<max_prop_count) ; movx++){{
                    const INT64 mask = prop_masks[px*max_prop + movx];
                    if (mask > 0){{
                        es_inner++;
                        prop_found++;
                    }}
                }}
                // if moves involve this particle we need the data
                if (es_inner > 0){{
                    exclusive_sum[old_ind] = es_tmp;
                    es_tmp += es_inner;
                    old_ids[old_ind] = px;
                    old_ind++;
                }}
            }}
            exclusive_sum[old_ind] = es_tmp;

            *num_particles = old_ind;
            *total_movs = es_tmp;

            // now move the data
            #pragma omp parallel for schedule(dynamic) reduction(+:err)
            for(INT64 oind=0 ; oind<old_ind; oind++ ){{
                const INT64 px = old_ids[oind];
                old_positions[oind*3 + 0] = current_positions[px*3 + 0];
                old_positions[oind*3 + 1] = current_positions[px*3 + 1];
                old_positions[oind*3 + 2] = current_positions[px*3 + 2];
                old_charges[oind] = current_charges[px];
                old_ids[oind] = current_ids[px];

                INT64 tmp_cell[3] = {{0,0,0}};
                get_fmm_cell(current_fmm_cells[px], fmm_cells_per_side, tmp_cell);
                old_fmm_cells[oind] = gcell_to_lcell(cell_data_offset, local_store_dims, &tmp_cell[0]);

                const INT64 prop_count = exclusive_sum[oind+1] - exclusive_sum[oind];
                const INT64 nstart = exclusive_sum[oind];
                INT64 prop_found = 0;
                for(INT64 movx=0 ; ((movx<max_prop) && (prop_found < prop_count)) ; movx++){{
                    const INT64 mask = prop_masks[px*max_prop + movx];
                    if(mask > 0){{
                        const INT64 nind = nstart + prop_found;
                        const INT64 prop_ind = px*max_prop*3 + (movx*3); 
                        new_positions[nind*3 + 0] = prop_positions[prop_ind + 0];
                        new_positions[nind*3 + 1] = prop_positions[prop_ind + 1];
                        new_positions[nind*3 + 2] = prop_positions[prop_ind + 2];

                        check_move(extent, &current_positions[px*3], &prop_positions[prop_ind], &err, px, movx);

                        INT64 tmp_cell[3] = {{0,0,0}};
                        REAL tmp_pos[3] = {{0.0, 0.0, 0.0}};
                        
                        get_cell( &prop_positions[prop_ind], extent, fmm_cells_per_side,
                            upper_allowed, lower_allowed, local_offset, &tmp_cell[0], &tmp_pos[0]);

                        new_fmm_cells[nind] = gcell_to_lcell(cell_data_offset, local_store_dims, &tmp_cell[0]);

                        new_shifted_positions[nind*3 + 0] = tmp_pos[0];
                        new_shifted_positions[nind*3 + 1] = tmp_pos[1];
                        new_shifted_positions[nind*3 + 2] = tmp_pos[2];
                        
                        const INT64 charge_prop_ind = px * max_prop + movx;
                        new_charges[nind] = (prop_charge_flag > 0) ? prop_charges[charge_prop_ind] : current_charges[px];

                        new_ids[nind] = current_ids[px];
                        rate_location[nind] = px*max_prop + movx;
                        prop_found++;
                    }}
                }}
            }}

            return err;
        }}

        """.format(CELL_GEN=cell_gen,
                   CHECK_MOD=check_mod,
                   MAX_MOVE_2=str(self.max_move * self.max_move))

        header = r"""
        #include <stdint.h>
        #include <stdio.h>
        """

        _lib = simple_lib_creator(header, src)
        self._dat_lib = _lib["setup_move"]
        self._get_fmm_lcell_lib = _lib["get_fmm_lcell"]
Ejemplo n.º 26
0
def test_sph_gen_1():

    lmax = 26
    N = 10
    M = (lmax+1) * (2*lmax+1)

    sph_gen = SphGen(lmax)

    assign_gen = ''
    for lx in range(lmax+1):
        for mx in range(-lx, lx+1):
            assign_gen += 're_out[NSTRIDE * ix + LMAX * {lx} + LOFFSET + {mx}] = '.format(lx=lx, mx=mx) + \
                str(sph_gen.get_y_sym(lx, mx)[0]) + ';\n'
            assign_gen += 'im_out[NSTRIDE * ix + LMAX * {lx} + LOFFSET + {mx}] = '.format(lx=lx, mx=mx) + \
                str(sph_gen.get_y_sym(lx, mx)[1]) + ';\n'


    src = """
    #define LMAX ({LMAX})
    #define LOFFSET ({LOFFSET})
    #define N ({N})
    #define NSTRIDE ({NSTRIDE})

    extern "C" int test_sph_gen(
        const double * RESTRICT theta_set,
        const double * RESTRICT phi_set,
        double * RESTRICT re_out,
        double * RESTRICT im_out
    ){{
        #pragma omp parallel for
        for (int ix=0; ix<N ; ix++){{
            const double theta = theta_set[ix];
            const double phi = phi_set[ix];
        {SPH_GEN}
        {ASSIGN_GEN}
        }}
        return 0;
    }}
    """.format(
        SPH_GEN=str(sph_gen.module),
        ASSIGN_GEN=str(assign_gen),
        LMAX=2*lmax+1,
        LOFFSET=lmax,
        N=N,
        NSTRIDE=M
    )
    header = str(sph_gen.header)

    lib = simple_lib_creator(header_code=header, src_code=src)['test_sph_gen']
    

    re_out = np.zeros((N, lmax+1, 2*lmax+1), dtype=c_double)
    im_out = np.zeros_like(re_out)

    rng = np.random.RandomState(1234)
    
    theta_set = np.array(rng.uniform(low=0.0, high=math.pi, size=N), dtype=c_double)
    phi_set = np.array(rng.uniform(low=0.0, high=2.*math.pi, size=N), dtype=c_double)
    lib(
        theta_set.ctypes.get_as_parameter(),
        phi_set.ctypes.get_as_parameter(),
        re_out.ctypes.get_as_parameter(),
        im_out.ctypes.get_as_parameter()
    )
    
    for ix in range(N):
        theta = theta_set[ix]
        phi = phi_set[ix]

        for lx in range(lmax + 1):
            mrange = list(range(lx, -1, -1)) + list(range(1, lx+1))
            mrange2 = list(range(-1*lx, 1)) + list(range(1, lx+1))
            scipy_p = lpmv(mrange, lx, np.cos(theta))

            for mxi, mx in enumerate(mrange2):

                re_exp = math.cos(mx * phi)
                im_exp = math.sin(mx * phi)

                val = math.sqrt(math.factorial(
                    lx - abs(mx))/math.factorial(lx + abs(mx)))
                val *= scipy_p[mxi]

                scipy_real = re_exp * val
                scipy_imag = im_exp * val
                
                re_err = abs(scipy_real - re_out[ix, lx, lmax + mx])
                im_err = abs(scipy_imag - im_out[ix, lx, lmax + mx])

                assert re_err < 10.**-13
                assert im_err < 10.**-13
Ejemplo n.º 27
0
    def build_compress_lib(state):

        dats = state.particle_dats

        def g(x):
            return getattr(state, x)

        hsrc = ''''''
        args = ','.join(['{} * D_{}'.format(g(n).ctype, n) for n in dats])
        dyn = '\n'.join([
            '''
             for(int ix=0 ; ix<{0} ; ix++){{
             {1}[dest*{0}+ix] = {1}[src*{0}+ix];}}
             '''.format(str(g(n).ncomp), 'D_{}'.format(n)) for n in dats
        ])

        src = '''
        extern "C"
        int compress(
            const int slots_to_fill_in,
            const int n_new_in,
            const int * slots,
            int * n_new_out,
            %(ARGS)s
        ){
        int slots_to_fill = slots_to_fill_in;
        int n_new = n_new_in;
        int last_slot;
        int last_slot_lookup_index = slots_to_fill - 1;
        int dest_index = 0;
        int dest = -1;
        // Whilst there are slots to fill and the current slot is not past the
        // end of the array.
        if (n_new > 0) {
            while ( (dest_index <= last_slot_lookup_index) &&
             (slots[dest_index] < n_new) ){
                // get first empty slot in particle dats.
                dest = slots[dest_index];
                int src = -1;
                //loop from end to empty slot
                for (int iy = n_new - 1; iy > dest; iy--){
                    if (iy == slots[last_slot_lookup_index]){
                        n_new = iy;
                        last_slot_lookup_index--;
                        //printf("n_new=%%d \\n", n_new);
                    } else {
                        src = iy;
                        break;
                    }
                }
                if (src > 0){
                    \n%(DYN_DAT_CODE)s
                    n_new = src;
                } else {
                    n_new = slots[last_slot_lookup_index];
                    break;
                }
                dest_index++;
            }
        }
        n_new_out[0] = n_new;
        return 0;}
        ''' % {
            'DYN_DAT_CODE': dyn,
            'ARGS': args
        }

        return build.simple_lib_creator(hsrc, src, 'compress')['compress']
Ejemplo n.º 28
0
    def build_pack_lib(state):

        dats = state.particle_dats

        def g(x):
            return getattr(state, x)

        args = ','.join(
            ['const {} * D_{}'.format(g(n).ctype, n) for n in dats])
        _dynamic_dats_shift = ''
        for ix in state.particle_dats:
            dat = g(ix)
            sub_dict = {
                'DTYPE': dat.ctype,
                'DBYTE': str(ctypes.sizeof(dat.dtype)),
                'TBYTE': str(dat.ncomp * ctypes.sizeof(dat.dtype)),
                'NCOMP': str(dat.ncomp),
                'NAME': str(ix)
            }

            if type(dat) is data.PositionDat:
                assert dat.ncomp == 3, "move only defined in 3D"
                _dynamic_dats_shift += '''
                %(DTYPE)s _pos_tmp[3];
                _pos_tmp[0]=D_%(NAME)s[_ix* %(NCOMP)s    ]+SHIFT[(_dir*3)    ];
                _pos_tmp[1]=D_%(NAME)s[_ix* %(NCOMP)s + 1]+SHIFT[(_dir*3) + 1];
                _pos_tmp[2]=D_%(NAME)s[_ix* %(NCOMP)s + 2]+SHIFT[(_dir*3) + 2];
                memcpy(_S_BUF, _pos_tmp, 3*%(DBYTE)s);
                _S_BUF += %(TBYTE)s;
                ''' % sub_dict
            else:
                assert dat.ncomp > 0, "move not defined for 0 component dats"
                _dynamic_dats_shift += '''
                memcpy(_S_BUF, &D_%(NAME)s[_ix * %(NCOMP)s], %(TBYTE)s);
                _S_BUF += %(TBYTE)s;
                ''' % sub_dict

        hsrc = '''
        #include<string.h>
        #include<stdint.h>
        '''
        src = '''
        extern "C"
        int move_pack(
        uint8_t * _S_BUF,
        const double * SHIFT,
        const int * _direction_id_list,
        int * _empty_slot_store,
        %(ARGS)s
        ){
            // Next free space in send buffer.
            int _slot_index = 0;
            //loop over the send directions.
            for(int _dir = 0; _dir < 26; _dir++){
                //traverse linked list.
                int _ixd = _direction_id_list[_dir];
                while(_ixd > -1){
                    //Generate code based on ParticleDats
                    int _ix = _direction_id_list[_ixd];
                    \n%(DYNAMIC_DATS)s
                    _empty_slot_store[_slot_index] = _ix;
                    _slot_index += 1;
                    _ixd = _direction_id_list[_ixd+1];
                }
            }
        return 0;}''' % {
            'DYNAMIC_DATS': _dynamic_dats_shift,
            'ARGS': args
        }

        return build.simple_lib_creator(hsrc, src, 'move_pack')['move_pack']
Ejemplo n.º 29
0
    def _init_host_point_eval(self):

        header = str(
            Module((Include('stdint.h'), Include('stdio.h'), Include('math.h'),
                    Include('omp.h'), Define('REAL', 'double'),
                    Define('INT64', 'int64_t'))))

        src = r"""
        static inline INT64 gcell_to_lcell(
            const INT64 * RESTRICT cell_data_offset,
            const INT64 * RESTRICT local_store_dims,
            const INT64 * cell
        ){{
            const INT64 c0 = cell[0] + cell_data_offset[2];
            const INT64 c1 = cell[1] + cell_data_offset[1];
            const INT64 c2 = cell[2] + cell_data_offset[0];
            return c0 + local_store_dims[2] * ( c1 + local_store_dims[1] * c2 );
        }}

        static inline void get_cell(
            const REAL * RESTRICT position,
            const REAL * RESTRICT extent,
            const INT64 * fmm_cells_per_side,
            INT64 * cell
        ){{

            REAL shifted_position[3];
            shifted_position[0] = position[0] + 0.5 * extent[0];
            shifted_position[1] = position[1] + 0.5 * extent[1];
            shifted_position[2] = position[2] + 0.5 * extent[2];

            const REAL w0 = fmm_cells_per_side[0] / extent[0];
            const REAL w1 = fmm_cells_per_side[1] / extent[1];
            const REAL w2 = fmm_cells_per_side[2] / extent[2];

            cell[0] = (INT64) (shifted_position[0] * w0);
            cell[1] = (INT64) (shifted_position[1] * w1);
            cell[2] = (INT64) (shifted_position[2] * w2);

            if (cell[0] >= fmm_cells_per_side[2]) {{ cell[0] = fmm_cells_per_side[2] - 1; }}
            if (cell[1] >= fmm_cells_per_side[1]) {{ cell[1] = fmm_cells_per_side[1] - 1; }}
            if (cell[2] >= fmm_cells_per_side[0]) {{ cell[2] = fmm_cells_per_side[0] - 1; }}

            return;
        }}


        extern "C" int direct_point_eval(
            const INT64 N,
            const REAL  * RESTRICT d_positions,
            const REAL  * RESTRICT d_pdata,
            const INT64 * RESTRICT d_cell_occ,
            const INT64            d_cell_stride,
            const INT64 * RESTRICT offsets,
            const INT64 * RESTRICT cell_data_offset,
            const INT64 * RESTRICT local_store_dims,
            const INT64 * RESTRICT fmm_cells_per_side,
            const REAL  * RESTRICT extent,
                  REAL  * RESTRICT d_potential
        ){{

            int err = 0;
            
            INT64 max_cell = fmm_cells_per_side[0] * fmm_cells_per_side[1] * fmm_cells_per_side[2];


            #pragma omp parallel for schedule(dynamic)
            for( INT64 idx=0 ; idx < N ; idx++ ) {{

                INT64 ict[3];
                get_cell(&d_positions[idx*3], extent, fmm_cells_per_side, ict);
                const INT64 ic = gcell_to_lcell(cell_data_offset, local_store_dims, ict);
                

                const REAL ipx = d_positions[idx*3];
                const REAL ipy = d_positions[idx*3+1];
                const REAL ipz = d_positions[idx*3+2];

                REAL energy_red = 0.0;

                // loop over the jcells
                for(INT64 jcx=0 ; jcx<27 ; jcx++){{
                    const INT64 jc = ic + offsets[jcx];

                    // compute the offset into the cell data
                    const INT64 offset = jc * d_cell_stride;
                    const INT64 offset5 = 5 * jc * d_cell_stride;

                    // loop over the particles in the j cell
                    for(INT64 jx=0 ; jx<d_cell_occ[jc] ; jx++){{            
                        const REAL jpx = d_pdata[offset5 + jx*5+0];
                        const REAL jpy = d_pdata[offset5 + jx*5+1];
                        const REAL jpz = d_pdata[offset5 + jx*5+2];
                        const REAL jch = d_pdata[offset5 + jx*5+3];
                        const REAL dx = ipx - jpx;
                        const REAL dy = ipy - jpy;
                        const REAL dz = ipz - jpz;
                        const REAL r2 = dx*dx + dy*dy + dz*dz;
                        const REAL contrib = jch / sqrt(r2);    
                        energy_red += contrib;
                    }}
                }}

                d_potential[idx] += energy_red;

            }}
            return err;
        }}
        """.format()

        self._host_point_eval_lib = build.simple_lib_creator(
            header_code=header, src_code=src,
            name='kmc_fmm_direct_point_eval')['direct_point_eval']
Ejemplo n.º 30
0
    def _init_host_kernels(self):

        header = str(
            Module((Include('stdint.h'), Include('stdio.h'), Include('math.h'),
                    Include('omp.h'), Define('REAL', 'double'),
                    Define('INT64', 'int64_t'))))

        LIB_PARAMETERS = """
                const INT64 num_movs,
                const INT64 * RESTRICT lsd,
                const INT64 * RESTRICT offsets,
                const REAL  * RESTRICT d_positions,
                const REAL  * RESTRICT d_charges,
                const INT64 * RESTRICT d_ids,
                const INT64 * RESTRICT d_fmm_cells,
                const REAL  * RESTRICT d_pdata,
                const INT64 * RESTRICT d_pdata_ids,
                const INT64 * RESTRICT d_cell_occ,
                const INT64 d_cell_stride,
                REAL * RESTRICT d_energy,
                INT64 * RESTRICT div_count"""

        common_1 = r"""
                INT64 tmp_div_count = 0;
                #pragma omp parallel for schedule(dynamic) reduction(+:tmp_div_count)
                for( INT64 idx=0 ; idx< num_movs ; idx++ ) {{

                    const INT64 ic = d_fmm_cells[idx];
                    const REAL ipx = d_positions[idx*3];
                    const REAL ipy = d_positions[idx*3+1];
                    const REAL ipz = d_positions[idx*3+2];

                    REAL energy_red = 0.0;

                    // loop over the jcells
                    for(INT64 jcx=0 ; jcx<27 ; jcx++){{
                        const INT64 jc = ic + offsets[jcx];

                        // compute the offset into the cell data
                        const INT64 offset = jc * d_cell_stride;
                        const INT64 offset5 = 5 * jc * d_cell_stride;
                        tmp_div_count += d_cell_occ[jc];

                        // loop over the particles in the j cell
                        for(INT64 jx=0 ; jx<d_cell_occ[jc] ; jx++){{            
                            const REAL jpx = d_pdata[offset5 + jx*5+0];
                            const REAL jpy = d_pdata[offset5 + jx*5+1];
                            const REAL jpz = d_pdata[offset5 + jx*5+2];
                            const REAL jch = d_pdata[offset5 + jx*5+3];
                            const REAL dx = ipx - jpx;
                            const REAL dy = ipy - jpy;
                            const REAL dz = ipz - jpz;
                            const REAL r2 = dx*dx + dy*dy + dz*dz;
                            const REAL contrib = jch / sqrt(r2);

        """.format()
        # new/old part goes here ->
        common_2 = r"""

                        }}
                    }}
                    energy_red *= d_charges[idx];
                    d_energy[idx] = energy_red;
                    *div_count = tmp_div_count;
                }}
            return 0;
        """.format()

        src = r"""
            {HEADER}

            extern "C" int direct_new(
                {LIB_PARAMETERS}
            ) {{
                {COMMON_1}
                            energy_red += contrib;
                {COMMON_2}
            }}

            extern "C" int direct_old(
                {LIB_PARAMETERS}
            ) {{
                {COMMON_1}
                            energy_red += (d_pdata_ids[offset + jx] != d_ids[idx]) ? contrib : 0.0;
                {COMMON_2}
            }}


            """.format(HEADER=header,
                       COMMON_1=common_1,
                       COMMON_2=common_2,
                       LIB_PARAMETERS=LIB_PARAMETERS)

        self._host_lib = build.simple_lib_creator(header_code=' ',
                                                  src_code=src,
                                                  name='kmc_fmm_direct_host')
        self._host_direct_new = self._host_lib["direct_new"]
        self._host_direct_old = self._host_lib["direct_old"]