Ejemplo n.º 1
0
    def _init_self_interaction_lib(self):

        if self.shared_memory in ('thread', 'omp'):
            PL = loop.ParticleLoopOMP
        else:
            PL = loop.ParticleLoop

        with open(str(_SRC_DIR) + '/EwaldOrthSource/SelfInteraction.h',
                  'r') as fh:
            _cont_header_src = fh.read()
        _cont_header = (kernel.Header(block=_cont_header_src %
                                      self._subvars), )

        with open(str(_SRC_DIR) + '/EwaldOrthSource/SelfInteraction.cpp',
                  'r') as fh:
            _cont_source = fh.read()

        _real_kernel = kernel.Kernel(name='self_interaction_part',
                                     code=_cont_source,
                                     headers=_cont_header)

        self._self_interaction_lib = PL(
            kernel=_real_kernel,
            dat_dict={
                'Q': data.ParticleDat(ncomp=1,
                                      dtype=ctypes.c_double)(access.READ),
                'u': self._vars['self_interaction_energy'](access.INC_ZERO)
            })

        with open(
                str(_SRC_DIR) + '/EwaldOrthSource/SelfInteractionPot.h',
                'r') as fh:
            _cont_header_src = fh.read()
        _cont_header = (kernel.Header(block=_cont_header_src %
                                      self._subvars), )

        with open(
                str(_SRC_DIR) + '/EwaldOrthSource/SelfInteractionPot.cpp',
                'r') as fh:
            _cont_source = fh.read()

        _real_kernel = kernel.Kernel(name='self_interaction_part_pot',
                                     code=_cont_source,
                                     headers=_cont_header)

        self._self_interaction_pot_lib = PL(
            kernel=_real_kernel,
            dat_dict={
                'Q': data.ParticleDat(ncomp=1,
                                      dtype=ctypes.c_double)(access.READ),
                'UPP': data.ParticleDat(ncomp=1,
                                        dtype=ctypes.c_double)(access.INC),
                'u': self._vars['self_interaction_energy'](access.INC_ZERO)
            })
Ejemplo n.º 2
0
    def kernel(self):
        """
        Returns a kernel class for the potential.
        """

        kernel_code = '''

        const double R[3] = {P[1][0] - P[0][0], P[1][1] - P[0][1], P[1][2] - P[0][2]};

        double r2 = R[0]*R[0] + R[1]*R[1] + R[2]*R[2];

        if (r2 < rc2){

            r2=1./r2;

            A[0][0]+=r2;
            A[0][1]+=r2;
            A[0][2]+=r2;

            A[1][0]+=r2;
            A[1][1]+=r2;
            A[1][2]+=r2;

        }
        '''
        constants = (kernel.Constant('rc2', self._rc**2), )

        return kernel.Kernel('TestPotential1', kernel_code, constants,
                             ['stdio.h'], None)
Ejemplo n.º 3
0
    def kernel(self):
        """
        Returns a kernel class for the potential.
        """

        kernel_code = '''

        const double R0 = P(1, 0) - P(0, 0);
        const double R1 = P(1, 1) - P(0, 1);
        const double R2 = P(1, 2) - P(0, 2);

        const double r2 = R0*R0 + R1*R1 + R2*R2;

        const double r_m2 = sigma2/r2;
        const double r_m4 = r_m2*r_m2;
        const double r_m6 = r_m4*r_m2;

        u(0)+= (r2 < rc2) ? 0.5*CV*((r_m6-1.0)*r_m6 + internalshift) : 0.0;

        const double r_m8 = r_m4*r_m4;
        const double f_tmp = CF*(r_m6 - 0.5)*r_m8;

        A(0, 0)+= (r2 < rc2) ? f_tmp*R0 : 0.0;
        A(0, 1)+= (r2 < rc2) ? f_tmp*R1 : 0.0;
        A(0, 2)+= (r2 < rc2) ? f_tmp*R2 : 0.0;

        '''
        constants = (kernel.Constant('sigma2', self._sigma**2),
                     kernel.Constant('rc2', self._rc**2),
                     kernel.Constant('internalshift', self._shift_internal),
                     kernel.Constant('CF', self._C_F),
                     kernel.Constant('CV', self._C_V))

        return kernel.Kernel('LJ_accel_U', kernel_code, constants,
                             [kernel.Header('stdio.h')])
Ejemplo n.º 4
0
    def _generate_pairloop(self):

        
        header = lib.build.write_header(self.interaction_func)
        kernel_code = r'''

        const double u0 = POINT_EVAL(
            P.i[0],
            P.i[1],
            P.i[2],
            P.j[0],
            P.j[1],
            P.j[2],
            (double) T.i[0],
            (double) T.j[0]
        );

        ENERGY[0] += u0;
        '''

        ikernel = kernel.Kernel('mc_short_range', kernel_code, headers=(header,))
        
        gen_loop = PairLoop(
            ikernel,
            dat_dict={
                'P': self.positions(access.READ),
                'T': self.types(access.READ),
                'ENERGY': self._ga_energy(access.INC_ZERO)
            },
            shell_cutoff=self.cutoff
        )
        
        return gen_loop
Ejemplo n.º 5
0
    def __init__(self, state, size=0, v0=None):
        self._state = state
        self._V0 = data.ParticleDat(self._state.npart_local, 3, name='v0')
        self._VT = state.velocities

        self._VO_SET = False
        if v0 is not None:
            self.set_v0(v0)
        else:
            self.set_v0(state=self._state)

        self._VAF = data.ScalarArray(ncomp=1)
        self._V = []
        self._T = []

        _headers = ['stdio.h']
        _constants = None
        _kernel_code = '''

        VAF(0) += (v0(0)*VT(0) + v0(1)*VT(1) + v0(2)*VT(2))*Ni;

        '''
        _reduction = (kernel.Reduction('VAF', 'VAF[I]', '+'), )

        _static_args = {'Ni': ctypes.c_double}

        _kernel = kernel.Kernel('VelocityAutocorrelation', _kernel_code,
                                _constants, _headers, _reduction, _static_args)

        self._datdict = {'VAF': self._VAF, 'v0': self._V0, 'VT': self._VT}

        self._loop = loop.ParticleLoop(self._state.as_func('npart_local'),
                                       None,
                                       kernel=_kernel,
                                       dat_dict=self._datdict)
Ejemplo n.º 6
0
    def __init__(self,
                 velocities=None,
                 masses=None,
                 kinetic_energy_dat=None,
                 looping_method=None):

        if looping_method is None:
            looping_method = loop.ParticleLoop
        if kinetic_energy_dat is None:
            self.k = data.ScalarArray(ncomp=1, dtype=ctypes.c_double)
        else:
            self.k = kinetic_energy_dat

        self._v = velocities

        if looping_method is None:
            looping_method = loop.ParticleLoop

        _K_kernel_code = '''
        k(0) += (V(0)*V(0) + V(1)*V(1) + V(2)*V(2))*0.5*M(0);
        '''
        _constants_K = []
        _K_kernel = kernel.Kernel('K_kernel', _K_kernel_code, _constants_K)
        self._kinetic_energy_lib = looping_method(kernel=_K_kernel,
                                                  dat_dict={
                                                      'V':
                                                      velocities(access.R),
                                                      'k': self.k(access.INC),
                                                      'M': masses(access.R)
                                                  })

        self._ke_store = []
Ejemplo n.º 7
0
    def kernel(self):
        """
        Returns a kernel class for the potential.
        """

        kernel_code = '''

        const double R0 = P.j[0] - P.i[0];
        const double R1 = P.j[1] - P.i[1];
        const double R2 = P.j[2] - P.i[2];

        const double r2 = R0*R0 + R1*R1 + R2*R2;

        const double r_m2 = sigma2/r2;
        const double r_m4 = r_m2*r_m2;

        const double f_tmp = CF*(r_m4*r_m2 - 0.5)*r_m4*r_m4;

        A.i[0]+= (r2 < rc2) ? f_tmp*R0 : 0.0;
        A.i[1]+= (r2 < rc2) ? f_tmp*R1 : 0.0;
        A.i[2]+= (r2 < rc2) ? f_tmp*R2 : 0.0;

        '''
        constants = (kernel.Constant('sigma2', self._sigma**2),
                     kernel.Constant('rc2', self._rc**2),
                     kernel.Constant('internalshift', self._shift_internal),
                     kernel.Constant('CF', self._C_F),
                     kernel.Constant('CV', self._C_V))

        return kernel.Kernel('LJ_accel', kernel_code, constants,
                             [kernel.Header('stdio.h')])
Ejemplo n.º 8
0
    def integrate(self, dt=None, t=None):
        """
        Integrate state forward in time.
        
        :arg double dt: Time step size.
        :arg double t: End time.
        """
        print("starting integration")
        if dt is not None:
            self._dt = dt
        if t is not None:
            self._T = t

        self._max_it = int(math.ceil(self._T / self._dt))

        self._constants = [
            kernel.Constant('dt', self._dt),
            kernel.Constant('dht', 0.5 * self._dt),
        ]

        self._kernel1 = kernel.Kernel('vv1', self._kernel1_code,
                                      self._constants)
        self._p1 = loop.ParticleLoop(
            self._kernel1, {
                'P': self._P(access.W),
                'V': self._V(access.W),
                'A': self._A(access.R),
                'M': self._M(access.R)
            })

        self._kernel2 = kernel.Kernel('vv2', self._kernel2_code,
                                      self._constants)
        self._p2 = loop.ParticleLoop(self._kernel2, {
            'V': self._V(access.W),
            'A': self._A(access.R),
            'M': self._M(access.R)
        })

        self._update_controller.execute_boundary_conditions()

        self._sim.forces_update()

        self.timer.start()
        self._velocity_verlet_integration()
        self.timer.pause()
Ejemplo n.º 9
0
    def kernel(self):
        """
        Returns a kernel class for the potential.
        """

        kernel_code = '''

        const double R0 = P(1,0) - P(0,0);
        const double R1 = P(1,1) - P(0,1);
        const double R2 = P(1,2) - P(0,2);

        const double r2 = R0*R0 + R1*R1 + R2*R2;

        if (r2 < rc2) {
            const double r = sqrt(r2);
            // \\exp{-B*r}
            const double exp_mbr = exp(_MB*r);

            // r^{-2, -4, -6}
            const double r_m1 = 1.0/r;
            const double r_m2 = r_m1*r_m1;
            const double r_m4 = r_m2*r_m2;
            const double r_m6 = r_m4*r_m2;

            // \\frac{C}{r^6}
            const double crm6 = _C*r_m6;

            // A \\exp{-Br} - \\frac{C}{r^6}
            u(0)+= _A*exp_mbr - crm6 + internalshift;

            // AB \\exp{-Br} - \\frac{C}{r^6}*\\frac{6}{r}
            const double term2 = crm6*(-6.0)*r_m1;
            const double f_tmp = _AB * exp_mbr + term2;

            A(0,0)+=f_tmp*R0;
            A(0,1)+=f_tmp*R1;
            A(0,2)+=f_tmp*R2;

            A(1,0)-=f_tmp*R0;
            A(1,1)-=f_tmp*R1;
            A(1,2)-=f_tmp*R2;
        }
        '''
        constants = (kernel.Constant('_A',
                                     self.a), kernel.Constant('_AB', self.ab),
                     kernel.Constant('_B',
                                     self.b), kernel.Constant('_MB', self.mb),
                     kernel.Constant('_C', self.c),
                     kernel.Constant('rc2', self.rc**2),
                     kernel.Constant('internalshift', self._shift_internal))

        return kernel.Kernel(
            'BuckinghamV', kernel_code, constants,
            [kernel.Header('stdio.h'),
             kernel.Header('math.h')])
Ejemplo n.º 10
0
    def kernel(self):
        """
        Returns a kernel class for the potential.
        """

        kernel_code = '''

        OUTCOUNT(0)++;

        const double R0 = P(1, 0) - P(0, 0);
        const double R1 = P(1, 1) - P(0, 1);
        const double R2 = P(1, 2) - P(0, 2);


        //printf("Positions P(0) = %f, P(1) = %f |", P(0, 1), P(1, 1));


        const double r2 = R0*R0 + R1*R1 + R2*R2;

        if (r2 < rc2){

            COUNT(0)++;

            const double r_m2 = sigma2/r2;
            const double r_m4 = r_m2*r_m2;
            const double r_m6 = r_m4*r_m2;

            u(0)+= CV*((r_m6-1.0)*r_m6 + internalshift);

            const double r_m8 = r_m4*r_m4;
            const double f_tmp = CF*(r_m6 - 0.5)*r_m8;


            A(0, 0)+=f_tmp*R0;
            A(0, 1)+=f_tmp*R1;
            A(0, 2)+=f_tmp*R2;

            A(1, 0)-=f_tmp*R0;
            A(1, 1)-=f_tmp*R1;
            A(1, 2)-=f_tmp*R2;

        }

        '''
        constants = (kernel.Constant('sigma2', self._sigma**2),
                     kernel.Constant('rc2', self._rc**2),
                     kernel.Constant('internalshift', self._shift_internal),
                     kernel.Constant('CF', self._C_F),
                     kernel.Constant('CV', self._C_V))

        reductions = (kernel.Reduction('u', 'u[0]', '+'), )

        return kernel.Kernel('LJ_accel_U', kernel_code, constants,
                             [kernel.Header('stdio.h')], reductions)
Ejemplo n.º 11
0
    def _build_libs(self, dt):
        kernel1_code = '''
        const double M_tmp = 1.0/M(0);
        V(0) += dht*F(0)*M_tmp;
        V(1) += dht*F(1)*M_tmp;
        V(2) += dht*F(2)*M_tmp;
        P(0) += dt*V(0);
        P(1) += dt*V(1);
        P(2) += dt*V(2);
        '''

        kernel2_code = '''
        const double M_tmp = 1.0/M(0);
        V(0) += dht*F(0)*M_tmp;
        V(1) += dht*F(1)*M_tmp;
        V(2) += dht*F(2)*M_tmp;
        '''
        constants = [
            kernel.Constant('dt', dt),
            kernel.Constant('dht', 0.5 * dt),
        ]

        kernel1 = kernel.Kernel('vv1', kernel1_code, constants)
        self._p1 = self._looping_method(kernel=kernel1,
                                        dat_dict={
                                            'P': self._p(access.W),
                                            'V': self._v(access.W),
                                            'F': self._f(access.R),
                                            'M': self._m(access.R)
                                        })

        kernel2 = kernel.Kernel('vv2', kernel2_code, constants)
        self._p2 = self._looping_method(kernel=kernel2,
                                        dat_dict={
                                            'V': self._v(access.W),
                                            'F': self._f(access.R),
                                            'M': self._m(access.R)
                                        })
Ejemplo n.º 12
0
    def kernel(self):
        """
        Returns a kernel class for the potential.
        """

        kernel_code = '''

        const double R0 = P.j[0] - P.i[0];
        const double R1 = P.j[1] - P.i[1];
        const double R2 = P.j[2] - P.i[2];

        const double r2 = R0*R0 + R1*R1 + R2*R2;

        const double r = sqrt(r2);
        // \\exp{-B*r}
        const double exp_mbr = exp(_MB*r);

        // r^{-2, -4, -6}
        const double r_m1 = 1.0/r;
        const double r_m2 = r_m1*r_m1;
        const double r_m4 = r_m2*r_m2;
        const double r_m6 = r_m4*r_m2;

        // \\frac{C}{r^6}
        const double crm6 = _C*r_m6;

        // A \\exp{-Br} - \\frac{C}{r^6}
        u[0]+= (r2 < rc2) ? 0.5*(_A*exp_mbr - crm6 + internalshift) : 0.0;

        // = AB \\exp{-Br} - \\frac{C}{r^6}*\\frac{6}{r}
        const double term2 = crm6*(-6.0)*r_m1;
        const double f_tmp = _AB * exp_mbr + term2;

        A.i[0]+= (r2 < rc2) ? f_tmp*R0 : 0.0;
        A.i[1]+= (r2 < rc2) ? f_tmp*R1 : 0.0;
        A.i[2]+= (r2 < rc2) ? f_tmp*R2 : 0.0;

        '''
        constants = (kernel.Constant('_A',
                                     self.a), kernel.Constant('_AB', self.ab),
                     kernel.Constant('_B',
                                     self.b), kernel.Constant('_MB', self.mb),
                     kernel.Constant('_C', self.c),
                     kernel.Constant('rc2', self.rc**2),
                     kernel.Constant('internalshift', self._shift_internal))

        return kernel.Kernel(
            'BuckinghamV', kernel_code, constants,
            [kernel.Header('stdio.h'),
             kernel.Header('math.h')])
Ejemplo n.º 13
0
    def kernel(self):
        """
        Returns a kernel class for the potential.
        """

        kernel_code = '''
        
        const double R0 = P(1, 0) - P(0, 0);
        const double R1 = P(1, 1) - P(0, 1);
        const double R2 = P(1, 2) - P(0, 2);
        
        A(0, 0)=0;
        A(0, 1)=0;
        A(0, 2)=0;
        
        A(1, 0)=0;
        A(1, 1)=0;
        A(1, 2)=0;
        
        '''

        return kernel.Kernel('NULL_Potential', kernel_code, None, None, None)
Ejemplo n.º 14
0
    def _init_near_potential_lib(self):

        # real space energy and force kernel
        with open(
                str(_SRC_DIR) +
                '/EwaldOrthSource/EvaluateNearPotentialField.h', 'r') as fh:
            _cont_header_src = fh.read()
        _cont_header = (kernel.Header(block=_cont_header_src %
                                      self._subvars), )

        with open(
                str(_SRC_DIR) +
                '/EwaldOrthSource/EvaluateNearPotentialField.cpp', 'r') as fh:
            _cont_source = fh.read()

        _real_kernel = kernel.Kernel(name='real_space_part',
                                     code=_cont_source,
                                     headers=_cont_header)

        if self.shell_width is None:
            rn = self.real_cutoff * 1.05
        else:
            rn = self.real_cutoff + self.shell_width

        PPL = pairloop.CellByCellOMP

        self._near_potential_field = PPL(
            kernel=_real_kernel,
            dat_dict={
                'P': data.ParticleDat(ncomp=3,
                                      dtype=ctypes.c_double)(access.READ),
                'Q': data.ParticleDat(ncomp=1,
                                      dtype=ctypes.c_double)(access.READ),
                'M': data.ParticleDat(ncomp=1,
                                      dtype=ctypes.c_int)(access.READ),
                'u': data.ParticleDat(ncomp=1,
                                      dtype=ctypes.c_double)(access.INC),
            },
            shell_cutoff=rn)
Ejemplo n.º 15
0
    def kernel(self):
        """
        Returns a kernel class for the potential.
        """

        kernel_code = '''
        //N_f = 27
        const double R0 = P.j[0] - P.i[0];
        const double R1 = P.j[1] - P.i[1];
        const double R2 = P.j[2] - P.i[2];

        const double r2 = R0*R0 + R1*R1 + R2*R2;

        if (r2 < rc2){

            const double r_m2 = sigma2/r2;
            const double r_m4 = r_m2*r_m2;
            const double r_m6 = r_m4*r_m2;

            u[0] += 0.5*CV*((r_m6-1.0)*r_m6 + internalshift);

            const double r_m8 = r_m4*r_m4;
            const double f_tmp = CF*(r_m6 - 0.5)*r_m8;

            A.i[0] +=  f_tmp*R0;
            A.i[1] +=  f_tmp*R1;
            A.i[2] +=  f_tmp*R2;

        }
        '''
        constants = (kernel.Constant('sigma2', self._sigma**2),
                     kernel.Constant('rc2', self._rc**2),
                     kernel.Constant('internalshift', self._shift_internal),
                     kernel.Constant('CF', self._C_F),
                     kernel.Constant('CV', self._C_V))

        return kernel.Kernel('LJ_accel_U', kernel_code, constants,
                             [kernel.Header('stdio.h')])
Ejemplo n.º 16
0
    def _init_extract_loop(self):

        g = self.group
        extent = self.domain.extent
        cell_widths = [
            1.0 / (ex / (sx**(self.R - 1)))
            for ex, sx in zip(extent, self.subdivision)
        ]

        L = self.L
        sph_gen = self.sph_gen

        def cube_ind(L, M):
            return ((L) * ((L) + 1) + (M))

        EC = ''
        for lx in range(L):

            for mx in range(-lx, lx + 1):
                smx = 'n' if mx < 0 else 'p'
                smx += str(abs(mx))

                re_lnm = SphSymbol('reln{lx}m{mx}'.format(lx=lx, mx=smx))
                im_lnm = SphSymbol('imln{lx}m{mx}'.format(lx=lx, mx=smx))

                EC += '''
                const double {re_lnm} = TREE[OFFSET + {cx}];
                const double {im_lnm} = TREE[OFFSET + IM_OFFSET + {cx}];
                '''.format(re_lnm=str(re_lnm),
                           im_lnm=str(im_lnm),
                           cx=str(cube_ind(lx, mx)))
                cm_re, cm_im = cmplx_mul(re_lnm, im_lnm,
                                         sph_gen.get_y_sym(lx, mx)[0],
                                         sph_gen.get_y_sym(lx, mx)[1])
                EC += 'tmp_energy += ({cm_re}) * rhol;\n'.format(cm_re=cm_re)

            EC += 'rhol *= radius;\n'

        k = kernel.Kernel(
            'lm_extract_loop',
            r'''
            
            const double rx = P.i[0];
            const double ry = P.i[1];
            const double rz = P.i[2];

            double particle_energy = 0.0;


            for( int level=0 ; level<R ; level++ ){{

                const int64_t cellx = MM_CELLS.i[level * 3 + 0];
                const int64_t celly = MM_CELLS.i[level * 3 + 1];
                const int64_t cellz = MM_CELLS.i[level * 3 + 2];

                const double dx = rx - ((-HEX) + (0.5  + cellx) * WIDTHS_X[level]);
                const double dy = ry - ((-HEY) + (0.5  + celly) * WIDTHS_Y[level]);
                const double dz = rz - ((-HEZ) + (0.5  + cellz) * WIDTHS_Z[level]);

                const double xy2 = dx * dx + dy * dy;
                const double radius = sqrt(xy2 + dz * dz);
                const double theta = atan2(sqrt(xy2), dz);
                const double phi = atan2(dy, dx);
                
                const int64_t lin_ind = cellx + NCELLS_X[level] * (celly + NCELLS_Y[level] * cellz);
                const int64_t OFFSET = LEVEL_OFFSETS[level] + NCOMP * lin_ind;
                 
                {SPH_GEN}

                double tmp_energy = 0.0;
                double rhol = 1.0;

                {ENERGY_COMP}

                particle_energy += tmp_energy;

            }}


            OUT_ENERGY[0] += particle_energy * 0.5 * Q.i[0];

            '''.format(SPH_GEN=str(sph_gen.module), ENERGY_COMP=str(EC)),
            (Constant('R', self.R), Constant('EX', extent[0]),
             Constant('EY', extent[1]), Constant('EZ', extent[2]),
             Constant('HEX', 0.5 * extent[0]), Constant(
                 'HEY', 0.5 * extent[1]), Constant('HEZ', 0.5 * extent[2]),
             Constant('CWX', cell_widths[0]), Constant(
                 'CWY', cell_widths[1]), Constant('CWZ', cell_widths[2]),
             Constant('LCX', self.subdivision[0]**(self.R - 1)),
             Constant('LCY', self.subdivision[1]**(self.R - 1)),
             Constant('LCZ', self.subdivision[2]
                      **(self.R - 1)), Constant('SDX', self.subdivision[0]),
             Constant('SDY', self.subdivision[1]),
             Constant('SDZ', self.subdivision[2]),
             Constant('IL_NO', self.il_array.shape[1]),
             Constant('IL_STRIDE_OUTER',
                      self.il_array.shape[1] * self.il_array.shape[2]),
             Constant('NCOMP', self.ncomp), Constant('IM_OFFSET', self.L**2)),
            headers=(lib.build.write_header("""
                    #define R {R}
                    const double WIDTHS_X[R] = {{ {WIDTHS_X} }};
                    const double WIDTHS_Y[R] = {{ {WIDTHS_Y} }};
                    const double WIDTHS_Z[R] = {{ {WIDTHS_Z} }};

                    const int64_t NCELLS_X[R] = {{ {NCELLS_X} }};
                    const int64_t NCELLS_Y[R] = {{ {NCELLS_Y} }};
                    const int64_t NCELLS_Z[R] = {{ {NCELLS_Z} }};

                    const int64_t LEVEL_OFFSETS[R] = {{ {LEVEL_OFFSETS} }};

                    """.format(R=self.R,
                               WIDTHS_X=self.widths_x_str,
                               WIDTHS_Y=self.widths_y_str,
                               WIDTHS_Z=self.widths_z_str,
                               NCELLS_X=self.ncells_x_str,
                               NCELLS_Y=self.ncells_y_str,
                               NCELLS_Z=self.ncells_z_str,
                               LEVEL_OFFSETS=self.level_offsets_str)), ))

        dat_dict = {
            'P': self.positions(access.READ),
            'Q': self.charges(access.READ),
            'MM_CELLS': self._dat_cells(access.READ),
            'MM_CHILD_INDEX': self._dat_child_index(access.READ),
            'TREE': self.tree(access.READ),
            'OUT_ENERGY': self._extract_energy(access.INC_ZERO),
        }

        self._extract_loop = loop.ParticleLoopOMP(kernel=k, dat_dict=dat_dict)
Ejemplo n.º 17
0
    def _init_real_space_lib(self):

        # real space energy and force kernel
        with open(
                str(_SRC_DIR) + '/EwaldOrthSource/RealSpaceForceEnergy.h',
                'r') as fh:
            _cont_header_src = fh.read()
        _cont_header = (kernel.Header(block=_cont_header_src %
                                      self._subvars), )

        with open(
                str(_SRC_DIR) + '/EwaldOrthSource/RealSpaceForceEnergy.cpp',
                'r') as fh:
            _cont_source = fh.read()

        _real_kernel = kernel.Kernel(name='real_space_part',
                                     code=_cont_source,
                                     headers=_cont_header)

        if self.shell_width is None:
            rn = self.real_cutoff * 1.05
        else:
            rn = self.real_cutoff + self.shell_width

        if self.shared_memory in ('thread', 'omp'):
            PPL = pairloop.PairLoopNeighbourListNSOMP
        else:
            PPL = pairloop.PairLoopNeighbourListNS

        self._real_space_pairloop = PPL(
            kernel=_real_kernel,
            dat_dict={
                'P': data.ParticleDat(ncomp=3,
                                      dtype=ctypes.c_double)(access.READ),
                'Q': data.ParticleDat(ncomp=1,
                                      dtype=ctypes.c_double)(access.READ),
                'F': data.ParticleDat(ncomp=3,
                                      dtype=ctypes.c_double)(access.INC),
                'u': self._vars['real_space_energy'](access.INC_ZERO)
            },
            shell_cutoff=rn)

        # real space energy and force kernel and per particle potential
        with open(
                str(_SRC_DIR) + '/EwaldOrthSource/RealSpaceForceEnergyPot.h',
                'r') as fh:
            _cont_header_src = fh.read()
        _cont_header = (kernel.Header(block=_cont_header_src %
                                      self._subvars), )

        with open(
                str(_SRC_DIR) + '/EwaldOrthSource/RealSpaceForceEnergyPot.cpp',
                'r') as fh:
            _cont_source = fh.read()

        _real_kernel = kernel.Kernel(name='real_space_part_pot',
                                     code=_cont_source,
                                     headers=_cont_header)

        self._real_space_pairloop_pot = PPL(
            kernel=_real_kernel,
            dat_dict={
                'P': data.ParticleDat(ncomp=3,
                                      dtype=ctypes.c_double)(access.READ),
                'Q': data.ParticleDat(ncomp=1,
                                      dtype=ctypes.c_double)(access.READ),
                'UPP': data.ParticleDat(ncomp=1,
                                        dtype=ctypes.c_double)(access.INC),
                'F': data.ParticleDat(ncomp=3,
                                      dtype=ctypes.c_double)(access.INC),
                'u': self._vars['real_space_energy'](access.INC_ZERO)
            },
            shell_cutoff=rn)
Ejemplo n.º 18
0
    def _init_libs(self):

        # reciprocal contribution calculation
        with open(str(_SRC_DIR) + '/EwaldOrthSource/AccumulateRecip.h',
                  'r') as fh:
            _cont_header_src = fh.read()
        _cont_header = kernel.Header(block=_cont_header_src % self._subvars)

        with open(str(_SRC_DIR) + '/EwaldOrthSource/AccumulateRecip.cpp',
                  'r') as fh:
            _cont_source = fh.read()

        _cont_kernel = kernel.Kernel(name='reciprocal_contributions',
                                     code=_cont_source,
                                     headers=_cont_header)

        if self.shared_memory in ('thread', 'omp'):
            PL = loop.ParticleLoopOMP
        else:
            PL = loop.ParticleLoop

        self._cont_lib = PL(
            kernel=_cont_kernel,
            dat_dict={
                'Positions':
                data.ParticleDat(ncomp=3, dtype=ctypes.c_double)(access.READ),
                'Charges':
                data.ParticleDat(ncomp=1, dtype=ctypes.c_double)(access.READ),
                'RecipSpace':
                self._vars['recip_space_kernel'](access.INC_ZERO)
            })

        # reciprocal extract forces plus energy
        with open(
                str(_SRC_DIR) + '/EwaldOrthSource/ExtractForceEnergy.h',
                'r') as fh:
            _cont_header_src = fh.read()
        _cont_header = kernel.Header(block=_cont_header_src % self._subvars)

        with open(
                str(_SRC_DIR) + '/EwaldOrthSource/ExtractForceEnergy.cpp',
                'r') as fh:
            _cont_source = fh.read()

        _cont_kernel = kernel.Kernel(name='reciprocal_force_energy',
                                     code=_cont_source,
                                     headers=_cont_header)

        self._extract_force_energy_lib = PL(
            kernel=_cont_kernel,
            dat_dict={
                'Positions':
                data.ParticleDat(ncomp=3, dtype=ctypes.c_double)(access.READ),
                'Forces':
                data.ParticleDat(ncomp=3, dtype=ctypes.c_double)(access.INC),
                'Energy':
                self._vars['recip_space_energy'](access.INC_ZERO),
                'Charges':
                data.ParticleDat(ncomp=1, dtype=ctypes.c_double)(access.READ),
                'RecipSpace':
                self._vars['recip_space_kernel'](access.READ),
                'CoeffSpace':
                self._vars['coeff_space_kernel'](access.READ)
            })

        self._extract_force_energy_pot_lib = None
Ejemplo n.º 19
0
    def __init__(self, state, rmax=None, rsteps=100):

        self._count = 0
        self._state = state
        self._extent = self._state.domain.extent
        self._P = self._state.positions
        self._N = self._state.npart_local
        self._rmax = rmax

        if self._rmax is None:
            self._rmax = 0.5 * np.min(self._extent.data)

        self._rsteps = rsteps

        self._gr = data.ScalarArray(ncomp=self._rsteps, dtype=ctypes.c_int)
        self._gr.scale(0.0)

        _headers = ['math.h', 'stdio.h']
        _kernel = '''
        
        
        double R0 = P(1, 0) - P(0, 0);
        double R1 = P(1, 1) - P(0, 1);
        double R2 = P(1, 2) - P(0, 2);
        
        if (abs_md(R0) > exto20 ) { R0 += isign(R0) * extent0 ; }
        if (abs_md(R1) > exto21 ) { R1 += isign(R1) * extent1 ; }
        if (abs_md(R2) > exto22 ) { R2 += isign(R2) * extent2 ; }
        
        const double r2 = R0*R0 + R1*R1 + R2*R2;
        
        if (r2 < rmax2){
            
            double r20=0.0, r21 = r2;
            
            r21 = sqrt(r2);
            #pragma omp atomic
            GR[(int) (abs_md(r21* rstepsoverrmax))]++;
            
        }
        '''

        _constants = (kernel.Constant('rmaxoverrsteps',
                                      0.2 * self._rmax / self._rsteps),
                      kernel.Constant('rstepsoverrmax',
                                      self._rsteps / self._rmax),
                      kernel.Constant('rmax2', self._rmax**2),
                      kernel.Constant('extent0', self._extent[0]),
                      kernel.Constant('extent1', self._extent[1]),
                      kernel.Constant('extent2', self._extent[2]),
                      kernel.Constant('exto20', 0.5 * self._extent[0]),
                      kernel.Constant('exto21', 0.5 * self._extent[1]),
                      kernel.Constant('exto22', 0.5 * self._extent[2]))

        _grkernel = kernel.Kernel('radial_distro_periodic_static',
                                  _kernel,
                                  _constants,
                                  headers=_headers)
        _datdict = {'P': self._P, 'GR': self._gr}

        self._p = pairloop.DoubleAllParticleLoop(self._N,
                                                 kernel=_grkernel,
                                                 dat_dict=_datdict)

        self.timer = ppmd.opt.Timer(runtime.TIMER, 0)
Ejemplo n.º 20
0
    def apply(self):
        """
        Enforce the boundary conditions on the held state.
        """

        comm = self.state.domain.comm

        self.timer_apply.start()

        if comm.Get_size() == 1:
            """
            BC code for one proc. porbably removable when restricting to large
             parallel systems.
            """

            self.timer_lib_overhead.start()

            if self._one_process_pbc_lib is None:
                with open(
                        str(cuda_config.LIB_DIR) + '/cudaOneProcPBCSource.cu',
                        'r') as fh:
                    _one_proc_pbc_code = fh.read()

                _one_proc_pbc_kernel = kernel.Kernel('_one_proc_pbc_kernel',
                                                     _one_proc_pbc_code,
                                                     None,
                                                     static_args={
                                                         'E0': ctypes.c_double,
                                                         'E1': ctypes.c_double,
                                                         'E2': ctypes.c_double
                                                     })

                self._one_process_pbc_lib = cuda_loop.ParticleLoop(
                    _one_proc_pbc_kernel, {
                        'P': self.state.get_position_dat()(access.RW),
                        'BCFLAG': self._flag(access.INC_ZERO)
                    })

            self.timer_lib_overhead.pause()

            _E = self.state.domain.extent

            self.timer_move.start()
            self._one_process_pbc_lib.execute(
                n=self.state.get_position_dat().npart_local,
                static_args={
                    'E0': ctypes.c_double(_E[0]),
                    'E1': ctypes.c_double(_E[1]),
                    'E2': ctypes.c_double(_E[2])
                })

            res = self._flag[0]
            if res > 0:
                self._flag[0] = 1

            self.timer_move.pause()

        ############ ----- MULTIPROC -------
        else:

            if self._escape_guard_lib is None:
                # build lib
                self._escape_guard_lib = \
                    cuda_build.build_static_libs('cudaNProcPBC')

            # --- init escape count ----
            if self._escape_count is None:
                self._escape_count = cuda_base.Array(ncomp=1,
                                                     dtype=ctypes.c_int32)
            self._escape_count[0] = 0

            # --- init escape dir count ----
            if self._escape_dir_count is None:
                self._escape_dir_count = cuda_base.Array(ncomp=26,
                                                         dtype=ctypes.c_int32)
            self._escape_dir_count[:] = 0

            # --- init escape list ----
            nl3 = self.state.get_position_dat().npart_local * 3

            if self._escape_list is None:
                self._escape_list = cuda_base.Array(ncomp=nl3,
                                                    dtype=ctypes.c_int32)
            elif self._escape_list.ncomp < nl3:
                self._escape_list.realloc(nl3)

            # --- find escapees ---

            nl = self.state.get_position_dat().npart_local

            if nl > 0:
                cuda_runtime.cuda_err_check(
                    self._escape_guard_lib['cudaNProcPBCStageOne'](
                        ctypes.c_int32(nl),
                        self.state.domain.boundary.ctypes_data,
                        self.state.get_position_dat().ctypes_data,
                        self.state.domain.get_shift().ctypes_data,
                        self._escape_count.ctypes_data,
                        self._escape_dir_count.ctypes_data,
                        self._escape_list.ctypes_data))

            dir_max = np.max(self._escape_dir_count[:]) + 1

            if self._escape_matrix is None:
                self._escape_matrix = cuda_base.Matrix(nrow=26,
                                                       ncol=dir_max,
                                                       dtype=ctypes.c_int32)

            elif self._escape_matrix.ncol < dir_max:
                self._escape_matrix.realloc(nrow=26, ncol=dir_max)

            # --- Populate escape matrix (essentially sort by direction)

            escape_count = self._escape_count[0]
            if (nl > 0) and (escape_count > 0):
                cuda_runtime.cuda_err_check(
                    self._escape_guard_lib['cudaNProcPBCStageTwo'](
                        ctypes.c_int32(escape_count),
                        ctypes.c_int32(self._escape_matrix.ncol),
                        self._escape_list.ctypes_data,
                        self._escape_matrix.ctypes_data))

            self.state.move_to_neighbour(directions_matrix=self._escape_matrix,
                                         dir_counts=self._escape_dir_count)

            self.state.filter_on_domain_boundary()
Ejemplo n.º 21
0
    def kernel(self):
        """
        Returns a kernel class for the potential.
        """

        kernel_code = '''

        const double R0 = P[1][0] - P[0][0];
        const double R1 = P[1][1] - P[0][1];
        const double R2 = P[1][2] - P[0][2];

        const double r2 = R0*R0 + R1*R1 + R2*R2;


            double xn = 0.01;
            for(int ix = 0; ix < 2; ix++){
                xn = xn*(2.0 - r2*xn);
            }


            const double r_m2 = sigma2*xn;
            const double r_m4 = r_m2*r_m2;
            const double r_m6 = r_m4*r_m2;

            const double _ex = r_m6;
            double _et = 1.0, _ep = 1.0, _ef = 1.0, _epx = 1.0;

            /*
            #pragma novector
            for(int _etx = 1; _etx < 21; _etx++){
                _epx *= _ex;
                _ef *= _ep;
                _ep++;

                xn = 0.01;

            #pragma novector
                for(int ix = 0; ix < 10; ix++){
                    xn = xn*(2.0 - _ef*xn);
                }


                _et += _epx*xn;
            }
            */

            u[0]+=CV*((r_m6-1.0)*r_m6 + internalshift);

            const double r_m8 = r_m4*r_m4;
            const double f_tmp = CF*(r_m6 - 0.5)*r_m8;

            A[0][0]+=f_tmp*R0;
            A[0][1]+=f_tmp*R1;
            A[0][2]+=f_tmp*R2;

            A[1][0]-=f_tmp*R0;
            A[1][1]-=f_tmp*R1;
            A[1][2]-=f_tmp*R2;


        '''
        constants = (kernel.Constant('sigma2', self._sigma**2),
                     kernel.Constant('rc2', self._rc**2),
                     kernel.Constant('internalshift', self._shift_internal),
                     kernel.Constant('CF', self._C_F),
                     kernel.Constant('CV', self._C_V))

        reductions = (kernel.Reduction('u', 'u[0]', '+'), )

        return kernel.Kernel('LJ_accel_U', kernel_code, constants,
                             [kernel.Header('stdio.h')], reductions)
Ejemplo n.º 22
0
    def _init_contrib_loop(self):
        
        g = self.group
        extent = self.domain.extent
        cell_widths = [1.0 / (ex / (sx**(self.R - 1))) for ex, sx in zip(extent, self.subdivision)]


        sph_gen = self.sph_gen

        def cube_ind(L, M):
            return ((L) * ( (L) + 1 ) + (M) )

        assign_gen =  'double rhol = 1.0;\n'
        assign_gen += 'double rholcharge = rhol * charge;\n'
        for lx in range(self.L):
            for mx in range(-lx, lx+1):
                assign_gen += 'TREE[OFFSET + {ind}] += {ylmm} * rholcharge;\n'.format(
                        ind=cube_ind(lx, mx),
                        ylmm=str(sph_gen.get_y_sym(lx, -mx)[0])
                    )
                assign_gen += 'TREE[OFFSET + IM_OFFSET + {ind}] += {ylmm} * rholcharge;\n'.format(
                        ind=cube_ind(lx, mx),
                        ylmm=str(sph_gen.get_y_sym(lx, -mx)[1])
                    )
            assign_gen += 'rhol *= radius;\n'
            assign_gen += 'rholcharge = rhol * charge;\n'


        k = kernel.Kernel(
            'mm_contrib_loop',
            r'''
            
            const double rx = P.i[0];
            const double ry = P.i[1];
            const double rz = P.i[2];

            // bin into finest level cell
            const double srx = rx + HEX;
            const double sry = ry + HEY;
            const double srz = rz + HEZ;
            
            int64_t cfx = srx * CWX;
            int64_t cfy = sry * CWY;
            int64_t cfz = srz * CWZ;

            


            cfx = (cfx < LCX) ? cfx : (LCX - 1);
            cfy = (cfy < LCX) ? cfy : (LCY - 1);
            cfz = (cfz < LCX) ? cfz : (LCZ - 1);

            


            
            // number of cells in each direction
            int64_t ncx = LCX;
            int64_t ncy = LCY;
            int64_t ncz = LCZ;
            
            // increment the occupancy for this cell
            OCC_GA[cfx + LCX * (cfy + LCY * cfz)]++;

            MM_FINE_CELLS.i[0] = cfx;
            MM_FINE_CELLS.i[1] = cfy;
            MM_FINE_CELLS.i[2] = cfz;

            for( int level=R-1 ; level>=0 ; level-- ){{
                
                // child on this level

                const int64_t cix = cfx % SDX;
                const int64_t ciy = cfy % SDY;
                const int64_t ciz = cfz % SDZ;

                // record the cell indices
                MM_CELLS.i[level * 3 + 0] = cfx;
                MM_CELLS.i[level * 3 + 1] = cfy;
                MM_CELLS.i[level * 3 + 2] = cfz;

                // record the child cell indices
                MM_CHILD_INDEX.i[level * 3 + 0] = cix;
                MM_CHILD_INDEX.i[level * 3 + 1] = ciy;
                MM_CHILD_INDEX.i[level * 3 + 2] = ciz;

                // compute the cells for the next level
                cfx /= SDX;
                cfy /= SDY;
                cfz /= SDZ;

            }}



            // compute the multipole expansions
            for( int level=0 ; level<R ; level++) {{

                const int64_t cellx = MM_CELLS.i[level * 3 + 0];
                const int64_t celly = MM_CELLS.i[level * 3 + 1];
                const int64_t cellz = MM_CELLS.i[level * 3 + 2];

                const double dx = rx - ((-HEX) + (0.5  + cellx) * WIDTHS_X[level]);
                const double dy = ry - ((-HEY) + (0.5  + celly) * WIDTHS_Y[level]);
                const double dz = rz - ((-HEZ) + (0.5  + cellz) * WIDTHS_Z[level]);

                const double xy2 = dx * dx + dy * dy;
                const double radius = sqrt(xy2 + dz * dz);
                const double theta = atan2(sqrt(xy2), dz);
                const double phi = atan2(dy, dx);
                const double charge = Q.i[0];
                
                const int64_t lin_ind = cellx + NCELLS_X[level] * (celly + NCELLS_Y[level] * cellz);
                const int64_t OFFSET = LEVEL_OFFSETS[level] + NCOMP * lin_ind;

                {SPH_GEN}
                {ASSIGN_GEN}
            }}


            '''.format(
                SPH_GEN=str(sph_gen.module),
                ASSIGN_GEN=str(assign_gen)
            ),
            (
                Constant('R', self.R),
                Constant('EX', extent[0]),
                Constant('EY', extent[1]),
                Constant('EZ', extent[2]),
                Constant('HEX', 0.5 * extent[0]),
                Constant('HEY', 0.5 * extent[1]),
                Constant('HEZ', 0.5 * extent[2]),                
                Constant('CWX', cell_widths[0]),
                Constant('CWY', cell_widths[1]),
                Constant('CWZ', cell_widths[2]),
                Constant('LCX', self.subdivision[0] ** (self.R - 1)),
                Constant('LCY', self.subdivision[1] ** (self.R - 1)),
                Constant('LCZ', self.subdivision[2] ** (self.R - 1)),
                Constant('SDX', self.subdivision[0]),
                Constant('SDY', self.subdivision[1]),
                Constant('SDZ', self.subdivision[2]),
                Constant('IL_NO', self.il_array.shape[1]),
                Constant('IL_STRIDE_OUTER', self.il_array.shape[1] * self.il_array.shape[2]),
                Constant('NCOMP', self.ncomp),
                Constant('IM_OFFSET', self.L**2)
            ),
            headers=(
                lib.build.write_header(
                    """
                    #define R {R}
                    const double WIDTHS_X[R] = {{ {WIDTHS_X} }};
                    const double WIDTHS_Y[R] = {{ {WIDTHS_Y} }};
                    const double WIDTHS_Z[R] = {{ {WIDTHS_Z} }};

                    const int64_t NCELLS_X[R] = {{ {NCELLS_X} }};
                    const int64_t NCELLS_Y[R] = {{ {NCELLS_Y} }};
                    const int64_t NCELLS_Z[R] = {{ {NCELLS_Z} }};

                    const int64_t LEVEL_OFFSETS[R] = {{ {LEVEL_OFFSETS} }};

                    """.format(
                        R=self.R,
                        WIDTHS_X=self.widths_x_str,
                        WIDTHS_Y=self.widths_y_str,
                        WIDTHS_Z=self.widths_z_str,
                        NCELLS_X=self.ncells_x_str,
                        NCELLS_Y=self.ncells_y_str,
                        NCELLS_Z=self.ncells_z_str,
                        LEVEL_OFFSETS=self.level_offsets_str
                    )
                ),
            )
        )

        dat_dict = {
            'P': self.positions(access.READ),
            'Q': self.charges(access.READ),
            'MM_FINE_CELLS': self._dat_fine_cells(access.WRITE),
            'MM_CELLS': self._dat_cells(access.WRITE),
            'MM_CHILD_INDEX': self._dat_child_index(access.WRITE),
            'OCC_GA': self.cell_occupation_ga(access.INC_ZERO),
            'TREE': self.tree(access.INC_ZERO),
        }

        self._contrib_loop = loop.ParticleLoopOMP(kernel=k, dat_dict=dat_dict)
Ejemplo n.º 23
0
    def integrate_thermostat(self, dt=None, t=None, temp=273.15, nu=1.0):
        """
        Integrate state forward in time.
        
        :arg double dt: Time step size.
        :arg double t: End time.
        :arg double temp: Temperature of heat bath.
        """

        self._Temp = temp
        self._nu = nu

        if dt is not None:
            self._dt = dt
        if t is not None:
            self._T = t

        self._max_it = int(math.ceil(self._T / self._dt))

        self._constants1 = [
            kernel.Constant('dt', self._dt),
            kernel.Constant('dht', 0.5 * self._dt),
        ]
        self._kernel1 = kernel.Kernel('vv1', self._kernel1_code,
                                      self._constants1)
        self._p1 = loop.ParticleLoop(self._kernel1, {
            'P': self._P,
            'V': self._V,
            'A': self._A,
            'M': self._M
        })

        self._kernel2_thermostat_code = '''

        //Anderson thermostat here.
        //probably horrific random code.

        const double tmp_rand_max = 1.0/RAND_MAX;

        if (rand()*tmp_rand_max < rate) {

            //Box-Muller method.


            const double scale = sqrt(temperature/M(0));
            const double stmp = scale*sqrt(-2.0*log(rand()*tmp_rand_max));

            const double V0 = 2.0*M_PI*rand()*tmp_rand_max;
            V(0) = stmp*cos(V0);
            V(1) = stmp*sin(V0);
            V(2) = scale*sqrt(-2.0*log(rand()*tmp_rand_max))*cos(2.0*M_PI*rand()*tmp_rand_max);

        }
        else {
            const double M_tmp = 1./M(0);
            V(0) += dht*A(0)*M_tmp;
            V(1) += dht*A(1)*M_tmp;
            V(2) += dht*A(2)*M_tmp;
        }

        '''

        self._constants2_thermostat = [
            kernel.Constant('rate', self._dt * self._nu),
            kernel.Constant('dt', self._dt),
            kernel.Constant('dht', 0.5 * self._dt),
            kernel.Constant('temperature', self._Temp),
        ]

        self._kernel2_thermostat = kernel.Kernel(
            'vv2_thermostat',
            self._kernel2_thermostat_code,
            self._constants2_thermostat,
            headers=['math.h', 'stdlib.h', 'time.h', 'stdio.h'])
        self._p2_thermostat = loop.ParticleLoop(self._kernel2_thermostat, {
            'V': self._V,
            'A': self._A,
            'M': self._M
        })

        _t = ppmd.opt.Timer(runtime.TIMER, 0, start=True)
        self._velocity_verlet_integration_thermostat()
        _t.stop("VelocityVerletAnderson")
Ejemplo n.º 24
0
    def _init_bin_loop(self):

        g = self.group
        extent = self.domain.extent
        cell_widths = [
            1.0 / (ex / (sx**(self.R - 1)))
            for ex, sx in zip(extent, self.subdivision)
        ]

        k = kernel.Kernel(
            'mc_bin_loop',
            r'''
            
            const double rx = P.i[0];
            const double ry = P.i[1];
            const double rz = P.i[2];

            // bin into finest level cell
            const double srx = rx + HEX;
            const double sry = ry + HEY;
            const double srz = rz + HEZ;
            
            int64_t cfx = srx * CWX;
            int64_t cfy = sry * CWY;
            int64_t cfz = srz * CWZ;

            cfx = (cfx < LCX) ? cfx : (LCX - 1);
            cfy = (cfy < LCX) ? cfy : (LCY - 1);
            cfz = (cfz < LCX) ? cfz : (LCZ - 1);
            
            // record the finest level cells
            MC_FC.i[0] = cfx;
            MC_FC.i[1] = cfy;
            MC_FC.i[2] = cfz;
            
            // number of cells in each direction
            int64_t ncx = LCX;
            int64_t ncy = LCY;
            int64_t ncz = LCZ;
            
            // increment the occupancy for this cell
            OCC_GA[cfx + LCX * (cfy + LCY * cfz)]++;

            int64_t n = 0;
            for( int level=R-1 ; level>=0 ; level-- ){{
                
                // cell widths for cell centre computation
                const double wx = EX / ncx;
                const double wy = EY / ncy;
                const double wz = EZ / ncz;

                // child on this level

                const int64_t cix = cfx % SDX;
                const int64_t ciy = cfy % SDY;
                const int64_t ciz = cfz % SDZ;


                const int64_t ci = cix + SDX * (ciy + SDY * ciz);
                
                // loop over IL for this child cell

                for( int ox=0 ; ox<IL_NO ; ox++){{
                    
                    const int64_t ocx = cfx + IL[ci * IL_STRIDE_OUTER + ox * 3 + 0];
                    const int64_t ocy = cfy + IL[ci * IL_STRIDE_OUTER + ox * 3 + 1];
                    const int64_t ocz = cfz + IL[ci * IL_STRIDE_OUTER + ox * 3 + 2];

                    // free space for now
                    if (ocx < 0) {{continue;}}
                    if (ocy < 0) {{continue;}}
                    if (ocz < 0) {{continue;}}
                    if (ocx >= ncx) {{continue;}}
                    if (ocy >= ncy) {{continue;}}
                    if (ocz >= ncz) {{continue;}}

                    MC_CX.i[n] = ocx;
                    MC_CY.i[n] = ocy;
                    MC_CZ.i[n] = ocz;
                    MC_CL.i[n] = ocx + ncx * (ocy + ncy * ocz);
                    MC_LEVEL.i[n] = level;

                    MC_DX.i[n] = rx - ((-HEX) + (0.5 * wx) + (ocx * wx));
                    MC_DY.i[n] = ry - ((-HEY) + (0.5 * wy) + (ocy * wy));
                    MC_DZ.i[n] = rz - ((-HEZ) + (0.5 * wz) + (ocz * wz));

                    n++;
                }}

                // compute the cells for the next level
                cfx /= SDX;
                cfy /= SDY;
                cfz /= SDZ;

                // number of cells in each dim for the next level
                ncx /= SDX;
                ncy /= SDY;
                ncz /= SDZ;

            }}

            MC_NEXP.i[0] = n;

            // compute offsets as spherical coordinates in a vcectorisable loop
            for( int ox=0 ; ox<n ; ox++){{
                const double dx = MC_DX.i[ox];
                const double dy = MC_DY.i[ox];
                const double dz = MC_DZ.i[ox];

                const double xy2 = dx * dx + dy * dy;
                MC_DX.i[ox] = sqrt(xy2 + dz * dz);
                MC_DY.i[ox] = atan2(sqrt(xy2), dz);
                MC_DZ.i[ox] = atan2(dy, dx);
                MC_CHR.i[ox] = Q.i[0];


                inline_local_exp(
                    Q.i[0], 
                    sqrt(xy2 + dz * dz),
                    atan2(sqrt(xy2), dz),
                    atan2(dy, dx),
                    &TL[TL_OFFSETS[MC_LEVEL.i[ox]] + NCOMP * MC_CL.i[ox]]
                );


            }}

            '''.format(
            ),
            (
                Constant('R', self.R),
                Constant('NCOMP', self.ncomp),
                Constant('EX', extent[0]),
                Constant('EY', extent[1]),
                Constant('EZ', extent[2]),
                Constant('HEX', 0.5 * extent[0]),
                Constant('HEY', 0.5 * extent[1]),
                Constant('HEZ', 0.5 * extent[2]),
                Constant('CWX', cell_widths[0]),
                Constant('CWY', cell_widths[1]),
                Constant('CWZ', cell_widths[2]),
                Constant('LCX', self.subdivision[0] ** (self.R - 1)),
                Constant('LCY', self.subdivision[1] ** (self.R - 1)),
                Constant('LCZ', self.subdivision[2] ** (self.R - 1)),
                Constant('SDX', self.subdivision[0]),
                Constant('SDY', self.subdivision[1]),
                Constant('SDZ', self.subdivision[2]),
                Constant('IL_NO', self.il_array.shape[1]),
                Constant('IL_STRIDE_OUTER', self.il_array.shape[1] * self.il_array.shape[2]),
            ),
            headers=(
                lib.build.write_header(
                    self.mc_lee.create_local_exp_header + \
                    self.mc_lee.create_local_exp_src
                ),
            )
        )

        dat_dict = {
            'P': self.positions(access.READ),
            'Q': self.charges(access.READ),
            'IL': self.il_scalararray(access.READ),
            'MC_FC': g._mc_fmm_cells(access.WRITE),
            'MC_NEXP': g._mc_nexp(access.WRITE),
            'MC_CHR': g._mc_charge(access.WRITE),
            'MC_DX': g._mc_radius(access.WRITE),
            'MC_DY': g._mc_theta(access.WRITE),
            'MC_DZ': g._mc_phi(access.WRITE),
            'MC_LEVEL': g._mc_level(access.WRITE),
            'MC_CX': g._mc_cx(access.WRITE),
            'MC_CY': g._mc_cy(access.WRITE),
            'MC_CZ': g._mc_cz(access.WRITE),
            'MC_CL': g._mc_cl(access.WRITE),
            'OCC_GA': self.cell_occupation_ga(access.INC_ZERO),
            'TL_OFFSETS': self.tree_local_ga_offsets(access.READ),
            'TL': self.tree_local_ga(access.INC_ZERO),
        }

        self._cell_bin_loop = loop.ParticleLoopOMP(kernel=k, dat_dict=dat_dict)
Ejemplo n.º 25
0
    def _init_pbc(self):
        
        self.top_multipole_expansion_ga = data.GlobalArray(ncomp=self.ncomp, dtype=REAL)
        self.top_dot_vector_ga = data.GlobalArray(ncomp=self.ncomp, dtype=REAL)
        self.lrc = LongRangeMTL(self.L, self.domain, exclude_tuples=self.il[1])


        sph_gen = self.sph_gen

        def cube_ind(L, M):
            return ((L) * ( (L) + 1 ) + (M) )

        assign_gen =  'double rhol = charge;\n'
        for lx in range(self.L):
            for mx in range(-lx, lx+1):

                res, ims = sph_gen.get_y_sym(lx, -mx)
                offset = cube_ind(lx, mx)

                assign_gen += ''.join(['MULTIPOLE[{}] += {} * rhol;\n'.format(*args) for args in (
                        (offset, str(res)),
                        (offset + self.L**2, str(ims))
                    )
                ])

                res, ims = sph_gen.get_y_sym(lx, mx)
                assign_gen += ''.join(['DOT_VEC[{}] += {} * rhol;\n'.format(*args) for args in (
                        (offset, str(res)),
                        (offset + self.L**2, '-1.0 * ' + str(ims))
                    )
                ])

            assign_gen += 'rhol *= radius;\n'


        lr_kernel = kernel.Kernel(
            'mm_lm_lr_kernel',
            r'''
            const double dx = P.i[0];
            const double dy = P.i[1];
            const double dz = P.i[2];

            const double xy2 = dx * dx + dy * dy;
            const double radius = sqrt(xy2 + dz * dz);
            const double theta = atan2(sqrt(xy2), dz);
            const double phi = atan2(dy, dx);
            const double charge = Q.i[0];

            {SPH_GEN}
            {ASSIGN_GEN}


            '''.format(
                SPH_GEN=str(sph_gen.module),
                ASSIGN_GEN=str(assign_gen)
            ),
            headers=(
                lib.build.write_header(
                    r'''
                    #include <math.h>
                    '''
                ),
            )
        )

        
        self._lr_loop = loop.ParticleLoopOMP(
            lr_kernel,
            dat_dict={
                'P': self.positions(access.READ),
                'Q': self.charges(access.READ),
                'MULTIPOLE': self.top_multipole_expansion_ga(access.INC_ZERO),
                'DOT_VEC': self.top_dot_vector_ga(access.INC_ZERO),
            }
        )
Ejemplo n.º 26
0
    def kernel(self):
        """
        Returns a kernel class for the potential.
        """

        kernel_code = '''

        const double R0 = P(1, 0) - P(0, 0);
        const double R1 = P(1, 1) - P(0, 1);
        const double R2 = P(1, 2) - P(0, 2);

        const double r2 = R0*R0 + R1*R1 + R2*R2;

            double xn = 0.01;
            for(int ix = 0; ix < 10; ix++){
                xn = xn*(2.0 - r2*xn);
            }



            const double r_m2 = sigma2*xn;
            const double r_m4 = r_m2*r_m2;
            const double r_m6 = r_m4*r_m2;

            const double _ex = r_m6;
            double _et = 1.0, _ep = 1.0, _ef = 1.0, _epx = 1.0;
            for(int _etx = 1; _etx < 21; _etx++){
                _epx *= _ex;
                _ef *= _ep;
                _ep++;

                xn = 0.01;
                for(int ix = 0; ix < 10; ix++){
                    xn = xn*(2.0 - _ef*xn);
                }


                _et += _epx*xn;
            }

            u(0)+=CV*((r_m6-1.0)*r_m6 + internalshift) + _et;

            const double r_m8 = r_m4*r_m4;
            const double f_tmp = CF*(r_m6 - 0.5)*r_m8;

            A(0, 0)+=f_tmp*R0;
            A(0, 1)+=f_tmp*R1;
            A(0, 2)+=f_tmp*R2;

            A(1, 0)-=f_tmp*R0;
            A(1, 1)-=f_tmp*R1;
            A(1, 2)-=f_tmp*R2;


        '''
        constants = (kernel.Constant('sigma2', self._sigma**2),
                     kernel.Constant('rc2', self._rc**2),
                     kernel.Constant('internalshift', self._shift_internal),
                     kernel.Constant('CF', self._C_F),
                     kernel.Constant('CV', self._C_V))

        reductions = (kernel.Reduction('u', 'u[0]', '+'), )

        return kernel.Kernel('LJ_accel_U', kernel_code, constants, ['stdio.h'],
                             reductions)
Ejemplo n.º 27
0
    def _init_contrib_loop(self):

        bc = self.boundary_condition
        if bc == BCType.FREE_SPACE:
            bc_block = r'''
                if (ocx < 0) {{continue;}}
                if (ocy < 0) {{continue;}}
                if (ocz < 0) {{continue;}}
                if (ocx >= ncx) {{continue;}}
                if (ocy >= ncy) {{continue;}}
                if (ocz >= ncz) {{continue;}}
            '''
        elif bc in (BCType.NEAREST, BCType.PBC):
            bc_block = r'''
                ocx = (ocx + ({O})*ncx) % ncx;
                ocy = (ocy + ({O})*ncy) % ncy;
                ocz = (ocz + ({O})*ncz) % ncz;
            '''.format(O=self.max_il_offset * 2)
        else:
            raise RuntimeError('Unkown boundary condition.')

        g = self.group
        extent = self.domain.extent
        cell_widths = [
            1.0 / (ex / (sx**(self.R - 1)))
            for ex, sx in zip(extent, self.subdivision)
        ]

        L = self.L

        sph_gen = self.sph_gen

        def cube_ind(L, M):
            return ((L) * ((L) + 1) + (M))

        assign_gen = 'const double iradius = 1.0 / radius;\n'
        assign_gen += 'double rholcharge = iradius * charge;\n'
        for lx in range(self.L):
            for mx in range(-lx, lx + 1):
                assign_gen += 'TREE[OFFSET + {ind}] += {ylmm} * rholcharge;\n'.format(
                    ind=cube_ind(lx, mx),
                    ylmm=str(sph_gen.get_y_sym(lx, -mx)[0]))
                assign_gen += 'TREE[OFFSET + IM_OFFSET + {ind}] += {ylmm} * rholcharge;\n'.format(
                    ind=cube_ind(lx, mx),
                    ylmm=str(sph_gen.get_y_sym(lx, -mx)[1]))
            assign_gen += 'rholcharge *= iradius;\n'

        k = kernel.Kernel(
            'lm_contrib_loop',
            r'''
            
            const double rx = P.i[0];
            const double ry = P.i[1];
            const double rz = P.i[2];
            const double charge = Q.i[0];

            // bin into finest level cell
            const double srx = rx + HEX;
            const double sry = ry + HEY;
            const double srz = rz + HEZ;
            
            int64_t cfx = srx * CWX;
            int64_t cfy = sry * CWY;
            int64_t cfz = srz * CWZ;

            cfx = (cfx < LCX) ? cfx : (LCX - 1);
            cfy = (cfy < LCX) ? cfy : (LCY - 1);
            cfz = (cfz < LCX) ? cfz : (LCZ - 1);
            
            // increment the occupancy for this cell
            OCC_GA[cfx + LCX * (cfy + LCY * cfz)]++;

            MM_FINE_CELLS.i[0] = cfx;
            MM_FINE_CELLS.i[1] = cfy;
            MM_FINE_CELLS.i[2] = cfz;

            for( int level=R-1 ; level>=0 ; level-- ){{

                // child on this level

                const int64_t cix = cfx % SDX;
                const int64_t ciy = cfy % SDY;
                const int64_t ciz = cfz % SDZ;

                // record the cell indices
                MM_CELLS.i[level * 3 + 0] = cfx;
                MM_CELLS.i[level * 3 + 1] = cfy;
                MM_CELLS.i[level * 3 + 2] = cfz;

                // record the child cell indices
                MM_CHILD_INDEX.i[level * 3 + 0] = cix;
                MM_CHILD_INDEX.i[level * 3 + 1] = ciy;
                MM_CHILD_INDEX.i[level * 3 + 2] = ciz;

                // compute the cells for the next level
                cfx /= SDX;
                cfy /= SDY;
                cfz /= SDZ;

            }}


            // compute the local expansions
            for( int level=1 ; level<R ; level++) {{
                
                // cell on this level
                const int64_t cfx = MM_CELLS.i[level * 3 + 0];
                const int64_t cfy = MM_CELLS.i[level * 3 + 1];
                const int64_t cfz = MM_CELLS.i[level * 3 + 2];

                // child on this level
                const int64_t cix = cfx % SDX;
                const int64_t ciy = cfy % SDY;
                const int64_t ciz = cfz % SDZ;
                const int64_t ci = cix + SDX * (ciy + SDY * ciz);

                // cell widths on this level
                const double wx = WIDTHS_X[level];
                const double wy = WIDTHS_Y[level];
                const double wz = WIDTHS_Z[level];

                // number of cells on this level
                const int64_t ncx = NCELLS_X[level];
                const int64_t ncy = NCELLS_Y[level];
                const int64_t ncz = NCELLS_Z[level];


                // loop over IL for this child cell
                for( int ox=0 ; ox<IL_NO ; ox++){{

                    int64_t ocx = cfx + IL[ci * IL_STRIDE_OUTER + ox * 3 + 0];
                    int64_t ocy = cfy + IL[ci * IL_STRIDE_OUTER + ox * 3 + 1];
                    int64_t ocz = cfz + IL[ci * IL_STRIDE_OUTER + ox * 3 + 2];

                    const double dx = rx - ((-HEX) + (0.5 * wx) + (ocx * wx));
                    const double dy = ry - ((-HEY) + (0.5 * wy) + (ocy * wy));
                    const double dz = rz - ((-HEZ) + (0.5 * wz) + (ocz * wz));

                    {BC_BLOCK}

                    const int64_t lin_ind = ocx + NCELLS_X[level] * (ocy + NCELLS_Y[level] * ocz);



                    const double xy2 = dx * dx + dy * dy;
                    const double radius = sqrt(xy2 + dz * dz);
                    const double theta = atan2(sqrt(xy2), dz);
                    const double phi = atan2(dy, dx);
                    
                    const int64_t OFFSET = LEVEL_OFFSETS[level] + NCOMP * lin_ind;

                    {SPH_GEN}
                    {ASSIGN_GEN}

                }}
            }}


            '''.format(BC_BLOCK=bc_block,
                       SPH_GEN=str(sph_gen.module),
                       ASSIGN_GEN=str(assign_gen)),
            (Constant('EX', extent[0]), Constant('EY', extent[1]),
             Constant('EZ', extent[2]), Constant('HEX', 0.5 * extent[0]),
             Constant('HEY', 0.5 * extent[1]), Constant(
                 'HEZ', 0.5 * extent[2]), Constant('CWX', cell_widths[0]),
             Constant('CWY', cell_widths[1]), Constant('CWZ', cell_widths[2]),
             Constant('LCX', self.subdivision[0]**(self.R - 1)),
             Constant('LCY', self.subdivision[1]**(self.R - 1)),
             Constant('LCZ', self.subdivision[2]
                      **(self.R - 1)), Constant('SDX', self.subdivision[0]),
             Constant('SDY', self.subdivision[1]),
             Constant('SDZ', self.subdivision[2]),
             Constant('IL_NO', self.il_array.shape[1]),
             Constant('IL_STRIDE_OUTER',
                      self.il_array.shape[1] * self.il_array.shape[2]),
             Constant('NCOMP', self.ncomp), Constant('IM_OFFSET', self.L**2)),
            headers=(lib.build.write_header("""
                    #define R {R}
                    const double WIDTHS_X[R] = {{ {WIDTHS_X} }};
                    const double WIDTHS_Y[R] = {{ {WIDTHS_Y} }};
                    const double WIDTHS_Z[R] = {{ {WIDTHS_Z} }};

                    const int64_t NCELLS_X[R] = {{ {NCELLS_X} }};
                    const int64_t NCELLS_Y[R] = {{ {NCELLS_Y} }};
                    const int64_t NCELLS_Z[R] = {{ {NCELLS_Z} }};

                    const int64_t LEVEL_OFFSETS[R] = {{ {LEVEL_OFFSETS} }};

                    """.format(R=self.R,
                               WIDTHS_X=self.widths_x_str,
                               WIDTHS_Y=self.widths_y_str,
                               WIDTHS_Z=self.widths_z_str,
                               NCELLS_X=self.ncells_x_str,
                               NCELLS_Y=self.ncells_y_str,
                               NCELLS_Z=self.ncells_z_str,
                               LEVEL_OFFSETS=self.level_offsets_str)), ))

        dat_dict = {
            'IL': self.il_scalararray(access.READ),
            'P': self.positions(access.READ),
            'Q': self.charges(access.READ),
            'MM_FINE_CELLS': self._dat_fine_cells(access.WRITE),
            'MM_CELLS': self._dat_cells(access.WRITE),
            'MM_CHILD_INDEX': self._dat_child_index(access.WRITE),
            'OCC_GA': self.cell_occupation_ga(access.INC_ZERO),
            'TREE': self.tree(access.INC_ZERO)
        }
        self._contrib_loop = loop.ParticleLoopOMP(kernel=k, dat_dict=dat_dict)
Ejemplo n.º 28
0
    def _init_extract_loop(self):

        g = self.group
        extent = self.domain.extent
        cell_widths = [1.0 / (ex / (sx**(self.R - 1))) for ex, sx in zip(extent, self.subdivision)]


        sph_gen = self.sph_gen

        def cube_ind(L, M):
            return ((L) * ( (L) + 1 ) + (M) )


        assign_gen = ''
        for lx in range(self.L):
            for mx in range(-lx, lx+1):
                reL = SphSymbol('TREE[OFFSET + {ind}]'.format(ind=cube_ind(lx, mx)))
                imL = SphSymbol('TREE[OFFSET + IM_OFFSET + {ind}]'.format(ind=cube_ind(lx, mx)))
                reY, imY = sph_gen.get_y_sym(lx, mx)
                phi_sym = cmplx_mul(reL, imL, reY, imY)[0]
                assign_gen += 'tmp_energy += rhol * ({phi_sym});\n'.format(phi_sym=str(phi_sym))

            assign_gen += 'rhol *= iradius;\n'


        bc = self.boundary_condition
        if bc == BCType.FREE_SPACE:
            bc_block = r'''
                if (ocx < 0) {{continue;}}
                if (ocy < 0) {{continue;}}
                if (ocz < 0) {{continue;}}
                if (ocx >= ncx) {{continue;}}
                if (ocy >= ncy) {{continue;}}
                if (ocz >= ncz) {{continue;}}
            '''
        elif bc in (BCType.NEAREST, BCType.PBC):
            bc_block = r'''
                ocx = (ocx + ({O})*ncx) % ncx;
                ocy = (ocy + ({O})*ncy) % ncy;
                ocz = (ocz + ({O})*ncz) % ncz;
            '''.format(O=self.max_il_offset*2)
        else:
            raise RuntimeError('Unkown boundary condition.')



        k = kernel.Kernel(
            'mm_extract_loop',
            r'''
            
            const double rx = P.i[0];
            const double ry = P.i[1];
            const double rz = P.i[2];

            double particle_energy = 0.0;


            for( int level=1 ; level<R ; level++ ){{

                // cell on this level
                const int64_t cfx = MM_CELLS.i[level*3 + 0];
                const int64_t cfy = MM_CELLS.i[level*3 + 1];
                const int64_t cfz = MM_CELLS.i[level*3 + 2];

                // number of cells on this level
                const int64_t ncx = NCELLS_X[level];
                const int64_t ncy = NCELLS_Y[level];
                const int64_t ncz = NCELLS_Z[level];


                // child on this level
                const int64_t cix = MM_CHILD_INDEX.i[level * 3 + 0];
                const int64_t ciy = MM_CHILD_INDEX.i[level * 3 + 1];
                const int64_t ciz = MM_CHILD_INDEX.i[level * 3 + 2];
                const int64_t ci = cix + SDX * (ciy + SDY * ciz);

                const double wx = WIDTHS_X[level];
                const double wy = WIDTHS_Y[level];
                const double wz = WIDTHS_Z[level];

                
                // loop over IL for this child cell
                for( int ox=0 ; ox<IL_NO ; ox++){{
                    
                    
                    int64_t ocx = cfx + IL[ci * IL_STRIDE_OUTER + ox * 3 + 0];
                    int64_t ocy = cfy + IL[ci * IL_STRIDE_OUTER + ox * 3 + 1];
                    int64_t ocz = cfz + IL[ci * IL_STRIDE_OUTER + ox * 3 + 2];

                    const double dx = rx - ((-HEX) + (0.5 * wx) + (ocx * wx));
                    const double dy = ry - ((-HEY) + (0.5 * wy) + (ocy * wy));
                    const double dz = rz - ((-HEZ) + (0.5 * wz) + (ocz * wz));

                    {BC_BLOCK}

                    const int64_t lin_ind = ocx + NCELLS_X[level] * (ocy + NCELLS_Y[level] * ocz);

                    const double xy2 = dx * dx + dy * dy;
                    const double radius = sqrt(xy2 + dz * dz);
                    const double theta = atan2(sqrt(xy2), dz);
                    const double phi = atan2(dy, dx);
                    
                    const int64_t OFFSET = LEVEL_OFFSETS[level] + NCOMP * lin_ind;

                    {SPH_GEN}
                    const double iradius = 1.0 / radius;
                    double rhol = iradius;
                    double tmp_energy = 0.0;
                    {ASSIGN_GEN}
                    
                    //if (isnan(tmp_energy)){{
                    //    printf(
                    //        "radius %f theta %f phi %f\n",
                    //        radius, theta, phi
                    //    );
                    //    std::raise(SIGINT);
                    //}}
                    particle_energy += tmp_energy;
                    

                }}


            }}


            OUT_ENERGY[0] += particle_energy * 0.5 * Q.i[0];

            '''.format(
                SPH_GEN=str(sph_gen.module),
                ASSIGN_GEN=str(assign_gen),
                BC_BLOCK=bc_block
            ),
            (   
                Constant('R', self.R),
                Constant('EX', extent[0]),
                Constant('EY', extent[1]),
                Constant('EZ', extent[2]),
                Constant('HEX', 0.5 * extent[0]),
                Constant('HEY', 0.5 * extent[1]),
                Constant('HEZ', 0.5 * extent[2]),                
                Constant('CWX', cell_widths[0]),
                Constant('CWY', cell_widths[1]),
                Constant('CWZ', cell_widths[2]),
                Constant('LCX', self.subdivision[0] ** (self.R - 1)),
                Constant('LCY', self.subdivision[1] ** (self.R - 1)),
                Constant('LCZ', self.subdivision[2] ** (self.R - 1)),
                Constant('SDX', self.subdivision[0]),
                Constant('SDY', self.subdivision[1]),
                Constant('SDZ', self.subdivision[2]),
                Constant('IL_NO', self.il_array.shape[1]),
                Constant('IL_STRIDE_OUTER', self.il_array.shape[1] * self.il_array.shape[2]),
                Constant('NCOMP', self.ncomp),
                Constant('IM_OFFSET', self.L**2)               
            ),
            headers=(
                lib.build.write_header(
                    """
                    #include <csignal>
                    #define R {R}
                    const double WIDTHS_X[R] = {{ {WIDTHS_X} }};
                    const double WIDTHS_Y[R] = {{ {WIDTHS_Y} }};
                    const double WIDTHS_Z[R] = {{ {WIDTHS_Z} }};

                    const int64_t NCELLS_X[R] = {{ {NCELLS_X} }};
                    const int64_t NCELLS_Y[R] = {{ {NCELLS_Y} }};
                    const int64_t NCELLS_Z[R] = {{ {NCELLS_Z} }};

                    const int64_t LEVEL_OFFSETS[R] = {{ {LEVEL_OFFSETS} }};

                    """.format(
                        R=self.R,
                        WIDTHS_X=self.widths_x_str,
                        WIDTHS_Y=self.widths_y_str,
                        WIDTHS_Z=self.widths_z_str,
                        NCELLS_X=self.ncells_x_str,
                        NCELLS_Y=self.ncells_y_str,
                        NCELLS_Z=self.ncells_z_str,
                        LEVEL_OFFSETS=self.level_offsets_str
                    )
                ),
            )
        )

        dat_dict = {
            'P': self.positions(access.READ),
            'Q': self.charges(access.READ),
            'IL': self.il_scalararray(access.READ),
            'MM_CELLS': self._dat_cells(access.READ),
            'MM_CHILD_INDEX': self._dat_child_index(access.READ),
            'TREE': self.tree(access.READ),
            'OUT_ENERGY': self._extract_energy(access.INC_ZERO),
        }

        self._extract_loop = loop.ParticleLoopOMP(kernel=k, dat_dict=dat_dict)