def _init_self_interaction_lib(self): if self.shared_memory in ('thread', 'omp'): PL = loop.ParticleLoopOMP else: PL = loop.ParticleLoop with open(str(_SRC_DIR) + '/EwaldOrthSource/SelfInteraction.h', 'r') as fh: _cont_header_src = fh.read() _cont_header = (kernel.Header(block=_cont_header_src % self._subvars), ) with open(str(_SRC_DIR) + '/EwaldOrthSource/SelfInteraction.cpp', 'r') as fh: _cont_source = fh.read() _real_kernel = kernel.Kernel(name='self_interaction_part', code=_cont_source, headers=_cont_header) self._self_interaction_lib = PL( kernel=_real_kernel, dat_dict={ 'Q': data.ParticleDat(ncomp=1, dtype=ctypes.c_double)(access.READ), 'u': self._vars['self_interaction_energy'](access.INC_ZERO) }) with open( str(_SRC_DIR) + '/EwaldOrthSource/SelfInteractionPot.h', 'r') as fh: _cont_header_src = fh.read() _cont_header = (kernel.Header(block=_cont_header_src % self._subvars), ) with open( str(_SRC_DIR) + '/EwaldOrthSource/SelfInteractionPot.cpp', 'r') as fh: _cont_source = fh.read() _real_kernel = kernel.Kernel(name='self_interaction_part_pot', code=_cont_source, headers=_cont_header) self._self_interaction_pot_lib = PL( kernel=_real_kernel, dat_dict={ 'Q': data.ParticleDat(ncomp=1, dtype=ctypes.c_double)(access.READ), 'UPP': data.ParticleDat(ncomp=1, dtype=ctypes.c_double)(access.INC), 'u': self._vars['self_interaction_energy'](access.INC_ZERO) })
def kernel(self): """ Returns a kernel class for the potential. """ kernel_code = ''' const double R[3] = {P[1][0] - P[0][0], P[1][1] - P[0][1], P[1][2] - P[0][2]}; double r2 = R[0]*R[0] + R[1]*R[1] + R[2]*R[2]; if (r2 < rc2){ r2=1./r2; A[0][0]+=r2; A[0][1]+=r2; A[0][2]+=r2; A[1][0]+=r2; A[1][1]+=r2; A[1][2]+=r2; } ''' constants = (kernel.Constant('rc2', self._rc**2), ) return kernel.Kernel('TestPotential1', kernel_code, constants, ['stdio.h'], None)
def kernel(self): """ Returns a kernel class for the potential. """ kernel_code = ''' const double R0 = P(1, 0) - P(0, 0); const double R1 = P(1, 1) - P(0, 1); const double R2 = P(1, 2) - P(0, 2); const double r2 = R0*R0 + R1*R1 + R2*R2; const double r_m2 = sigma2/r2; const double r_m4 = r_m2*r_m2; const double r_m6 = r_m4*r_m2; u(0)+= (r2 < rc2) ? 0.5*CV*((r_m6-1.0)*r_m6 + internalshift) : 0.0; const double r_m8 = r_m4*r_m4; const double f_tmp = CF*(r_m6 - 0.5)*r_m8; A(0, 0)+= (r2 < rc2) ? f_tmp*R0 : 0.0; A(0, 1)+= (r2 < rc2) ? f_tmp*R1 : 0.0; A(0, 2)+= (r2 < rc2) ? f_tmp*R2 : 0.0; ''' constants = (kernel.Constant('sigma2', self._sigma**2), kernel.Constant('rc2', self._rc**2), kernel.Constant('internalshift', self._shift_internal), kernel.Constant('CF', self._C_F), kernel.Constant('CV', self._C_V)) return kernel.Kernel('LJ_accel_U', kernel_code, constants, [kernel.Header('stdio.h')])
def _generate_pairloop(self): header = lib.build.write_header(self.interaction_func) kernel_code = r''' const double u0 = POINT_EVAL( P.i[0], P.i[1], P.i[2], P.j[0], P.j[1], P.j[2], (double) T.i[0], (double) T.j[0] ); ENERGY[0] += u0; ''' ikernel = kernel.Kernel('mc_short_range', kernel_code, headers=(header,)) gen_loop = PairLoop( ikernel, dat_dict={ 'P': self.positions(access.READ), 'T': self.types(access.READ), 'ENERGY': self._ga_energy(access.INC_ZERO) }, shell_cutoff=self.cutoff ) return gen_loop
def __init__(self, state, size=0, v0=None): self._state = state self._V0 = data.ParticleDat(self._state.npart_local, 3, name='v0') self._VT = state.velocities self._VO_SET = False if v0 is not None: self.set_v0(v0) else: self.set_v0(state=self._state) self._VAF = data.ScalarArray(ncomp=1) self._V = [] self._T = [] _headers = ['stdio.h'] _constants = None _kernel_code = ''' VAF(0) += (v0(0)*VT(0) + v0(1)*VT(1) + v0(2)*VT(2))*Ni; ''' _reduction = (kernel.Reduction('VAF', 'VAF[I]', '+'), ) _static_args = {'Ni': ctypes.c_double} _kernel = kernel.Kernel('VelocityAutocorrelation', _kernel_code, _constants, _headers, _reduction, _static_args) self._datdict = {'VAF': self._VAF, 'v0': self._V0, 'VT': self._VT} self._loop = loop.ParticleLoop(self._state.as_func('npart_local'), None, kernel=_kernel, dat_dict=self._datdict)
def __init__(self, velocities=None, masses=None, kinetic_energy_dat=None, looping_method=None): if looping_method is None: looping_method = loop.ParticleLoop if kinetic_energy_dat is None: self.k = data.ScalarArray(ncomp=1, dtype=ctypes.c_double) else: self.k = kinetic_energy_dat self._v = velocities if looping_method is None: looping_method = loop.ParticleLoop _K_kernel_code = ''' k(0) += (V(0)*V(0) + V(1)*V(1) + V(2)*V(2))*0.5*M(0); ''' _constants_K = [] _K_kernel = kernel.Kernel('K_kernel', _K_kernel_code, _constants_K) self._kinetic_energy_lib = looping_method(kernel=_K_kernel, dat_dict={ 'V': velocities(access.R), 'k': self.k(access.INC), 'M': masses(access.R) }) self._ke_store = []
def kernel(self): """ Returns a kernel class for the potential. """ kernel_code = ''' const double R0 = P.j[0] - P.i[0]; const double R1 = P.j[1] - P.i[1]; const double R2 = P.j[2] - P.i[2]; const double r2 = R0*R0 + R1*R1 + R2*R2; const double r_m2 = sigma2/r2; const double r_m4 = r_m2*r_m2; const double f_tmp = CF*(r_m4*r_m2 - 0.5)*r_m4*r_m4; A.i[0]+= (r2 < rc2) ? f_tmp*R0 : 0.0; A.i[1]+= (r2 < rc2) ? f_tmp*R1 : 0.0; A.i[2]+= (r2 < rc2) ? f_tmp*R2 : 0.0; ''' constants = (kernel.Constant('sigma2', self._sigma**2), kernel.Constant('rc2', self._rc**2), kernel.Constant('internalshift', self._shift_internal), kernel.Constant('CF', self._C_F), kernel.Constant('CV', self._C_V)) return kernel.Kernel('LJ_accel', kernel_code, constants, [kernel.Header('stdio.h')])
def integrate(self, dt=None, t=None): """ Integrate state forward in time. :arg double dt: Time step size. :arg double t: End time. """ print("starting integration") if dt is not None: self._dt = dt if t is not None: self._T = t self._max_it = int(math.ceil(self._T / self._dt)) self._constants = [ kernel.Constant('dt', self._dt), kernel.Constant('dht', 0.5 * self._dt), ] self._kernel1 = kernel.Kernel('vv1', self._kernel1_code, self._constants) self._p1 = loop.ParticleLoop( self._kernel1, { 'P': self._P(access.W), 'V': self._V(access.W), 'A': self._A(access.R), 'M': self._M(access.R) }) self._kernel2 = kernel.Kernel('vv2', self._kernel2_code, self._constants) self._p2 = loop.ParticleLoop(self._kernel2, { 'V': self._V(access.W), 'A': self._A(access.R), 'M': self._M(access.R) }) self._update_controller.execute_boundary_conditions() self._sim.forces_update() self.timer.start() self._velocity_verlet_integration() self.timer.pause()
def kernel(self): """ Returns a kernel class for the potential. """ kernel_code = ''' const double R0 = P(1,0) - P(0,0); const double R1 = P(1,1) - P(0,1); const double R2 = P(1,2) - P(0,2); const double r2 = R0*R0 + R1*R1 + R2*R2; if (r2 < rc2) { const double r = sqrt(r2); // \\exp{-B*r} const double exp_mbr = exp(_MB*r); // r^{-2, -4, -6} const double r_m1 = 1.0/r; const double r_m2 = r_m1*r_m1; const double r_m4 = r_m2*r_m2; const double r_m6 = r_m4*r_m2; // \\frac{C}{r^6} const double crm6 = _C*r_m6; // A \\exp{-Br} - \\frac{C}{r^6} u(0)+= _A*exp_mbr - crm6 + internalshift; // AB \\exp{-Br} - \\frac{C}{r^6}*\\frac{6}{r} const double term2 = crm6*(-6.0)*r_m1; const double f_tmp = _AB * exp_mbr + term2; A(0,0)+=f_tmp*R0; A(0,1)+=f_tmp*R1; A(0,2)+=f_tmp*R2; A(1,0)-=f_tmp*R0; A(1,1)-=f_tmp*R1; A(1,2)-=f_tmp*R2; } ''' constants = (kernel.Constant('_A', self.a), kernel.Constant('_AB', self.ab), kernel.Constant('_B', self.b), kernel.Constant('_MB', self.mb), kernel.Constant('_C', self.c), kernel.Constant('rc2', self.rc**2), kernel.Constant('internalshift', self._shift_internal)) return kernel.Kernel( 'BuckinghamV', kernel_code, constants, [kernel.Header('stdio.h'), kernel.Header('math.h')])
def kernel(self): """ Returns a kernel class for the potential. """ kernel_code = ''' OUTCOUNT(0)++; const double R0 = P(1, 0) - P(0, 0); const double R1 = P(1, 1) - P(0, 1); const double R2 = P(1, 2) - P(0, 2); //printf("Positions P(0) = %f, P(1) = %f |", P(0, 1), P(1, 1)); const double r2 = R0*R0 + R1*R1 + R2*R2; if (r2 < rc2){ COUNT(0)++; const double r_m2 = sigma2/r2; const double r_m4 = r_m2*r_m2; const double r_m6 = r_m4*r_m2; u(0)+= CV*((r_m6-1.0)*r_m6 + internalshift); const double r_m8 = r_m4*r_m4; const double f_tmp = CF*(r_m6 - 0.5)*r_m8; A(0, 0)+=f_tmp*R0; A(0, 1)+=f_tmp*R1; A(0, 2)+=f_tmp*R2; A(1, 0)-=f_tmp*R0; A(1, 1)-=f_tmp*R1; A(1, 2)-=f_tmp*R2; } ''' constants = (kernel.Constant('sigma2', self._sigma**2), kernel.Constant('rc2', self._rc**2), kernel.Constant('internalshift', self._shift_internal), kernel.Constant('CF', self._C_F), kernel.Constant('CV', self._C_V)) reductions = (kernel.Reduction('u', 'u[0]', '+'), ) return kernel.Kernel('LJ_accel_U', kernel_code, constants, [kernel.Header('stdio.h')], reductions)
def _build_libs(self, dt): kernel1_code = ''' const double M_tmp = 1.0/M(0); V(0) += dht*F(0)*M_tmp; V(1) += dht*F(1)*M_tmp; V(2) += dht*F(2)*M_tmp; P(0) += dt*V(0); P(1) += dt*V(1); P(2) += dt*V(2); ''' kernel2_code = ''' const double M_tmp = 1.0/M(0); V(0) += dht*F(0)*M_tmp; V(1) += dht*F(1)*M_tmp; V(2) += dht*F(2)*M_tmp; ''' constants = [ kernel.Constant('dt', dt), kernel.Constant('dht', 0.5 * dt), ] kernel1 = kernel.Kernel('vv1', kernel1_code, constants) self._p1 = self._looping_method(kernel=kernel1, dat_dict={ 'P': self._p(access.W), 'V': self._v(access.W), 'F': self._f(access.R), 'M': self._m(access.R) }) kernel2 = kernel.Kernel('vv2', kernel2_code, constants) self._p2 = self._looping_method(kernel=kernel2, dat_dict={ 'V': self._v(access.W), 'F': self._f(access.R), 'M': self._m(access.R) })
def kernel(self): """ Returns a kernel class for the potential. """ kernel_code = ''' const double R0 = P.j[0] - P.i[0]; const double R1 = P.j[1] - P.i[1]; const double R2 = P.j[2] - P.i[2]; const double r2 = R0*R0 + R1*R1 + R2*R2; const double r = sqrt(r2); // \\exp{-B*r} const double exp_mbr = exp(_MB*r); // r^{-2, -4, -6} const double r_m1 = 1.0/r; const double r_m2 = r_m1*r_m1; const double r_m4 = r_m2*r_m2; const double r_m6 = r_m4*r_m2; // \\frac{C}{r^6} const double crm6 = _C*r_m6; // A \\exp{-Br} - \\frac{C}{r^6} u[0]+= (r2 < rc2) ? 0.5*(_A*exp_mbr - crm6 + internalshift) : 0.0; // = AB \\exp{-Br} - \\frac{C}{r^6}*\\frac{6}{r} const double term2 = crm6*(-6.0)*r_m1; const double f_tmp = _AB * exp_mbr + term2; A.i[0]+= (r2 < rc2) ? f_tmp*R0 : 0.0; A.i[1]+= (r2 < rc2) ? f_tmp*R1 : 0.0; A.i[2]+= (r2 < rc2) ? f_tmp*R2 : 0.0; ''' constants = (kernel.Constant('_A', self.a), kernel.Constant('_AB', self.ab), kernel.Constant('_B', self.b), kernel.Constant('_MB', self.mb), kernel.Constant('_C', self.c), kernel.Constant('rc2', self.rc**2), kernel.Constant('internalshift', self._shift_internal)) return kernel.Kernel( 'BuckinghamV', kernel_code, constants, [kernel.Header('stdio.h'), kernel.Header('math.h')])
def kernel(self): """ Returns a kernel class for the potential. """ kernel_code = ''' const double R0 = P(1, 0) - P(0, 0); const double R1 = P(1, 1) - P(0, 1); const double R2 = P(1, 2) - P(0, 2); A(0, 0)=0; A(0, 1)=0; A(0, 2)=0; A(1, 0)=0; A(1, 1)=0; A(1, 2)=0; ''' return kernel.Kernel('NULL_Potential', kernel_code, None, None, None)
def _init_near_potential_lib(self): # real space energy and force kernel with open( str(_SRC_DIR) + '/EwaldOrthSource/EvaluateNearPotentialField.h', 'r') as fh: _cont_header_src = fh.read() _cont_header = (kernel.Header(block=_cont_header_src % self._subvars), ) with open( str(_SRC_DIR) + '/EwaldOrthSource/EvaluateNearPotentialField.cpp', 'r') as fh: _cont_source = fh.read() _real_kernel = kernel.Kernel(name='real_space_part', code=_cont_source, headers=_cont_header) if self.shell_width is None: rn = self.real_cutoff * 1.05 else: rn = self.real_cutoff + self.shell_width PPL = pairloop.CellByCellOMP self._near_potential_field = PPL( kernel=_real_kernel, dat_dict={ 'P': data.ParticleDat(ncomp=3, dtype=ctypes.c_double)(access.READ), 'Q': data.ParticleDat(ncomp=1, dtype=ctypes.c_double)(access.READ), 'M': data.ParticleDat(ncomp=1, dtype=ctypes.c_int)(access.READ), 'u': data.ParticleDat(ncomp=1, dtype=ctypes.c_double)(access.INC), }, shell_cutoff=rn)
def kernel(self): """ Returns a kernel class for the potential. """ kernel_code = ''' //N_f = 27 const double R0 = P.j[0] - P.i[0]; const double R1 = P.j[1] - P.i[1]; const double R2 = P.j[2] - P.i[2]; const double r2 = R0*R0 + R1*R1 + R2*R2; if (r2 < rc2){ const double r_m2 = sigma2/r2; const double r_m4 = r_m2*r_m2; const double r_m6 = r_m4*r_m2; u[0] += 0.5*CV*((r_m6-1.0)*r_m6 + internalshift); const double r_m8 = r_m4*r_m4; const double f_tmp = CF*(r_m6 - 0.5)*r_m8; A.i[0] += f_tmp*R0; A.i[1] += f_tmp*R1; A.i[2] += f_tmp*R2; } ''' constants = (kernel.Constant('sigma2', self._sigma**2), kernel.Constant('rc2', self._rc**2), kernel.Constant('internalshift', self._shift_internal), kernel.Constant('CF', self._C_F), kernel.Constant('CV', self._C_V)) return kernel.Kernel('LJ_accel_U', kernel_code, constants, [kernel.Header('stdio.h')])
def _init_extract_loop(self): g = self.group extent = self.domain.extent cell_widths = [ 1.0 / (ex / (sx**(self.R - 1))) for ex, sx in zip(extent, self.subdivision) ] L = self.L sph_gen = self.sph_gen def cube_ind(L, M): return ((L) * ((L) + 1) + (M)) EC = '' for lx in range(L): for mx in range(-lx, lx + 1): smx = 'n' if mx < 0 else 'p' smx += str(abs(mx)) re_lnm = SphSymbol('reln{lx}m{mx}'.format(lx=lx, mx=smx)) im_lnm = SphSymbol('imln{lx}m{mx}'.format(lx=lx, mx=smx)) EC += ''' const double {re_lnm} = TREE[OFFSET + {cx}]; const double {im_lnm} = TREE[OFFSET + IM_OFFSET + {cx}]; '''.format(re_lnm=str(re_lnm), im_lnm=str(im_lnm), cx=str(cube_ind(lx, mx))) cm_re, cm_im = cmplx_mul(re_lnm, im_lnm, sph_gen.get_y_sym(lx, mx)[0], sph_gen.get_y_sym(lx, mx)[1]) EC += 'tmp_energy += ({cm_re}) * rhol;\n'.format(cm_re=cm_re) EC += 'rhol *= radius;\n' k = kernel.Kernel( 'lm_extract_loop', r''' const double rx = P.i[0]; const double ry = P.i[1]; const double rz = P.i[2]; double particle_energy = 0.0; for( int level=0 ; level<R ; level++ ){{ const int64_t cellx = MM_CELLS.i[level * 3 + 0]; const int64_t celly = MM_CELLS.i[level * 3 + 1]; const int64_t cellz = MM_CELLS.i[level * 3 + 2]; const double dx = rx - ((-HEX) + (0.5 + cellx) * WIDTHS_X[level]); const double dy = ry - ((-HEY) + (0.5 + celly) * WIDTHS_Y[level]); const double dz = rz - ((-HEZ) + (0.5 + cellz) * WIDTHS_Z[level]); const double xy2 = dx * dx + dy * dy; const double radius = sqrt(xy2 + dz * dz); const double theta = atan2(sqrt(xy2), dz); const double phi = atan2(dy, dx); const int64_t lin_ind = cellx + NCELLS_X[level] * (celly + NCELLS_Y[level] * cellz); const int64_t OFFSET = LEVEL_OFFSETS[level] + NCOMP * lin_ind; {SPH_GEN} double tmp_energy = 0.0; double rhol = 1.0; {ENERGY_COMP} particle_energy += tmp_energy; }} OUT_ENERGY[0] += particle_energy * 0.5 * Q.i[0]; '''.format(SPH_GEN=str(sph_gen.module), ENERGY_COMP=str(EC)), (Constant('R', self.R), Constant('EX', extent[0]), Constant('EY', extent[1]), Constant('EZ', extent[2]), Constant('HEX', 0.5 * extent[0]), Constant( 'HEY', 0.5 * extent[1]), Constant('HEZ', 0.5 * extent[2]), Constant('CWX', cell_widths[0]), Constant( 'CWY', cell_widths[1]), Constant('CWZ', cell_widths[2]), Constant('LCX', self.subdivision[0]**(self.R - 1)), Constant('LCY', self.subdivision[1]**(self.R - 1)), Constant('LCZ', self.subdivision[2] **(self.R - 1)), Constant('SDX', self.subdivision[0]), Constant('SDY', self.subdivision[1]), Constant('SDZ', self.subdivision[2]), Constant('IL_NO', self.il_array.shape[1]), Constant('IL_STRIDE_OUTER', self.il_array.shape[1] * self.il_array.shape[2]), Constant('NCOMP', self.ncomp), Constant('IM_OFFSET', self.L**2)), headers=(lib.build.write_header(""" #define R {R} const double WIDTHS_X[R] = {{ {WIDTHS_X} }}; const double WIDTHS_Y[R] = {{ {WIDTHS_Y} }}; const double WIDTHS_Z[R] = {{ {WIDTHS_Z} }}; const int64_t NCELLS_X[R] = {{ {NCELLS_X} }}; const int64_t NCELLS_Y[R] = {{ {NCELLS_Y} }}; const int64_t NCELLS_Z[R] = {{ {NCELLS_Z} }}; const int64_t LEVEL_OFFSETS[R] = {{ {LEVEL_OFFSETS} }}; """.format(R=self.R, WIDTHS_X=self.widths_x_str, WIDTHS_Y=self.widths_y_str, WIDTHS_Z=self.widths_z_str, NCELLS_X=self.ncells_x_str, NCELLS_Y=self.ncells_y_str, NCELLS_Z=self.ncells_z_str, LEVEL_OFFSETS=self.level_offsets_str)), )) dat_dict = { 'P': self.positions(access.READ), 'Q': self.charges(access.READ), 'MM_CELLS': self._dat_cells(access.READ), 'MM_CHILD_INDEX': self._dat_child_index(access.READ), 'TREE': self.tree(access.READ), 'OUT_ENERGY': self._extract_energy(access.INC_ZERO), } self._extract_loop = loop.ParticleLoopOMP(kernel=k, dat_dict=dat_dict)
def _init_real_space_lib(self): # real space energy and force kernel with open( str(_SRC_DIR) + '/EwaldOrthSource/RealSpaceForceEnergy.h', 'r') as fh: _cont_header_src = fh.read() _cont_header = (kernel.Header(block=_cont_header_src % self._subvars), ) with open( str(_SRC_DIR) + '/EwaldOrthSource/RealSpaceForceEnergy.cpp', 'r') as fh: _cont_source = fh.read() _real_kernel = kernel.Kernel(name='real_space_part', code=_cont_source, headers=_cont_header) if self.shell_width is None: rn = self.real_cutoff * 1.05 else: rn = self.real_cutoff + self.shell_width if self.shared_memory in ('thread', 'omp'): PPL = pairloop.PairLoopNeighbourListNSOMP else: PPL = pairloop.PairLoopNeighbourListNS self._real_space_pairloop = PPL( kernel=_real_kernel, dat_dict={ 'P': data.ParticleDat(ncomp=3, dtype=ctypes.c_double)(access.READ), 'Q': data.ParticleDat(ncomp=1, dtype=ctypes.c_double)(access.READ), 'F': data.ParticleDat(ncomp=3, dtype=ctypes.c_double)(access.INC), 'u': self._vars['real_space_energy'](access.INC_ZERO) }, shell_cutoff=rn) # real space energy and force kernel and per particle potential with open( str(_SRC_DIR) + '/EwaldOrthSource/RealSpaceForceEnergyPot.h', 'r') as fh: _cont_header_src = fh.read() _cont_header = (kernel.Header(block=_cont_header_src % self._subvars), ) with open( str(_SRC_DIR) + '/EwaldOrthSource/RealSpaceForceEnergyPot.cpp', 'r') as fh: _cont_source = fh.read() _real_kernel = kernel.Kernel(name='real_space_part_pot', code=_cont_source, headers=_cont_header) self._real_space_pairloop_pot = PPL( kernel=_real_kernel, dat_dict={ 'P': data.ParticleDat(ncomp=3, dtype=ctypes.c_double)(access.READ), 'Q': data.ParticleDat(ncomp=1, dtype=ctypes.c_double)(access.READ), 'UPP': data.ParticleDat(ncomp=1, dtype=ctypes.c_double)(access.INC), 'F': data.ParticleDat(ncomp=3, dtype=ctypes.c_double)(access.INC), 'u': self._vars['real_space_energy'](access.INC_ZERO) }, shell_cutoff=rn)
def _init_libs(self): # reciprocal contribution calculation with open(str(_SRC_DIR) + '/EwaldOrthSource/AccumulateRecip.h', 'r') as fh: _cont_header_src = fh.read() _cont_header = kernel.Header(block=_cont_header_src % self._subvars) with open(str(_SRC_DIR) + '/EwaldOrthSource/AccumulateRecip.cpp', 'r') as fh: _cont_source = fh.read() _cont_kernel = kernel.Kernel(name='reciprocal_contributions', code=_cont_source, headers=_cont_header) if self.shared_memory in ('thread', 'omp'): PL = loop.ParticleLoopOMP else: PL = loop.ParticleLoop self._cont_lib = PL( kernel=_cont_kernel, dat_dict={ 'Positions': data.ParticleDat(ncomp=3, dtype=ctypes.c_double)(access.READ), 'Charges': data.ParticleDat(ncomp=1, dtype=ctypes.c_double)(access.READ), 'RecipSpace': self._vars['recip_space_kernel'](access.INC_ZERO) }) # reciprocal extract forces plus energy with open( str(_SRC_DIR) + '/EwaldOrthSource/ExtractForceEnergy.h', 'r') as fh: _cont_header_src = fh.read() _cont_header = kernel.Header(block=_cont_header_src % self._subvars) with open( str(_SRC_DIR) + '/EwaldOrthSource/ExtractForceEnergy.cpp', 'r') as fh: _cont_source = fh.read() _cont_kernel = kernel.Kernel(name='reciprocal_force_energy', code=_cont_source, headers=_cont_header) self._extract_force_energy_lib = PL( kernel=_cont_kernel, dat_dict={ 'Positions': data.ParticleDat(ncomp=3, dtype=ctypes.c_double)(access.READ), 'Forces': data.ParticleDat(ncomp=3, dtype=ctypes.c_double)(access.INC), 'Energy': self._vars['recip_space_energy'](access.INC_ZERO), 'Charges': data.ParticleDat(ncomp=1, dtype=ctypes.c_double)(access.READ), 'RecipSpace': self._vars['recip_space_kernel'](access.READ), 'CoeffSpace': self._vars['coeff_space_kernel'](access.READ) }) self._extract_force_energy_pot_lib = None
def __init__(self, state, rmax=None, rsteps=100): self._count = 0 self._state = state self._extent = self._state.domain.extent self._P = self._state.positions self._N = self._state.npart_local self._rmax = rmax if self._rmax is None: self._rmax = 0.5 * np.min(self._extent.data) self._rsteps = rsteps self._gr = data.ScalarArray(ncomp=self._rsteps, dtype=ctypes.c_int) self._gr.scale(0.0) _headers = ['math.h', 'stdio.h'] _kernel = ''' double R0 = P(1, 0) - P(0, 0); double R1 = P(1, 1) - P(0, 1); double R2 = P(1, 2) - P(0, 2); if (abs_md(R0) > exto20 ) { R0 += isign(R0) * extent0 ; } if (abs_md(R1) > exto21 ) { R1 += isign(R1) * extent1 ; } if (abs_md(R2) > exto22 ) { R2 += isign(R2) * extent2 ; } const double r2 = R0*R0 + R1*R1 + R2*R2; if (r2 < rmax2){ double r20=0.0, r21 = r2; r21 = sqrt(r2); #pragma omp atomic GR[(int) (abs_md(r21* rstepsoverrmax))]++; } ''' _constants = (kernel.Constant('rmaxoverrsteps', 0.2 * self._rmax / self._rsteps), kernel.Constant('rstepsoverrmax', self._rsteps / self._rmax), kernel.Constant('rmax2', self._rmax**2), kernel.Constant('extent0', self._extent[0]), kernel.Constant('extent1', self._extent[1]), kernel.Constant('extent2', self._extent[2]), kernel.Constant('exto20', 0.5 * self._extent[0]), kernel.Constant('exto21', 0.5 * self._extent[1]), kernel.Constant('exto22', 0.5 * self._extent[2])) _grkernel = kernel.Kernel('radial_distro_periodic_static', _kernel, _constants, headers=_headers) _datdict = {'P': self._P, 'GR': self._gr} self._p = pairloop.DoubleAllParticleLoop(self._N, kernel=_grkernel, dat_dict=_datdict) self.timer = ppmd.opt.Timer(runtime.TIMER, 0)
def apply(self): """ Enforce the boundary conditions on the held state. """ comm = self.state.domain.comm self.timer_apply.start() if comm.Get_size() == 1: """ BC code for one proc. porbably removable when restricting to large parallel systems. """ self.timer_lib_overhead.start() if self._one_process_pbc_lib is None: with open( str(cuda_config.LIB_DIR) + '/cudaOneProcPBCSource.cu', 'r') as fh: _one_proc_pbc_code = fh.read() _one_proc_pbc_kernel = kernel.Kernel('_one_proc_pbc_kernel', _one_proc_pbc_code, None, static_args={ 'E0': ctypes.c_double, 'E1': ctypes.c_double, 'E2': ctypes.c_double }) self._one_process_pbc_lib = cuda_loop.ParticleLoop( _one_proc_pbc_kernel, { 'P': self.state.get_position_dat()(access.RW), 'BCFLAG': self._flag(access.INC_ZERO) }) self.timer_lib_overhead.pause() _E = self.state.domain.extent self.timer_move.start() self._one_process_pbc_lib.execute( n=self.state.get_position_dat().npart_local, static_args={ 'E0': ctypes.c_double(_E[0]), 'E1': ctypes.c_double(_E[1]), 'E2': ctypes.c_double(_E[2]) }) res = self._flag[0] if res > 0: self._flag[0] = 1 self.timer_move.pause() ############ ----- MULTIPROC ------- else: if self._escape_guard_lib is None: # build lib self._escape_guard_lib = \ cuda_build.build_static_libs('cudaNProcPBC') # --- init escape count ---- if self._escape_count is None: self._escape_count = cuda_base.Array(ncomp=1, dtype=ctypes.c_int32) self._escape_count[0] = 0 # --- init escape dir count ---- if self._escape_dir_count is None: self._escape_dir_count = cuda_base.Array(ncomp=26, dtype=ctypes.c_int32) self._escape_dir_count[:] = 0 # --- init escape list ---- nl3 = self.state.get_position_dat().npart_local * 3 if self._escape_list is None: self._escape_list = cuda_base.Array(ncomp=nl3, dtype=ctypes.c_int32) elif self._escape_list.ncomp < nl3: self._escape_list.realloc(nl3) # --- find escapees --- nl = self.state.get_position_dat().npart_local if nl > 0: cuda_runtime.cuda_err_check( self._escape_guard_lib['cudaNProcPBCStageOne']( ctypes.c_int32(nl), self.state.domain.boundary.ctypes_data, self.state.get_position_dat().ctypes_data, self.state.domain.get_shift().ctypes_data, self._escape_count.ctypes_data, self._escape_dir_count.ctypes_data, self._escape_list.ctypes_data)) dir_max = np.max(self._escape_dir_count[:]) + 1 if self._escape_matrix is None: self._escape_matrix = cuda_base.Matrix(nrow=26, ncol=dir_max, dtype=ctypes.c_int32) elif self._escape_matrix.ncol < dir_max: self._escape_matrix.realloc(nrow=26, ncol=dir_max) # --- Populate escape matrix (essentially sort by direction) escape_count = self._escape_count[0] if (nl > 0) and (escape_count > 0): cuda_runtime.cuda_err_check( self._escape_guard_lib['cudaNProcPBCStageTwo']( ctypes.c_int32(escape_count), ctypes.c_int32(self._escape_matrix.ncol), self._escape_list.ctypes_data, self._escape_matrix.ctypes_data)) self.state.move_to_neighbour(directions_matrix=self._escape_matrix, dir_counts=self._escape_dir_count) self.state.filter_on_domain_boundary()
def kernel(self): """ Returns a kernel class for the potential. """ kernel_code = ''' const double R0 = P[1][0] - P[0][0]; const double R1 = P[1][1] - P[0][1]; const double R2 = P[1][2] - P[0][2]; const double r2 = R0*R0 + R1*R1 + R2*R2; double xn = 0.01; for(int ix = 0; ix < 2; ix++){ xn = xn*(2.0 - r2*xn); } const double r_m2 = sigma2*xn; const double r_m4 = r_m2*r_m2; const double r_m6 = r_m4*r_m2; const double _ex = r_m6; double _et = 1.0, _ep = 1.0, _ef = 1.0, _epx = 1.0; /* #pragma novector for(int _etx = 1; _etx < 21; _etx++){ _epx *= _ex; _ef *= _ep; _ep++; xn = 0.01; #pragma novector for(int ix = 0; ix < 10; ix++){ xn = xn*(2.0 - _ef*xn); } _et += _epx*xn; } */ u[0]+=CV*((r_m6-1.0)*r_m6 + internalshift); const double r_m8 = r_m4*r_m4; const double f_tmp = CF*(r_m6 - 0.5)*r_m8; A[0][0]+=f_tmp*R0; A[0][1]+=f_tmp*R1; A[0][2]+=f_tmp*R2; A[1][0]-=f_tmp*R0; A[1][1]-=f_tmp*R1; A[1][2]-=f_tmp*R2; ''' constants = (kernel.Constant('sigma2', self._sigma**2), kernel.Constant('rc2', self._rc**2), kernel.Constant('internalshift', self._shift_internal), kernel.Constant('CF', self._C_F), kernel.Constant('CV', self._C_V)) reductions = (kernel.Reduction('u', 'u[0]', '+'), ) return kernel.Kernel('LJ_accel_U', kernel_code, constants, [kernel.Header('stdio.h')], reductions)
def _init_contrib_loop(self): g = self.group extent = self.domain.extent cell_widths = [1.0 / (ex / (sx**(self.R - 1))) for ex, sx in zip(extent, self.subdivision)] sph_gen = self.sph_gen def cube_ind(L, M): return ((L) * ( (L) + 1 ) + (M) ) assign_gen = 'double rhol = 1.0;\n' assign_gen += 'double rholcharge = rhol * charge;\n' for lx in range(self.L): for mx in range(-lx, lx+1): assign_gen += 'TREE[OFFSET + {ind}] += {ylmm} * rholcharge;\n'.format( ind=cube_ind(lx, mx), ylmm=str(sph_gen.get_y_sym(lx, -mx)[0]) ) assign_gen += 'TREE[OFFSET + IM_OFFSET + {ind}] += {ylmm} * rholcharge;\n'.format( ind=cube_ind(lx, mx), ylmm=str(sph_gen.get_y_sym(lx, -mx)[1]) ) assign_gen += 'rhol *= radius;\n' assign_gen += 'rholcharge = rhol * charge;\n' k = kernel.Kernel( 'mm_contrib_loop', r''' const double rx = P.i[0]; const double ry = P.i[1]; const double rz = P.i[2]; // bin into finest level cell const double srx = rx + HEX; const double sry = ry + HEY; const double srz = rz + HEZ; int64_t cfx = srx * CWX; int64_t cfy = sry * CWY; int64_t cfz = srz * CWZ; cfx = (cfx < LCX) ? cfx : (LCX - 1); cfy = (cfy < LCX) ? cfy : (LCY - 1); cfz = (cfz < LCX) ? cfz : (LCZ - 1); // number of cells in each direction int64_t ncx = LCX; int64_t ncy = LCY; int64_t ncz = LCZ; // increment the occupancy for this cell OCC_GA[cfx + LCX * (cfy + LCY * cfz)]++; MM_FINE_CELLS.i[0] = cfx; MM_FINE_CELLS.i[1] = cfy; MM_FINE_CELLS.i[2] = cfz; for( int level=R-1 ; level>=0 ; level-- ){{ // child on this level const int64_t cix = cfx % SDX; const int64_t ciy = cfy % SDY; const int64_t ciz = cfz % SDZ; // record the cell indices MM_CELLS.i[level * 3 + 0] = cfx; MM_CELLS.i[level * 3 + 1] = cfy; MM_CELLS.i[level * 3 + 2] = cfz; // record the child cell indices MM_CHILD_INDEX.i[level * 3 + 0] = cix; MM_CHILD_INDEX.i[level * 3 + 1] = ciy; MM_CHILD_INDEX.i[level * 3 + 2] = ciz; // compute the cells for the next level cfx /= SDX; cfy /= SDY; cfz /= SDZ; }} // compute the multipole expansions for( int level=0 ; level<R ; level++) {{ const int64_t cellx = MM_CELLS.i[level * 3 + 0]; const int64_t celly = MM_CELLS.i[level * 3 + 1]; const int64_t cellz = MM_CELLS.i[level * 3 + 2]; const double dx = rx - ((-HEX) + (0.5 + cellx) * WIDTHS_X[level]); const double dy = ry - ((-HEY) + (0.5 + celly) * WIDTHS_Y[level]); const double dz = rz - ((-HEZ) + (0.5 + cellz) * WIDTHS_Z[level]); const double xy2 = dx * dx + dy * dy; const double radius = sqrt(xy2 + dz * dz); const double theta = atan2(sqrt(xy2), dz); const double phi = atan2(dy, dx); const double charge = Q.i[0]; const int64_t lin_ind = cellx + NCELLS_X[level] * (celly + NCELLS_Y[level] * cellz); const int64_t OFFSET = LEVEL_OFFSETS[level] + NCOMP * lin_ind; {SPH_GEN} {ASSIGN_GEN} }} '''.format( SPH_GEN=str(sph_gen.module), ASSIGN_GEN=str(assign_gen) ), ( Constant('R', self.R), Constant('EX', extent[0]), Constant('EY', extent[1]), Constant('EZ', extent[2]), Constant('HEX', 0.5 * extent[0]), Constant('HEY', 0.5 * extent[1]), Constant('HEZ', 0.5 * extent[2]), Constant('CWX', cell_widths[0]), Constant('CWY', cell_widths[1]), Constant('CWZ', cell_widths[2]), Constant('LCX', self.subdivision[0] ** (self.R - 1)), Constant('LCY', self.subdivision[1] ** (self.R - 1)), Constant('LCZ', self.subdivision[2] ** (self.R - 1)), Constant('SDX', self.subdivision[0]), Constant('SDY', self.subdivision[1]), Constant('SDZ', self.subdivision[2]), Constant('IL_NO', self.il_array.shape[1]), Constant('IL_STRIDE_OUTER', self.il_array.shape[1] * self.il_array.shape[2]), Constant('NCOMP', self.ncomp), Constant('IM_OFFSET', self.L**2) ), headers=( lib.build.write_header( """ #define R {R} const double WIDTHS_X[R] = {{ {WIDTHS_X} }}; const double WIDTHS_Y[R] = {{ {WIDTHS_Y} }}; const double WIDTHS_Z[R] = {{ {WIDTHS_Z} }}; const int64_t NCELLS_X[R] = {{ {NCELLS_X} }}; const int64_t NCELLS_Y[R] = {{ {NCELLS_Y} }}; const int64_t NCELLS_Z[R] = {{ {NCELLS_Z} }}; const int64_t LEVEL_OFFSETS[R] = {{ {LEVEL_OFFSETS} }}; """.format( R=self.R, WIDTHS_X=self.widths_x_str, WIDTHS_Y=self.widths_y_str, WIDTHS_Z=self.widths_z_str, NCELLS_X=self.ncells_x_str, NCELLS_Y=self.ncells_y_str, NCELLS_Z=self.ncells_z_str, LEVEL_OFFSETS=self.level_offsets_str ) ), ) ) dat_dict = { 'P': self.positions(access.READ), 'Q': self.charges(access.READ), 'MM_FINE_CELLS': self._dat_fine_cells(access.WRITE), 'MM_CELLS': self._dat_cells(access.WRITE), 'MM_CHILD_INDEX': self._dat_child_index(access.WRITE), 'OCC_GA': self.cell_occupation_ga(access.INC_ZERO), 'TREE': self.tree(access.INC_ZERO), } self._contrib_loop = loop.ParticleLoopOMP(kernel=k, dat_dict=dat_dict)
def integrate_thermostat(self, dt=None, t=None, temp=273.15, nu=1.0): """ Integrate state forward in time. :arg double dt: Time step size. :arg double t: End time. :arg double temp: Temperature of heat bath. """ self._Temp = temp self._nu = nu if dt is not None: self._dt = dt if t is not None: self._T = t self._max_it = int(math.ceil(self._T / self._dt)) self._constants1 = [ kernel.Constant('dt', self._dt), kernel.Constant('dht', 0.5 * self._dt), ] self._kernel1 = kernel.Kernel('vv1', self._kernel1_code, self._constants1) self._p1 = loop.ParticleLoop(self._kernel1, { 'P': self._P, 'V': self._V, 'A': self._A, 'M': self._M }) self._kernel2_thermostat_code = ''' //Anderson thermostat here. //probably horrific random code. const double tmp_rand_max = 1.0/RAND_MAX; if (rand()*tmp_rand_max < rate) { //Box-Muller method. const double scale = sqrt(temperature/M(0)); const double stmp = scale*sqrt(-2.0*log(rand()*tmp_rand_max)); const double V0 = 2.0*M_PI*rand()*tmp_rand_max; V(0) = stmp*cos(V0); V(1) = stmp*sin(V0); V(2) = scale*sqrt(-2.0*log(rand()*tmp_rand_max))*cos(2.0*M_PI*rand()*tmp_rand_max); } else { const double M_tmp = 1./M(0); V(0) += dht*A(0)*M_tmp; V(1) += dht*A(1)*M_tmp; V(2) += dht*A(2)*M_tmp; } ''' self._constants2_thermostat = [ kernel.Constant('rate', self._dt * self._nu), kernel.Constant('dt', self._dt), kernel.Constant('dht', 0.5 * self._dt), kernel.Constant('temperature', self._Temp), ] self._kernel2_thermostat = kernel.Kernel( 'vv2_thermostat', self._kernel2_thermostat_code, self._constants2_thermostat, headers=['math.h', 'stdlib.h', 'time.h', 'stdio.h']) self._p2_thermostat = loop.ParticleLoop(self._kernel2_thermostat, { 'V': self._V, 'A': self._A, 'M': self._M }) _t = ppmd.opt.Timer(runtime.TIMER, 0, start=True) self._velocity_verlet_integration_thermostat() _t.stop("VelocityVerletAnderson")
def _init_bin_loop(self): g = self.group extent = self.domain.extent cell_widths = [ 1.0 / (ex / (sx**(self.R - 1))) for ex, sx in zip(extent, self.subdivision) ] k = kernel.Kernel( 'mc_bin_loop', r''' const double rx = P.i[0]; const double ry = P.i[1]; const double rz = P.i[2]; // bin into finest level cell const double srx = rx + HEX; const double sry = ry + HEY; const double srz = rz + HEZ; int64_t cfx = srx * CWX; int64_t cfy = sry * CWY; int64_t cfz = srz * CWZ; cfx = (cfx < LCX) ? cfx : (LCX - 1); cfy = (cfy < LCX) ? cfy : (LCY - 1); cfz = (cfz < LCX) ? cfz : (LCZ - 1); // record the finest level cells MC_FC.i[0] = cfx; MC_FC.i[1] = cfy; MC_FC.i[2] = cfz; // number of cells in each direction int64_t ncx = LCX; int64_t ncy = LCY; int64_t ncz = LCZ; // increment the occupancy for this cell OCC_GA[cfx + LCX * (cfy + LCY * cfz)]++; int64_t n = 0; for( int level=R-1 ; level>=0 ; level-- ){{ // cell widths for cell centre computation const double wx = EX / ncx; const double wy = EY / ncy; const double wz = EZ / ncz; // child on this level const int64_t cix = cfx % SDX; const int64_t ciy = cfy % SDY; const int64_t ciz = cfz % SDZ; const int64_t ci = cix + SDX * (ciy + SDY * ciz); // loop over IL for this child cell for( int ox=0 ; ox<IL_NO ; ox++){{ const int64_t ocx = cfx + IL[ci * IL_STRIDE_OUTER + ox * 3 + 0]; const int64_t ocy = cfy + IL[ci * IL_STRIDE_OUTER + ox * 3 + 1]; const int64_t ocz = cfz + IL[ci * IL_STRIDE_OUTER + ox * 3 + 2]; // free space for now if (ocx < 0) {{continue;}} if (ocy < 0) {{continue;}} if (ocz < 0) {{continue;}} if (ocx >= ncx) {{continue;}} if (ocy >= ncy) {{continue;}} if (ocz >= ncz) {{continue;}} MC_CX.i[n] = ocx; MC_CY.i[n] = ocy; MC_CZ.i[n] = ocz; MC_CL.i[n] = ocx + ncx * (ocy + ncy * ocz); MC_LEVEL.i[n] = level; MC_DX.i[n] = rx - ((-HEX) + (0.5 * wx) + (ocx * wx)); MC_DY.i[n] = ry - ((-HEY) + (0.5 * wy) + (ocy * wy)); MC_DZ.i[n] = rz - ((-HEZ) + (0.5 * wz) + (ocz * wz)); n++; }} // compute the cells for the next level cfx /= SDX; cfy /= SDY; cfz /= SDZ; // number of cells in each dim for the next level ncx /= SDX; ncy /= SDY; ncz /= SDZ; }} MC_NEXP.i[0] = n; // compute offsets as spherical coordinates in a vcectorisable loop for( int ox=0 ; ox<n ; ox++){{ const double dx = MC_DX.i[ox]; const double dy = MC_DY.i[ox]; const double dz = MC_DZ.i[ox]; const double xy2 = dx * dx + dy * dy; MC_DX.i[ox] = sqrt(xy2 + dz * dz); MC_DY.i[ox] = atan2(sqrt(xy2), dz); MC_DZ.i[ox] = atan2(dy, dx); MC_CHR.i[ox] = Q.i[0]; inline_local_exp( Q.i[0], sqrt(xy2 + dz * dz), atan2(sqrt(xy2), dz), atan2(dy, dx), &TL[TL_OFFSETS[MC_LEVEL.i[ox]] + NCOMP * MC_CL.i[ox]] ); }} '''.format( ), ( Constant('R', self.R), Constant('NCOMP', self.ncomp), Constant('EX', extent[0]), Constant('EY', extent[1]), Constant('EZ', extent[2]), Constant('HEX', 0.5 * extent[0]), Constant('HEY', 0.5 * extent[1]), Constant('HEZ', 0.5 * extent[2]), Constant('CWX', cell_widths[0]), Constant('CWY', cell_widths[1]), Constant('CWZ', cell_widths[2]), Constant('LCX', self.subdivision[0] ** (self.R - 1)), Constant('LCY', self.subdivision[1] ** (self.R - 1)), Constant('LCZ', self.subdivision[2] ** (self.R - 1)), Constant('SDX', self.subdivision[0]), Constant('SDY', self.subdivision[1]), Constant('SDZ', self.subdivision[2]), Constant('IL_NO', self.il_array.shape[1]), Constant('IL_STRIDE_OUTER', self.il_array.shape[1] * self.il_array.shape[2]), ), headers=( lib.build.write_header( self.mc_lee.create_local_exp_header + \ self.mc_lee.create_local_exp_src ), ) ) dat_dict = { 'P': self.positions(access.READ), 'Q': self.charges(access.READ), 'IL': self.il_scalararray(access.READ), 'MC_FC': g._mc_fmm_cells(access.WRITE), 'MC_NEXP': g._mc_nexp(access.WRITE), 'MC_CHR': g._mc_charge(access.WRITE), 'MC_DX': g._mc_radius(access.WRITE), 'MC_DY': g._mc_theta(access.WRITE), 'MC_DZ': g._mc_phi(access.WRITE), 'MC_LEVEL': g._mc_level(access.WRITE), 'MC_CX': g._mc_cx(access.WRITE), 'MC_CY': g._mc_cy(access.WRITE), 'MC_CZ': g._mc_cz(access.WRITE), 'MC_CL': g._mc_cl(access.WRITE), 'OCC_GA': self.cell_occupation_ga(access.INC_ZERO), 'TL_OFFSETS': self.tree_local_ga_offsets(access.READ), 'TL': self.tree_local_ga(access.INC_ZERO), } self._cell_bin_loop = loop.ParticleLoopOMP(kernel=k, dat_dict=dat_dict)
def _init_pbc(self): self.top_multipole_expansion_ga = data.GlobalArray(ncomp=self.ncomp, dtype=REAL) self.top_dot_vector_ga = data.GlobalArray(ncomp=self.ncomp, dtype=REAL) self.lrc = LongRangeMTL(self.L, self.domain, exclude_tuples=self.il[1]) sph_gen = self.sph_gen def cube_ind(L, M): return ((L) * ( (L) + 1 ) + (M) ) assign_gen = 'double rhol = charge;\n' for lx in range(self.L): for mx in range(-lx, lx+1): res, ims = sph_gen.get_y_sym(lx, -mx) offset = cube_ind(lx, mx) assign_gen += ''.join(['MULTIPOLE[{}] += {} * rhol;\n'.format(*args) for args in ( (offset, str(res)), (offset + self.L**2, str(ims)) ) ]) res, ims = sph_gen.get_y_sym(lx, mx) assign_gen += ''.join(['DOT_VEC[{}] += {} * rhol;\n'.format(*args) for args in ( (offset, str(res)), (offset + self.L**2, '-1.0 * ' + str(ims)) ) ]) assign_gen += 'rhol *= radius;\n' lr_kernel = kernel.Kernel( 'mm_lm_lr_kernel', r''' const double dx = P.i[0]; const double dy = P.i[1]; const double dz = P.i[2]; const double xy2 = dx * dx + dy * dy; const double radius = sqrt(xy2 + dz * dz); const double theta = atan2(sqrt(xy2), dz); const double phi = atan2(dy, dx); const double charge = Q.i[0]; {SPH_GEN} {ASSIGN_GEN} '''.format( SPH_GEN=str(sph_gen.module), ASSIGN_GEN=str(assign_gen) ), headers=( lib.build.write_header( r''' #include <math.h> ''' ), ) ) self._lr_loop = loop.ParticleLoopOMP( lr_kernel, dat_dict={ 'P': self.positions(access.READ), 'Q': self.charges(access.READ), 'MULTIPOLE': self.top_multipole_expansion_ga(access.INC_ZERO), 'DOT_VEC': self.top_dot_vector_ga(access.INC_ZERO), } )
def kernel(self): """ Returns a kernel class for the potential. """ kernel_code = ''' const double R0 = P(1, 0) - P(0, 0); const double R1 = P(1, 1) - P(0, 1); const double R2 = P(1, 2) - P(0, 2); const double r2 = R0*R0 + R1*R1 + R2*R2; double xn = 0.01; for(int ix = 0; ix < 10; ix++){ xn = xn*(2.0 - r2*xn); } const double r_m2 = sigma2*xn; const double r_m4 = r_m2*r_m2; const double r_m6 = r_m4*r_m2; const double _ex = r_m6; double _et = 1.0, _ep = 1.0, _ef = 1.0, _epx = 1.0; for(int _etx = 1; _etx < 21; _etx++){ _epx *= _ex; _ef *= _ep; _ep++; xn = 0.01; for(int ix = 0; ix < 10; ix++){ xn = xn*(2.0 - _ef*xn); } _et += _epx*xn; } u(0)+=CV*((r_m6-1.0)*r_m6 + internalshift) + _et; const double r_m8 = r_m4*r_m4; const double f_tmp = CF*(r_m6 - 0.5)*r_m8; A(0, 0)+=f_tmp*R0; A(0, 1)+=f_tmp*R1; A(0, 2)+=f_tmp*R2; A(1, 0)-=f_tmp*R0; A(1, 1)-=f_tmp*R1; A(1, 2)-=f_tmp*R2; ''' constants = (kernel.Constant('sigma2', self._sigma**2), kernel.Constant('rc2', self._rc**2), kernel.Constant('internalshift', self._shift_internal), kernel.Constant('CF', self._C_F), kernel.Constant('CV', self._C_V)) reductions = (kernel.Reduction('u', 'u[0]', '+'), ) return kernel.Kernel('LJ_accel_U', kernel_code, constants, ['stdio.h'], reductions)
def _init_contrib_loop(self): bc = self.boundary_condition if bc == BCType.FREE_SPACE: bc_block = r''' if (ocx < 0) {{continue;}} if (ocy < 0) {{continue;}} if (ocz < 0) {{continue;}} if (ocx >= ncx) {{continue;}} if (ocy >= ncy) {{continue;}} if (ocz >= ncz) {{continue;}} ''' elif bc in (BCType.NEAREST, BCType.PBC): bc_block = r''' ocx = (ocx + ({O})*ncx) % ncx; ocy = (ocy + ({O})*ncy) % ncy; ocz = (ocz + ({O})*ncz) % ncz; '''.format(O=self.max_il_offset * 2) else: raise RuntimeError('Unkown boundary condition.') g = self.group extent = self.domain.extent cell_widths = [ 1.0 / (ex / (sx**(self.R - 1))) for ex, sx in zip(extent, self.subdivision) ] L = self.L sph_gen = self.sph_gen def cube_ind(L, M): return ((L) * ((L) + 1) + (M)) assign_gen = 'const double iradius = 1.0 / radius;\n' assign_gen += 'double rholcharge = iradius * charge;\n' for lx in range(self.L): for mx in range(-lx, lx + 1): assign_gen += 'TREE[OFFSET + {ind}] += {ylmm} * rholcharge;\n'.format( ind=cube_ind(lx, mx), ylmm=str(sph_gen.get_y_sym(lx, -mx)[0])) assign_gen += 'TREE[OFFSET + IM_OFFSET + {ind}] += {ylmm} * rholcharge;\n'.format( ind=cube_ind(lx, mx), ylmm=str(sph_gen.get_y_sym(lx, -mx)[1])) assign_gen += 'rholcharge *= iradius;\n' k = kernel.Kernel( 'lm_contrib_loop', r''' const double rx = P.i[0]; const double ry = P.i[1]; const double rz = P.i[2]; const double charge = Q.i[0]; // bin into finest level cell const double srx = rx + HEX; const double sry = ry + HEY; const double srz = rz + HEZ; int64_t cfx = srx * CWX; int64_t cfy = sry * CWY; int64_t cfz = srz * CWZ; cfx = (cfx < LCX) ? cfx : (LCX - 1); cfy = (cfy < LCX) ? cfy : (LCY - 1); cfz = (cfz < LCX) ? cfz : (LCZ - 1); // increment the occupancy for this cell OCC_GA[cfx + LCX * (cfy + LCY * cfz)]++; MM_FINE_CELLS.i[0] = cfx; MM_FINE_CELLS.i[1] = cfy; MM_FINE_CELLS.i[2] = cfz; for( int level=R-1 ; level>=0 ; level-- ){{ // child on this level const int64_t cix = cfx % SDX; const int64_t ciy = cfy % SDY; const int64_t ciz = cfz % SDZ; // record the cell indices MM_CELLS.i[level * 3 + 0] = cfx; MM_CELLS.i[level * 3 + 1] = cfy; MM_CELLS.i[level * 3 + 2] = cfz; // record the child cell indices MM_CHILD_INDEX.i[level * 3 + 0] = cix; MM_CHILD_INDEX.i[level * 3 + 1] = ciy; MM_CHILD_INDEX.i[level * 3 + 2] = ciz; // compute the cells for the next level cfx /= SDX; cfy /= SDY; cfz /= SDZ; }} // compute the local expansions for( int level=1 ; level<R ; level++) {{ // cell on this level const int64_t cfx = MM_CELLS.i[level * 3 + 0]; const int64_t cfy = MM_CELLS.i[level * 3 + 1]; const int64_t cfz = MM_CELLS.i[level * 3 + 2]; // child on this level const int64_t cix = cfx % SDX; const int64_t ciy = cfy % SDY; const int64_t ciz = cfz % SDZ; const int64_t ci = cix + SDX * (ciy + SDY * ciz); // cell widths on this level const double wx = WIDTHS_X[level]; const double wy = WIDTHS_Y[level]; const double wz = WIDTHS_Z[level]; // number of cells on this level const int64_t ncx = NCELLS_X[level]; const int64_t ncy = NCELLS_Y[level]; const int64_t ncz = NCELLS_Z[level]; // loop over IL for this child cell for( int ox=0 ; ox<IL_NO ; ox++){{ int64_t ocx = cfx + IL[ci * IL_STRIDE_OUTER + ox * 3 + 0]; int64_t ocy = cfy + IL[ci * IL_STRIDE_OUTER + ox * 3 + 1]; int64_t ocz = cfz + IL[ci * IL_STRIDE_OUTER + ox * 3 + 2]; const double dx = rx - ((-HEX) + (0.5 * wx) + (ocx * wx)); const double dy = ry - ((-HEY) + (0.5 * wy) + (ocy * wy)); const double dz = rz - ((-HEZ) + (0.5 * wz) + (ocz * wz)); {BC_BLOCK} const int64_t lin_ind = ocx + NCELLS_X[level] * (ocy + NCELLS_Y[level] * ocz); const double xy2 = dx * dx + dy * dy; const double radius = sqrt(xy2 + dz * dz); const double theta = atan2(sqrt(xy2), dz); const double phi = atan2(dy, dx); const int64_t OFFSET = LEVEL_OFFSETS[level] + NCOMP * lin_ind; {SPH_GEN} {ASSIGN_GEN} }} }} '''.format(BC_BLOCK=bc_block, SPH_GEN=str(sph_gen.module), ASSIGN_GEN=str(assign_gen)), (Constant('EX', extent[0]), Constant('EY', extent[1]), Constant('EZ', extent[2]), Constant('HEX', 0.5 * extent[0]), Constant('HEY', 0.5 * extent[1]), Constant( 'HEZ', 0.5 * extent[2]), Constant('CWX', cell_widths[0]), Constant('CWY', cell_widths[1]), Constant('CWZ', cell_widths[2]), Constant('LCX', self.subdivision[0]**(self.R - 1)), Constant('LCY', self.subdivision[1]**(self.R - 1)), Constant('LCZ', self.subdivision[2] **(self.R - 1)), Constant('SDX', self.subdivision[0]), Constant('SDY', self.subdivision[1]), Constant('SDZ', self.subdivision[2]), Constant('IL_NO', self.il_array.shape[1]), Constant('IL_STRIDE_OUTER', self.il_array.shape[1] * self.il_array.shape[2]), Constant('NCOMP', self.ncomp), Constant('IM_OFFSET', self.L**2)), headers=(lib.build.write_header(""" #define R {R} const double WIDTHS_X[R] = {{ {WIDTHS_X} }}; const double WIDTHS_Y[R] = {{ {WIDTHS_Y} }}; const double WIDTHS_Z[R] = {{ {WIDTHS_Z} }}; const int64_t NCELLS_X[R] = {{ {NCELLS_X} }}; const int64_t NCELLS_Y[R] = {{ {NCELLS_Y} }}; const int64_t NCELLS_Z[R] = {{ {NCELLS_Z} }}; const int64_t LEVEL_OFFSETS[R] = {{ {LEVEL_OFFSETS} }}; """.format(R=self.R, WIDTHS_X=self.widths_x_str, WIDTHS_Y=self.widths_y_str, WIDTHS_Z=self.widths_z_str, NCELLS_X=self.ncells_x_str, NCELLS_Y=self.ncells_y_str, NCELLS_Z=self.ncells_z_str, LEVEL_OFFSETS=self.level_offsets_str)), )) dat_dict = { 'IL': self.il_scalararray(access.READ), 'P': self.positions(access.READ), 'Q': self.charges(access.READ), 'MM_FINE_CELLS': self._dat_fine_cells(access.WRITE), 'MM_CELLS': self._dat_cells(access.WRITE), 'MM_CHILD_INDEX': self._dat_child_index(access.WRITE), 'OCC_GA': self.cell_occupation_ga(access.INC_ZERO), 'TREE': self.tree(access.INC_ZERO) } self._contrib_loop = loop.ParticleLoopOMP(kernel=k, dat_dict=dat_dict)
def _init_extract_loop(self): g = self.group extent = self.domain.extent cell_widths = [1.0 / (ex / (sx**(self.R - 1))) for ex, sx in zip(extent, self.subdivision)] sph_gen = self.sph_gen def cube_ind(L, M): return ((L) * ( (L) + 1 ) + (M) ) assign_gen = '' for lx in range(self.L): for mx in range(-lx, lx+1): reL = SphSymbol('TREE[OFFSET + {ind}]'.format(ind=cube_ind(lx, mx))) imL = SphSymbol('TREE[OFFSET + IM_OFFSET + {ind}]'.format(ind=cube_ind(lx, mx))) reY, imY = sph_gen.get_y_sym(lx, mx) phi_sym = cmplx_mul(reL, imL, reY, imY)[0] assign_gen += 'tmp_energy += rhol * ({phi_sym});\n'.format(phi_sym=str(phi_sym)) assign_gen += 'rhol *= iradius;\n' bc = self.boundary_condition if bc == BCType.FREE_SPACE: bc_block = r''' if (ocx < 0) {{continue;}} if (ocy < 0) {{continue;}} if (ocz < 0) {{continue;}} if (ocx >= ncx) {{continue;}} if (ocy >= ncy) {{continue;}} if (ocz >= ncz) {{continue;}} ''' elif bc in (BCType.NEAREST, BCType.PBC): bc_block = r''' ocx = (ocx + ({O})*ncx) % ncx; ocy = (ocy + ({O})*ncy) % ncy; ocz = (ocz + ({O})*ncz) % ncz; '''.format(O=self.max_il_offset*2) else: raise RuntimeError('Unkown boundary condition.') k = kernel.Kernel( 'mm_extract_loop', r''' const double rx = P.i[0]; const double ry = P.i[1]; const double rz = P.i[2]; double particle_energy = 0.0; for( int level=1 ; level<R ; level++ ){{ // cell on this level const int64_t cfx = MM_CELLS.i[level*3 + 0]; const int64_t cfy = MM_CELLS.i[level*3 + 1]; const int64_t cfz = MM_CELLS.i[level*3 + 2]; // number of cells on this level const int64_t ncx = NCELLS_X[level]; const int64_t ncy = NCELLS_Y[level]; const int64_t ncz = NCELLS_Z[level]; // child on this level const int64_t cix = MM_CHILD_INDEX.i[level * 3 + 0]; const int64_t ciy = MM_CHILD_INDEX.i[level * 3 + 1]; const int64_t ciz = MM_CHILD_INDEX.i[level * 3 + 2]; const int64_t ci = cix + SDX * (ciy + SDY * ciz); const double wx = WIDTHS_X[level]; const double wy = WIDTHS_Y[level]; const double wz = WIDTHS_Z[level]; // loop over IL for this child cell for( int ox=0 ; ox<IL_NO ; ox++){{ int64_t ocx = cfx + IL[ci * IL_STRIDE_OUTER + ox * 3 + 0]; int64_t ocy = cfy + IL[ci * IL_STRIDE_OUTER + ox * 3 + 1]; int64_t ocz = cfz + IL[ci * IL_STRIDE_OUTER + ox * 3 + 2]; const double dx = rx - ((-HEX) + (0.5 * wx) + (ocx * wx)); const double dy = ry - ((-HEY) + (0.5 * wy) + (ocy * wy)); const double dz = rz - ((-HEZ) + (0.5 * wz) + (ocz * wz)); {BC_BLOCK} const int64_t lin_ind = ocx + NCELLS_X[level] * (ocy + NCELLS_Y[level] * ocz); const double xy2 = dx * dx + dy * dy; const double radius = sqrt(xy2 + dz * dz); const double theta = atan2(sqrt(xy2), dz); const double phi = atan2(dy, dx); const int64_t OFFSET = LEVEL_OFFSETS[level] + NCOMP * lin_ind; {SPH_GEN} const double iradius = 1.0 / radius; double rhol = iradius; double tmp_energy = 0.0; {ASSIGN_GEN} //if (isnan(tmp_energy)){{ // printf( // "radius %f theta %f phi %f\n", // radius, theta, phi // ); // std::raise(SIGINT); //}} particle_energy += tmp_energy; }} }} OUT_ENERGY[0] += particle_energy * 0.5 * Q.i[0]; '''.format( SPH_GEN=str(sph_gen.module), ASSIGN_GEN=str(assign_gen), BC_BLOCK=bc_block ), ( Constant('R', self.R), Constant('EX', extent[0]), Constant('EY', extent[1]), Constant('EZ', extent[2]), Constant('HEX', 0.5 * extent[0]), Constant('HEY', 0.5 * extent[1]), Constant('HEZ', 0.5 * extent[2]), Constant('CWX', cell_widths[0]), Constant('CWY', cell_widths[1]), Constant('CWZ', cell_widths[2]), Constant('LCX', self.subdivision[0] ** (self.R - 1)), Constant('LCY', self.subdivision[1] ** (self.R - 1)), Constant('LCZ', self.subdivision[2] ** (self.R - 1)), Constant('SDX', self.subdivision[0]), Constant('SDY', self.subdivision[1]), Constant('SDZ', self.subdivision[2]), Constant('IL_NO', self.il_array.shape[1]), Constant('IL_STRIDE_OUTER', self.il_array.shape[1] * self.il_array.shape[2]), Constant('NCOMP', self.ncomp), Constant('IM_OFFSET', self.L**2) ), headers=( lib.build.write_header( """ #include <csignal> #define R {R} const double WIDTHS_X[R] = {{ {WIDTHS_X} }}; const double WIDTHS_Y[R] = {{ {WIDTHS_Y} }}; const double WIDTHS_Z[R] = {{ {WIDTHS_Z} }}; const int64_t NCELLS_X[R] = {{ {NCELLS_X} }}; const int64_t NCELLS_Y[R] = {{ {NCELLS_Y} }}; const int64_t NCELLS_Z[R] = {{ {NCELLS_Z} }}; const int64_t LEVEL_OFFSETS[R] = {{ {LEVEL_OFFSETS} }}; """.format( R=self.R, WIDTHS_X=self.widths_x_str, WIDTHS_Y=self.widths_y_str, WIDTHS_Z=self.widths_z_str, NCELLS_X=self.ncells_x_str, NCELLS_Y=self.ncells_y_str, NCELLS_Z=self.ncells_z_str, LEVEL_OFFSETS=self.level_offsets_str ) ), ) ) dat_dict = { 'P': self.positions(access.READ), 'Q': self.charges(access.READ), 'IL': self.il_scalararray(access.READ), 'MM_CELLS': self._dat_cells(access.READ), 'MM_CHILD_INDEX': self._dat_child_index(access.READ), 'TREE': self.tree(access.READ), 'OUT_ENERGY': self._extract_energy(access.INC_ZERO), } self._extract_loop = loop.ParticleLoopOMP(kernel=k, dat_dict=dat_dict)