def load_fields(self): """Code fragment that loads field arrays from 'grid' struct""" idxs = ''.join(['[%d]' % d.value for d in self.dim]) return '\n'.join(['%s (*%s)%s = (%s (*)%s) grid->%s;' % (self.real_t, ccode(f.label), idxs, self.real_t, idxs, ccode(f.label)) for f in self.fields])
def time_stepping(self): """ generate time index variable for time stepping e.g. for 2nd order time-accurate scheme, varibales are t0, t1 for 4th order time-accurate scheme, variables are t0, t1, t2, t3 the variables are used to address the field arrays e.g. in 2nd order scheme, U[t1] will be updated using U[t0] the variables are calculated by taking mod with time periodicity return generated code as string """ _ti = Symbol('_ti') body = [] for i in range(len(self.time)): lhs = self.time[i].name if i == 0: rhs = ccode(_ti % self.tp) else: rhs = ccode((self.time[i-1]+1) % self.tp) body.append(cgen.Assign(lhs, rhs)) body = cgen.Block(body) body = cgen.Module([cgen.Pragma('omp single'), body]) return body
def initialise(self): loop = [Symbol('_'+x.name) for x in self.index] # symbols for loop statements = [] for field in self.fields: body = [] if self.omp: statements.append(cgen.Pragma('omp for schedule(static,1)')) # populate xvalue, yvalue zvalue code for d in range(self.dimension-1, -1, -1): i = loop[d] i0 = 0 i1 = ccode(self.dim[d]) pre = [] #velocity_initialisation = cgen.Assign(ccode()) post = [] if d == self.dimension-1: # inner loop # first time step t0 = 0 sol = field.sol.subs(self.t, t0) for idx in self.index: sol = sol.subs(idx, '_'+idx.name) body = [cgen.Assign(ccode(field[[0]+loop]), ccode(sol))] body = pre + body + post body = [cgen.For(cgen.InlineInitializer(cgen.Value('int', i), i0), cgen.Line('%s<%s' % (i, i1)), cgen.Line('++%s' % i), cgen.Block(body))] statements.append(body[0]) statements += self.generate_second_initialisation() return cgen.Module(statements)
def time_stepping(self): """ generate time index variable for time stepping e.g. for 2nd order time-accurate scheme, varibales are t0, t1 for 4th order time-accurate scheme, variables are t0, t1, t2, t3 the variables are used to address the field arrays e.g. in 2nd order scheme, U[t1] will be updated using U[t0] the variables are calculated by taking mod with time periodicity return generated code as string """ result = '' tmpl = self.lookup.get_template('time_stepping.txt') _ti = Symbol('_ti') body = '' for i in range(len(self.time)): lhs = self.time[i].name if i == 0: rhs = ccode(_ti % self.tp) else: rhs = ccode((self.time[i-1]+1) % self.tp) body += lhs + ' = ' + rhs + ';\n' dict1 = {'body': body} result = render(tmpl, dict1) return result
def generate_second_initialisation(self): loop = [Symbol('_'+x.name) for x in self.index] # symbols for loop m = self.margin.value statements = [] v = symbols("v") for field in self.fields: body = [] if self.omp: statements.append(cgen.Pragma('omp for schedule(static,1)')) # populate xvalue, yvalue zvalue code for d in range(self.dimension-1, -1, -1): i = loop[d] i0 = m i1 = ccode(self.dim[d]-m) pre = [] post = [] if d == self.dimension-1: # inner loop # first time step kernel = self.transform_kernel(field) for arg in kernel.args: if str(arg).startswith("-") and str(self.t - 1) in str(arg): kernel = kernel.subs({arg: 0}, simultaneous=True) arg = 2*v*self.dt kernel = 0.5*(kernel + arg) kernel = kernel.subs({self.t: self.time[0]}) for idx in self.index: kernel = kernel.subs(idx, '_'+idx.name) body = [cgen.Assign(ccode(field[[self.time[1]]+loop]), ccode(kernel))] body = pre + body + post body = [cgen.For(cgen.InlineInitializer(cgen.Value('int', i), i0), cgen.Line('%s<%s' % (i, i1)), cgen.Line('++%s' % i), cgen.Block(body))] statements.append(body[0]) return statements
def declare_fields(self): """ - generate code for delcaring fields - the generated code first declare fields as std::vector of size=vec_size, then cast to multidimensional array - return the generated code as string """ result = '' arr = '' # = [dim1][dim2][dim3]... for d in self.dim: arr += '[' + d.name + ']' vsize = 1 for d in self.dim: vsize *= d.value vsize *= self.order[0]*2 for field in self.sfields + self.vfields: vec = '_' + ccode(field.label) + '_vec' # alloc aligned memory (on windows and linux) result += self.real_t + ' *' + vec + ';\n' result += '#ifdef _MSC_VER\n' result += vec + ' = (' + self.real_t + '*) _aligned_malloc(' + str(vsize) \ + '*sizeof(' + self.real_t + '), ' + str(self.alignment) + ');\n' result += '#else\n' result += 'posix_memalign((void **)(&' + vec + '), ' + str(self.alignment) \ + ', ' + str(vsize) + '*sizeof(' + self.real_t + '));\n' result += '#endif\n' # cast pointer to multidimensional array result += self.real_t + ' (*' + ccode(field.label) + ')' + arr \ + '= (' + self.real_t + ' (*)' + arr + ') ' + vec + ';\n' if self.read: # add code to read data result += self.read_data() return result
def declare_fields(self): """ - generate code for declaring fields - the generated code first declare fields as std::vector of size=vec_size, then cast to multidimensional array - return the generated code as string """ result = [] arr = '' # = [dim1][dim2][dim3]... for d in self.dim: arr += '[' + d.name + ']' vsize = 1 for d in self.dim: vsize *= d.value vsize *= len(self.time) statements = [] for field in self.fields: vec = "_%s_vec" % ccode(field.label) vec_value = cgen.Pointer(cgen.Value(self.real_t, vec)) # alloc aligned memory (on windows and linux) statements.append(vec_value) ifdef = cgen.IfDef('_MSC_VER', [cgen.Assign(vec, '(%s*) _aligned_malloc(%s*sizeof(%s), %s)' % (self.real_t, str(vsize), self.real_t, str(self.alignment)))], [cgen.Statement('posix_memalign((void **)(&%s), %d, %d*sizeof(%s))' % (vec, self.alignment, vsize, self.real_t))]) statements.append(ifdef) # cast pointer to multidimensional array cast_pointer = cgen.Initializer(cgen.Value(self.real_t, "(*%s)%s" % (ccode(field.label), arr)), '(%s (*)%s) %s' % (self.real_t, arr, vec)) statements.append(cast_pointer) vec = "_%s_vec" % ccode("m") vec_value = cgen.Pointer(cgen.Value(self.real_t, vec)) statements.append(vec_value) result += statements return cgen.Module(result)
def output_step(self): """ - generate code for output at each time step - typically output selected fields in vtk format - return generated code as string """ if self.output_vts: return self.save_field_block(ccode(self.fields[0].label)+"_", ccode(self.fields[0].label)) return None
def vtk_save_field(self): """ generate code to output this field with vtk uses Mako template returns the generated code as string """ tmpl = self.lookup.get_template('save_field.txt') result = '' dict1 = {'filename': ccode(self.label)+'_', 'field': ccode(self.label)} result = render(tmpl, dict1) return result
def converge_test(self): """ - generate code for convergence test - convergence test implemented by calculating L2 norm of the simulation against analytical solution - L2 norm of each field is calculated and output with printf() - return generated code as string """ result = [] if not self.converge: return cgen.Module(result) m = self.margin.value ti = self.ntsteps.value % 2 # last updated grid loop = [Symbol('_'+x.name) for x in self.index] # symbols for loop for i in range(len(self.spacing)): result.append(cgen.Statement('printf("%d\\n")' % self.spacing[i].value)) for field in self.fields: body = [] l2 = ccode(field.label)+'_l2' idx = [ti] + loop result.append(cgen.Initializer(cgen.Value(self.real_t, l2), 0.0)) # populate xvalue, yvalue zvalue code for d in range(self.dimension-1, -1, -1): i = loop[d] i0 = m i1 = ccode(self.dim[d]-m) expr = self.spacing[d]*(loop[d] - self.margin.value) pre = [cgen.Initializer(cgen.Value(self.real_t, self.index[d].name), ccode(expr))] if d == self.dimension-1: # inner loop tn = self.dt.value*self.ntsteps.value \ if not field.staggered[0] \ else self.dt.value*self.ntsteps.value \ + self.dt.value/2.0 body = [cgen.Statement('%s += %s' % (l2, ccode((field[idx] - (field.sol.subs(self.t, tn)))**2.0)))] body = pre+body body = [cgen.For(cgen.InlineInitializer(cgen.Value('int', i), i0), cgen.Line('%s<%s' % (i, i1)), cgen.Line('++%s' % i), cgen.Block(body))] result += body volume = 1.0 for i in range(len(self.spacing)): volume *= self.spacing[i].value l2_value = 'pow(' + l2 + '*' + ccode(volume) + ', 0.5)' result.append(cgen.Statement('conv->%s = %s' % (l2, l2_value))) return cgen.Module(result)
def initialise(self): """ generate code for initialisation of the fields - substitute starting time to the analytical function of the fields - substitute field coordinates calculated from array indices to the analytical function of the fields - generate inner loop by inserting kernel into Mako template - recursive insertion to generate nested loop return generated code as string """ tmpl = self.lookup.get_template('generic_loop.txt') result = '' m = self.margin.value loop = [Symbol('_'+x.name) for x in self.index] # symbols for loop for field in self.sfields+self.vfields: body = '' if self.omp: result += '#pragma omp for\n' # populate xvalue, yvalue zvalue code for d in range(self.dimension-1, -1, -1): i = loop[d] i0 = m if field.staggered[d+1]: i1 = ccode(self.dim[d]-m-1) expr = self.spacing[d]*(loop[d] - self.margin.value + 0.5) else: i1 = ccode(self.dim[d]-m) expr = self.spacing[d]*(loop[d] - self.margin.value) pre = self.real_t + ' ' + self.index[d].name + '= ' \ + ccode(expr) + ';\n' post = '' if d == self.dimension-1: # inner loop # first time step t0 = self.dt.value/2 if field.staggered[0] else 0 sol = field.sol.subs(self.t, t0) if self.read: sol = sol.subs(field.media_param) for idx in self.index: sol = sol.subs(idx, '_'+idx.name) body = ccode(field[[0]+loop]) + '=' \ + ccode(sol) + ';\n' body = pre + body + post dict1 = {'i': i, 'i0': i0, 'i1': i1, 'body': body} body = render(tmpl, dict1) result += body return result
def vtk_save_field(self): """ generate code to output this field with vtk uses Mako template returns the generated code as string """ tmpl = self.lookup.get_template('save_field.txt') result = '' dict1 = { 'filename': ccode(self.label) + '_', 'field': ccode(self.label) } result = render(tmpl, dict1) return result
def generate_loop(self, fields): """ The functions to generate stress loops and velocity loops are identical, save for a single parameter. Moved the common code to this function to reduce repetition of code. """ if self.eval_const: self.create_const_dict() m = self.margin.value body = [] for d in range(self.dimension-1, -1, -1): i = self.index[d] i0 = m i1 = ccode(self.dim[d]-m) if d == self.dimension-1: # inner loop if not self.fission: body = self.simple_kernel(fields, [d, i, i0, i1]) else: body = self.fission_kernel(fields, [d, i, i0, i1]) if not d == self.dimension-1: body = [cgen.For(cgen.InlineInitializer(cgen.Value('int', i), i0), cgen.Line('%s<%s' % (i, i1)), cgen.Line('++%s' % i), cgen.Block(body))] if not self.pluto and self.omp: body.insert(0, cgen.Pragma('omp for schedule(static,1)')) return cgen.Module(body)
def initialise(self): """ generate code for initialisation of the fields - substitute starting time to the analytical function of the fields - substitute field coordinates calculated from array indices to the analytical function of the fields - generate inner loop by inserting kernel into Mako template - recursive insertion to generate nested loop return generated code as string """ m = self.margin.value loop = [Symbol('_'+x.name) for x in self.index] # symbols for loop statements = [] for field in self.fields: body = [] if self.omp: statements.append(cgen.Pragma('omp for schedule(static,1)')) # populate xvalue, yvalue zvalue code for d in range(self.dimension-1, -1, -1): i = loop[d] i0 = m if field.staggered[d+1]: i1 = ccode(self.dim[d]-m-1) expr = self.spacing[d]*(loop[d] - self.margin.value + 0.5) else: i1 = ccode(self.dim[d]-m) expr = self.spacing[d]*(loop[d] - self.margin.value) pre = [cgen.Initializer(cgen.Value(self.real_t, self.index[d].name), ccode(expr))] post = [] if d == self.dimension-1: # inner loop # first time step t0 = self.dt.value/2 if field.staggered[0] else 0 sol = field.sol.subs(self.t, t0) if self.read: sol = sol.subs(self.media_dict) sol = self.resolve_media_params(sol) for idx in self.index: sol = sol.subs(idx, '_'+idx.name) body = [cgen.Assign(ccode(field[[0]+loop]), ccode(sol))] body = pre + body + post body = [cgen.For(cgen.InlineInitializer(cgen.Value('int', i), i0), cgen.Line('%s<%s' % (i, i1)), cgen.Line('++%s' % i), cgen.Block(body))] statements.append(body[0]) return cgen.Module(statements)
def define_fields(self): """Code fragment that defines field arrays""" result = [] for f in self.fields: var = cgen.Pointer(cgen.Value(self.real_t, ccode(f.label))) result.append(var) return cgen.Module(result)
def store_fields(self): """Code fragment that stores field arrays to 'grid' struct""" result = [] for f in self.fields: assignment = cgen.Assign('grid->%s' % ccode(f.label), '(%s*) %s' % (self.real_t, ccode(f.label))) # There must be a better way of doing this. This hardly seems better than string manipulation result.append(assignment) return cgen.Module(result)
def load_fields(self): """Code fragment that loads field arrays from 'grid' struct""" idxs = ''.join(['[%d]' % d.value for d in self.dim]) result = [] for f in self.fields: back_assign = cgen.Initializer(cgen.Value(self.real_t, "(*%s)%s" % (ccode(f.label), idxs)), '(%s (*)%s) grid->%s' % (self.real_t, idxs, ccode(f.label))) # Another hackish attempt. result.append(back_assign) return cgen.Module(result)
def simple_kernel(self, grid_field, indexes): """ Generate the inner loop with all fields from stress or velocity :param grid_field: stress or velocity field array :param indexes: array with dimension, dimension var, initial margin, final margin - iterate through fields and replace mako template - return inner loop code as string """ body = [] idx = [self.time[len(self.time)-1]] + self.index # This loop create the most inner loop with all fields for field in grid_field: body.append(cgen.Assign(ccode(field[idx]), ccode(self.kernel_sympy(field)))) body = [cgen.For(cgen.InlineInitializer(cgen.Value('int', indexes[1]), indexes[2]), cgen.Line('%s<%s' % (indexes[1], indexes[3])), cgen.Line('++%s' % indexes[1]), cgen.Block(body))] if not self.pluto and self.ivdep and indexes[0] == self.dimension-1: body.insert(0, self.compiler._ivdep) if not self.pluto and self.simd and indexes[0] == self.dimension-1: body.insert(0, cgen.Pragma('simd')) return body
def set_free_surface(self, indices, d, b, side, read=False): """ - set free surface boundary condition to boundary d, at index b :param indices: list of indices, e.g. [t,x,y,z] for 3D :param d: direction of the boundary surface normal :param b: location of the boundary (index) :param side: lower boundary (0) or upper boundary (1) - e.g. set_free_surface([t,x,y,z],1,2,0) set y-z plane at x=2 to be lower free surface - ghost cells are calculated using reflection of stress fields - store the code to populate ghost cells to self.bc """ # use this stress field to solve for ghost cell expression field = self.sfields[d] idx = list(indices) if not field.staggered[d]: # if not staggered, solve ghost cell using T'[b]=0 eq = Eq(field.dt) shift = hf t = b - hf else: # if staggered, solve ghost cell using T'[b-1/2]=T[b+1/2] eq = Eq(field.dt.subs(indices[d], indices[d]-hf), field.dt.subs(indices[d], indices[d]+hf)) shift = 1 t = b idx[d] -= ((-1)**side)*shift lhs = self[idx] rhs = solve(eq, lhs)[0] lhs = lhs.subs(indices[d], t) rhs = self.align(rhs.subs(indices[d], t)) # if read data from file, replace media parameters with array # replace particular index with boundary if read: rhs = rhs.subs(self.media_param) rhs = rhs.subs(indices[d], b) self.bc[d][side] = ccode(lhs) + ' = ' + ccode(rhs) + ';\n'
def free_memory(self): """ - generate code for free allocated memory - return the generated code as string """ statements = [] for field in self.fields: # alloc aligned memory (on windows and linux) ifdef = cgen.IfDef('_MSC_VER', [cgen.Statement('_aligned_free(grid->%s)' % (ccode(field.label)))], [cgen.Statement('free(grid->%s)' % (ccode(field.label)))]) statements.append(ifdef) return cgen.Module(statements)
def converge_test(self): """ - generate code for convergence test - convergence test implemented by calculating L2 norm of the simulation against analytical solution - L2 norm of each field is calculated and output with printf() - return generated code as string """ result = '' if not self.converge: return result tmpl = self.lookup.get_template('generic_loop.txt') m = self.margin.value ti = self.ntsteps.value % 2 # last updated grid loop = [Symbol('_'+x.name) for x in self.index] # symbols for loop for i in range(len(self.spacing)): result += 'printf("' + str(self.spacing[i].value) + '\\n");\n' for field in self.sfields+self.vfields: body = '' l2 = ccode(field.label)+'_l2' idx = [ti] + loop result += self.real_t + ' ' + l2 + ' = 0.0;\n' # populate xvalue, yvalue zvalue code for d in range(self.dimension-1, -1, -1): i = loop[d] i0 = m if field.staggered[d+1]: i1 = ccode(self.dim[d]-m-1) expr = self.spacing[d]*(loop[d] - self.margin.value + 0.5) else: i1 = ccode(self.dim[d]-m) expr = self.spacing[d]*(loop[d] - self.margin.value) pre = self.real_t + ' ' + self.index[d].name + '= ' \ + ccode(expr) + ';\n' if d == self.dimension-1: # inner loop tn = self.dt.value*self.ntsteps.value \ if not field.staggered[0] \ else self.dt.value*self.ntsteps.value \ + self.dt.value/2.0 body = l2 + '+=' \ + ccode((field[idx] - (field.sol.subs(self.t, tn)))**2.0) + ';\n' body = pre + body dict1 = {'i': i, 'i0': i0, 'i1': i1, 'body': body} body = render(tmpl, dict1) result += body volume = 1.0 for i in range(len(self.spacing)): volume *= self.spacing[i].value l2_value = 'pow(' + l2 + '*' + ccode(volume) + ', 0.5)' result += 'conv->%s = %s;\n' % (l2, l2_value) return result
def velocity_loop(self): """ generate code for velocity field update loop - loop through velocity fields to generate code of computation kernel - generate inner loop by inserting kernel into Mako template - recursive insertion to generate nested loop return generated code as string """ tmpl = self.lookup.get_template('generic_loop.txt') m = self.margin.value body = '' for d in range(self.dimension-1, -1, -1): i = self.index[d] i0 = m i1 = ccode(self.dim[d]-m) if d == self.dimension-1: # inner loop idx = [self.time[1]] + self.index for field in self.vfields: body += ccode(field[idx]) + '=' \ + ccode(field.fd_align.xreplace({self.t+1: self.time[1], self.t: self.time[0]})) \ + ';\n' dict1 = {'i': i, 'i0': i0, 'i1': i1, 'body': body} body = render(tmpl, dict1) if self.ivdep and d == self.dimension-1: body = '%s\n' % self.compiler._ivdep + body if self.simd and d == self.dimension-1: body = '#pragma simd\n' + body if self.omp: body = '#pragma omp for\n' + body return body
def simple_loop(self, kernel): """ - helper function to generate simple nested loop over the entire domain (not including ghost cells) with kernel at the inner loop - variables defined in self.index are used as loop variables """ result = '' tmpl = self.lookup.get_template('generic_loop.txt') m = self.margin.value for d in range(self.dimension-1, -1, -1): i = self.index[d] i0 = m i1 = ccode(self.dim[d]-m) if d == self.dimension-1: # inner loop result += kernel + ';\n' dict1 = {'i': i, 'i0': i0, 'i1': i1, 'body': result} result = render(tmpl, dict1) return result
class OpesciConvergence(Structure): _fields_ = [('%s_l2' % ccode(f.label), c_float) for f in self.fields]
class OpesciGrid(Structure): _fields_ = [(ccode(f.label), POINTER(c_float)) for f in self.fields]
def read_data(self): """ - generate code for reading data (rho, Vp, Vs) from input files - calculate effective media parameters beta, lambda, mu from the data """ result = '' if self.read: arr = '' # =[dim2][dim3]... for d in self.dim[1:]: arr += '[' + d.name + ']' vsize = 1 for d in self.dim: vsize *= d.value # declare fields to read physical parameters from file # always use float not double loop = [self.rho, self.vp, self.vs] + self.beta + [self.lam] + self.mu for field in loop: vec = '_' + ccode(field.label) + '_vec' # alloc aligned memory (on windows and linux) result += self.real_t + ' *' + vec + ';\n' result += '#ifdef _MSC_VER\n' result += vec + ' = (' + self.real_t + '*) _aligned_malloc(' + str(vsize) \ + '*sizeof(' + self.real_t + '), ' + str(self.alignment) + ');\n' result += '#else\n' result += 'posix_memalign((void **)(&' + vec + '), ' + str(self.alignment) \ + ', ' + str(vsize) + '*sizeof(' + self.real_t + '));\n' result += '#endif\n' # cast pointer to multidimensional array result += self.real_t + ' (*' + ccode(field.label) + ')' + arr \ + '= (' + self.real_t + ' (*)' + arr + ') ' + vec + ';\n' # read from file result += 'opesci_read_simple_binary_ptr("' + self.rho_file + '",_' \ + ccode(self.rho.label) + '_vec, ' + str(vsize) + ');\n' result += 'opesci_read_simple_binary_ptr("' + self.vp_file + '",_' \ + ccode(self.vp.label) + '_vec, ' + str(vsize) + ');\n' result += 'opesci_read_simple_binary_ptr("' + self.vs_file + '",_' \ + ccode(self.vs.label) + '_vec, ' + str(vsize) + ');\n' # calculated effective media parameter idx = self.index # make copies of index idx100 = list(idx) idx010 = list(idx) idx001 = list(idx) idx110 = list(idx) idx101 = list(idx) idx011 = list(idx) # shift the indices to obtain idx100=[x+1,y,z] etc idx100[0] += 1 idx010[1] += 1 idx001[2] += 1 idx110[0] += 1 idx110[1] += 1 idx101[0] += 1 idx101[2] += 1 idx011[1] += 1 idx011[2] += 1 # beta kernel = ccode(self.beta[0][idx]) + '=' + ccode(1.0/self.rho[idx]) result += self.simple_loop(kernel) # beta1 (effective bouyancy in x direction) kernel = ccode(self.beta[1][idx]) + '=' \ + ccode((self.beta[0][idx] + self.beta[0][idx100])/2.0) result += self.simple_loop(kernel) # beta2 (effective bouyancy in y direction) kernel = ccode(self.beta[2][idx]) + '=' \ + ccode((self.beta[0][idx] + self.beta[0][idx010])/2.0) result += self.simple_loop(kernel) # beta3 (effective bouyancy in z direction) kernel = ccode(self.beta[3][idx]) + '=' + \ ccode((self.beta[0][idx] + self.beta[0][idx001])/2.0) result += self.simple_loop(kernel) # lambda kernel = ccode(self.lam[idx]) + '=' + \ ccode(self.rho[idx]*(self.vp[idx]**2-2*self.vs[idx]**2)) result += self.simple_loop(kernel) # mu kernel = ccode(self.mu[0][idx]) + '=' \ + ccode(self.rho[idx]*(self.vs[idx]**2)) result += self.simple_loop(kernel) # mu12 (effective shear modulus for shear stress sigma_xy) kernel = ccode(self.mu[1][idx]) + '=' \ + ccode(1.0/(0.25*(1.0/self.mu[0][idx]+1.0/self.mu[0][idx100] + 1.0/self.mu[0][idx010]+1.0/self.mu[0][idx110]))) result += self.simple_loop(kernel) # mu13 (effective shear modulus for shear stress sigma_xz) kernel = ccode(self.mu[2][idx]) + '=' \ + ccode(1.0/(0.25*(1.0/self.mu[0][idx]+1.0/self.mu[0][idx100] + 1.0/self.mu[0][idx001]+1.0/self.mu[0][idx101]))) result += self.simple_loop(kernel) # mu23 (effective shear modulus for shear stress sigma_yz) kernel = ccode(self.mu[3][idx]) + '=' \ + ccode(1.0/(0.25*(1.0/self.mu[0][idx]+1.0/self.mu[0][idx010] + 1.0/self.mu[0][idx001]+1.0/self.mu[0][idx011]))) result += self.simple_loop(kernel) return result
def save_field_block(self, filename, field): statements = [] statements.append(cgen.Initializer(cgen.Value("int", "dims[]"), "{dim1, dim1, dim1}")) statements.append(cgen.Initializer(cgen.Value("float", "spacing[]"), "{dx1, dx2, dx3}")) statements.append(cgen.Assign("std::string vtkfile", "\""+filename+"\" + std::to_string(_ti)")) statements.append(cgen.Statement("opesci_dump_field_vts_3d(vtkfile, dims, spacing, 2, &"+field+"["+ccode(self.time[len(self.time)-1])+"][0][0][0])")) return cgen.Module([cgen.Pragma("omp single"), cgen.Block(statements)])
def print_convergence(self): """Code fragment that prints convergence norms""" statements = [cgen.Statement('printf("%s %s\\n", conv.%s_l2)' % (ccode(f.label), '\t%.10f', ccode(f.label))) for f in self.fields] return cgen.Module(statements)
def define_convergence(self): """Code fragment that defines convergence norms""" result = [] for f in self.fields: result.append(cgen.Value(self.real_t, '%s_l2' % ccode(f.label))) return cgen.Module(result)
def read_data(self): """ - generate code for reading data (rho, Vp, Vs) from input files - calculate effective media parameters beta, lambda, mu from the data """ statements = [] if self.read: arr = '' # =[dim2][dim3]... for d in self.dim[1:]: arr += '[' + d.name + ']' vsize = 1 for d in self.dim: vsize *= d.value # declare fields to read physical parameters from file # always use float not double loop = [self.rho, self.vp, self.vs] + self.beta + [self.lam] + self.mu for field in loop: vec = "_%s_vec" % ccode(field.label) vec_value = cgen.Pointer(cgen.Value(self.real_t, vec)) # alloc aligned memory (on windows and linux) statements.append(vec_value) ifdef = cgen.IfDef('_MSC_VER', [cgen.Assign(vec, '(%s*) _aligned_malloc(%d * sizeof(%s), %d)' % (self.real_t, vsize, self.real_t, self.alignment))], [cgen.Statement('posix_memalign((void **)(&%s), %d, %d*sizeof(%s))' % (vec, self.alignment, vsize, self.real_t))]) statements.append(ifdef) cast_pointer = cgen.Initializer(cgen.Value(self.real_t, "(*%s)%s" % (ccode(field.label), arr)), '(%s (*)%s) %s' % (self.real_t, arr, vec)) statements.append(cast_pointer) # read from file statements.append(cgen.Statement('opesci_read_simple_binary_ptr("%s", _%s_vec, %d)' % (self.rho_file, self.rho.label, vsize))) statements.append(cgen.Statement('opesci_read_simple_binary_ptr("%s", _%s_vec, %d)' % (self.vp_file, self.vp.label, vsize))) statements.append(cgen.Statement('opesci_read_simple_binary_ptr("%s", _%s_vec, %d)' % (self.vs_file, self.vs.label, vsize))) # calculated effective media parameter idx = self.index # make copies of index idx100 = list(idx) idx010 = list(idx) idx001 = list(idx) idx110 = list(idx) idx101 = list(idx) idx011 = list(idx) # shift the indices to obtain idx100=[x+1,y,z] etc idx100[0] += 1 idx010[1] += 1 idx001[2] += 1 idx110[0] += 1 idx110[1] += 1 idx101[0] += 1 idx101[2] += 1 idx011[1] += 1 idx011[2] += 1 # beta kernel = cgen.Assign(ccode(self.beta[0][idx]), ccode(1.0/self.rho[idx])) statements.append(self.simple_loop(kernel)) # beta1 (effective bouyancy in x direction) kernel = cgen.Assign(ccode(self.beta[1][idx]), ccode((self.beta[0][idx] + self.beta[0][idx100])/2.0)) statements.append(self.simple_loop(kernel)) # beta2 (effective bouyancy in y direction) kernel = cgen.Assign(ccode(self.beta[2][idx]), ccode((self.beta[0][idx] + self.beta[0][idx010])/2.0)) statements.append(self.simple_loop(kernel)) # beta3 (effective bouyancy in z direction) kernel = cgen.Assign(ccode(self.beta[3][idx]), ccode((self.beta[0][idx] + self.beta[0][idx001])/2.0)) statements.append(self.simple_loop(kernel)) # lambda kernel = cgen.Assign(ccode(self.lam[idx]), ccode(self.rho[idx]*(self.vp[idx]**2-2*self.vs[idx]**2))) statements.append(self.simple_loop(kernel)) # mu kernel = cgen.Assign(ccode(self.mu[0][idx]), ccode(self.rho[idx]*(self.vs[idx]**2))) statements.append(self.simple_loop(kernel)) # mu12 (effective shear modulus for shear stress sigma_xy) kernel = cgen.Assign(ccode(self.mu[1][idx]), ccode(1.0/(0.25*(1.0/self.mu[0][idx]+1.0/self.mu[0][idx100] + 1.0/self.mu[0][idx010]+1.0/self.mu[0][idx110])))) statements.append(self.simple_loop(kernel)) # mu13 (effective shear modulus for shear stress sigma_xz) kernel = cgen.Assign(ccode(self.mu[2][idx]), ccode(1.0/(0.25*(1.0/self.mu[0][idx]+1.0/self.mu[0][idx100] + 1.0/self.mu[0][idx001]+1.0/self.mu[0][idx101])))) statements.append(self.simple_loop(kernel)) # mu23 (effective shear modulus for shear stress sigma_yz) kernel = cgen.Assign(ccode(self.mu[3][idx]), ccode(1.0/(0.25*(1.0/self.mu[0][idx]+1.0/self.mu[0][idx010] + 1.0/self.mu[0][idx001]+1.0/self.mu[0][idx011])))) statements.append(self.simple_loop(kernel)) return statements
def simple_loop(self, kernel): """ - helper function to generate simple nested loop over the entire domain (not including ghost cells) with kernel at the inner loop - variables defined in self.index are used as loop variables """ result = kernel m = self.margin.value for d in range(self.dimension-1, -1, -1): result = cgen.For(cgen.InlineInitializer(cgen.Value('int', self.index[d]), m), cgen.Line('%s<%s' % (self.index[d], ccode(self.dim[d]-m))), cgen.Line('++%s' % self.index[d]), result) return result
def fission_kernel(self, grid_field, indexes): """ Generate the inner loop with all fields from stress or velocity :param grid_field: stress or velocity field array :param indexes: array with dimension, dimension var, initial margin, final margin - iterate through fields and for each dimension separate minus, plus and unitary strides on its own loop replacing it on mako template - return inner loop code as string """ body = [] body_tmp = [] operator = ['=', '+='] idx = [self.time[1]] + self.index for field in grid_field: remainder_kernel = () operator_idx = 0 kernel = self.transform_kernel(field) if self.read: kernel = self.resolve_media_params(kernel) kernel_args = kernel.args for dim in range(self.dimension-1, -1, -1): dimension = self.index[dim] kernel_stmt_pos = kernel kernel_stmt_neg = kernel # For each dimension in each field iterate through its expressions to separate # positive and negative strides for arg in kernel_args: if not (str(dimension) + " -" in str(arg)): kernel_stmt_neg = kernel_stmt_neg.subs({arg: 0}, simultaneous=True) if not (str(dimension) + " +" in str(arg)): kernel_stmt_pos = kernel_stmt_pos.subs({arg: 0}, simultaneous=True) remainder_kernel += kernel_stmt_pos.args remainder_kernel += kernel_stmt_neg.args # Create the inner loop for with negative strides expressions if not (len(kernel_stmt_neg.args) == 0): body_tmp = [cgen.Statement(ccode(field[idx]) + operator[operator_idx] + ccode(kernel_stmt_neg.xreplace({self.t+1: self.time[1], self.t: self.time[0]})))] body_tmp = [cgen.For(cgen.InlineInitializer(cgen.Value('int', indexes[1]), indexes[2]), cgen.Line('%s<%s' % (indexes[1], indexes[3])), cgen.Line('++%s' % indexes[1]), cgen.Block(body_tmp))] if not self.pluto and self.ivdep and indexes[0] == self.dimension-1: body_tmp.insert(0, self.compiler._ivdep) if not self.pluto and self.simd and indexes[0] == self.dimension-1: body_tmp.insert(0, cgen.Pragma('simd')) body = body + body_tmp operator_idx = 1 # Create the inner loop for with positive strides expressions if not (len(kernel_stmt_pos.args) == 0): body_tmp = [cgen.Statement(ccode(field[idx]) + operator[operator_idx] + ccode(kernel_stmt_pos.xreplace({self.t+1: self.time[1], self.t: self.time[0]})))] body_tmp = [cgen.For(cgen.InlineInitializer(cgen.Value('int', indexes[1]), indexes[2]), cgen.Line('%s<%s' % (indexes[1], indexes[3])), cgen.Line('++%s' % indexes[1]), cgen.Block(body_tmp))] if not self.pluto and self.ivdep and indexes[0] == self.dimension-1: body_tmp.insert(0, self.compiler._ivdep) if not self.pluto and self.simd and indexes[0] == self.dimension-1: body_tmp.insert(0, cgen.Pragma('simd')) body = body + body_tmp operator_idx = 1 # Create the inner loop for unit strided array access kernel_stmt = kernel for arg in remainder_kernel: kernel_stmt = kernel_stmt.subs({arg: 0}, simultaneous=True) body_tmp = [cgen.Statement(ccode(field[idx]) + '+=' + ccode(kernel_stmt.xreplace({self.t+1: self.time[1], self.t: self.time[0]})))] body_tmp = [cgen.For(cgen.InlineInitializer(cgen.Value('int', indexes[1]), indexes[2]), cgen.Line('%s<%s' % (indexes[1], indexes[3])), cgen.Line('++%s' % indexes[1]), cgen.Block(body_tmp))] if not self.pluto and self.ivdep and indexes[0] == self.dimension-1: body_tmp.insert(0, self.compiler._ivdep) if not self.pluto and self.simd and indexes[0] == self.dimension-1: body_tmp.insert(0, cgen.Pragma('simd')) body = body + body_tmp return body
def copy_memory(self): #data, rowcount, colcount vec = "_%s_vec" % ccode("m") statements = [cgen.Assign(vec, 'data')] return statements