def to_reference_coordinates(ufl_coordinate_element, parameters=None): if parameters is None: parameters = tsfc.default_parameters() else: _ = tsfc.default_parameters() _.update(parameters) parameters = _ # Create FInAT element element = tsfc.finatinterface.create_element(ufl_coordinate_element) cell = ufl_coordinate_element.cell() code = { "geometric_dimension": cell.geometric_dimension(), "topological_dimension": cell.topological_dimension(), "to_reference_coords": to_reference_coordinates_body(ufl_coordinate_element, parameters), "init_X": init_X(element.cell, parameters), "max_iteration_count": 1 if is_affine(ufl_coordinate_element) else 16, "convergence_epsilon": 1e-12, "dX_norm_square": dX_norm_square(cell.topological_dimension()), "X_isub_dX": X_isub_dX(cell.topological_dimension()), "IntType": as_cstr(IntType), } evaluate_template_c = """#include <math.h> #include <stdio.h> static inline void to_reference_coords_kernel(double *X, const double *x0, const double *C) { const int space_dim = %(geometric_dimension)d; /* * Mapping coordinates from physical to reference space */ %(init_X)s double x[space_dim]; int converged = 0; for (int it = 0; !converged && it < %(max_iteration_count)d; it++) { double dX[%(topological_dimension)d] = { 0.0 }; %(to_reference_coords)s if (%(dX_norm_square)s < %(convergence_epsilon)g * %(convergence_epsilon)g) { converged = 1; } %(X_isub_dX)s } }""" return evaluate_template_c % code
def to_reference_coordinates(ufl_coordinate_element, parameters=None): if parameters is None: parameters = tsfc.default_parameters() else: _ = tsfc.default_parameters() _.update(parameters) parameters = _ # Create FInAT element element = tsfc.finatinterface.create_element(ufl_coordinate_element) cell = ufl_coordinate_element.cell() code = { "geometric_dimension": cell.geometric_dimension(), "topological_dimension": cell.topological_dimension(), "to_reference_coords": to_reference_coordinates_body(ufl_coordinate_element, parameters), "init_X": init_X(element.cell, parameters), "max_iteration_count": 1 if is_affine(ufl_coordinate_element) else 16, "convergence_epsilon": 1e-12, "dX_norm_square": dX_norm_square(cell.topological_dimension()), "X_isub_dX": X_isub_dX(cell.topological_dimension()), "IntType": as_cstr(IntType), } evaluate_template_c = """#include <math.h> static inline void to_reference_coords_kernel(double *X, const double *x0, const double *C) { const int space_dim = %(geometric_dimension)d; /* * Mapping coordinates from physical to reference space */ %(init_X)s double x[space_dim]; int converged = 0; for (int it = 0; !converged && it < %(max_iteration_count)d; it++) { double dX[%(topological_dimension)d] = { 0.0 }; %(to_reference_coords)s if (%(dX_norm_square)s < %(convergence_epsilon)g * %(convergence_epsilon)g) { converged = 1; } %(X_isub_dX)s } }""" return evaluate_template_c % code
def c_wrapper_arg(self): if self._is_mat: val = "Mat %s_" % self.c_arg_name() else: val = ', '.join([ "%s *%s" % (self.ctype, self.c_arg_name(i)) for i in range(len(self.data)) ]) if self._is_indirect or self._is_mat: for i, map in enumerate(as_tuple(self.map, Map)): if map is not None: for j, m in enumerate(map): val += ", %s *%s" % (as_cstr(IntType), self.c_map_name(i, j)) return val
def c_map_decl(self, is_facet=False): if self._is_mat: dsets = self.data.sparsity.dsets else: dsets = (self.data.dataset, ) val = [] for i, (map, dset) in enumerate(zip(as_tuple(self.map, Map), dsets)): for j, (m, d) in enumerate(zip(map, dset)): dim = m.arity if is_facet: dim *= 2 val.append( "%(IntType)s xtr_%(name)s[%(dim)s];" % { 'name': self.c_map_name(i, j), 'dim': dim, 'IntType': as_cstr(IntType) }) return '\n'.join(val) + '\n'
def compile_expression(slate_expr, tsfc_parameters=None): """Takes a Slate expression `slate_expr` and returns the appropriate :class:`firedrake.op2.Kernel` object representing the Slate expression. :arg slate_expr: a :class:'TensorBase' expression. :arg tsfc_parameters: an optional `dict` of form compiler parameters to be passed onto TSFC during the compilation of ufl forms. Returns: A `tuple` containing a `SplitKernel(idx, kinfo)` """ if not isinstance(slate_expr, TensorBase): raise ValueError("Expecting a `TensorBase` expression, not %s" % type(slate_expr)) # TODO: Get PyOP2 to write into mixed dats if any(len(a.function_space()) > 1 for a in slate_expr.arguments()): raise NotImplementedError("Compiling mixed slate expressions") # If the expression has already been symbolically compiled, then # simply reuse the produced kernel. if slate_expr._metakernel_cache is not None: return slate_expr._metakernel_cache # Initialize coefficients, shape and statements list expr_coeffs = slate_expr.coefficients() # We treat scalars as 1x1 MatrixBase objects, so we give # the right shape to do so and everything just falls out. # This bit here ensures the return result has the right # shape if slate_expr.rank == 0: shape = (1, ) else: shape = slate_expr.shape statements = [] # Create a builder for the Slate expression builder = KernelBuilder(expression=slate_expr, tsfc_parameters=tsfc_parameters) # Initialize coordinate, cell orientations and facet/layer # symbols coordsym = ast.Symbol("coords") coords = None cell_orientations = ast.Symbol("cell_orientations") cellfacetsym = ast.Symbol("cell_facets") mesh_layer_sym = ast.Symbol("layer") inc = [] # We keep track of temporaries that have been declared declared_temps = {} for cxt_kernel in builder.context_kernels: exp = cxt_kernel.tensor t = builder.temps[exp] if exp not in declared_temps: # Declare and initialize the temporary statements.append(ast.Decl(eigen_matrixbase_type(exp.shape), t)) statements.append(ast.FlatBlock("%s.setZero();\n" % t)) declared_temps[exp] = t it_type = cxt_kernel.original_integral_type if it_type not in supported_integral_types: raise NotImplementedError("Type %s not supported." % it_type) # Explicit checking of coordinates coordinates = exp.ufl_domain().coordinates if coords is not None: assert coordinates == coords else: coords = coordinates if it_type == "cell": # Nothing difficult about cellwise integrals. Just need # to get coefficient info, include_dirs and append # function calls to the appropriate subkernels. # If tensor is mixed, there will be more than one SplitKernel incl = [] for splitkernel in cxt_kernel.tsfc_kernels: index = splitkernel.indices kinfo = splitkernel.kinfo # Generate an iterable of coefficients to pass to the subkernel # if any are required clist = [ c for ci in kinfo.coefficient_map for c in builder.coefficient(exp.coefficients()[ci]) ] if kinfo.oriented: clist.insert(0, cell_orientations) incl.extend(kinfo.kernel._include_dirs) tensor = eigen_tensor(exp, t, index) statements.append( ast.FunCall(kinfo.kernel.name, tensor, coordsym, *clist)) elif it_type in [ "interior_facet", "exterior_facet", "interior_facet_vert", "exterior_facet_vert" ]: # These integral types will require accessing local facet # information and looping over facet indices. builder.require_cell_facets() loop_stmt, incl = facet_integral_loop(cxt_kernel, builder, coordsym, cellfacetsym, cell_orientations) statements.append(loop_stmt) elif it_type == "interior_facet_horiz": # The infamous interior horizontal facet # will have two SplitKernels: one top, # one bottom. The mesh layer will determine # which kernels we call. builder.require_mesh_layers() top_sks = [ k for k in cxt_kernel.tsfc_kernels if k.kinfo.integral_type == "exterior_facet_top" ] bottom_sks = [ k for k in cxt_kernel.tsfc_kernels if k.kinfo.integral_type == "exterior_facet_bottom" ] assert len(top_sks) == len(bottom_sks), ( "Number of top and bottom kernels should be equal") # Top and bottom kernels need to be sorted by kinfo.indices # if the space is mixed to ensure indices match. top_sks = sorted(top_sks, key=lambda x: x.indices) bottom_sks = sorted(bottom_sks, key=lambda x: x.indices) stmt, incl = extruded_int_horiz_facet(exp, builder, top_sks, bottom_sks, coordsym, mesh_layer_sym, cell_orientations) statements.append(stmt) elif it_type in ["exterior_facet_bottom", "exterior_facet_top"]: # These kernels will only be called if we are on # the top or bottom layers of the extruded mesh. builder.require_mesh_layers() stmt, incl = extruded_top_bottom_facet(cxt_kernel, builder, coordsym, mesh_layer_sym, cell_orientations) statements.append(stmt) else: raise ValueError("Kernel type not recognized: %s" % it_type) # Don't duplicate include lines inc_dir = list(set(incl) - set(inc)) inc.extend(inc_dir) # Now we handle any terms that require auxiliary temporaries, # such as inverses, transposes and actions of a tensor on a # coefficient if builder.aux_exprs: # The declared temps will be updated within this method aux_statements = auxiliary_temporaries(builder, declared_temps) statements.extend(aux_statements) # Now we create the result statement by declaring its eigen type and # using Eigen::Map to move between Eigen and C data structs. result_sym = ast.Symbol("T%d" % len(builder.temps)) result_data_sym = ast.Symbol("A%d" % len(builder.temps)) result_type = "Eigen::Map<%s >" % eigen_matrixbase_type(shape) result = ast.Decl(SCALAR_TYPE, ast.Symbol(result_data_sym, shape)) result_statement = ast.FlatBlock( "%s %s((%s *)%s);\n" % (result_type, result_sym, SCALAR_TYPE, result_data_sym)) statements.append(result_statement) # Generate the complete c++ string performing the linear algebra operations # on Eigen matrices/vectors cpp_string = ast.FlatBlock( metaphrase_slate_to_cpp(slate_expr, declared_temps)) statements.append(ast.Incr(result_sym, cpp_string)) # Finalize AST for macro kernel construction builder._finalize_kernels_and_update() # Generate arguments for the macro kernel args = [result, ast.Decl("%s **" % SCALAR_TYPE, coordsym)] # Orientation information if builder.oriented: args.append(ast.Decl("int **", cell_orientations)) # Coefficient information for c in expr_coeffs: if isinstance(c, Constant): ctype = "%s *" % SCALAR_TYPE else: ctype = "%s **" % SCALAR_TYPE args.extend([ast.Decl(ctype, csym) for csym in builder.coefficient(c)]) # Facet information if builder.needs_cell_facets: args.append( ast.Decl("%s *" % as_cstr(cell_to_facets_dtype), cellfacetsym)) # NOTE: We need to be careful about the ordering here. Mesh layers are # added as the final argument to the kernel. if builder.needs_mesh_layers: args.append(ast.Decl("int", mesh_layer_sym)) # NOTE: In the future we may want to have more than one "macro_kernel" macro_kernel_name = "compile_slate" stmt = ast.Block(statements) macro_kernel = builder.construct_macro_kernel(name=macro_kernel_name, args=args, statements=stmt) # Tell the builder to construct the final ast kernel_ast = builder.construct_ast([macro_kernel]) # Now we wrap up the kernel ast as a PyOP2 kernel. # Include the Eigen header files inc.extend(["%s/include/eigen3/" % d for d in PETSC_DIR]) op2kernel = op2.Kernel( kernel_ast, macro_kernel_name, cpp=True, include_dirs=inc, headers=['#include <Eigen/Dense>', '#define restrict __restrict']) assert len(slate_expr.ufl_domains()) == 1, ( "No support for multiple domains yet!") # Send back a "TSFC-like" SplitKernel object with an # index and KernelInfo kinfo = KernelInfo(kernel=op2kernel, integral_type=builder.integral_type, oriented=builder.oriented, subdomain_id="otherwise", domain_number=0, coefficient_map=tuple(range(len(expr_coeffs))), needs_cell_facets=builder.needs_cell_facets, pass_layer_arg=builder.needs_mesh_layers) idx = tuple([0] * slate_expr.rank) kernels = (SplitKernel(idx, kinfo), ) # Store the resulting kernel for reuse slate_expr._metakernel_cache = kernels return kernels
def wrapper_snippets(itspace, args, kernel_name=None, wrapper_name=None, user_code=None, iteration_region=ALL, pass_layer_arg=False): """Generates code snippets for the wrapper, ready to be into a template. :param itspace: :class:`IterationSpace` object of the :class:`ParLoop`, This is built from the iteration :class:`Set`. :param args: :class:`Arg`s of the :class:`ParLoop` :param kernel_name: Kernel function name (forwarded) :param user_code: Code to insert into the wrapper (forwarded) :param wrapper_name: Wrapper function name (forwarded) :param iteration_region: Iteration region, this is specified when creating a :class:`ParLoop`. :return: dict containing the code snippets """ assert kernel_name is not None if wrapper_name is None: wrapper_name = "wrap_" + kernel_name if user_code is None: user_code = "" direct = all(a.map is None for a in args) def itspace_loop(i, d): return "for (int i_%d=0; i_%d<%d; ++i_%d) {" % (i, i, d, i) def extrusion_loop(): if direct: return "{" return "for (int j_0 = start_layer; j_0 < end_layer; ++j_0){" _ssinds_arg = "" _index_expr = "(%s)n" % as_cstr(IntType) is_top = (iteration_region == ON_TOP) is_facet = (iteration_region == ON_INTERIOR_FACETS) if isinstance(itspace._iterset, Subset): _ssinds_arg = "%s* ssinds," % as_cstr(IntType) _index_expr = "ssinds[n]" _wrapper_args = ', '.join([arg.c_wrapper_arg() for arg in args]) # Pass in the is_facet flag to mark the case when it's an interior horizontal facet in # an extruded mesh. _wrapper_decs = ';\n'.join([arg.c_wrapper_dec() for arg in args]) _vec_decs = ';\n'.join( [arg.c_vec_dec(is_facet=is_facet) for arg in args if arg._is_vec_map]) _intermediate_globals_decl = ';\n'.join([ arg.c_intermediate_globals_decl(count) for count, arg in enumerate(args) if arg._is_global_reduction ]) _intermediate_globals_init = ';\n'.join([ arg.c_intermediate_globals_init(count) for count, arg in enumerate(args) if arg._is_global_reduction ]) _intermediate_globals_writeback = ';\n'.join([ arg.c_intermediate_globals_writeback(count) for count, arg in enumerate(args) if arg._is_global_reduction ]) _vec_inits = ';\n'.join([ arg.c_vec_init(is_top, is_facet=is_facet) for arg in args if not arg._is_mat and arg._is_vec_map ]) indent = lambda t, i: ('\n' + ' ' * i).join(t.split('\n')) _map_decl = "" _apply_offset = "" _map_init = "" _extr_loop = "" _extr_loop_close = "" _map_bcs_m = "" _map_bcs_p = "" _layer_arg = "" if itspace._extruded: _layer_arg = ", int start_layer, int end_layer, int top_layer" _map_decl += ';\n'.join([ arg.c_map_decl(is_facet=is_facet) for arg in args if arg._uses_itspace ]) _map_init += ';\n'.join([ arg.c_map_init(is_top=is_top, is_facet=is_facet) for arg in args if arg._uses_itspace ]) _map_bcs_m += ';\n'.join( [arg.c_map_bcs("-", is_facet) for arg in args if arg._is_mat]) _map_bcs_p += ';\n'.join( [arg.c_map_bcs("+", is_facet) for arg in args if arg._is_mat]) _apply_offset += ';\n'.join([ arg.c_add_offset_map(is_facet=is_facet) for arg in args if arg._uses_itspace ]) _apply_offset += ';\n'.join([ arg.c_add_offset(is_facet=is_facet) for arg in args if arg._is_vec_map ]) _extr_loop = '\n' + extrusion_loop() _extr_loop_close = '}\n' # Build kernel invocation. Let X be a parameter of the kernel representing a # tensor accessed in an iteration space. Let BUFFER be an array of the same # size as X. BUFFER is declared and intialized in the wrapper function. # In particular, if: # - X is written or incremented, then BUFFER is initialized to 0 # - X is read, then BUFFER gathers data expected by X _buf_name, _tmp_decl, _tmp_name = {}, {}, {} _buf_decl, _buf_gather = OrderedDict(), OrderedDict( ) # Deterministic code generation for count, arg in enumerate(args): if not arg._uses_itspace: continue _buf_name[arg] = "buffer_%s" % arg.c_arg_name(count) _tmp_name[arg] = "tmp_%s" % _buf_name[arg] _buf_size = list(itspace._extents) if not arg._is_mat: # Readjust size to take into account the size of a vector space _dat_size = (arg.data.cdim, ) _buf_size = [sum([e * d for e, d in zip(_buf_size, _dat_size)])] _loop_size = [ _buf_size[i] // _dat_size[i] for i in range(len(_buf_size)) ] else: _dat_size = arg.data.dims[0][0] # TODO: [0][0] ? _buf_size = [e * d for e, d in zip(_buf_size, _dat_size)] _buf_decl[arg] = arg.c_buffer_decl(_buf_size, count, _buf_name[arg], is_facet=is_facet) _tmp_decl[arg] = arg.c_buffer_decl(_buf_size, count, _tmp_name[arg], is_facet=is_facet, init=False) if arg.access not in [WRITE, INC]: _itspace_loops = '\n'.join([ ' ' * n + itspace_loop(n, e) for n, e in enumerate(_loop_size) ]) _buf_gather[arg] = arg.c_buffer_gather(_buf_size, count, _buf_name[arg]) _itspace_loop_close = '\n'.join( ' ' * n + '}' for n in range(len(_loop_size) - 1, -1, -1)) _buf_gather[arg] = "\n".join( [_itspace_loops, _buf_gather[arg], _itspace_loop_close]) _kernel_args = ', '.join([ arg.c_kernel_arg(count) if not arg._uses_itspace else _buf_name[arg] for count, arg in enumerate(args) ]) if pass_layer_arg: _kernel_args += ", j_0" _buf_gather = ";\n".join(_buf_gather.values()) _buf_decl = ";\n".join(_buf_decl.values()) def itset_loop_body(i, j, shape, offsets, is_facet=False): template_scatter = """ %(offset_decl)s; %(ofs_itspace_loops)s %(ind)s%(offset)s %(ofs_itspace_loop_close)s %(itspace_loops)s %(ind)s%(buffer_scatter)s; %(itspace_loop_close)s """ nloops = len(shape) mult = 1 if not is_facet else 2 _buf_scatter = OrderedDict() # Deterministic code generation for count, arg in enumerate(args): if not (arg._uses_itspace and arg.access in [WRITE, INC]): continue elif (arg._is_mat and arg._is_mixed) or (arg._is_dat and nloops > 1): raise NotImplementedError elif arg._is_mat: continue elif arg._is_dat: loop_size = shape[0] * mult _itspace_loops, _itspace_loop_close = itspace_loop( 0, loop_size), '}' _scatter_stmts = arg.c_buffer_scatter_vec( count, i, j, offsets, _buf_name[arg]) _buf_offset, _buf_offset_decl = '', '' else: raise NotImplementedError _buf_scatter[arg] = template_scatter % { 'ind': ' ' * nloops, 'offset_decl': _buf_offset_decl, 'offset': _buf_offset, 'buffer_scatter': _scatter_stmts, 'itspace_loops': indent(_itspace_loops, 2), 'itspace_loop_close': indent(_itspace_loop_close, 2), 'ofs_itspace_loops': indent(_itspace_loops, 2) if _buf_offset else '', 'ofs_itspace_loop_close': indent(_itspace_loop_close, 2) if _buf_offset else '' } scatter = ";\n".join(_buf_scatter.values()) if itspace._extruded: _addtos_extruded = ';\n'.join([ arg.c_addto(i, j, _buf_name[arg], _tmp_name[arg], _tmp_decl[arg], "xtr_", is_facet=is_facet) for arg in args if arg._is_mat ]) _addtos = "" else: _addtos_extruded = "" _addtos = ';\n'.join([ arg.c_addto(i, j, _buf_name[arg], _tmp_name[arg], _tmp_decl[arg]) for count, arg in enumerate(args) if arg._is_mat ]) if not _buf_scatter: _itspace_loops = '' _itspace_loop_close = '' template = """ %(scatter)s %(ind)s%(addtos_extruded)s; %(addtos)s; """ return template % { 'ind': ' ' * nloops, 'scatter': scatter, 'addtos_extruded': indent(_addtos_extruded, 2 + nloops), 'addtos': indent(_addtos, 2), } return { 'kernel_name': kernel_name, 'wrapper_name': wrapper_name, 'ssinds_arg': _ssinds_arg, 'index_expr': _index_expr, 'wrapper_args': _wrapper_args, 'user_code': user_code, 'wrapper_decs': indent(_wrapper_decs, 1), 'vec_inits': indent(_vec_inits, 2), 'layer_arg': _layer_arg, 'map_decl': indent(_map_decl, 2), 'vec_decs': indent(_vec_decs, 2), 'map_init': indent(_map_init, 5), 'apply_offset': indent(_apply_offset, 3), 'extr_loop': indent(_extr_loop, 5), 'map_bcs_m': indent(_map_bcs_m, 5), 'map_bcs_p': indent(_map_bcs_p, 5), 'extr_loop_close': indent(_extr_loop_close, 2), 'interm_globals_decl': indent(_intermediate_globals_decl, 3), 'interm_globals_init': indent(_intermediate_globals_init, 3), 'interm_globals_writeback': indent(_intermediate_globals_writeback, 3), 'buffer_decl': _buf_decl, 'buffer_gather': _buf_gather, 'kernel_args': _kernel_args, 'IntType': as_cstr(IntType), 'itset_loop_body': '\n'.join([ itset_loop_body(i, j, shape, offsets, is_facet=(iteration_region == ON_INTERIOR_FACETS)) for i, j, shape, offsets in itspace ]) }
def c_addto(self, i, j, buf_name, tmp_name, tmp_decl, extruded=None, is_facet=False): maps = as_tuple(self.map, Map) nrows = maps[0].split[i].arity ncols = maps[1].split[j].arity rows_str = "%s + i * %s" % (self.c_map_name(0, i), nrows) cols_str = "%s + i * %s" % (self.c_map_name(1, j), ncols) if extruded is not None: rows_str = extruded + self.c_map_name(0, i) cols_str = extruded + self.c_map_name(1, j) if is_facet: nrows *= 2 ncols *= 2 ret = [] rbs, cbs = self.data.sparsity[i, j].dims[0][0] rdim = rbs * nrows addto_name = buf_name addto = 'MatSetValuesLocal' if self.data._is_vector_field: addto = 'MatSetValuesBlockedLocal' rmap, cmap = maps rdim, cdim = self.data.dims[i][j] if rmap.vector_index is not None or cmap.vector_index is not None: rows_str = "rowmap" cols_str = "colmap" addto = "MatSetValuesLocal" nbits = IntType.itemsize * 8 - 2 fdict = { 'nrows': nrows, 'ncols': ncols, 'rdim': rdim, 'cdim': cdim, 'rowmap': self.c_map_name(0, i), 'colmap': self.c_map_name(1, j), 'drop_full_row': 0 if rmap.vector_index is not None else 1, 'drop_full_col': 0 if cmap.vector_index is not None else 1, 'IntType': as_cstr(IntType), 'NBIT': nbits, # UGH, need to make sure literals have # correct type ("long int" if using 64 bit # ints). 'ONE': { 62: "1L", 30: "1" }[nbits], 'MASK': "0x%x%s" % (sum(2**(nbits - i) for i in range(3)), { 62: "L", 30: "" }[nbits]) } # Horrible hack alert # To apply BCs to a component of a Dat with cdim > 1 # we encode which components to apply things to in the # high bits of the map value # The value that comes in is: # NBIT = (sizeof(IntType)*8 - 2) # -(row + 1 + sum_i 2 ** (NBIT - i)) # where i are the components to zero # # So, the actual row (if it's negative) is: # MASK = sum_i 2**(NBIT - i) # (~input) & ~MASK # And we can determine which components to zero by # inspecting the high bits (1 << NBIT - i) ret.append(""" %(IntType)s rowmap[%(nrows)d*%(rdim)d]; %(IntType)s colmap[%(ncols)d*%(cdim)d]; %(IntType)s block_row, block_col, tmp; int discard; for ( int j = 0; j < %(nrows)d; j++ ) { block_row = %(rowmap)s[i*%(nrows)d + j]; discard = 0; tmp = -(block_row + 1); if ( block_row < 0 ) { discard = 1; block_row = tmp & ~%(MASK)s; } for ( int k = 0; k < %(rdim)d; k++ ) { if ( discard && (!(tmp & %(MASK)s) || %(drop_full_row)d || ((tmp & (%(ONE)s << (%(NBIT)s - k))) != 0)) ) { rowmap[j*%(rdim)d + k] = -1; } else { rowmap[j*%(rdim)d + k] = (block_row)*%(rdim)d + k; } } } for ( int j = 0; j < %(ncols)d; j++ ) { discard = 0; block_col = %(colmap)s[i*%(ncols)d + j]; tmp = -(block_col + 1); if ( block_col < 0 ) { discard = 1; block_col = tmp & ~%(MASK)s; } for ( int k = 0; k < %(cdim)d; k++ ) { if ( discard && (!(tmp & %(MASK)s) || %(drop_full_col)d || ((tmp & (%(ONE)s << (%(NBIT)s- k))) != 0)) ) { colmap[j*%(cdim)d + k] = -1; } else { colmap[j*%(cdim)d + k] = (block_col)*%(cdim)d + k; } } } """ % fdict) nrows *= rdim ncols *= cdim ret.append( """%(addto)s(%(mat)s, %(nrows)s, %(rows)s, %(ncols)s, %(cols)s, (const PetscScalar *)%(vals)s, %(insert)s);""" % { 'mat': self.c_arg_name(i, j), 'vals': addto_name, 'addto': addto, 'nrows': nrows, 'ncols': ncols, 'rows': rows_str, 'cols': cols_str, 'IntType': as_cstr(IntType), 'insert': "INSERT_VALUES" if self.access == WRITE else "ADD_VALUES" }) ret = " " * 16 + "{\n" + "\n".join(ret) + "\n" + " " * 16 + "}" return ret
def exterior_facet_boundary_node_map(self, V, method): """Return the :class:`pyop2.Map` from exterior facets to nodes on the boundary. :arg V: The function space. :arg method: The method for determining boundary nodes. See :class:`~.DirichletBC` for details. """ try: return self.map_caches["boundary_node"][method] except KeyError: pass el = V.finat_element dim = self.mesh.facet_dimension() if method == "topological": boundary_dofs = el.entity_closure_dofs()[dim] elif method == "geometric": # This function is only called on extruded meshes when # asking for the nodes that live on the "vertical" # exterior facets. boundary_dofs = entity_support_dofs(el, dim) nodes_per_facet = \ len(boundary_dofs[0]) # HACK ALERT # The facet set does not have a halo associated with it, since # we only construct halos for DoF sets. Fortunately, this # loop is direct and we already have all the correct # information available locally. So We fake a set of the # correct size and carry out a direct loop facet_set = op2.Set(self.mesh.exterior_facets.set.total_size, comm=self.mesh.comm) fs_dat = op2.Dat( facet_set**el.space_dimension(), data=V.exterior_facet_node_map().values_with_halo.view()) facet_dat = op2.Dat(facet_set**nodes_per_facet, dtype=IntType) # Ensure these come out in sorted order. local_facet_nodes = numpy.array( [boundary_dofs[e] for e in sorted(boundary_dofs.keys())]) # Helper function to turn the inner index of an array into c # array literals. c_array = lambda xs: "{" + ", ".join(map(str, xs)) + "}" # AST for: l_nodes[facet[0]][n] rank_ast = ast.Symbol("l_nodes", rank=(ast.Symbol("facet", rank=(0, )), "n")) body = ast.Block([ ast.Decl("int", ast.Symbol("l_nodes", (len(el.cell.topology[dim]), nodes_per_facet)), init=ast.ArrayInit( c_array(map(c_array, local_facet_nodes))), qualifiers=["const"]), ast.For( ast.Decl("int", "n", 0), ast.Less("n", nodes_per_facet), ast.Incr("n", 1), ast.Assign(ast.Symbol("facet_nodes", ("n", )), ast.Symbol("cell_nodes", (rank_ast, )))) ]) kernel = op2.Kernel( ast.FunDecl("void", "create_bc_node_map", [ ast.Decl("%s*" % as_cstr(fs_dat.dtype), "cell_nodes"), ast.Decl("%s*" % as_cstr(facet_dat.dtype), "facet_nodes"), ast.Decl("unsigned int*", "facet") ], body), "create_bc_node_map") local_facet_dat = op2.Dat( facet_set**self.mesh.exterior_facets._rank, self.mesh.exterior_facets.local_facet_dat.data_ro_with_halos, dtype=numpy.uintc) op2.par_loop(kernel, facet_set, fs_dat(op2.READ), facet_dat(op2.WRITE), local_facet_dat(op2.READ)) if self.extruded: offset = self.offset[boundary_dofs[0]] else: offset = None val = op2.Map(facet_set, self.node_set, nodes_per_facet, facet_dat.data_ro_with_halos, name="exterior_facet_boundary_node", offset=offset) self.map_caches["boundary_node"][method] = val return val
def compile_coordinate_element(ufl_coordinate_element, contains_eps, parameters=None): """Generates C code for changing to reference coordinates. :arg ufl_coordinate_element: UFL element of the coordinates :returns: C code as string """ if parameters is None: parameters = tsfc.default_parameters() else: _ = tsfc.default_parameters() _.update(parameters) parameters = _ def dX_norm_square(topological_dimension): return " + ".join("dX[{0}]*dX[{0}]".format(i) for i in range(topological_dimension)) def X_isub_dX(topological_dimension): return "\n".join("\tX[{0}] -= dX[{0}];".format(i) for i in range(topological_dimension)) def is_affine(ufl_element): return ufl_element.cell().is_simplex( ) and ufl_element.degree() <= 1 and ufl_element.family() in [ "Discontinuous Lagrange", "Lagrange" ] def inside_check(fiat_cell): dim = fiat_cell.get_spatial_dimension() point = tuple(sympy.Symbol("X[%d]" % i) for i in range(dim)) return " && ".join("(%s)" % arg for arg in fiat_cell.contains_point( point, epsilon=contains_eps).args) def init_X(fiat_cell): vertices = numpy.array(fiat_cell.get_vertices()) X = numpy.average(vertices, axis=0) formatter = ArrayInit(X, precision=parameters["precision"])._formatter return "\n".join("%s = %s;" % ("X[%d]" % i, formatter(v)) for i, v in enumerate(X)) def to_reference_coordinates(ufl_coordinate_element): # Set up UFL form cell = ufl_coordinate_element.cell() domain = ufl.Mesh(ufl_coordinate_element) K = ufl.JacobianInverse(domain) x = ufl.SpatialCoordinate(domain) x0_element = ufl.VectorElement("Real", cell, 0) x0 = ufl.Coefficient(ufl.FunctionSpace(domain, x0_element)) expr = ufl.dot(K, x - x0) # Translation to GEM C = ufl_utils.coordinate_coefficient(domain) expr = ufl_utils.preprocess_expression(expr) expr = ufl_utils.replace_coordinates(expr, C) expr = ufl_utils.simplify_abs(expr) builder = firedrake_interface.KernelBuilderBase() builder._coefficient(C, "C") builder._coefficient(x0, "x0") dim = cell.topological_dimension() point = gem.Variable('X', (dim, )) context = tsfc.fem.GemPointContext( interface=builder, ufl_cell=cell, precision=parameters["precision"], point_indices=(), point_expr=point, ) translator = tsfc.fem.Translator(context) ir = map_expr_dag(translator, expr) # Unroll result ir = [gem.Indexed(ir, alpha) for alpha in numpy.ndindex(ir.shape)] # Unroll IndexSums max_extent = parameters["unroll_indexsum"] if max_extent: def predicate(index): return index.extent <= max_extent ir = gem.optimise.unroll_indexsum(ir, predicate=predicate) # Translate to COFFEE ir = impero_utils.preprocess_gem(ir) return_variable = gem.Variable('dX', (dim, )) assignments = [(gem.Indexed(return_variable, (i, )), e) for i, e in enumerate(ir)] impero_c = impero_utils.compile_gem(assignments, ()) body = tsfc.coffee.generate(impero_c, {}, parameters["precision"]) body.open_scope = False return body # Create FInAT element element = tsfc.finatinterface.create_element(ufl_coordinate_element) cell = ufl_coordinate_element.cell() extruded = isinstance(cell, ufl.TensorProductCell) code = { "geometric_dimension": cell.geometric_dimension(), "topological_dimension": cell.topological_dimension(), "inside_predicate": inside_check(element.cell), "to_reference_coords": to_reference_coordinates(ufl_coordinate_element), "init_X": init_X(element.cell), "max_iteration_count": 1 if is_affine(ufl_coordinate_element) else 16, "convergence_epsilon": 1e-12, "dX_norm_square": dX_norm_square(cell.topological_dimension()), "X_isub_dX": X_isub_dX(cell.topological_dimension()), "extruded_arg": ", %s nlayers" % as_cstr(IntType) if extruded else "", "nlayers": ", f->n_layers" if extruded else "", "IntType": as_cstr(IntType), } evaluate_template_c = """#include <math.h> struct ReferenceCoords { double X[%(geometric_dimension)d]; }; static inline void to_reference_coords_kernel(void *result_, double *x0, int *return_value, double **C) { struct ReferenceCoords *result = (struct ReferenceCoords *) result_; const int space_dim = %(geometric_dimension)d; /* * Mapping coordinates from physical to reference space */ double *X = result->X; %(init_X)s double x[space_dim]; int converged = 0; for (int it = 0; !converged && it < %(max_iteration_count)d; it++) { double dX[%(topological_dimension)d] = { 0.0 }; %(to_reference_coords)s if (%(dX_norm_square)s < %(convergence_epsilon)g * %(convergence_epsilon)g) { converged = 1; } %(X_isub_dX)s } // Are we inside the reference element? *return_value = %(inside_predicate)s; } static inline void wrap_to_reference_coords(void *result_, double *x, int *return_value, double *coords, %(IntType)s *coords_map%(extruded_arg)s, %(IntType)s cell); int to_reference_coords(void *result_, struct Function *f, int cell, double *x) { int return_value; wrap_to_reference_coords(result_, x, &return_value, f->coords, f->coords_map%(nlayers)s, cell); return return_value; } """ return evaluate_template_c % code
# Some generic python utilities not really specific to our work. from decorator import decorator from pyop2.utils import cached_property # noqa: F401 from pyop2.datatypes import ScalarType, as_cstr from pyop2.datatypes import RealType # noqa: F401 from pyop2.datatypes import IntType # noqa: F401 from pyop2.datatypes import as_ctypes # noqa: F401 from firedrake_configuration import get_config _current_uid = 0 ScalarType_c = as_cstr(ScalarType) IntType_c = as_cstr(IntType) complex_mode = get_config()["options"].get("complex", False) # Remove this (and update test suite) when Slate supports complex mode. SLATE_SUPPORTS_COMPLEX = False def _new_uid(): global _current_uid _current_uid += 1 return _current_uid def _init(): """Cause :func:`pyop2.init` to be called in case the user has not done it for themselves. The result of this is that the user need only call :func:`pyop2.init` if she wants to set a non-default option, for example to switch the debug or log level."""
def compile_element(expression, coordinates, parameters=None): """Generates C code for point evaluations. :arg expression: UFL expression :arg coordinates: coordinate field :arg parameters: form compiler parameters :returns: C code as string """ if parameters is None: parameters = default_parameters() else: _ = default_parameters() _.update(parameters) parameters = _ # No arguments, please! if extract_arguments(expression): return ValueError("Cannot interpolate UFL expression with Arguments!") # Apply UFL preprocessing expression = tsfc.ufl_utils.preprocess_expression( expression, complex_mode=utils.complex_mode) # Collect required coefficients coefficient, = extract_coefficients(expression) # Point evaluation of mixed coefficients not supported here if type(coefficient.ufl_element()) == MixedElement: raise NotImplementedError("Cannot point evaluate mixed elements yet!") # Replace coordinates (if any) domain = expression.ufl_domain() assert coordinates.ufl_domain() == domain # Initialise kernel builder builder = firedrake_interface.KernelBuilderBase(utils.ScalarType_c) builder.domain_coordinate[domain] = coordinates x_arg = builder._coefficient(coordinates, "x") f_arg = builder._coefficient(coefficient, "f") # TODO: restore this for expression evaluation! # expression = ufl_utils.split_coefficients(expression, builder.coefficient_split) # Translate to GEM cell = domain.ufl_cell() dim = cell.topological_dimension() point = gem.Variable('X', (dim, )) point_arg = ast.Decl(utils.ScalarType_c, ast.Symbol('X', rank=(dim, ))) config = dict(interface=builder, ufl_cell=coordinates.ufl_domain().ufl_cell(), precision=parameters["precision"], point_indices=(), point_expr=point, complex_mode=utils.complex_mode) # TODO: restore this for expression evaluation! # config["cellvolume"] = cellvolume_generator(coordinates.ufl_domain(), coordinates, config) context = tsfc.fem.GemPointContext(**config) # Abs-simplification expression = tsfc.ufl_utils.simplify_abs(expression, utils.complex_mode) # Translate UFL -> GEM translator = tsfc.fem.Translator(context) result, = map_expr_dags(translator, [expression]) tensor_indices = () if expression.ufl_shape: tensor_indices = tuple(gem.Index() for s in expression.ufl_shape) return_variable = gem.Indexed(gem.Variable('R', expression.ufl_shape), tensor_indices) result_arg = ast.Decl(utils.ScalarType_c, ast.Symbol('R', rank=expression.ufl_shape)) result = gem.Indexed(result, tensor_indices) else: return_variable = gem.Indexed(gem.Variable('R', (1, )), (0, )) result_arg = ast.Decl(utils.ScalarType_c, ast.Symbol('R', rank=(1, ))) # Unroll max_extent = parameters["unroll_indexsum"] if max_extent: def predicate(index): return index.extent <= max_extent result, = gem.optimise.unroll_indexsum([result], predicate=predicate) # Translate GEM -> COFFEE result, = gem.impero_utils.preprocess_gem([result]) impero_c = gem.impero_utils.compile_gem([(return_variable, result)], tensor_indices) body = generate_coffee(impero_c, {}, parameters["precision"], utils.ScalarType_c) # Build kernel tuple kernel_code = builder.construct_kernel( "evaluate_kernel", [result_arg, point_arg, x_arg, f_arg], body) # Fill the code template extruded = isinstance(cell, TensorProductCell) code = { "geometric_dimension": cell.geometric_dimension(), "layers_arg": ", int const *__restrict__ layers" if extruded else "", "layers": ", layers" if extruded else "", "IntType": as_cstr(IntType), "scalar_type": utils.ScalarType_c, } # if maps are the same, only need to pass one of them if coordinates.cell_node_map() == coefficient.cell_node_map(): code[ "wrapper_map_args"] = "%(IntType)s const *__restrict__ coords_map" % code code["map_args"] = "f->coords_map" else: code[ "wrapper_map_args"] = "%(IntType)s const *__restrict__ coords_map, %(IntType)s const *__restrict__ f_map" % code code["map_args"] = "f->coords_map, f->f_map" evaluate_template_c = """ static inline void wrap_evaluate(%(scalar_type)s* const result, %(scalar_type)s* const X, int const start, int const end%(layers_arg)s, %(scalar_type)s const *__restrict__ coords, %(scalar_type)s const *__restrict__ f, %(wrapper_map_args)s); int evaluate(struct Function *f, %(scalar_type)s *x, %(scalar_type)s *result) { struct ReferenceCoords reference_coords; %(IntType)s cell = locate_cell(f, x, %(geometric_dimension)d, &to_reference_coords, &to_reference_coords_xtr, &reference_coords); if (cell == -1) { return -1; } if (!result) { return 0; } int layers[2] = {0, 0}; if (f->extruded != 0) { int nlayers = f->n_layers; layers[1] = cell %% nlayers + 2; cell = cell / nlayers; } wrap_evaluate(result, reference_coords.X, cell, cell+1%(layers)s, f->coords, f->f, %(map_args)s); return 0; } """ return (evaluate_template_c % code) + kernel_code.gencode()
def compile_coordinate_element(ufl_coordinate_element, contains_eps, parameters=None): """Generates C code for changing to reference coordinates. :arg ufl_coordinate_element: UFL element of the coordinates :returns: C code as string """ if parameters is None: parameters = tsfc.default_parameters() else: _ = tsfc.default_parameters() _.update(parameters) parameters = _ # Create FInAT element element = tsfc.finatinterface.create_element(ufl_coordinate_element) cell = ufl_coordinate_element.cell() extruded = isinstance(cell, ufl.TensorProductCell) code = { "geometric_dimension": cell.geometric_dimension(), "topological_dimension": cell.topological_dimension(), "inside_predicate": inside_check(element.cell, eps=contains_eps), "to_reference_coords": to_reference_coordinates(ufl_coordinate_element, parameters), "init_X": init_X(element.cell, parameters), "max_iteration_count": 1 if is_affine(ufl_coordinate_element) else 16, "convergence_epsilon": 1e-12, "dX_norm_square": dX_norm_square(cell.topological_dimension()), "X_isub_dX": X_isub_dX(cell.topological_dimension()), "extruded_arg": ", int const *__restrict__ layers" if extruded else "", "extr_comment_out": "//" if extruded else "", "non_extr_comment_out": "//" if not extruded else "", "IntType": as_cstr(IntType), } evaluate_template_c = """#include <math.h> struct ReferenceCoords { double X[%(geometric_dimension)d]; }; static inline void to_reference_coords_kernel(void *result_, double *x0, int *return_value, double *C) { struct ReferenceCoords *result = (struct ReferenceCoords *) result_; const int space_dim = %(geometric_dimension)d; /* * Mapping coordinates from physical to reference space */ double *X = result->X; %(init_X)s double x[space_dim]; int converged = 0; for (int it = 0; !converged && it < %(max_iteration_count)d; it++) { double dX[%(topological_dimension)d] = { 0.0 }; %(to_reference_coords)s if (%(dX_norm_square)s < %(convergence_epsilon)g * %(convergence_epsilon)g) { converged = 1; } %(X_isub_dX)s } // Are we inside the reference element? *return_value = %(inside_predicate)s; } static inline void wrap_to_reference_coords( void* const result_, double* const x, int* const return_value, %(IntType)s const start, %(IntType)s const end%(extruded_arg)s, double const *__restrict__ coords, %(IntType)s const *__restrict__ coords_map); int to_reference_coords(void *result_, struct Function *f, int cell, double *x) { int return_value; %(extr_comment_out)swrap_to_reference_coords(result_, x, &return_value, cell, cell+1, f->coords, f->coords_map); return return_value; } int to_reference_coords_xtr(void *result_, struct Function *f, int cell, int layer, double *x) { int return_value; int layers[2] = {0, layer+2}; // +2 because the layer loop goes to layers[1]-1, which is nlayers-1 %(non_extr_comment_out)swrap_to_reference_coords(result_, x, &return_value, cell, cell+1, layers, f->coords, f->coords_map); return return_value; } """ return evaluate_template_c % code
def compile_coordinate_element(ufl_coordinate_element): """Generates C code for changing to reference coordinates. :arg ufl_coordinate_element: UFL element of the coordinates :returns: C code as string """ from tsfc import default_parameters from tsfc.fiatinterface import create_element from firedrake.pointeval_utils import ssa_arrays, c_print from FIAT.reference_element import TensorProductCell as two_product_cell import sympy as sp import numpy as np # Set code generation parameters set_float_formatting(default_parameters()["precision"]) def dX_norm_square(topological_dimension): return " + ".join("dX[{0}]*dX[{0}]".format(i) for i in range(topological_dimension)) def X_isub_dX(topological_dimension): return "\n".join("\tX[{0}] -= dX[{0}];".format(i) for i in range(topological_dimension)) def is_affine(ufl_element): return ufl_element.cell().is_simplex( ) and ufl_element.degree() <= 1 and ufl_element.family() in [ "Discontinuous Lagrange", "Lagrange" ] def inside_check(ufl_cell, fiat_cell): dim = ufl_cell.topological_dimension() point = tuple(sp.Symbol("X[%d]" % i) for i in range(dim)) return " && ".join( "(%s)" % arg for arg in fiat_cell.contains_point(point, epsilon=1e-14).args) def init_X(fiat_element): f_float = format["floating point"] f_assign = format["assign"] fiat_cell = fiat_element.get_reference_element() vertices = np.array(fiat_cell.get_vertices()) X = np.average(vertices, axis=0) return "\n".join( f_assign("X[%d]" % i, f_float(v)) for i, v in enumerate(X)) def to_reference_coordinates(ufl_cell, fiat_element): f_decl = format["declaration"] f_float_decl = format["float declaration"] # Get the element cell name and geometric dimension. cell = ufl_cell gdim = cell.geometric_dimension() tdim = cell.topological_dimension() code = [] # Symbolic tabulation tabs = fiat_element.tabulate( 1, np.array([[sp.Symbol("X[%d]" % i) for i in range(tdim)]])) tabs = sorted((d, value.reshape(value.shape[:-1])) for d, value in tabs.iteritems()) # Generate code for intermediate values s_code, d_phis = ssa_arrays([v for k, v in tabs], prefix="t") phi = d_phis.pop(0) for name, value in s_code: code += [f_decl(f_float_decl, name, c_print(value))] # Cell coordinate data C = np.array([[sp.Symbol("C[%d][%d]" % (i, j)) for j in range(gdim)] for i in range(fiat_element.space_dimension())]) # Generate physical coordinates x = phi.dot(C) for i, e in enumerate(x): code += ["\tx[%d] = %s;" % (i, e)] # Generate Jacobian grad_phi = np.vstack(reversed(d_phis)) J = np.transpose(grad_phi.dot(C)) for i, row in enumerate(J): for j, e in enumerate(row): code += ["\tJ[%d * %d + %d] = %s;" % (i, tdim, j, e)] # Get code snippets for Jacobian, inverse of Jacobian and mapping of # coordinates from physical element to the FIAT reference element. code += ["compute_jacobian_inverse_%s(K, detJ, J);" % cellname[cell]] # FIXME: use cell orientations! # if needs_orientation: # code_ += [format["orientation"]["ufc"](tdim, gdim)] x = np.array([sp.Symbol("x[%d]" % i) for i in range(gdim)]) x0 = np.array([sp.Symbol("x0[%d]" % i) for i in range(gdim)]) K = np.array( [[sp.Symbol("K[%d]" % (i * gdim + j)) for j in range(gdim)] for i in range(tdim)]) dX = K.dot(x - x0) for i, e in enumerate(dX): code += ["\tdX[%d] = %s;" % (i, e)] return "\n".join(code) # Create FIAT element element = create_element(ufl_coordinate_element, vector_is_mixed=False) cell = ufl_coordinate_element.cell() # calculate_basisvalues, vdim = calculate_basisvalues(cell, element) extruded = isinstance(element.get_reference_element(), two_product_cell) code = { "geometric_dimension": cell.geometric_dimension(), "topological_dimension": cell.topological_dimension(), "inside_predicate": inside_check(cell, element.get_reference_element()), "to_reference_coords": to_reference_coordinates(cell, element), "init_X": init_X(element), "max_iteration_count": 1 if is_affine(ufl_coordinate_element) else 16, "convergence_epsilon": 1e-12, "dX_norm_square": dX_norm_square(cell.topological_dimension()), "X_isub_dX": X_isub_dX(cell.topological_dimension()), "extruded_arg": ", %s nlayers" % as_cstr(IntType) if extruded else "", "nlayers": ", f->n_layers" if extruded else "", "IntType": as_cstr(IntType), } evaluate_template_c = """#include <math.h> #include <firedrake_geometry.h> struct ReferenceCoords { double X[%(geometric_dimension)d]; double J[%(geometric_dimension)d * %(topological_dimension)d]; double K[%(topological_dimension)d * %(geometric_dimension)d]; double detJ; }; static inline void to_reference_coords_kernel(void *result_, double *x0, int *return_value, double **C) { struct ReferenceCoords *result = (struct ReferenceCoords *) result_; const int space_dim = %(geometric_dimension)d; /* * Mapping coordinates from physical to reference space */ double *X = result->X; %(init_X)s double x[space_dim]; double *J = result->J; double *K = result->K; double detJ; double dX[%(topological_dimension)d]; int converged = 0; for (int it = 0; !converged && it < %(max_iteration_count)d; it++) { %(to_reference_coords)s if (%(dX_norm_square)s < %(convergence_epsilon)g * %(convergence_epsilon)g) { converged = 1; } %(X_isub_dX)s } result->detJ = detJ; // Are we inside the reference element? *return_value = %(inside_predicate)s; } static inline void wrap_to_reference_coords(void *result_, double *x, int *return_value, double *coords, %(IntType)s *coords_map%(extruded_arg)s, %(IntType)s cell); int to_reference_coords(void *result_, struct Function *f, int cell, double *x) { int return_value; wrap_to_reference_coords(result_, x, &return_value, f->coords, f->coords_map%(nlayers)s, cell); return return_value; } """ return evaluate_template_c % code
def compile_element(ufl_element, cdim): """Generates C code for point evaluations. :arg ufl_element: UFL element of the function space :arg cdim: ``cdim`` of the function space :returns: C code as string """ from tsfc import default_parameters from firedrake.pointquery_utils import set_float_formatting, format from tsfc.fiatinterface import create_element from FIAT.reference_element import TensorProductCell as two_product_cell import sympy as sp import numpy as np # Set code generation parameters set_float_formatting(default_parameters()["precision"]) def calculate_basisvalues(ufl_cell, fiat_element): f_component = format["component"] f_decl = format["declaration"] f_float_decl = format["float declaration"] f_tensor = format["tabulate tensor"] f_new_line = format["new line"] tdim = ufl_cell.topological_dimension() gdim = ufl_cell.geometric_dimension() code = [] # Symbolic tabulation tabs = fiat_element.tabulate( 0, np.array([[ sp.Symbol("reference_coords.X[%d]" % i) for i in range(tdim) ]])) tabs = tabs[(0, ) * tdim] tabs = tabs.reshape(tabs.shape[:-1]) # Generate code for intermediate values s_code, (theta, ) = ssa_arrays([tabs]) for name, value in s_code: code += [f_decl(f_float_decl, name, c_print(value))] # Prepare Jacobian, Jacobian inverse and determinant s_detJ = sp.Symbol('detJ') s_J = np.array([[ sp.Symbol("J[{i}*{tdim} + {j}]".format(i=i, j=j, tdim=tdim)) for j in range(tdim) ] for i in range(gdim)]) s_Jinv = np.array([[ sp.Symbol("K[{i}*{gdim} + {j}]".format(i=i, j=j, gdim=gdim)) for j in range(gdim) ] for i in range(tdim)]) # Apply transformations phi = [] for i, val in enumerate(theta): mapping = fiat_element.mapping()[i] if mapping == "affine": phi.append(val) elif mapping == "contravariant piola": phi.append(s_J.dot(val) / s_detJ) elif mapping == "covariant piola": phi.append(s_Jinv.transpose().dot(val)) else: raise ValueError("Unknown mapping: %s" % mapping) phi = np.asarray(phi, dtype=object) # Dump tables of basis values code += ["", "\t// Values of basis functions"] code += [ f_decl("double", f_component("phi", phi.shape), f_new_line + f_tensor(phi)) ] shape = phi.shape if len(shape) <= 1: vdim = 1 elif len(shape) == 2: vdim = shape[1] return "\n".join(code), vdim # Create FIAT element element = create_element(ufl_element, vector_is_mixed=False) cell = ufl_element.cell() calculate_basisvalues, vdim = calculate_basisvalues(cell, element) extruded = isinstance(element.get_reference_element(), two_product_cell) code = { "cdim": cdim, "vdim": vdim, "geometric_dimension": cell.geometric_dimension(), "ndofs": element.space_dimension(), "calculate_basisvalues": calculate_basisvalues, "extruded_arg": ", %s nlayers" % as_cstr(IntType) if extruded else "", "nlayers": ", f->n_layers" if extruded else "", "IntType": as_cstr(IntType), } evaluate_template_c = """static inline void evaluate_kernel(double *result, double *phi_, double **F) { const int ndofs = %(ndofs)d; const int cdim = %(cdim)d; const int vdim = %(vdim)d; double (*phi)[vdim] = (double (*)[vdim]) phi_; // F: ndofs x cdim // phi: ndofs x vdim // result = F' * phi: cdim x vdim // // Usually cdim == 1 or vdim == 1. for (int q = 0; q < cdim * vdim; q++) { result[q] = 0.0; } for (int i = 0; i < ndofs; i++) { for (int c = 0; c < cdim; c++) { for (int v = 0; v < vdim; v++) { result[c*vdim + v] += F[i][c] * phi[i][v]; } } } } static inline void wrap_evaluate(double *result, double *phi, double *data, %(IntType)s *map%(extruded_arg)s, %(IntType)s cell); int evaluate(struct Function *f, double *x, double *result) { struct ReferenceCoords reference_coords; int cell = locate_cell(f, x, %(geometric_dimension)d, &to_reference_coords, &reference_coords); if (cell == -1) { return -1; } if (!result) { return 0; } double *J = reference_coords.J; double *K = reference_coords.K; double detJ = reference_coords.detJ; %(calculate_basisvalues)s wrap_evaluate(result, (double *)phi, f->f, f->f_map%(nlayers)s, cell); return 0; } """ return evaluate_template_c % code
# Some generic python utilities not really specific to our work. from decorator import decorator from pyop2.utils import cached_property # noqa: F401 from pyop2.datatypes import ScalarType, as_cstr _current_uid = 0 ScalarType_c = as_cstr(ScalarType) def _new_uid(): global _current_uid _current_uid += 1 return _current_uid def _init(): """Cause :func:`pyop2.init` to be called in case the user has not done it for themselves. The result of this is that the user need only call :func:`pyop2.init` if she wants to set a non-default option, for example to switch the debug or log level.""" from pyop2 import op2 from firedrake.parameters import parameters if not op2.initialised(): op2.init(**parameters["pyop2_options"]) def unique_name(name, nameset): """Return name if name is not in nameset, or a deterministic uniquified name if name is in nameset. The new name is inserted into nameset to prevent further name clashes."""
def generate_kernel_ast(builder, statements, declared_temps): """Glues together the complete AST for the Slate expression contained in the :class:`LocalKernelBuilder`. :arg builder: The :class:`LocalKernelBuilder` containing all relevant expression information. :arg statements: A list of COFFEE objects containing all assembly calls and temporary declarations. :arg declared_temps: A `dict` containing all previously declared temporaries. Return: A `KernelInfo` object describing the complete AST. """ slate_expr = builder.expression if slate_expr.rank == 0: # Scalars are treated as 1x1 MatrixBase objects shape = (1,) else: shape = slate_expr.shape # Now we create the result statement by declaring its eigen type and # using Eigen::Map to move between Eigen and C data structs. statements.append(ast.FlatBlock("/* Map eigen tensor into C struct */\n")) result_sym = ast.Symbol("T%d" % len(declared_temps)) result_data_sym = ast.Symbol("A%d" % len(declared_temps)) result_type = "Eigen::Map<%s >" % eigen_matrixbase_type(shape) result = ast.Decl(SCALAR_TYPE, ast.Symbol(result_data_sym, shape)) result_statement = ast.FlatBlock("%s %s((%s *)%s);\n" % (result_type, result_sym, SCALAR_TYPE, result_data_sym)) statements.append(result_statement) # Generate the complete c++ string performing the linear algebra operations # on Eigen matrices/vectors statements.append(ast.FlatBlock("/* Linear algebra expression */\n")) cpp_string = ast.FlatBlock(slate_to_cpp(slate_expr, declared_temps)) statements.append(ast.Incr(result_sym, cpp_string)) # Generate arguments for the macro kernel args = [result, ast.Decl(SCALAR_TYPE, builder.coord_sym, pointers=[("restrict",)], qualifiers=["const"])] # Orientation information if builder.oriented: args.append(ast.Decl("int", builder.cell_orientations_sym, pointers=[("restrict",)], qualifiers=["const"])) # Coefficient information expr_coeffs = slate_expr.coefficients() for c in expr_coeffs: args.extend([ast.Decl(SCALAR_TYPE, csym, pointers=[("restrict",)], qualifiers=["const"]) for csym in builder.coefficient(c)]) # Facet information if builder.needs_cell_facets: f_sym = builder.cell_facet_sym f_arg = ast.Symbol("arg_cell_facets") f_dtype = as_cstr(cell_to_facets_dtype) # cell_facets is locally a flattened 2-D array. We typecast here so we # can access its entries using standard array notation. cast = "%s (*%s)[2] = (%s (*)[2])%s;\n" % (f_dtype, f_sym, f_dtype, f_arg) statements.insert(0, ast.FlatBlock(cast)) args.append(ast.Decl(f_dtype, f_arg, pointers=[("restrict",)], qualifiers=["const"])) # NOTE: We need to be careful about the ordering here. Mesh layers are # added as the final argument to the kernel. if builder.needs_mesh_layers: args.append(ast.Decl("int", builder.mesh_layer_sym)) # Macro kernel macro_kernel_name = "compile_slate" stmts = ast.Block(statements) macro_kernel = ast.FunDecl("void", macro_kernel_name, args, stmts, pred=["static", "inline"]) # Construct the final ast kernel_ast = ast.Node(builder.templated_subkernels + [macro_kernel]) # Now we wrap up the kernel ast as a PyOP2 kernel and include the # Eigen header files include_dirs = builder.include_dirs include_dirs.extend(["%s/include/eigen3/" % d for d in PETSC_DIR]) op2kernel = op2.Kernel(kernel_ast, macro_kernel_name, cpp=True, include_dirs=include_dirs, headers=['#include <Eigen/Dense>', '#define restrict __restrict']) op2kernel.num_flops = builder.expression_flops + builder.terminal_flops # Send back a "TSFC-like" SplitKernel object with an # index and KernelInfo kinfo = KernelInfo(kernel=op2kernel, integral_type=builder.integral_type, oriented=builder.oriented, subdomain_id="otherwise", domain_number=0, coefficient_map=tuple(range(len(expr_coeffs))), needs_cell_facets=builder.needs_cell_facets, pass_layer_arg=builder.needs_mesh_layers, needs_cell_sizes=builder.needs_cell_sizes) return kinfo
def generate_cell_wrapper(itspace, args, forward_args=(), kernel_name=None, wrapper_name=None): """Generates wrapper for a single cell. No iteration loop, but cellwise data is extracted. Cell is expected as an argument to the wrapper. For extruded, the numbering of the cells is columnwise continuous, bottom to top. :param itspace: :class:`IterationSpace` object. Can be built from iteration :class:`Set` using pyop2.base.build_itspace :param args: :class:`Arg`s :param forward_args: To forward unprocessed arguments to the kernel via the wrapper, give an iterable of strings describing their C types. :param kernel_name: Kernel function name :param wrapper_name: Wrapper function name :return: string containing the C code for the single-cell wrapper """ direct = all(a.map is None for a in args) snippets = wrapper_snippets(itspace, args, kernel_name=kernel_name, wrapper_name=wrapper_name) if itspace._extruded: snippets['index_exprs'] = """{0} i = cell / nlayers; {0} j = cell % nlayers;""".format(as_cstr(IntType)) snippets['nlayers_arg'] = ", {0} nlayers".format(as_cstr(IntType)) snippets[ 'extr_pos_loop'] = "{" if direct else "for ({0} j_0 = 0; j_0 < j; ++j_0) {{".format( as_cstr(IntType)) else: snippets['index_exprs'] = "{0} i = cell;".format(as_cstr(IntType)) snippets['nlayers_arg'] = "" snippets['extr_pos_loop'] = "" snippets['wrapper_fargs'] = "".join("{1} farg{0}, ".format(i, arg) for i, arg in enumerate(forward_args)) snippets['kernel_fargs'] = "".join("farg{0}, ".format(i) for i in range(len(forward_args))) snippets['IntType'] = as_cstr(IntType) template = """ #include <inttypes.h> static inline void %(wrapper_name)s(%(wrapper_fargs)s%(wrapper_args)s%(nlayers_arg)s, %(IntType)s cell) { %(user_code)s %(wrapper_decs)s; %(map_decl)s %(vec_decs)s; %(index_exprs)s %(vec_inits)s; %(map_init)s; %(extr_pos_loop)s %(apply_offset)s; %(extr_loop_close)s %(map_bcs_m)s; %(buffer_decl)s; %(buffer_gather)s %(kernel_name)s(%(kernel_fargs)s%(kernel_args)s); %(itset_loop_body)s %(map_bcs_p)s; } """ return template % snippets
def compile_element(expression, coordinates, parameters=None): """Generates C code for point evaluations. :arg expression: UFL expression :arg coordinates: coordinate field :arg parameters: form compiler parameters :returns: C code as string """ if parameters is None: parameters = default_parameters() else: _ = default_parameters() _.update(parameters) parameters = _ # No arguments, please! if extract_arguments(expression): return ValueError("Cannot interpolate UFL expression with Arguments!") # Apply UFL preprocessing expression = tsfc.ufl_utils.preprocess_expression(expression) # Collect required coefficients coefficient, = extract_coefficients(expression) # Point evaluation of mixed coefficients not supported here if type(coefficient.ufl_element()) == MixedElement: raise NotImplementedError("Cannot point evaluate mixed elements yet!") # Replace coordinates (if any) domain = expression.ufl_domain() assert coordinates.ufl_domain() == domain # Initialise kernel builder builder = firedrake_interface.KernelBuilderBase() builder.domain_coordinate[domain] = coordinates x_arg = builder._coefficient(coordinates, "x") f_arg = builder._coefficient(coefficient, "f") # TODO: restore this for expression evaluation! # expression = ufl_utils.split_coefficients(expression, builder.coefficient_split) # Translate to GEM cell = domain.ufl_cell() dim = cell.topological_dimension() point = gem.Variable('X', (dim,)) point_arg = ast.Decl(SCALAR_TYPE, ast.Symbol('X', rank=(dim,))) config = dict(interface=builder, ufl_cell=coordinates.ufl_domain().ufl_cell(), precision=parameters["precision"], point_indices=(), point_expr=point) # TODO: restore this for expression evaluation! # config["cellvolume"] = cellvolume_generator(coordinates.ufl_domain(), coordinates, config) context = tsfc.fem.GemPointContext(**config) # Abs-simplification expression = tsfc.ufl_utils.simplify_abs(expression) # Translate UFL -> GEM translator = tsfc.fem.Translator(context) result, = map_expr_dags(translator, [expression]) tensor_indices = () if expression.ufl_shape: tensor_indices = tuple(gem.Index() for s in expression.ufl_shape) return_variable = gem.Indexed(gem.Variable('R', expression.ufl_shape), tensor_indices) result_arg = ast.Decl(SCALAR_TYPE, ast.Symbol('R', rank=expression.ufl_shape)) result = gem.Indexed(result, tensor_indices) else: return_variable = gem.Indexed(gem.Variable('R', (1,)), (0,)) result_arg = ast.Decl(SCALAR_TYPE, ast.Symbol('R', rank=(1,))) # Unroll max_extent = parameters["unroll_indexsum"] if max_extent: def predicate(index): return index.extent <= max_extent result, = gem.optimise.unroll_indexsum([result], predicate=predicate) # Translate GEM -> COFFEE result, = gem.impero_utils.preprocess_gem([result]) impero_c = gem.impero_utils.compile_gem([(return_variable, result)], tensor_indices) body = generate_coffee(impero_c, {}, parameters["precision"]) # Build kernel tuple kernel_code = builder.construct_kernel("evaluate_kernel", [result_arg, point_arg, x_arg, f_arg], body) # Fill the code template extruded = isinstance(cell, TensorProductCell) code = { "geometric_dimension": cell.geometric_dimension(), "layers_arg": ", int const *__restrict__ layers" if extruded else "", "layers": ", layers" if extruded else "", "IntType": as_cstr(IntType), } # if maps are the same, only need to pass one of them if coordinates.cell_node_map() == coefficient.cell_node_map(): code["wrapper_map_args"] = "%(IntType)s const *__restrict__ coords_map" % code code["map_args"] = "f->coords_map" else: code["wrapper_map_args"] = "%(IntType)s const *__restrict__ coords_map, %(IntType)s const *__restrict__ f_map" % code code["map_args"] = "f->coords_map, f->f_map" evaluate_template_c = """ static inline void wrap_evaluate(double* const result, double* const X, int const start, int const end%(layers_arg)s, double const *__restrict__ coords, double const *__restrict__ f, %(wrapper_map_args)s); int evaluate(struct Function *f, double *x, double *result) { struct ReferenceCoords reference_coords; %(IntType)s cell = locate_cell(f, x, %(geometric_dimension)d, &to_reference_coords, &to_reference_coords_xtr, &reference_coords); if (cell == -1) { return -1; } if (!result) { return 0; } int layers[2] = {0, 0}; if (f->extruded != 0) { int nlayers = f->n_layers; layers[1] = cell %% nlayers + 2; cell = cell / nlayers; } wrap_evaluate(result, reference_coords.X, cell, cell+1%(layers)s, f->coords, f->f, %(map_args)s); return 0; } """ return (evaluate_template_c % code) + kernel_code.gencode()