def terminal_temporaries(builder, declared_temps): """Generates statements for assigning auxiliary temporaries for nodes in an expression with "high" reference count. Expressions which require additional temporaries are provided by the :class:`LocalKernelBuilder`. :arg builder: The :class:`LocalKernelBuilder` containing all relevant expression information. :arg declared_temps: A `dict` keeping track of all declared temporaries. This dictionary is updated as terminal tensors are assigned temporaries. """ statements = [ast.FlatBlock("/* Declare and initialize */\n")] for exp in builder.temps: t = builder.temps[exp] statements.append(ast.Decl(eigen_matrixbase_type(exp.shape), t)) statements.append(ast.FlatBlock("%s.setZero();\n" % t)) declared_temps[exp] = t return statements
def expression_kernel(expr, args): """Produce a :class:`pyop2.Kernel` from the processed UFL expression expr and the corresponding args.""" # Empty slot indicating assignment to indexed LHS, so don't do anything if type(expr) is Zero: return fs = args[0].function.function_space() d = ast.Symbol("dim") ast_expr = _ast(expr) body = ast.Block((ast.Decl("int", d), ast.For(ast.Assign(d, ast.Symbol(0)), ast.Less(d, ast.Symbol(fs.dof_dset.cdim)), ast.Incr(d, ast.Symbol(1)), ast_expr))) return op2.Kernel( ast.FunDecl("void", "expression", [arg.arg for arg in args], body), "expression")
def construct_kernel(self, return_arg, impero_c, precision, index_names): """Constructs an :class:`ExpressionKernel`. :arg return_arg: COFFEE argument for the return value :arg body: function body (:class:`coffee.Block` node) :returns: :class:`ExpressionKernel` object """ args = [return_arg] if self.oriented: args.append(cell_orientations_coffee_arg) if self.cell_sizes: args.append(self.cell_sizes_arg) args.extend(self.kernel_args) body = generate_coffee(impero_c, index_names, precision) for name_, shape in self.tabulations: args.append(coffee.Decl(self.scalar_type, coffee.Symbol( name_, rank=shape), qualifiers=["const"])) kernel_code = super(ExpressionKernelBuilder, self).construct_kernel("expression_kernel", args, body) return ExpressionKernel(kernel_code, self.oriented, self.cell_sizes, self.coefficients, self.tabulations)
def set_coefficients(self, integral_data, form_data): """Prepare the coefficients of the form. :arg integral_data: UFL integral data :arg form_data: UFL form data """ name = "w" self.coefficient_args = [ coffee.Decl(SCALAR_TYPE, coffee.Symbol(name), pointers=[("const",), ()], qualifiers=["const"]) ] # enabled_coefficients is a boolean array that indicates which # of reduced_coefficients the integral requires. for n in range(len(integral_data.enabled_coefficients)): if not integral_data.enabled_coefficients[n]: continue coefficient = form_data.function_replace_map[form_data.reduced_coefficients[n]] expression = prepare_coefficient(coefficient, n, name, self.interior_facet) self.coefficient_map[coefficient] = expression
def _arglist(ir): "Generate argument list for tensor tabulation function (only for pyop2)" rank = len(ir['prim_idims']) f_j = format["first free index"] f_k = format["second free index"] float = format['float declaration'] int = format['int declaration'] prim_idims = ir["prim_idims"] integral_type = ir["integral_type"] if integral_type in ("interior_facet", "interior_facet_horiz", "interior_facet_vert"): prim_idims = [d*2 for d in prim_idims] localtensor = pyop2.Decl(float, pyop2.Symbol("A", tuple(prim_idims) or (1,))) coordinates = pyop2.Decl("%s**" % float, pyop2.Symbol("coordinate_dofs", ())) coeffs = [] for n, e in zip(ir['coefficient_names'], ir['coefficient_elements']): typ = "%s*" % float if e.family() == 'Real' else "%s**" % float coeffs.append(pyop2.Decl(typ, pyop2.Symbol("%s%s" % \ ("c" if e.family() == 'Real' else "", n[1:] if e.family() == 'Real' else n), ()))) arglist = [localtensor, coordinates] # embedded manifold, passing in cell_orientation if ir['needs_oriented'] and \ ir['cell'].topological_dimension() != ir['cell'].geometric_dimension(): cell_orientation = pyop2.Decl("%s**" % int, pyop2.Symbol("cell_orientation_", ())) arglist.append(cell_orientation) arglist += coeffs if integral_type in ("exterior_facet", "exterior_facet_vert"): arglist.append(pyop2.Decl("%s*" % int, pyop2.Symbol("facet_p", ()), qualifiers=["unsigned"])) if integral_type in ("interior_facet", "interior_facet_vert"): arglist.append(pyop2.Decl(int, pyop2.Symbol("facet_p", (2,)), qualifiers=["unsigned"])) return arglist
def compile_element(expression, dual_space=None, parameters=None, name="evaluate"): """Generate code for point evaluations. :arg expression: A UFL expression (may contain up to one coefficient, or one argument) :arg dual_space: if the expression has an argument, should we also distribute residual data? :returns: Some coffee AST """ if parameters is None: parameters = default_parameters() else: _ = default_parameters() _.update(parameters) parameters = _ expression = tsfc.ufl_utils.preprocess_expression(expression) # # Collect required coefficients try: arg, = extract_coefficients(expression) argument_multiindices = () coefficient = True if expression.ufl_shape: tensor_indices = tuple(gem.Index() for s in expression.ufl_shape) else: tensor_indices = () except ValueError: arg, = extract_arguments(expression) finat_elem = create_element(arg.ufl_element()) argument_multiindices = (finat_elem.get_indices(), ) argument_multiindex, = argument_multiindices value_shape = finat_elem.value_shape if value_shape: tensor_indices = argument_multiindex[-len(value_shape):] else: tensor_indices = () coefficient = False # Replace coordinates (if any) builder = firedrake_interface.KernelBuilderBase(scalar_type=ScalarType_c) domain = expression.ufl_domain() # Translate to GEM cell = domain.ufl_cell() dim = cell.topological_dimension() point = gem.Variable('X', (dim, )) point_arg = ast.Decl(ScalarType_c, ast.Symbol('X', rank=(dim, ))) config = dict(interface=builder, ufl_cell=cell, precision=parameters["precision"], point_indices=(), point_expr=point, argument_multiindices=argument_multiindices) context = tsfc.fem.GemPointContext(**config) # Abs-simplification expression = tsfc.ufl_utils.simplify_abs(expression) # Translate UFL -> GEM if coefficient: assert dual_space is None f_arg = [builder._coefficient(arg, "f")] else: f_arg = [] translator = tsfc.fem.Translator(context) result, = map_expr_dags(translator, [expression]) b_arg = [] if coefficient: if expression.ufl_shape: return_variable = gem.Indexed( gem.Variable('R', expression.ufl_shape), tensor_indices) result_arg = ast.Decl(ScalarType_c, ast.Symbol('R', rank=expression.ufl_shape)) result = gem.Indexed(result, tensor_indices) else: return_variable = gem.Indexed(gem.Variable('R', (1, )), (0, )) result_arg = ast.Decl(ScalarType_c, ast.Symbol('R', rank=(1, ))) else: return_variable = gem.Indexed( gem.Variable('R', finat_elem.index_shape), argument_multiindex) result = gem.Indexed(result, tensor_indices) if dual_space: elem = create_element(dual_space.ufl_element()) if elem.value_shape: var = gem.Indexed(gem.Variable("b", elem.value_shape), tensor_indices) b_arg = [ ast.Decl(ScalarType_c, ast.Symbol("b", rank=elem.value_shape)) ] else: var = gem.Indexed(gem.Variable("b", (1, )), (0, )) b_arg = [ast.Decl(ScalarType_c, ast.Symbol("b", rank=(1, )))] result = gem.Product(result, var) result_arg = ast.Decl(ScalarType_c, ast.Symbol('R', rank=finat_elem.index_shape)) # Unroll max_extent = parameters["unroll_indexsum"] if max_extent: def predicate(index): return index.extent <= max_extent result, = gem.optimise.unroll_indexsum([result], predicate=predicate) # Translate GEM -> COFFEE result, = gem.impero_utils.preprocess_gem([result]) impero_c = gem.impero_utils.compile_gem([(return_variable, result)], tensor_indices) body = generate_coffee(impero_c, {}, parameters["precision"], ScalarType_c) # Build kernel tuple kernel_code = builder.construct_kernel( "pyop2_kernel_" + name, [result_arg] + b_arg + f_arg + [point_arg], body) return kernel_code
def tensor_assembly_calls(builder): """Generates a block of statements for assembling the local finite element tensors. :arg builder: The :class:`LocalKernelBuilder` containing all relevant expression information and assembly calls. """ assembly_calls = builder.assembly_calls statements = [ast.FlatBlock("/* Assemble local tensors */\n")] # Cell integrals are straightforward. Just splat them out. statements.extend(assembly_calls["cell"]) if builder.needs_cell_facets: # The for-loop will have the general structure: # # FOR (facet=0; facet<num_facets; facet++): # IF (facet is interior): # *interior calls # ELSE IF (facet is exterior): # *exterior calls # # If only interior (exterior) facets are present, # then only a single IF-statement checking for interior # (exterior) facets will be present within the loop. The # cell facets are labelled `1` for interior, and `0` for # exterior. statements.append(ast.FlatBlock("/* Loop over cell facets */\n")) int_calls = list( chain(*[ assembly_calls[it_type] for it_type in ("interior_facet", "interior_facet_vert") ])) ext_calls = list( chain(*[ assembly_calls[it_type] for it_type in ("exterior_facet", "exterior_facet_vert") ])) # Generate logical statements for handling exterior/interior facet # integrals on subdomains. # Currently only facet integrals are supported. for sd_type in ("subdomains_exterior_facet", "subdomains_interior_facet"): stmts = [] for sd, sd_calls in groupby(assembly_calls[sd_type], lambda x: x[0]): _, calls = zip(*sd_calls) if_sd = ast.Eq( ast.Symbol(builder.cell_facet_sym, rank=(builder.it_sym, 1)), sd) stmts.append( ast.If(if_sd, (ast.Block(calls, open_scope=True), ))) if sd_type == "subdomains_exterior_facet": ext_calls.extend(stmts) if sd_type == "subdomains_interior_facet": int_calls.extend(stmts) # Compute the number of facets to loop over domain = builder.expression.ufl_domain() if domain.cell_set._extruded: num_facets = domain.ufl_cell()._cells[0].num_facets() else: num_facets = domain.ufl_cell().num_facets() if_ext = ast.Eq( ast.Symbol(builder.cell_facet_sym, rank=(builder.it_sym, 0)), 0) if_int = ast.Eq( ast.Symbol(builder.cell_facet_sym, rank=(builder.it_sym, 0)), 1) body = [] if ext_calls: body.append( ast.If(if_ext, (ast.Block(ext_calls, open_scope=True), ))) if int_calls: body.append( ast.If(if_int, (ast.Block(int_calls, open_scope=True), ))) statements.append( ast.For(ast.Decl("unsigned int", builder.it_sym, init=0), ast.Less(builder.it_sym, num_facets), ast.Incr(builder.it_sym, 1), body)) if builder.needs_mesh_layers: # In the presence of interior horizontal facet calls, an # IF-ELIF-ELSE block is generated using the mesh levels # as conditions for which calls are needed: # # IF (layer == bottom_layer): # *bottom calls # ELSE IF (layer == top_layer): # *top calls # ELSE: # *top calls # *bottom calls # # Any extruded top or bottom calls for extruded facets are # included within the appropriate mesh-level IF-blocks. If # no interior horizontal facet calls are present, then # standard IF-blocks are generated for exterior top/bottom # facet calls when appropriate: # # IF (layer == bottom_layer): # *bottom calls # # IF (layer == top_layer): # *top calls # # The mesh level is an integer provided as a macro kernel # argument. # FIXME: No variable layers assumption statements.append(ast.FlatBlock("/* Mesh levels: */\n")) num_layers = ast.Symbol(builder.mesh_layer_count_sym, rank=(0, )) layer = builder.mesh_layer_sym types = [ "interior_facet_horiz_top", "interior_facet_horiz_bottom", "exterior_facet_top", "exterior_facet_bottom" ] decide = [ ast.Less(layer, num_layers), ast.Greater(layer, 0), ast.Eq(layer, num_layers), ast.Eq(layer, 0) ] for (integral_type, which) in zip(types, decide): statements.append( ast.If(which, (ast.Block(assembly_calls[integral_type], open_scope=True), ))) return statements
def generate_kernel_ast(builder, statements, declared_temps): """Glues together the complete AST for the Slate expression contained in the :class:`LocalKernelBuilder`. :arg builder: The :class:`LocalKernelBuilder` containing all relevant expression information. :arg statements: A list of COFFEE objects containing all assembly calls and temporary declarations. :arg declared_temps: A `dict` containing all previously declared temporaries. Return: A `KernelInfo` object describing the complete AST. """ slate_expr = builder.expression if slate_expr.rank == 0: # Scalars are treated as 1x1 MatrixBase objects shape = (1, ) else: shape = slate_expr.shape # Now we create the result statement by declaring its eigen type and # using Eigen::Map to move between Eigen and C data structs. statements.append(ast.FlatBlock("/* Map eigen tensor into C struct */\n")) result_sym = ast.Symbol("T%d" % len(declared_temps)) result_data_sym = ast.Symbol("A%d" % len(declared_temps)) result_type = "Eigen::Map<%s >" % eigen_matrixbase_type(shape) result = ast.Decl(ScalarType_c, ast.Symbol(result_data_sym), pointers=[("restrict", )]) result_statement = ast.FlatBlock( "%s %s((%s *)%s);\n" % (result_type, result_sym, ScalarType_c, result_data_sym)) statements.append(result_statement) # Generate the complete c++ string performing the linear algebra operations # on Eigen matrices/vectors statements.append(ast.FlatBlock("/* Linear algebra expression */\n")) cpp_string = ast.FlatBlock(slate_to_cpp(slate_expr, declared_temps)) statements.append(ast.Incr(result_sym, cpp_string)) # Generate arguments for the macro kernel args = [ result, ast.Decl(ScalarType_c, builder.coord_sym, pointers=[("restrict", )], qualifiers=["const"]) ] # Orientation information if builder.oriented: args.append( ast.Decl("int", builder.cell_orientations_sym, pointers=[("restrict", )], qualifiers=["const"])) # Coefficient information expr_coeffs = slate_expr.coefficients() for c in expr_coeffs: args.extend([ ast.Decl(ScalarType_c, csym, pointers=[("restrict", )], qualifiers=["const"]) for csym in builder.coefficient(c) ]) # Facet information if builder.needs_cell_facets: f_sym = builder.cell_facet_sym f_arg = ast.Symbol("arg_cell_facets") f_dtype = as_cstr(cell_to_facets_dtype) # cell_facets is locally a flattened 2-D array. We typecast here so we # can access its entries using standard array notation. cast = "%s (*%s)[2] = (%s (*)[2])%s;\n" % (f_dtype, f_sym, f_dtype, f_arg) statements.insert(0, ast.FlatBlock(cast)) args.append( ast.Decl(f_dtype, f_arg, pointers=[("restrict", )], qualifiers=["const"])) # NOTE: We need to be careful about the ordering here. Mesh layers are # added as the final argument to the kernel # and the amount of layers before that. if builder.needs_mesh_layers: args.append( ast.Decl("int", builder.mesh_layer_count_sym, pointers=[("restrict", )], qualifiers=["const"])) args.append(ast.Decl("int", builder.mesh_layer_sym)) # Cell size information if builder.needs_cell_sizes: args.append( ast.Decl(ScalarType_c, builder.cell_size_sym, pointers=[("restrict", )], qualifiers=["const"])) # Macro kernel macro_kernel_name = "pyop2_kernel_compile_slate" stmts = ast.Block(statements) macro_kernel = ast.FunDecl("void", macro_kernel_name, args, stmts, pred=["static", "inline"]) # Construct the final ast kernel_ast = ast.Node(builder.templated_subkernels + [macro_kernel]) # Now we wrap up the kernel ast as a PyOP2 kernel and include the # Eigen header files include_dirs = list(builder.include_dirs) include_dirs.append(EIGEN_INCLUDE_DIR) op2kernel = op2.Kernel( kernel_ast, macro_kernel_name, cpp=True, include_dirs=include_dirs, headers=['#include <Eigen/Dense>', '#define restrict __restrict']) op2kernel.num_flops = builder.expression_flops + builder.terminal_flops # Send back a "TSFC-like" SplitKernel object with an # index and KernelInfo kinfo = KernelInfo(kernel=op2kernel, integral_type=builder.integral_type, oriented=builder.oriented, subdomain_id="otherwise", domain_number=0, coefficient_map=slate_expr.coeff_map, needs_cell_facets=builder.needs_cell_facets, pass_layer_arg=builder.needs_mesh_layers, needs_cell_sizes=builder.needs_cell_sizes) return kinfo
def _generate_integral_ir(points, terms, sets, optimise_parameters, parameters): "Generate code to evaluate the element tensor." # For checking if the integral code is for a matrix def is_matrix(loop): loop_indices = [ l[0] for l in loop ] return (format["first free index"] in loop_indices and \ format["second free index"] in loop_indices) # Prefetch formats to speed up code generation. p_format = parameters["format"] f_comment = format["comment"] f_mul = format["mul"] f_scale_factor = format["scale factor"] f_iadd = format["iadd"] f_add = format["add"] f_A = format["element tensor"][p_format] f_j = format["first free index"] f_k = format["second free index"] f_loop = format["generate loop"] f_B = format["basis constant"] # Initialise return values. code = [] num_ops = 0 loops = {} # Extract sets. used_weights, used_psi_tables, used_nzcs, trans_set = sets nests = [] # Loop terms and create code. for loop, (data, entry_vals) in terms.items(): # If we don't have any entry values, there's no need to generate the loop. if not entry_vals: continue # Get data. t_set, u_weights, u_psi_tables, u_nzcs, basis_consts = data # If we have a value, then we also need to update the sets of used variables. trans_set.update(t_set) used_weights.update(u_weights) used_psi_tables.update(u_psi_tables) used_nzcs.update(u_nzcs) # @@@: A[0][0] += FE0[ip][j]*FE0[ip][k]*W24[ip]*det; entry_ir = [] for entry, value, ops in entry_vals: # Left hand side it_vars = entry if len(loop) > 0 else (0,) rank = tuple(i.loop_index if hasattr(i, 'loop_index') else i for i in it_vars) offset = tuple((1, int(i.offset)) if hasattr(i, 'offset') else (1, 0) for i in it_vars) local_tensor = pyop2.Symbol(f_A(''), rank, offset) # Right hand side pyop2_rhs = visit_rhs(value) entry_ir.append(pyop2.Incr(local_tensor, pyop2_rhs, "#pragma coffee expression")) # Disgusting hack, ensure resulting IR comes out in same order # on all processes. entry_ir = sorted(entry_ir, key=lambda x: x.gencode()) if len(loop) == 0: nest = pyop2.Block(entry_ir, open_scope=True) elif len(loop) in [1, 2]: it_var = c_sym(loop[0][0]) end = c_sym(loop[0][2]) nest = pyop2.For(pyop2.Decl("int", it_var, c_sym(0)), pyop2.Less(it_var, end), \ pyop2.Incr(it_var, c_sym(1)), pyop2.Block(entry_ir, open_scope=True), "#pragma coffee itspace") if len(loop) == 2: it_var = c_sym(loop[1][0]) end = c_sym(loop[1][2]) nest_k = pyop2.For(pyop2.Decl("int", it_var, c_sym(0)), pyop2.Less(it_var, end), \ pyop2.Incr(it_var, c_sym(1)), pyop2.Block(entry_ir, open_scope=True), "#pragma coffee itspace") nest.children[0] = pyop2.Block([nest_k], open_scope=True) nests.append(nest) return nests, num_ops
def get_restriction_kernel(fiat_element, unique_indices, dim=1, no_weights=False): weights = restriction_weights(fiat_element)[unique_indices].T ncdof = weights.shape[0] nfdof = weights.shape[1] arglist = [ ast.Decl("double", ast.Symbol("coarse", (ncdof * dim, ))), ast.Decl("double *restrict *restrict ", ast.Symbol("fine", ()), qualifiers=["const"]) ] if not no_weights: arglist.append( ast.Decl("double *restrict *restrict", ast.Symbol("count_weights", ()), qualifiers=["const"])) all_ones = np.allclose(weights, 1.0) if all_ones: w = [] else: w_sym = ast.Symbol("weights", (ncdof, nfdof)) init = ast.ArrayInit(format_array_literal(weights)) w = [ast.Decl("double", w_sym, init, qualifiers=["const"])] i = ast.Symbol("i", ()) j = ast.Symbol("j", ()) k = ast.Symbol("k", ()) fine = ast.Symbol("fine", (j, k)) if no_weights: if all_ones: assign = fine else: assign = ast.Prod(fine, ast.Symbol("weights", (i, j))) else: if all_ones: assign = ast.Prod(fine, ast.Symbol("count_weights", (j, 0))) else: assign = ast.Prod( fine, ast.Prod(ast.Symbol("weights", (i, j)), ast.Symbol("count_weights", (j, 0)))) assignment = ast.Incr( ast.Symbol("coarse", (ast.Sum(k, ast.Prod(i, ast.c_sym(dim))), )), assign) k_loop = ast.For(ast.Decl("int", k, ast.c_sym(0)), ast.Less(k, ast.c_sym(dim)), ast.Incr(k, ast.c_sym(1)), ast.Block([assignment], open_scope=True)) j_loop = ast.For(ast.Decl("int", j, ast.c_sym(0)), ast.Less(j, ast.c_sym(nfdof)), ast.Incr(j, ast.c_sym(1)), ast.Block([k_loop], open_scope=True)) i_loop = ast.For(ast.Decl("int", i, ast.c_sym(0)), ast.Less(i, ast.c_sym(ncdof)), ast.Incr(i, ast.c_sym(1)), ast.Block([j_loop], open_scope=True)) k = ast.FunDecl("void", "restriction", arglist, ast.Block(w + [i_loop]), pred=["static", "inline"]) return op2.Kernel(k, "restriction", opts=parameters["coffee"])
def auxiliary_temporaries(builder, declared_temps): """This function generates auxiliary information regarding special handling of expressions that require creating additional temporaries. :arg builder: a :class:`KernelBuilder` object that contains all the necessary temporary and expression information. :arg declared_temps: a `dict` of temporaries that have already been declared and assigned values. This will be updated in this method and referenced later in the compiler. Returns: a list of auxiliary statements are returned that contain temporary declarations and any code-blocks needed to evaluate the expression. """ aux_statements = [] for exp in builder.aux_exprs: if isinstance(exp, Inverse): if builder._ref_counts[exp] > 1: # Get the temporary for the particular expression result = metaphrase_slate_to_cpp(exp, declared_temps) # Now we use the generated result and assign the value to the # corresponding temporary. temp = ast.Symbol("auxT%d" % len(declared_temps)) shape = exp.shape aux_statements.append( ast.Decl(eigen_matrixbase_type(shape), temp)) aux_statements.append(ast.FlatBlock("%s.setZero();\n" % temp)) aux_statements.append(ast.Assign(temp, result)) # Update declared temps declared_temps[exp] = temp elif isinstance(exp, Action): # Action computations are relatively inexpensive, so # we don't waste memory space on creating temps for # these expressions. However, we must create a temporary # for the actee coefficient (if we haven't already). actee, = exp.actee if actee not in declared_temps: # Declare a temporary for the coefficient V = actee.function_space() shape_array = [(Vi.finat_element.space_dimension(), np.prod(Vi.shape)) for Vi in V.split()] ctemp = ast.Symbol("auxT%d" % len(declared_temps)) shape = sum(n * d for (n, d) in shape_array) typ = eigen_matrixbase_type(shape=(shape, )) aux_statements.append(ast.Decl(typ, ctemp)) aux_statements.append(ast.FlatBlock("%s.setZero();\n" % ctemp)) # Now we populate the temporary with the coefficient # information and insert in the right place. offset = 0 for i, shp in enumerate(shape_array): node_extent, dof_extent = shp # Now we unpack the function and insert its entries into a # 1D vector temporary isym = ast.Symbol("i1") jsym = ast.Symbol("j1") tensor_index = ast.Sum( offset, ast.Sum(ast.Prod(dof_extent, isym), jsym)) # Inner-loop running over dof_extent coeff_sym = ast.Symbol(builder.coefficient(actee)[i], rank=(isym, jsym)) coeff_temp = ast.Symbol(ctemp, rank=(tensor_index, )) inner_loop = ast.For( ast.Decl("unsigned int", jsym, init=0), ast.Less(jsym, dof_extent), ast.Incr(jsym, 1), ast.Assign(coeff_temp, coeff_sym)) # Outer-loop running over node_extent loop = ast.For(ast.Decl("unsigned int", isym, init=0), ast.Less(isym, node_extent), ast.Incr(isym, 1), inner_loop) aux_statements.append(loop) offset += node_extent * dof_extent # Update declared temporaries with the coefficient declared_temps[actee] = ctemp else: raise NotImplementedError( "Auxiliary expr type %s not currently implemented." % type(exp)) return aux_statements
def compile_expression(slate_expr, tsfc_parameters=None): """Takes a Slate expression `slate_expr` and returns the appropriate :class:`firedrake.op2.Kernel` object representing the Slate expression. :arg slate_expr: a :class:'TensorBase' expression. :arg tsfc_parameters: an optional `dict` of form compiler parameters to be passed onto TSFC during the compilation of ufl forms. Returns: A `tuple` containing a `SplitKernel(idx, kinfo)` """ if not isinstance(slate_expr, TensorBase): raise ValueError("Expecting a `TensorBase` expression, not %s" % type(slate_expr)) # TODO: Get PyOP2 to write into mixed dats if any(len(a.function_space()) > 1 for a in slate_expr.arguments()): raise NotImplementedError("Compiling mixed slate expressions") # If the expression has already been symbolically compiled, then # simply reuse the produced kernel. if slate_expr._metakernel_cache is not None: return slate_expr._metakernel_cache # Initialize coefficients, shape and statements list expr_coeffs = slate_expr.coefficients() # We treat scalars as 1x1 MatrixBase objects, so we give # the right shape to do so and everything just falls out. # This bit here ensures the return result has the right # shape if slate_expr.rank == 0: shape = (1, ) else: shape = slate_expr.shape statements = [] # Create a builder for the Slate expression builder = KernelBuilder(expression=slate_expr, tsfc_parameters=tsfc_parameters) # Initialize coordinate, cell orientations and facet/layer # symbols coordsym = ast.Symbol("coords") coords = None cell_orientations = ast.Symbol("cell_orientations") cellfacetsym = ast.Symbol("cell_facets") mesh_layer_sym = ast.Symbol("layer") inc = [] # We keep track of temporaries that have been declared declared_temps = {} for cxt_kernel in builder.context_kernels: exp = cxt_kernel.tensor t = builder.temps[exp] if exp not in declared_temps: # Declare and initialize the temporary statements.append(ast.Decl(eigen_matrixbase_type(exp.shape), t)) statements.append(ast.FlatBlock("%s.setZero();\n" % t)) declared_temps[exp] = t it_type = cxt_kernel.original_integral_type if it_type not in supported_integral_types: raise NotImplementedError("Type %s not supported." % it_type) # Explicit checking of coordinates coordinates = exp.ufl_domain().coordinates if coords is not None: assert coordinates == coords else: coords = coordinates if it_type == "cell": # Nothing difficult about cellwise integrals. Just need # to get coefficient info, include_dirs and append # function calls to the appropriate subkernels. # If tensor is mixed, there will be more than one SplitKernel incl = [] for splitkernel in cxt_kernel.tsfc_kernels: index = splitkernel.indices kinfo = splitkernel.kinfo # Generate an iterable of coefficients to pass to the subkernel # if any are required clist = [ c for ci in kinfo.coefficient_map for c in builder.coefficient(exp.coefficients()[ci]) ] if kinfo.oriented: clist.insert(0, cell_orientations) incl.extend(kinfo.kernel._include_dirs) tensor = eigen_tensor(exp, t, index) statements.append( ast.FunCall(kinfo.kernel.name, tensor, coordsym, *clist)) elif it_type in [ "interior_facet", "exterior_facet", "interior_facet_vert", "exterior_facet_vert" ]: # These integral types will require accessing local facet # information and looping over facet indices. builder.require_cell_facets() loop_stmt, incl = facet_integral_loop(cxt_kernel, builder, coordsym, cellfacetsym, cell_orientations) statements.append(loop_stmt) elif it_type == "interior_facet_horiz": # The infamous interior horizontal facet # will have two SplitKernels: one top, # one bottom. The mesh layer will determine # which kernels we call. builder.require_mesh_layers() top_sks = [ k for k in cxt_kernel.tsfc_kernels if k.kinfo.integral_type == "exterior_facet_top" ] bottom_sks = [ k for k in cxt_kernel.tsfc_kernels if k.kinfo.integral_type == "exterior_facet_bottom" ] assert len(top_sks) == len(bottom_sks), ( "Number of top and bottom kernels should be equal") # Top and bottom kernels need to be sorted by kinfo.indices # if the space is mixed to ensure indices match. top_sks = sorted(top_sks, key=lambda x: x.indices) bottom_sks = sorted(bottom_sks, key=lambda x: x.indices) stmt, incl = extruded_int_horiz_facet(exp, builder, top_sks, bottom_sks, coordsym, mesh_layer_sym, cell_orientations) statements.append(stmt) elif it_type in ["exterior_facet_bottom", "exterior_facet_top"]: # These kernels will only be called if we are on # the top or bottom layers of the extruded mesh. builder.require_mesh_layers() stmt, incl = extruded_top_bottom_facet(cxt_kernel, builder, coordsym, mesh_layer_sym, cell_orientations) statements.append(stmt) else: raise ValueError("Kernel type not recognized: %s" % it_type) # Don't duplicate include lines inc_dir = list(set(incl) - set(inc)) inc.extend(inc_dir) # Now we handle any terms that require auxiliary temporaries, # such as inverses, transposes and actions of a tensor on a # coefficient if builder.aux_exprs: # The declared temps will be updated within this method aux_statements = auxiliary_temporaries(builder, declared_temps) statements.extend(aux_statements) # Now we create the result statement by declaring its eigen type and # using Eigen::Map to move between Eigen and C data structs. result_sym = ast.Symbol("T%d" % len(builder.temps)) result_data_sym = ast.Symbol("A%d" % len(builder.temps)) result_type = "Eigen::Map<%s >" % eigen_matrixbase_type(shape) result = ast.Decl(SCALAR_TYPE, ast.Symbol(result_data_sym, shape)) result_statement = ast.FlatBlock( "%s %s((%s *)%s);\n" % (result_type, result_sym, SCALAR_TYPE, result_data_sym)) statements.append(result_statement) # Generate the complete c++ string performing the linear algebra operations # on Eigen matrices/vectors cpp_string = ast.FlatBlock( metaphrase_slate_to_cpp(slate_expr, declared_temps)) statements.append(ast.Incr(result_sym, cpp_string)) # Finalize AST for macro kernel construction builder._finalize_kernels_and_update() # Generate arguments for the macro kernel args = [result, ast.Decl("%s **" % SCALAR_TYPE, coordsym)] # Orientation information if builder.oriented: args.append(ast.Decl("int **", cell_orientations)) # Coefficient information for c in expr_coeffs: if isinstance(c, Constant): ctype = "%s *" % SCALAR_TYPE else: ctype = "%s **" % SCALAR_TYPE args.extend([ast.Decl(ctype, csym) for csym in builder.coefficient(c)]) # Facet information if builder.needs_cell_facets: args.append( ast.Decl("%s *" % as_cstr(cell_to_facets_dtype), cellfacetsym)) # NOTE: We need to be careful about the ordering here. Mesh layers are # added as the final argument to the kernel. if builder.needs_mesh_layers: args.append(ast.Decl("int", mesh_layer_sym)) # NOTE: In the future we may want to have more than one "macro_kernel" macro_kernel_name = "compile_slate" stmt = ast.Block(statements) macro_kernel = builder.construct_macro_kernel(name=macro_kernel_name, args=args, statements=stmt) # Tell the builder to construct the final ast kernel_ast = builder.construct_ast([macro_kernel]) # Now we wrap up the kernel ast as a PyOP2 kernel. # Include the Eigen header files inc.extend(["%s/include/eigen3/" % d for d in PETSC_DIR]) op2kernel = op2.Kernel( kernel_ast, macro_kernel_name, cpp=True, include_dirs=inc, headers=['#include <Eigen/Dense>', '#define restrict __restrict']) assert len(slate_expr.ufl_domains()) == 1, ( "No support for multiple domains yet!") # Send back a "TSFC-like" SplitKernel object with an # index and KernelInfo kinfo = KernelInfo(kernel=op2kernel, integral_type=builder.integral_type, oriented=builder.oriented, subdomain_id="otherwise", domain_number=0, coefficient_map=tuple(range(len(expr_coeffs))), needs_cell_facets=builder.needs_cell_facets, pass_layer_arg=builder.needs_mesh_layers) idx = tuple([0] * slate_expr.rank) kernels = (SplitKernel(idx, kinfo), ) # Store the resulting kernel for reuse slate_expr._metakernel_cache = kernels return kernels
def facet_integral_loop(cxt_kernel, builder, coordsym, cellfacetsym, cell_orientations): """Generates a code statement for evaluating exterior/interior facet integrals. :arg cxt_kernel: A :namedtuple:`ContextKernel` containing all relevant integral types and TSFC kernels associated with the form nested in the expression. :arg builder: A :class:`KernelBuilder` containing the expression context. :arg coordsym: An `ast.Symbol` object representing coordinate arguments for the kernel. :arg cellfacetsym: An `ast.Symbol` representing the cell facets. :arg cell_orientations: An `ast.Symbol` representing cell orientation information. Returns: A COFFEE code statement and updated include_dirs """ exp = cxt_kernel.tensor t = builder.temps[exp] it_type = cxt_kernel.original_integral_type itsym = ast.Symbol("i0") chker = { "interior_facet": 1, "interior_facet_vert": 1, "exterior_facet": 0, "exterior_facet_vert": 0 } # Compute the correct number of facets for a particular facet measure if it_type in ["interior_facet", "exterior_facet"]: # Non-extruded case nfacet = exp.ufl_domain().ufl_cell().num_facets() elif it_type in ["interior_facet_vert", "exterior_facet_vert"]: # Extrusion case base_cell = exp.ufl_domain().ufl_cell()._cells[0] nfacet = base_cell.num_facets() else: raise ValueError("Integral type %s not supported." % it_type) incl = [] funcalls = [] checker = chker[it_type] for splitkernel in cxt_kernel.tsfc_kernels: index = splitkernel.indices kinfo = splitkernel.kinfo # Generate an iterable of coefficients to pass to the subkernel # if any are required clist = [ c for ci in kinfo.coefficient_map for c in builder.coefficient(exp.coefficients()[ci]) ] incl.extend(kinfo.kernel._include_dirs) tensor = eigen_tensor(exp, t, index) if kinfo.oriented: clist.insert(0, cell_orientations) clist.append(ast.FlatBlock("&%s" % itsym)) funcalls.append( ast.FunCall(kinfo.kernel.name, tensor, coordsym, *clist)) loop_body = ast.If( ast.Eq(ast.Symbol(cellfacetsym, rank=(itsym, )), checker), [ast.Block(funcalls, open_scope=True)]) loop_stmt = ast.For(ast.Decl("unsigned int", itsym, init=0), ast.Less(itsym, nfacet), ast.Incr(itsym, 1), loop_body) return loop_stmt, incl
def statement_initialise(leaf, parameters): if parameters.declare[leaf]: return coffee.Decl(parameters.scalar_type, _decl_symbol(leaf.indexsum, parameters), 0.0) else: return coffee.Assign(_ref_symbol(leaf.indexsum, parameters), 0.0)
def statement_block(tree, parameters): statements = [statement(child, parameters) for child in tree.children] declares = [] for expr in parameters.declare[tree]: declares.append(coffee.Decl(parameters.scalar_type, _decl_symbol(expr, parameters))) return coffee.Block(declares + statements, open_scope=True)
def compile_ufl_kernel(expression, to_pts, to_element, fs): import collections from ufl.algorithms.apply_function_pullbacks import apply_function_pullbacks from ufl.algorithms.apply_algebra_lowering import apply_algebra_lowering from ufl.algorithms.apply_derivatives import apply_derivatives from ufl.algorithms.apply_geometry_lowering import apply_geometry_lowering from ufl.algorithms import extract_arguments, extract_coefficients from gem import gem, impero_utils from tsfc import fem, ufl_utils from tsfc.coffee import generate as generate_coffee from tsfc.kernel_interface import (KernelBuilderBase, needs_cell_orientations, cell_orientations_coffee_arg) # Imitate the compute_form_data processing pipeline # # Unfortunately, we cannot call compute_form_data here, since # we only have an expression, not a form expression = apply_algebra_lowering(expression) expression = apply_derivatives(expression) expression = apply_function_pullbacks(expression) expression = apply_geometry_lowering(expression) expression = apply_derivatives(expression) expression = apply_geometry_lowering(expression) expression = apply_derivatives(expression) # Replace coordinates (if any) if expression.ufl_domain(): assert fs.mesh() == expression.ufl_domain() expression = ufl_utils.replace_coordinates(expression, fs.mesh().coordinates) if extract_arguments(expression): return ValueError("Cannot interpolate UFL expression with Arguments!") builder = KernelBuilderBase() args = [] coefficients = extract_coefficients(expression) for i, coefficient in enumerate(coefficients): args.append(builder.coefficient(coefficient, "w_%d" % i)) point_index = gem.Index(name='p') ir = fem.process('cell', fs.mesh().ufl_cell(), to_pts, None, point_index, (), expression, builder.coefficient_mapper, collections.defaultdict(gem.Index)) assert len(ir) == 1 # Deal with non-scalar expressions tensor_indices = () if fs.shape: tensor_indices = tuple(gem.Index() for s in fs.shape) ir = [gem.Indexed(ir[0], tensor_indices)] # Build kernel body return_var = gem.Variable('A', (len(to_pts),) + fs.shape) return_expr = gem.Indexed(return_var, (point_index,) + tensor_indices) impero_c = impero_utils.compile_gem([return_expr], ir, [point_index]) body = generate_coffee(impero_c, index_names={point_index: 'p'}) oriented = needs_cell_orientations(ir) if oriented: args.insert(0, cell_orientations_coffee_arg) # Build kernel args.insert(0, ast.Decl("double", ast.Symbol('A', rank=(len(to_pts),) + fs.shape))) kernel_code = builder.construct_kernel("expression_kernel", args, body) return op2.Kernel(kernel_code, kernel_code.name), oriented, coefficients
def compile_c_kernel(expression, to_pts, to_element, fs, coords): """Produce a :class:`PyOP2.Kernel` from the c expression provided.""" coords_space = coords.function_space() coords_element = coords_space.fiat_element names = {v[0] for v in expression._user_args} X = coords_element.tabulate(0, to_pts).values()[0] # Produce C array notation of X. X_str = "{{"+"},\n{".join([",".join(map(str, x)) for x in X.T])+"}}" A = utils.unique_name("A", names) X = utils.unique_name("X", names) x_ = utils.unique_name("x_", names) k = utils.unique_name("k", names) d = utils.unique_name("d", names) i_ = utils.unique_name("i", names) # x is a reserved name. x = "x" if "x" in names: raise ValueError("cannot use 'x' as a user-defined Expression variable") ass_exp = [ast.Assign(ast.Symbol(A, (k,), ((len(expression.code), i),)), ast.FlatBlock("%s" % code)) for i, code in enumerate(expression.code)] vals = { "X": X, "x": x, "x_": x_, "k": k, "d": d, "i": i_, "x_array": X_str, "dim": coords_space.dim, "xndof": coords_element.space_dimension(), # FS will always either be a functionspace or # vectorfunctionspace, so just accessing dim here is safe # (we don't need to go through ufl_element.value_shape()) "nfdof": to_element.space_dimension() * numpy.prod(fs.dim, dtype=int), "ndof": to_element.space_dimension(), "assign_dim": numpy.prod(expression.value_shape(), dtype=int) } init = ast.FlatBlock(""" const double %(X)s[%(ndof)d][%(xndof)d] = %(x_array)s; double %(x)s[%(dim)d]; const double pi = 3.141592653589793; """ % vals) block = ast.FlatBlock(""" for (unsigned int %(d)s=0; %(d)s < %(dim)d; %(d)s++) { %(x)s[%(d)s] = 0; for (unsigned int %(i)s=0; %(i)s < %(xndof)d; %(i)s++) { %(x)s[%(d)s] += %(X)s[%(k)s][%(i)s] * %(x_)s[%(i)s][%(d)s]; }; }; """ % vals) loop = ast.c_for(k, "%(ndof)d" % vals, ast.Block([block] + ass_exp, open_scope=True)) user_args = [] user_init = [] for _, arg in expression._user_args: if arg.shape == (1, ): user_args.append(ast.Decl("double *", "%s_" % arg.name)) user_init.append(ast.FlatBlock("const double %s = *%s_;" % (arg.name, arg.name))) else: user_args.append(ast.Decl("double *", arg.name)) kernel_code = ast.FunDecl("void", "expression_kernel", [ast.Decl("double", ast.Symbol(A, (int("%(nfdof)d" % vals),))), ast.Decl("double**", x_)] + user_args, ast.Block(user_init + [init, loop], open_scope=False)) coefficients = [coords] for _, arg in expression._user_args: coefficients.append(GlobalWrapper(arg)) return op2.Kernel(kernel_code, kernel_code.name), False, tuple(coefficients)
def arg(self): argtype = self.function.dat.ctype + "*" name = "fn_%r" % self.argnum return ast.Decl(argtype, ast.Symbol(name))
def _generate_element_tensor(integrals, sets, optimise_parameters, parameters): "Construct quadrature code for element tensors." # Prefetch formats to speed up code generation. f_comment = format["comment"] f_ip = format["integration points"] f_I = format["ip constant"] f_loop = format["generate loop"] f_ip_coords = format["generate ip coordinates"] f_coords = format["coordinate_dofs"] f_double = format["float declaration"] f_decl = format["declaration"] f_X = format["ip coordinates"] f_C = format["conditional"] # Initialise return values. tensor_ops_count = 0 ffc_assert(1 == len(integrals), "This function is not capable of handling multiple integrals.") # We receive a dictionary {num_points: form,}. # Loop points and forms. for points, terms, functions, ip_consts, coordinate, conditionals in integrals: nest_ir = [] ip_ir = [] num_ops = 0 # Generate code to compute coordinates if used. if coordinate: raise RuntimeError("Don't know how to compute coordinates") # Left in place for posterity name, gdim, ip, r = coordinate element_code += ["", f_comment("Declare array to hold physical coordinate of quadrature point.")] element_code += [f_decl(f_double, f_X(points, gdim))] ops, coord_code = f_ip_coords(gdim, points, name, ip, r) ip_code += ["", f_comment("Compute physical coordinate of quadrature point, operations: %d." % ops)] ip_code += [coord_code] num_ops += ops # Update used psi tables and transformation set. sets[1].add(name) sets[3].add(f_coords(r)) # Generate code to compute function values. if functions: const_func_code, func_code, ops = _generate_functions(functions, sets) nest_ir += const_func_code ip_ir += func_code num_ops += ops # Generate code to compute conditionals (might depend on coordinates # and function values so put here). # TODO: Some conditionals might only depend on geometry so they # should be moved outside if possible. if conditionals: ip_ir.append(pyop2.Decl(f_double, c_sym(f_C(len(conditionals))))) # Sort conditionals (need to in case of nested conditionals). reversed_conds = dict([(n, (o, e)) for e, (t, o, n) in conditionals.items()]) for num in range(len(conditionals)): name = format["conditional"](num) ops, expr = reversed_conds[num] ip_ir.append(pyop2.Assign(c_sym(name), visit_rhs(expr))) num_ops += ops # Generate code for ip constant declarations. # TODO: this code should be removable as only executed when ffc's optimisations are on ip_const_ops, ip_const_code = generate_aux_constants(ip_consts, f_I,\ format["assign"], True) if len(ip_const_code) > 0: raise RuntimeError("IP Const code not supported") num_ops += ip_const_ops # Generate code to evaluate the element tensor. code, ops = _generate_integral_ir(points, terms, sets, optimise_parameters, parameters) num_ops += ops tensor_ops_count += num_ops*points ip_ir += code # Loop code over all IPs. # @@@: for (ip ...) { A[0][0] += ... } if points > 1: it_var = pyop2.Symbol(f_ip, ()) nest_ir += [pyop2.For(pyop2.Decl("int", it_var, c_sym(0)), pyop2.Less(it_var, c_sym(points)), pyop2.Incr(it_var, c_sym(1)), pyop2.Block(ip_ir, open_scope=True))] else: nest_ir += ip_ir return (nest_ir, tensor_ops_count)
def prepare_arguments(arguments, multiindices, interior_facet=False): """Bridges the kernel interface and the GEM abstraction for Arguments. Vector Arguments are rearranged here for interior facet integrals. :arg arguments: UFL Arguments :arg multiindices: Argument multiindices :arg interior_facet: interior facet integral? :returns: (funarg, prepare, expressions) funarg - :class:`coffee.Decl` function argument prepare - list of COFFEE nodes to be prepended to the kernel body expressions - GEM expressions referring to the argument tensor """ funarg = coffee.Decl(SCALAR_TYPE, coffee.Symbol("A"), pointers=[()]) varexp = gem.Variable("A", (None,)) if len(arguments) == 0: # No arguments zero = coffee.FlatBlock( "memset({name}, 0, sizeof(*{name}));\n".format(name=funarg.sym.gencode()) ) return funarg, [zero], [gem.reshape(varexp, ())] elements = tuple(create_element(arg.ufl_element()) for arg in arguments) transposed_shapes = [] transposed_indices = [] for element, multiindex in zip(elements, multiindices): if isinstance(element, TensorFiniteElement): scalar_shape = element.base_element.index_shape tensor_shape = element.index_shape[len(scalar_shape):] else: scalar_shape = element.index_shape tensor_shape = () transposed_shapes.append(tensor_shape + scalar_shape) scalar_rank = len(scalar_shape) transposed_indices.extend(multiindex[scalar_rank:] + multiindex[:scalar_rank]) transposed_indices = tuple(transposed_indices) def expression(restricted): return gem.Indexed(gem.reshape(restricted, *transposed_shapes), transposed_indices) u_shape = numpy.array([numpy.prod(element.index_shape, dtype=int) for element in elements]) if interior_facet: c_shape = tuple(2 * u_shape) slicez = [[slice(r * s, (r + 1) * s) for r, s in zip(restrictions, u_shape)] for restrictions in product((0, 1), repeat=len(arguments))] else: c_shape = tuple(u_shape) slicez = [[slice(s) for s in u_shape]] expressions = [expression(gem.view(gem.reshape(varexp, c_shape), *slices)) for slices in slicez] zero = coffee.FlatBlock( str.format("memset({name}, 0, {size} * sizeof(*{name}));\n", name=funarg.sym.gencode(), size=numpy.product(c_shape, dtype=int)) ) return funarg, [zero], prune(expressions)
def _generate_functions(functions, sets): "Generate declarations for functions and code to compute values." f_comment = format["comment"] f_double = format["float declaration"] f_F = format["function value"] f_float = format["floating point"] f_decl = format["declaration"] f_r = format["free indices"] f_iadd = format["iadd"] f_loop = format["generate loop"] # Create the function declarations -- only the (unique) variables we need const_vardecls = set(fd.id for fd in functions.values() if fd.cellwise_constant) const_ast_items = [pyop2.Decl(f_double, c_sym(f_F(n)), c_sym(f_float(0))) for n in const_vardecls] vardecls = set(fd.id for fd in functions.values() if not fd.cellwise_constant) ast_items = [pyop2.Decl(f_double, c_sym(f_F(n)), c_sym(f_float(0))) for n in vardecls] # Get sets. used_psi_tables = sets[1] used_nzcs = sets[2] # Sort functions after being cellwise constant and loop ranges. function_groups = collections.defaultdict(list) for f, fd in functions.items(): function_groups[(fd.cellwise_constant, fd.loop_range)].append(f) total_ops = 0 # Loop ranges and get list of functions. for (cellwise_constant, loop_range), function_list in function_groups.iteritems(): function_expr = [] # Loop functions. func_ops = 0 for function in function_list: data = functions[function] range_i = data.loop_range if not isinstance(range_i, tuple): range_i = tuple([range_i]) # Add name to used psi names and non zeros name to used_nzcs. used_psi_tables.add(data.psi_name) used_nzcs.update(data.used_nzcs) # # TODO: This check can be removed for speed later. # REMOVED this, since we might need to increment into the same # number more than once for mixed element + interior facets # ffc_assert(data.id not in function_expr, "This is definitely not supposed to happen!") # Convert function to COFFEE ast node, save string # representation for sorting (such that we're reproducible # in parallel). function = visit_rhs(function) key = str(function) function_expr.append((data.id, function, key)) # Get number of operations to compute entry and add to function operations count. func_ops += (data.ops + 1)*sum(range_i) # Gather, sorted by string rep of function. lines = [pyop2.Incr(c_sym(f_F(n)), fn) for n, fn, _ in sorted(function_expr, key=lambda x: x[2])] if isinstance(loop_range, tuple): if not all(map(lambda x: x==loop_range[0], loop_range)): raise RuntimeError("General mixed elements not yet supported in PyOP2") loop_vars = [ (f_r[0], 0, loop_range[0]), (f_r[1], 0, len(loop_range)) ] else: loop_vars = [(f_r[0], 0, loop_range)] # TODO: If loop_range == 1, this loop may be unneccessary. Not sure if it's safe to just skip it. it_var = c_sym(loop_vars[0][0]) loop_size = c_sym(loop_vars[0][2]) ast_item = pyop2.For(pyop2.Decl("int", it_var, c_sym(0)), pyop2.Less(it_var, loop_size), pyop2.Incr(it_var, c_sym(1)), pyop2.Block(lines, open_scope=True)) if cellwise_constant: const_ast_items.append(ast_item) else: ast_items.append(ast_item) return const_ast_items, ast_items, total_ops
def compile_expression(slate_expr, tsfc_parameters=None): """Takes a SLATE expression `slate_expr` and returns the appropriate :class:`firedrake.op2.Kernel` object representing the SLATE expression. :arg slate_expr: a :class:'TensorBase' expression. :arg tsfc_parameters: an optional `dict` of form compiler parameters to be passed onto TSFC during the compilation of ufl forms. """ if not isinstance(slate_expr, TensorBase): raise ValueError( "Expecting a `slate.TensorBase` expression, not a %r" % slate_expr) # TODO: Get PyOP2 to write into mixed dats if any(len(a.function_space()) > 1 for a in slate_expr.arguments()): raise NotImplementedError("Compiling mixed slate expressions") # Initialize shape and statements list shape = slate_expr.shape statements = [] # Create a builder for the SLATE expression builder = KernelBuilder(expression=slate_expr, tsfc_parameters=tsfc_parameters) # Initialize coordinate and facet symbols coordsym = ast.Symbol("coords") coords = None cellfacetsym = ast.Symbol("cell_facets") inc = [] # Now we construct the list of statements to provide to the builder context_temps = builder.temps.copy() for exp, t in context_temps.items(): statements.append(ast.Decl(eigen_matrixbase_type(exp.shape), t)) statements.append(ast.FlatBlock("%s.setZero();\n" % t)) for splitkernel in builder.kernel_exprs[exp]: clist = [] index = splitkernel.indices kinfo = splitkernel.kinfo integral_type = kinfo.integral_type if integral_type not in [ "cell", "interior_facet", "exterior_facet" ]: raise NotImplementedError( "Integral type %s not currently supported." % integral_type) coordinates = exp.ufl_domain().coordinates if coords is not None: assert coordinates == coords else: coords = coordinates for cindex in kinfo.coefficient_map: c = exp.coefficients()[cindex] # Handles both mixed and non-mixed coefficient cases clist.extend(builder.extract_coefficient(c)) inc.extend(kinfo.kernel._include_dirs) tensor = eigen_tensor(exp, t, index) if integral_type in ["interior_facet", "exterior_facet"]: builder.require_cell_facets() itsym = ast.Symbol("i0") clist.append(ast.FlatBlock("&%s" % itsym)) loop_body = [] nfacet = exp.ufl_domain().ufl_cell().num_facets() if integral_type == "exterior_facet": checker = 1 else: checker = 0 loop_body.append( ast.If( ast.Eq(ast.Symbol(cellfacetsym, rank=(itsym, )), checker), [ ast.Block([ ast.FunCall(kinfo.kernel.name, tensor, coordsym, *clist) ], open_scope=True) ])) loop = ast.For(ast.Decl("unsigned int", itsym, init=0), ast.Less(itsym, nfacet), ast.Incr(itsym, 1), loop_body) statements.append(loop) else: statements.append( ast.FunCall(kinfo.kernel.name, tensor, coordsym, *clist)) # Now we handle any terms that require auxiliary data (if any) if bool(builder.aux_exprs): aux_temps, aux_statements = auxiliary_information(builder) context_temps.update(aux_temps) statements.extend(aux_statements) result_sym = ast.Symbol("T%d" % len(builder.temps)) result_data_sym = ast.Symbol("A%d" % len(builder.temps)) result_type = "Eigen::Map<%s >" % eigen_matrixbase_type(shape) result = ast.Decl(SCALAR_TYPE, ast.Symbol(result_data_sym, shape)) result_statement = ast.FlatBlock( "%s %s((%s *)%s);\n" % (result_type, result_sym, SCALAR_TYPE, result_data_sym)) statements.append(result_statement) cpp_string = ast.FlatBlock( metaphrase_slate_to_cpp(slate_expr, context_temps)) statements.append(ast.Assign(result_sym, cpp_string)) # Generate arguments for the macro kernel args = [result, ast.Decl("%s **" % SCALAR_TYPE, coordsym)] for c in slate_expr.coefficients(): if isinstance(c, Constant): ctype = "%s *" % SCALAR_TYPE else: ctype = "%s **" % SCALAR_TYPE args.extend([ ast.Decl(ctype, sym_c) for sym_c in builder.extract_coefficient(c) ]) if builder.needs_cell_facets: args.append(ast.Decl("char *", cellfacetsym)) macro_kernel_name = "compile_slate" kernel_ast, oriented = builder.construct_ast( name=macro_kernel_name, args=args, statements=ast.Block(statements)) inc.extend(["%s/include/eigen3/" % d for d in PETSC_DIR]) op2kernel = op2.Kernel( kernel_ast, macro_kernel_name, cpp=True, include_dirs=inc, headers=['#include <Eigen/Dense>', '#define restrict __restrict']) assert len(slate_expr.ufl_domains()) == 1 kinfo = KernelInfo(kernel=op2kernel, integral_type="cell", oriented=oriented, subdomain_id="otherwise", domain_number=0, coefficient_map=range(len(slate_expr.coefficients())), needs_cell_facets=builder.needs_cell_facets) idx = tuple([0] * slate_expr.rank) return (SplitKernel(idx, kinfo), )
def _tabulate_tensor(ir, parameters): "Generate code for a single integral (tabulate_tensor())." p_format = parameters["format"] precision = parameters["precision"] f_comment = format["comment"] f_G = format["geometry constant"] f_const_double = format["assign"] f_float = format["float"] f_assign = format["assign"] f_A = format["element tensor"][p_format] f_r = format["free indices"][0] f_j = format["first free index"] f_k = format["second free index"] f_loop = format["generate loop"] f_int = format["int"] f_weight = format["weight"] # Get data. opt_par = ir["optimise_parameters"] integral_type = ir["integral_type"] cell = ir["cell"] gdim = cell.geometric_dimension() tdim = cell.topological_dimension() num_facets = ir["num_facets"] num_vertices= ir["num_vertices"] integrals = ir["trans_integrals"] geo_consts = ir["geo_consts"] oriented = ir["needs_oriented"] # Create sets of used variables. used_weights = set() used_psi_tables = set() used_nzcs = set() trans_set = set() sets = [used_weights, used_psi_tables, used_nzcs, trans_set] affine_tables = {} # TODO: This is not populated anywhere, remove? quadrature_weights = ir["quadrature_weights"] #The pyop2 format requires dereferencing constant coefficients since # these are passed in as double * common = [] if p_format == "pyop2": for n, c in zip(ir["coefficient_names"], ir["coefficient_elements"]): if c.family() == 'Real': # Second index is always? 0, so we cast to (double (*)[1]). common += ['double (*w%(n)s)[1] = (double (*)[1])c%(n)s;\n' % {'n': n[1:]}] operations = [] if integral_type == "cell": # Update transformer with facets and generate code + set of used geometry terms. nest_ir, num_ops = _generate_element_tensor(integrals, sets, \ opt_par, parameters) # Set operations equal to num_ops (for printing info on operations). operations.append([num_ops]) # Generate code for basic geometric quantities # @@@: Jacobian snippet jacobi_code = "" jacobi_code += format["compute_jacobian"](cell) jacobi_code += "\n" jacobi_code += format["compute_jacobian_inverse"](cell) if oriented and tdim != gdim: # NEED TO THINK ABOUT THIS FOR EXTRUSION jacobi_code += format["orientation"][p_format](tdim, gdim) jacobi_code += "\n" jacobi_code += format["scale factor snippet"][p_format] # Generate code for cell volume and circumradius -- note that the # former will be incorrect on extruded meshes by a constant factor. jacobi_code += "\n\n" + format["generate cell volume"][p_format](tdim, gdim, integral_type) jacobi_code += "\n\n" + format["generate circumradius"][p_format](tdim, gdim, integral_type) elif integral_type in ("exterior_facet", "exterior_facet_vert"): if p_format == 'pyop2': common += ["unsigned int facet = *facet_p;\n"] # Generate tensor code for facets + set of used geometry terms. nest_ir, ops = _generate_element_tensor(integrals, sets, opt_par, parameters) # Save number of operations (for printing info on operations). operations.append([ops]) # Generate code for basic geometric quantities # @@@: Jacobian snippet jacobi_code = "" jacobi_code += format["compute_jacobian"](cell) jacobi_code += "\n" jacobi_code += format["compute_jacobian_inverse"](cell) if oriented and tdim != gdim: # NEED TO THINK ABOUT THIS FOR EXTRUSION jacobi_code += format["orientation"][p_format](tdim, gdim) jacobi_code += "\n" if integral_type == "exterior_facet": jacobi_code += "\n\n" + format["facet determinant"](cell, p_format, integral_type) jacobi_code += "\n\n" + format["generate normal"](cell, p_format, integral_type) jacobi_code += "\n\n" + format["generate facet area"](tdim, gdim) if tdim == 3: jacobi_code += "\n\n" + format["generate min facet edge length"](tdim, gdim) jacobi_code += "\n\n" + format["generate max facet edge length"](tdim, gdim) # Generate code for cell volume and circumradius jacobi_code += "\n\n" + format["generate cell volume"][p_format](tdim, gdim, integral_type) jacobi_code += "\n\n" + format["generate circumradius"][p_format](tdim, gdim, integral_type) elif integral_type == "exterior_facet_vert": jacobi_code += "\n\n" + format["facet determinant"](cell, p_format, integral_type) jacobi_code += "\n\n" + format["generate normal"](cell, p_format, integral_type) # OTHER THINGS NOT IMPLEMENTED YET else: raise RuntimeError("Invalid integral_type") elif integral_type in ("exterior_facet_top", "exterior_facet_bottom"): nest_ir, ops = _generate_element_tensor(integrals, sets, opt_par, parameters) operations.append([ops]) # Generate code for basic geometric quantities # @@@: Jacobian snippet jacobi_code = "" jacobi_code += format["compute_jacobian"](cell) jacobi_code += "\n" jacobi_code += format["compute_jacobian_inverse"](cell) if oriented: # NEED TO THINK ABOUT THIS FOR EXTRUSION jacobi_code += format["orientation"][p_format](tdim, gdim) jacobi_code += "\n" jacobi_code += "\n\n" + format["facet determinant"](cell, p_format, integral_type) jacobi_code += "\n\n" + format["generate normal"](cell, p_format, integral_type) # THE REST IS NOT IMPLEMENTED YET elif integral_type in ("interior_facet", "interior_facet_vert"): if p_format == 'pyop2': common += ["unsigned int facet_0 = facet_p[0];"] common += ["unsigned int facet_1 = facet_p[1];"] common += ["double **coordinate_dofs_0 = coordinate_dofs;"] # Note that the following line is unsafe for isoparametric elements. common += ["double **coordinate_dofs_1 = coordinate_dofs + %d;" % num_vertices] # Generate tensor code for facets + set of used geometry terms. nest_ir, ops = _generate_element_tensor(integrals, sets, opt_par, parameters) # Save number of operations (for printing info on operations). operations.append([ops]) # Generate code for basic geometric quantities # @@@: Jacobian snippet jacobi_code = "" for _r in ["+", "-"]: if p_format == "pyop2": jacobi_code += format["compute_jacobian_interior"](cell, r=_r) else: jacobi_code += format["compute_jacobian"](cell, r=_r) jacobi_code += "\n" jacobi_code += format["compute_jacobian_inverse"](cell, r=_r) if oriented and tdim != gdim: # NEED TO THINK ABOUT THIS FOR EXTRUSION jacobi_code += format["orientation"][p_format](tdim, gdim, r=_r) jacobi_code += "\n" if integral_type == "interior_facet": jacobi_code += "\n\n" + format["facet determinant"](cell, p_format, integral_type, r="+") jacobi_code += "\n\n" + format["generate normal"](cell, p_format, integral_type) jacobi_code += "\n\n" + format["generate facet area"](tdim, gdim) if tdim == 3: jacobi_code += "\n\n" + format["generate min facet edge length"](tdim, gdim, r="+") jacobi_code += "\n\n" + format["generate max facet edge length"](tdim, gdim, r="+") # Generate code for cell volume and circumradius jacobi_code += "\n\n" + format["generate cell volume"][p_format](tdim, gdim, integral_type) jacobi_code += "\n\n" + format["generate circumradius interior"](tdim, gdim, integral_type) elif integral_type == "interior_facet_vert": # THE REST IS NOT IMPLEMENTED YET jacobi_code += "\n\n" + format["facet determinant"](cell, p_format, integral_type, r="+") jacobi_code += "\n\n" + format["generate normal"](cell, p_format, integral_type) else: raise RuntimeError("Invalid integral_type") elif integral_type == "interior_facet_horiz": common += ["double **coordinate_dofs_0 = coordinate_dofs;"] # Note that the following line is unsafe for isoparametric elements. common += ["double **coordinate_dofs_1 = coordinate_dofs + %d;" % num_vertices] nest_ir, ops = _generate_element_tensor(integrals, sets, opt_par, parameters) # Save number of operations (for printing info on operations). operations.append([ops]) # Generate code for basic geometric quantities # @@@: Jacobian snippet jacobi_code = "" for _r in ["+", "-"]: jacobi_code += format["compute_jacobian_interior"](cell, r=_r) jacobi_code += "\n" jacobi_code += format["compute_jacobian_inverse"](cell, r=_r) if oriented: # NEED TO THINK ABOUT THIS FOR EXTRUSION jacobi_code += format["orientation"][p_format](tdim, gdim, r=_r) jacobi_code += "\n" # TODO: verify that this is correct (we think it is) jacobi_code += "\n\n" + format["facet determinant"](cell, p_format, integral_type, r="+") jacobi_code += "\n\n" + format["generate normal"](cell, p_format, integral_type) # THE REST IS NOT IMPLEMENTED YET elif integral_type == "point": # Update transformer with vertices and generate code + set of used geometry terms. nest_ir, ops = _generate_element_tensor(integrals, sets, opt_par, parameters) # Save number of operations (for printing info on operations). operations.append([ops]) # Generate code for basic geometric quantities # @@@: Jacobian snippet jacobi_code = "" jacobi_code += format["compute_jacobian"](cell) jacobi_code += "\n" jacobi_code += format["compute_jacobian_inverse"](cell) if oriented and tdim != gdim: jacobi_code += format["orientation"][p_format](tdim, gdim) jacobi_code += "\n" else: error("Unhandled integral type: " + str(integral_type)) # Embedded manifold, need to pass in cell orientations if oriented and tdim != gdim and p_format == 'pyop2': if integral_type in ("interior_facet", "interior_facet_vert", "interior_facet_horiz"): common += ["const int cell_orientation%s = cell_orientation_[0][0];" % _choose_map('+'), "const int cell_orientation%s = cell_orientation_[1][0];" % _choose_map('-')] else: common += ["const int cell_orientation = cell_orientation_[0][0];"] # After we have generated the element code for all facets we can remove # the unused transformations and tabulate the used psi tables and weights. common += [remove_unused(jacobi_code, trans_set)] jacobi_ir = pyop2.FlatBlock("\n".join(common)) # @@@: const double W3[3] = {{...}} pyop2_weights = [] for weights, points in [quadrature_weights[p] for p in used_weights]: n_points = len(points) w_sym = pyop2.Symbol(f_weight(n_points), () if n_points == 1 else (n_points,)) pyop2_weights.append(pyop2.Decl("double", w_sym, pyop2.ArrayInit(weights, precision), qualifiers=["static", "const"])) name_map = ir["name_map"] tables = ir["unique_tables"] tables.update(affine_tables) # TODO: This is not populated anywhere, remove? # @@@: const double FE0[] = {{...}} code, decl = _tabulate_psis(tables, used_psi_tables, name_map, used_nzcs, opt_par, parameters) pyop2_basis = [] for name, data in decl.items(): rank, _, values = data zeroflags = values.get_zeros() feo_sym = pyop2.Symbol(name, rank) init = pyop2.ArrayInit(values, precision) if zeroflags is not None and not zeroflags.all(): nz_indices = numpy.logical_not(zeroflags).nonzero() # Note: in the following, we take the last entry of /nz_indices/ since we /know/ # we have been tracking only zero-valued columns nz_indices = nz_indices[-1] nz_bounds = tuple([(i, 0)] for i in rank[:-1]) nz_bounds += ([(max(nz_indices) - min(nz_indices) + 1, min(nz_indices))],) init = pyop2.SparseArrayInit(values, precision, nz_bounds) pyop2_basis.append(pyop2.Decl("double", feo_sym, init, ["static", "const"])) # Build the root of the PyOP2' ast pyop2_tables = pyop2_weights + [tab for tab in pyop2_basis] root = pyop2.Root([jacobi_ir] + pyop2_tables + nest_ir) return root
def generate_kernel_ast(builder, statements, declared_temps): """Glues together the complete AST for the Slate expression contained in the :class:`LocalKernelBuilder`. :arg builder: The :class:`LocalKernelBuilder` containing all relevant expression information. :arg statements: A list of COFFEE objects containing all assembly calls and temporary declarations. :arg declared_temps: A `dict` containing all previously declared temporaries. Return: A `KernelInfo` object describing the complete AST. """ slate_expr = builder.expression if slate_expr.rank == 0: # Scalars are treated as 1x1 MatrixBase objects shape = (1, ) else: shape = slate_expr.shape # Now we create the result statement by declaring its eigen type and # using Eigen::Map to move between Eigen and C data structs. statements.append(ast.FlatBlock("/* Map eigen tensor into C struct */\n")) result_sym = ast.Symbol("T%d" % len(declared_temps)) result_data_sym = ast.Symbol("A%d" % len(declared_temps)) result_type = "Eigen::Map<%s >" % eigen_matrixbase_type(shape) result = ast.Decl(SCALAR_TYPE, ast.Symbol(result_data_sym, shape)) result_statement = ast.FlatBlock( "%s %s((%s *)%s);\n" % (result_type, result_sym, SCALAR_TYPE, result_data_sym)) statements.append(result_statement) # Generate the complete c++ string performing the linear algebra operations # on Eigen matrices/vectors statements.append(ast.FlatBlock("/* Linear algebra expression */\n")) cpp_string = ast.FlatBlock( metaphrase_slate_to_cpp(slate_expr, declared_temps)) statements.append(ast.Incr(result_sym, cpp_string)) # Generate arguments for the macro kernel args = [result, ast.Decl("%s **" % SCALAR_TYPE, builder.coord_sym)] # Orientation information if builder.oriented: args.append(ast.Decl("int **", builder.cell_orientations_sym)) # Coefficient information expr_coeffs = slate_expr.coefficients() for c in expr_coeffs: if isinstance(c, Constant): ctype = "%s *" % SCALAR_TYPE else: ctype = "%s **" % SCALAR_TYPE args.extend([ast.Decl(ctype, csym) for csym in builder.coefficient(c)]) # Facet information if builder.needs_cell_facets: args.append( ast.Decl("%s *" % as_cstr(cell_to_facets_dtype), builder.cell_facet_sym)) # NOTE: We need to be careful about the ordering here. Mesh layers are # added as the final argument to the kernel. if builder.needs_mesh_layers: args.append(ast.Decl("int", builder.mesh_layer_sym)) # Macro kernel macro_kernel_name = "compile_slate" stmts = ast.Block(statements) macro_kernel = ast.FunDecl("void", macro_kernel_name, args, stmts, pred=["static", "inline"]) # Construct the final ast kernel_ast = ast.Node(builder.templated_subkernels + [macro_kernel]) # Now we wrap up the kernel ast as a PyOP2 kernel and include the # Eigen header files include_dirs = builder.include_dirs include_dirs.extend(["%s/include/eigen3/" % d for d in PETSC_DIR]) op2kernel = op2.Kernel( kernel_ast, macro_kernel_name, cpp=True, include_dirs=include_dirs, headers=['#include <Eigen/Dense>', '#define restrict __restrict']) # Send back a "TSFC-like" SplitKernel object with an # index and KernelInfo kinfo = KernelInfo(kernel=op2kernel, integral_type=builder.integral_type, oriented=builder.oriented, subdomain_id="otherwise", domain_number=0, coefficient_map=tuple(range(len(expr_coeffs))), needs_cell_facets=builder.needs_cell_facets, pass_layer_arg=builder.needs_mesh_layers) return kinfo
def coefficient_temporaries(builder, declared_temps): """Generates coefficient temporary statements for assigning coefficients to vector temporaries. :arg builder: The :class:`LocalKernelBuilder` containing all relevant expression information. :arg declared_temps: A `dict` keeping track of all declared temporaries. This dictionary is updated as coefficients are assigned temporaries. 'AssembledVector's require creating coefficient temporaries to store data. The temporaries are created by inspecting the function space of the coefficient to compute node and dof extents. The coefficient is then assigned values by looping over both the node extent and dof extent (double FOR-loop). A double FOR-loop is needed for each function space (if the function space is mixed, then a loop will be constructed for each component space). The general structure of each coefficient loop will be: FOR (i1=0; i1<node_extent; i1++): FOR (j1=0; j1<dof_extent; j1++): VT0[offset + (dof_extent * i1) + j1] = w_0_0[i1][j1] VT1[offset + (dof_extent * i1) + j1] = w_1_0[i1][j1] . . . where wT0, wT1, ... are temporaries for coefficients sharing the same node and dof extents. The offset is computed based on whether the function space is mixed. The offset is always 0 for non-mixed coefficients. If the coefficient is mixed, then the offset is incremented by the total number of nodal unknowns associated with the component spaces of the mixed space. """ statements = [ast.FlatBlock("/* Coefficient temporaries */\n")] j = ast.Symbol("j1") loops = [ast.FlatBlock("/* Loops for coefficient temps */\n")] for dofs, cinfo_list in builder.coefficient_vecs.items(): # Collect all coefficients which share the same node/dof extent assignments = [] for cinfo in cinfo_list: fs_i = cinfo.space_index offset = cinfo.offset_index c_shape = cinfo.shape vector = cinfo.vector function = vector._function t = cinfo.local_temp if vector not in declared_temps: # Declare and initialize coefficient temporary c_type = eigen_matrixbase_type(shape=c_shape) statements.append(ast.Decl(c_type, t)) declared_temps[vector] = t # Assigning coefficient values into temporary coeff_sym = ast.Symbol(builder.coefficient(function)[fs_i], rank=(j, )) index = ast.Sum(offset, j) coeff_temp = ast.Symbol(t, rank=(index, )) assignments.append(ast.Assign(coeff_temp, coeff_sym)) # loop over dofs loop = ast.For(ast.Decl("unsigned int", j, init=0), ast.Less(j, dofs), ast.Incr(j, 1), assignments) loops.append(loop) statements.extend(loops) return statements
def tensor_assembly_calls(builder): """Generates a block of statements for assembling the local finite element tensors. :arg builder: The :class:`LocalKernelBuilder` containing all relevant expression information and assembly calls. """ statements = [ast.FlatBlock("/* Assemble local tensors */\n")] # Cell integrals are straightforward. Just splat them out. statements.extend(builder.assembly_calls["cell"]) if builder.needs_cell_facets: # The for-loop will have the general structure: # # FOR (facet=0; facet<num_facets; facet++): # IF (facet is interior): # *interior calls # ELSE IF (facet is exterior): # *exterior calls # # If only interior (exterior) facets are present, # then only a single IF-statement checking for interior # (exterior) facets will be present within the loop. The # cell facets are labelled `1` for interior, and `0` for # exterior. statements.append(ast.FlatBlock("/* Loop over cell facets */\n")) int_calls = list( chain(*[ builder.assembly_calls[it_type] for it_type in ("interior_facet", "interior_facet_vert") ])) ext_calls = list( chain(*[ builder.assembly_calls[it_type] for it_type in ("exterior_facet", "exterior_facet_vert") ])) # Compute the number of facets to loop over domain = builder.expression.ufl_domain() if domain.cell_set._extruded: num_facets = domain.ufl_cell()._cells[0].num_facets() else: num_facets = domain.ufl_cell().num_facets() if_ext = ast.Eq( ast.Symbol(builder.cell_facet_sym, rank=(builder.it_sym, )), 0) if_int = ast.Eq( ast.Symbol(builder.cell_facet_sym, rank=(builder.it_sym, )), 1) body = [] if ext_calls: body.append( ast.If(if_ext, (ast.Block(ext_calls, open_scope=True), ))) if int_calls: body.append( ast.If(if_int, (ast.Block(int_calls, open_scope=True), ))) statements.append( ast.For(ast.Decl("unsigned int", builder.it_sym, init=0), ast.Less(builder.it_sym, num_facets), ast.Incr(builder.it_sym, 1), body)) if builder.needs_mesh_layers: # In the presence of interior horizontal facet calls, an # IF-ELIF-ELSE block is generated using the mesh levels # as conditions for which calls are needed: # # IF (layer == bottom_layer): # *bottom calls # ELSE IF (layer == top_layer): # *top calls # ELSE: # *top calls # *bottom calls # # Any extruded top or bottom calls for extruded facets are # included within the appropriate mesh-level IF-blocks. If # no interior horizontal facet calls are present, then # standard IF-blocks are generated for exterior top/bottom # facet calls when appropriate: # # IF (layer == bottom_layer): # *bottom calls # # IF (layer == top_layer): # *top calls # # The mesh level is an integer provided as a macro kernel # argument. # FIXME: No variable layers assumption statements.append(ast.FlatBlock("/* Mesh levels: */\n")) num_layers = builder.expression.ufl_domain().topological.layers - 1 int_top = builder.assembly_calls["interior_facet_horiz_top"] int_btm = builder.assembly_calls["interior_facet_horiz_bottom"] ext_top = builder.assembly_calls["exterior_facet_top"] ext_btm = builder.assembly_calls["exterior_facet_bottom"] bottom = ast.Block(int_top + ext_btm, open_scope=True) top = ast.Block(int_btm + ext_top, open_scope=True) rest = ast.Block(int_btm + int_top, open_scope=True) statements.append( ast.If(ast.Eq(builder.mesh_layer_sym, 0), (bottom, ast.If(ast.Eq(builder.mesh_layer_sym, num_layers - 1), (top, rest))))) return statements
def dg_injection_kernel(Vf, Vc, ncell): from firedrake import Tensor, AssembledVector, TestFunction, TrialFunction from firedrake.slate.slac import compile_expression macro_builder = MacroKernelBuilder(ScalarType_c, ncell) f = ufl.Coefficient(Vf) macro_builder.set_coefficients([f]) macro_builder.set_coordinates(Vf.mesh()) Vfe = create_element(Vf.ufl_element()) macro_quadrature_rule = make_quadrature( Vfe.cell, estimate_total_polynomial_degree(ufl.inner(f, f))) index_cache = {} parameters = default_parameters() integration_dim, entity_ids = lower_integral_type(Vfe.cell, "cell") macro_cfg = dict(interface=macro_builder, ufl_cell=Vf.ufl_cell(), precision=parameters["precision"], integration_dim=integration_dim, entity_ids=entity_ids, index_cache=index_cache, quadrature_rule=macro_quadrature_rule) fexpr, = fem.compile_ufl(f, **macro_cfg) X = ufl.SpatialCoordinate(Vf.mesh()) C_a, = fem.compile_ufl(X, **macro_cfg) detJ = ufl_utils.preprocess_expression( abs(ufl.JacobianDeterminant(f.ufl_domain()))) macro_detJ, = fem.compile_ufl(detJ, **macro_cfg) Vce = create_element(Vc.ufl_element()) coarse_builder = firedrake_interface.KernelBuilder("cell", "otherwise", 0, ScalarType_c) coarse_builder.set_coordinates(Vc.mesh()) argument_multiindices = (Vce.get_indices(), ) argument_multiindex, = argument_multiindices return_variable, = coarse_builder.set_arguments((ufl.TestFunction(Vc), ), argument_multiindices) integration_dim, entity_ids = lower_integral_type(Vce.cell, "cell") # Midpoint quadrature for jacobian on coarse cell. quadrature_rule = make_quadrature(Vce.cell, 0) coarse_cfg = dict(interface=coarse_builder, ufl_cell=Vc.ufl_cell(), precision=parameters["precision"], integration_dim=integration_dim, entity_ids=entity_ids, index_cache=index_cache, quadrature_rule=quadrature_rule) X = ufl.SpatialCoordinate(Vc.mesh()) K = ufl_utils.preprocess_expression(ufl.JacobianInverse(Vc.mesh())) C_0, = fem.compile_ufl(X, **coarse_cfg) K, = fem.compile_ufl(K, **coarse_cfg) i = gem.Index() j = gem.Index() C_0 = gem.Indexed(C_0, (j, )) C_0 = gem.index_sum(C_0, quadrature_rule.point_set.indices) C_a = gem.Indexed(C_a, (j, )) X_a = gem.Sum(C_0, gem.Product(gem.Literal(-1), C_a)) K_ij = gem.Indexed(K, (i, j)) K_ij = gem.index_sum(K_ij, quadrature_rule.point_set.indices) X_a = gem.index_sum(gem.Product(K_ij, X_a), (j, )) C_0, = quadrature_rule.point_set.points C_0 = gem.Indexed(gem.Literal(C_0), (i, )) # fine quad points in coarse reference space. X_a = gem.Sum(C_0, gem.Product(gem.Literal(-1), X_a)) X_a = gem.ComponentTensor(X_a, (i, )) # Coarse basis function evaluated at fine quadrature points phi_c = fem.fiat_to_ufl( Vce.point_evaluation(0, X_a, (Vce.cell.get_dimension(), 0)), 0) tensor_indices = tuple(gem.Index(extent=d) for d in f.ufl_shape) phi_c = gem.Indexed(phi_c, argument_multiindex + tensor_indices) fexpr = gem.Indexed(fexpr, tensor_indices) quadrature_weight = macro_quadrature_rule.weight_expression expr = gem.Product(gem.IndexSum(gem.Product(phi_c, fexpr), tensor_indices), gem.Product(macro_detJ, quadrature_weight)) quadrature_indices = macro_builder.indices + macro_quadrature_rule.point_set.indices reps = spectral.Integrals([expr], quadrature_indices, argument_multiindices, parameters) assignments = spectral.flatten([(return_variable, reps)], index_cache) return_variables, expressions = zip(*assignments) expressions = impero_utils.preprocess_gem(expressions, **spectral.finalise_options) assignments = list(zip(return_variables, expressions)) impero_c = impero_utils.compile_gem(assignments, quadrature_indices + argument_multiindex, remove_zeros=True) index_names = [] def name_index(index, name): index_names.append((index, name)) if index in index_cache: for multiindex, suffix in zip(index_cache[index], string.ascii_lowercase): name_multiindex(multiindex, name + suffix) def name_multiindex(multiindex, name): if len(multiindex) == 1: name_index(multiindex[0], name) else: for i, index in enumerate(multiindex): name_index(index, name + str(i)) name_multiindex(quadrature_indices, 'ip') for multiindex, name in zip(argument_multiindices, ['j', 'k']): name_multiindex(multiindex, name) index_names.extend(zip(macro_builder.indices, ["entity"])) body = generate_coffee(impero_c, index_names, parameters["precision"], ScalarType_c) retarg = ast.Decl(ScalarType_c, ast.Symbol("R", rank=(Vce.space_dimension(), ))) local_tensor = coarse_builder.local_tensor local_tensor.init = ast.ArrayInit( numpy.zeros(Vce.space_dimension(), dtype=ScalarType_c)) body.children.insert(0, local_tensor) args = [retarg] + macro_builder.kernel_args + [ macro_builder.coordinates_arg, coarse_builder.coordinates_arg ] # Now we have the kernel that computes <f, phi_c>dx_c # So now we need to hit it with the inverse mass matrix on dx_c u = TrialFunction(Vc) v = TestFunction(Vc) expr = Tensor(ufl.inner(u, v) * ufl.dx).inv * AssembledVector( ufl.Coefficient(Vc)) Ainv, = compile_expression(expr) Ainv = Ainv.kinfo.kernel A = ast.Symbol(local_tensor.sym.symbol) R = ast.Symbol("R") body.children.append( ast.FunCall(Ainv.name, R, coarse_builder.coordinates_arg.sym, A)) from coffee.base import Node assert isinstance(Ainv._code, Node) return op2.Kernel(ast.Node([ Ainv._code, ast.FunDecl("void", "pyop2_kernel_injection_dg", args, body, pred=["static", "inline"]) ]), name="pyop2_kernel_injection_dg", cpp=True, include_dirs=Ainv._include_dirs, headers=Ainv._headers)
def build_hard_fusion_kernel(base_loop, fuse_loop, fusion_map, loop_chain_index): """ Build AST and :class:`Kernel` for two loops suitable to hard fusion. The AST consists of three functions: fusion, base, fuse. base and fuse are respectively the ``base_loop`` and the ``fuse_loop`` kernels, whereas fusion is the orchestrator that invokes, for each ``base_loop`` iteration, base and, if still to be executed, fuse. The orchestrator has the following structure: :: fusion (buffer, ..., executed): base (buffer, ...) for i = 0 to arity: if not executed[i]: additional pointer staging required by kernel2 fuse (sub_buffer, ...) insertion into buffer The executed array tracks whether the i-th iteration (out of /arity/) adjacent to the main kernel1 iteration has been executed. """ finder = Find((ast.FunDecl, ast.PreprocessNode)) base = base_loop.kernel base_ast = dcopy(base._ast) base_info = finder.visit(base_ast) base_headers = base_info[ast.PreprocessNode] base_fundecl = base_info[ast.FunDecl] assert len(base_fundecl) == 1 base_fundecl = base_fundecl[0] fuse = fuse_loop.kernel fuse_ast = dcopy(fuse._ast) fuse_info = finder.visit(fuse_ast) fuse_headers = fuse_info[ast.PreprocessNode] fuse_fundecl = fuse_info[ast.FunDecl] assert len(fuse_fundecl) == 1 fuse_fundecl = fuse_fundecl[0] # Create /fusion/ arguments and signature body = ast.Block([]) fusion_name = '%s_%s' % (base_fundecl.name, fuse_fundecl.name) fusion_args = dcopy(base_fundecl.args + fuse_fundecl.args) fusion_fundecl = ast.FunDecl(base_fundecl.ret, fusion_name, fusion_args, body) # Make sure kernel and variable names are unique base_fundecl.name = "%s_base" % base_fundecl.name fuse_fundecl.name = "%s_fuse" % fuse_fundecl.name for i, decl in enumerate(fusion_args): decl.sym.symbol += '_%d' % i # Filter out duplicate arguments, and append extra arguments to the fundecl binding = WeakFilter().kernel_args([base_loop, fuse_loop], fusion_fundecl) fusion_args += [ast.Decl('int*', 'executed'), ast.Decl('int*', 'fused_iters'), ast.Decl('int', 'i')] # Which args are actually used in /fuse/, but not in /base/ ? The gather for # such arguments is moved to /fusion/, to avoid usless memory LOADs base_dats = set(a.data for a in base_loop.args) fuse_dats = set(a.data for a in fuse_loop.args) unshared = OrderedDict() for arg, decl in binding.items(): if arg.data in fuse_dats - base_dats: unshared.setdefault(decl, arg) # Track position of Args that need a postponed gather # Can't track Args themselves as they change across different parloops fargs = {fusion_args.index(i): ('postponed', False) for i in unshared.keys()} fargs.update({len(set(binding.values())): ('onlymap', True)}) # Add maps for arguments that need a postponed gather for decl, arg in unshared.items(): decl_pos = fusion_args.index(decl) fusion_args[decl_pos].sym.symbol = arg.c_arg_name() if arg._is_indirect: fusion_args[decl_pos].sym.rank = () fusion_args.insert(decl_pos + 1, ast.Decl('int*', arg.c_map_name(0, 0))) # Append the invocation of /base/; then, proceed with the invocation # of the /fuse/ kernels base_funcall_syms = [binding[a].sym.symbol for a in base_loop.args] body.children.append(ast.FunCall(base_fundecl.name, *base_funcall_syms)) for idx in range(fusion_map.arity): fused_iter = ast.Assign('i', ast.Symbol('fused_iters', (idx,))) fuse_funcall = ast.FunCall(fuse_fundecl.name) if_cond = ast.Not(ast.Symbol('executed', ('i',))) if_update = ast.Assign(ast.Symbol('executed', ('i',)), 1) if_body = ast.Block([fuse_funcall, if_update], open_scope=True) if_exec = ast.If(if_cond, [if_body]) body.children.extend([ast.FlatBlock('\n'), fused_iter, if_exec]) # Modify the /fuse/ kernel # This is to take into account that many arguments are shared with # /base/, so they will only staged once for /base/. This requires # tweaking the way the arguments are declared and accessed in /fuse/. # For example, the shared incremented array (called /buffer/ in # the pseudocode in the comment above) now needs to take offsets # to be sure the locations that /base/ is supposed to increment are # actually accessed. The same concept apply to indirect arguments. init = lambda v: '{%s}' % ', '.join([str(j) for j in v]) for i, fuse_loop_arg in enumerate(fuse_loop.args): fuse_kernel_arg = binding[fuse_loop_arg] buffer_name = '%s_vec' % fuse_kernel_arg.sym.symbol fuse_funcall_sym = ast.Symbol(buffer_name) # What kind of temporaries do we need ? if fuse_loop_arg.access == INC: op, lvalue, rvalue = ast.Incr, fuse_kernel_arg.sym.symbol, buffer_name stager = lambda b, l: b.children.extend(l) indexer = lambda indices: [(k, j) for j, k in enumerate(indices)] pointers = [] elif fuse_loop_arg.access == READ: op, lvalue, rvalue = ast.Assign, buffer_name, fuse_kernel_arg.sym.symbol stager = lambda b, l: [b.children.insert(0, j) for j in reversed(l)] indexer = lambda indices: [(j, k) for j, k in enumerate(indices)] pointers = list(fuse_kernel_arg.pointers) # Now gonna handle arguments depending on their type and rank ... if fuse_loop_arg._is_global: # ... Handle global arguments. These can be dropped in the # kernel without any particular fiddling fuse_funcall_sym = ast.Symbol(fuse_kernel_arg.sym.symbol) elif fuse_kernel_arg in unshared: # ... Handle arguments that appear only in /fuse/ staging = unshared[fuse_kernel_arg].c_vec_init(False).split('\n') rvalues = [ast.FlatBlock(j.split('=')[1]) for j in staging] lvalues = [ast.Symbol(buffer_name, (j,)) for j in range(len(staging))] staging = [ast.Assign(j, k) for j, k in zip(lvalues, rvalues)] # Set up the temporary buffer_symbol = ast.Symbol(buffer_name, (len(staging),)) buffer_decl = ast.Decl(fuse_kernel_arg.typ, buffer_symbol, qualifiers=fuse_kernel_arg.qual, pointers=list(pointers)) # Update the if-then AST body stager(if_exec.children[0], staging) if_exec.children[0].children.insert(0, buffer_decl) elif fuse_loop_arg._is_mat: # ... Handle Mats staging = [] for b in fused_inc_arg._block_shape: for rc in b: lvalue = ast.Symbol(lvalue, (idx, idx), ((rc[0], 'j'), (rc[1], 'k'))) rvalue = ast.Symbol(rvalue, ('j', 'k')) staging = ItSpace(mode=0).to_for([(0, rc[0]), (0, rc[1])], ('j', 'k'), [op(lvalue, rvalue)])[:1] # Set up the temporary buffer_symbol = ast.Symbol(buffer_name, (fuse_kernel_arg.sym.rank,)) buffer_init = ast.ArrayInit(init([init([0.0])])) buffer_decl = ast.Decl(fuse_kernel_arg.typ, buffer_symbol, buffer_init, qualifiers=fuse_kernel_arg.qual, pointers=pointers) # Update the if-then AST body stager(if_exec.children[0], staging) if_exec.children[0].children.insert(0, buffer_decl) elif fuse_loop_arg._is_indirect: cdim = fuse_loop_arg.data.cdim if cdim == 1 and fuse_kernel_arg.sym.rank: # [Special case] # ... Handle rank 1 indirect arguments that appear in both # /base/ and /fuse/: just point into the right location rank = (idx,) if fusion_map.arity > 1 else () fuse_funcall_sym = ast.Symbol(fuse_kernel_arg.sym.symbol, rank) else: # ... Handle indirect arguments. At the C level, these arguments # are of pointer type, so simple pointer arithmetic is used # to ensure the kernel accesses are to the correct locations fuse_arity = fuse_loop_arg.map.arity base_arity = fuse_arity*fusion_map.arity size = fuse_arity*cdim # Set the proper storage layout before invoking /fuse/ ofs_vals = [[base_arity*j + k for k in range(fuse_arity)] for j in range(cdim)] ofs_vals = [[fuse_arity*j + k for k in flatten(ofs_vals)] for j in range(fusion_map.arity)] ofs_vals = list(flatten(ofs_vals)) indices = [ofs_vals[idx*size + j] for j in range(size)] staging = [op(ast.Symbol(lvalue, (j,)), ast.Symbol(rvalue, (k,))) for j, k in indexer(indices)] # Set up the temporary buffer_symbol = ast.Symbol(buffer_name, (size,)) if fuse_loop_arg.access == INC: buffer_init = ast.ArrayInit(init([0.0])) else: buffer_init = ast.EmptyStatement() pointers.pop() buffer_decl = ast.Decl(fuse_kernel_arg.typ, buffer_symbol, buffer_init, qualifiers=fuse_kernel_arg.qual, pointers=pointers) # Update the if-then AST body stager(if_exec.children[0], staging) if_exec.children[0].children.insert(0, buffer_decl) else: # Nothing special to do for direct arguments pass # Finally update the /fuse/ funcall fuse_funcall.children.append(fuse_funcall_sym) fused_headers = set([str(h) for h in base_headers + fuse_headers]) fused_ast = ast.Root([ast.PreprocessNode(h) for h in fused_headers] + [base_fundecl, fuse_fundecl, fusion_fundecl]) return Kernel([base, fuse], fused_ast, loop_chain_index), fargs
def exterior_facet_boundary_node_map(self, V, method): """Return the :class:`pyop2.Map` from exterior facets to nodes on the boundary. :arg V: The function space. :arg method: The method for determining boundary nodes. See :class:`~.DirichletBC` for details. """ try: return self.map_caches["boundary_node"][method] except KeyError: pass el = V.finat_element dim = self.mesh.facet_dimension() if method == "topological": boundary_dofs = el.entity_closure_dofs()[dim] elif method == "geometric": # This function is only called on extruded meshes when # asking for the nodes that live on the "vertical" # exterior facets. boundary_dofs = entity_support_dofs(el, dim) nodes_per_facet = \ len(boundary_dofs[0]) # HACK ALERT # The facet set does not have a halo associated with it, since # we only construct halos for DoF sets. Fortunately, this # loop is direct and we already have all the correct # information available locally. So We fake a set of the # correct size and carry out a direct loop facet_set = op2.Set(self.mesh.exterior_facets.set.total_size, comm=self.mesh.comm) fs_dat = op2.Dat( facet_set**el.space_dimension(), data=V.exterior_facet_node_map().values_with_halo.view()) facet_dat = op2.Dat(facet_set**nodes_per_facet, dtype=IntType) # Ensure these come out in sorted order. local_facet_nodes = numpy.array( [boundary_dofs[e] for e in sorted(boundary_dofs.keys())]) # Helper function to turn the inner index of an array into c # array literals. c_array = lambda xs: "{" + ", ".join(map(str, xs)) + "}" # AST for: l_nodes[facet[0]][n] rank_ast = ast.Symbol("l_nodes", rank=(ast.Symbol("facet", rank=(0, )), "n")) body = ast.Block([ ast.Decl("int", ast.Symbol("l_nodes", (len(el.cell.topology[dim]), nodes_per_facet)), init=ast.ArrayInit( c_array(map(c_array, local_facet_nodes))), qualifiers=["const"]), ast.For( ast.Decl("int", "n", 0), ast.Less("n", nodes_per_facet), ast.Incr("n", 1), ast.Assign(ast.Symbol("facet_nodes", ("n", )), ast.Symbol("cell_nodes", (rank_ast, )))) ]) kernel = op2.Kernel( ast.FunDecl("void", "create_bc_node_map", [ ast.Decl("%s*" % as_cstr(fs_dat.dtype), "cell_nodes"), ast.Decl("%s*" % as_cstr(facet_dat.dtype), "facet_nodes"), ast.Decl("unsigned int*", "facet") ], body), "create_bc_node_map") local_facet_dat = op2.Dat( facet_set**self.mesh.exterior_facets._rank, self.mesh.exterior_facets.local_facet_dat.data_ro_with_halos, dtype=numpy.uintc) op2.par_loop(kernel, facet_set, fs_dat(op2.READ), facet_dat(op2.WRITE), local_facet_dat(op2.READ)) if self.extruded: offset = self.offset[boundary_dofs[0]] else: offset = None val = op2.Map(facet_set, self.node_set, nodes_per_facet, facet_dat.data_ro_with_halos, name="exterior_facet_boundary_node", offset=offset) self.map_caches["boundary_node"][method] = val return val
def transfer_kernel(Pk, P1): """Compile a kernel that will map between Pk and P1. :returns: a PyOP2 kernel. The prolongation maps a solution in P1 into Pk using the natural embedding. The restriction maps a residual in the dual of Pk into the dual of P1 (it is the dual of the prolongation), computed using linearity of the test function. """ # Mapping of a residual in Pk into a residual in P1 from coffee import base as coffee from tsfc.coffee import generate as generate_coffee, SCALAR_TYPE from tsfc.parameters import default_parameters from gem import gem, impero_utils as imp # Pk should be at least the same size as P1 assert Pk.finat_element.space_dimension( ) >= P1.finat_element.space_dimension() # In the general case we should compute this by doing: # numpy.linalg.solve(Pkmass, PkP1mass) Pke = Pk.finat_element._element P1e = P1.finat_element._element # TODO, rework to use finat. matrix = numpy.dot(Pke.dual.to_riesz(P1e.get_nodal_basis()), P1e.get_coeffs().T).T Vout, Vin = P1, Pk weights = gem.Literal(matrix) name = "Pk_P1_mapper" funargs = [] assert Vin.shape == Vout.shape shape = (P1e.space_dimension() * Vout.value_size, Pke.space_dimension() * Vin.value_size) outarg = coffee.Decl(SCALAR_TYPE, coffee.Symbol("A", rank=shape)) i = gem.Index() j = gem.Index() k = gem.Index() indices = i, j, k A = gem.Variable("A", shape) outgem = [ gem.Indexed( gem.reshape(A, (P1e.space_dimension(), Vout.value_size), (Pke.space_dimension(), Vin.value_size)), (i, k, j, k)) ] funargs.append(outarg) expr = gem.Indexed(weights, (i, j)) outgem, = imp.preprocess_gem(outgem) ir = imp.compile_gem([(outgem, expr)], indices) index_names = [(i, "i"), (j, "j"), (k, "k")] body = generate_coffee(ir, index_names, default_parameters()["precision"]) function = coffee.FunDecl("void", name, funargs, body, pred=["static", "inline"]) return op2.Kernel(function, name=function.name)