def generate_kernel_ast(builder, statements, declared_temps): """Glues together the complete AST for the Slate expression contained in the :class:`LocalKernelBuilder`. :arg builder: The :class:`LocalKernelBuilder` containing all relevant expression information. :arg statements: A list of COFFEE objects containing all assembly calls and temporary declarations. :arg declared_temps: A `dict` containing all previously declared temporaries. Return: A `KernelInfo` object describing the complete AST. """ slate_expr = builder.expression if slate_expr.rank == 0: # Scalars are treated as 1x1 MatrixBase objects shape = (1, ) else: shape = slate_expr.shape # Now we create the result statement by declaring its eigen type and # using Eigen::Map to move between Eigen and C data structs. statements.append(ast.FlatBlock("/* Map eigen tensor into C struct */\n")) result_sym = ast.Symbol("T%d" % len(declared_temps)) result_data_sym = ast.Symbol("A%d" % len(declared_temps)) result_type = "Eigen::Map<%s >" % eigen_matrixbase_type(shape) result = ast.Decl(ScalarType_c, ast.Symbol(result_data_sym), pointers=[("restrict", )]) result_statement = ast.FlatBlock( "%s %s((%s *)%s);\n" % (result_type, result_sym, ScalarType_c, result_data_sym)) statements.append(result_statement) # Generate the complete c++ string performing the linear algebra operations # on Eigen matrices/vectors statements.append(ast.FlatBlock("/* Linear algebra expression */\n")) cpp_string = ast.FlatBlock(slate_to_cpp(slate_expr, declared_temps)) statements.append(ast.Incr(result_sym, cpp_string)) # Generate arguments for the macro kernel args = [ result, ast.Decl(ScalarType_c, builder.coord_sym, pointers=[("restrict", )], qualifiers=["const"]) ] # Orientation information if builder.oriented: args.append( ast.Decl("int", builder.cell_orientations_sym, pointers=[("restrict", )], qualifiers=["const"])) # Coefficient information expr_coeffs = slate_expr.coefficients() for c in expr_coeffs: args.extend([ ast.Decl(ScalarType_c, csym, pointers=[("restrict", )], qualifiers=["const"]) for csym in builder.coefficient(c) ]) # Facet information if builder.needs_cell_facets: f_sym = builder.cell_facet_sym f_arg = ast.Symbol("arg_cell_facets") f_dtype = as_cstr(cell_to_facets_dtype) # cell_facets is locally a flattened 2-D array. We typecast here so we # can access its entries using standard array notation. cast = "%s (*%s)[2] = (%s (*)[2])%s;\n" % (f_dtype, f_sym, f_dtype, f_arg) statements.insert(0, ast.FlatBlock(cast)) args.append( ast.Decl(f_dtype, f_arg, pointers=[("restrict", )], qualifiers=["const"])) # NOTE: We need to be careful about the ordering here. Mesh layers are # added as the final argument to the kernel # and the amount of layers before that. if builder.needs_mesh_layers: args.append( ast.Decl("int", builder.mesh_layer_count_sym, pointers=[("restrict", )], qualifiers=["const"])) args.append(ast.Decl("int", builder.mesh_layer_sym)) # Cell size information if builder.needs_cell_sizes: args.append( ast.Decl(ScalarType_c, builder.cell_size_sym, pointers=[("restrict", )], qualifiers=["const"])) # Macro kernel macro_kernel_name = "pyop2_kernel_compile_slate" stmts = ast.Block(statements) macro_kernel = ast.FunDecl("void", macro_kernel_name, args, stmts, pred=["static", "inline"]) # Construct the final ast kernel_ast = ast.Node(builder.templated_subkernels + [macro_kernel]) # Now we wrap up the kernel ast as a PyOP2 kernel and include the # Eigen header files include_dirs = list(builder.include_dirs) include_dirs.append(EIGEN_INCLUDE_DIR) op2kernel = op2.Kernel( kernel_ast, macro_kernel_name, cpp=True, include_dirs=include_dirs, headers=['#include <Eigen/Dense>', '#define restrict __restrict']) op2kernel.num_flops = builder.expression_flops + builder.terminal_flops # Send back a "TSFC-like" SplitKernel object with an # index and KernelInfo kinfo = KernelInfo(kernel=op2kernel, integral_type=builder.integral_type, oriented=builder.oriented, subdomain_id="otherwise", domain_number=0, coefficient_map=slate_expr.coeff_map, needs_cell_facets=builder.needs_cell_facets, pass_layer_arg=builder.needs_mesh_layers, needs_cell_sizes=builder.needs_cell_sizes) return kinfo
def compile_coordinate_element(ufl_coordinate_element, contains_eps, parameters=None): """Generates C code for changing to reference coordinates. :arg ufl_coordinate_element: UFL element of the coordinates :returns: C code as string """ if parameters is None: parameters = tsfc.default_parameters() else: _ = tsfc.default_parameters() _.update(parameters) parameters = _ # Create FInAT element element = tsfc.finatinterface.create_element(ufl_coordinate_element) cell = ufl_coordinate_element.cell() extruded = isinstance(cell, ufl.TensorProductCell) code = { "geometric_dimension": cell.geometric_dimension(), "topological_dimension": cell.topological_dimension(), "inside_predicate": inside_check(element.cell, eps=contains_eps), "to_reference_coords": to_reference_coordinates(ufl_coordinate_element, parameters), "init_X": init_X(element.cell, parameters), "max_iteration_count": 1 if is_affine(ufl_coordinate_element) else 16, "convergence_epsilon": 1e-12, "dX_norm_square": dX_norm_square(cell.topological_dimension()), "X_isub_dX": X_isub_dX(cell.topological_dimension()), "extruded_arg": ", int const *__restrict__ layers" if extruded else "", "extr_comment_out": "//" if extruded else "", "non_extr_comment_out": "//" if not extruded else "", "IntType": as_cstr(IntType), "ScalarType": ScalarType_c, } evaluate_template_c = """#include <math.h> struct ReferenceCoords { %(ScalarType)s X[%(geometric_dimension)d]; }; static inline void to_reference_coords_kernel(void *result_, double *x0, int *return_value, %(ScalarType)s *C) { struct ReferenceCoords *result = (struct ReferenceCoords *) result_; /* * Mapping coordinates from physical to reference space */ %(ScalarType)s *X = result->X; %(init_X)s int converged = 0; for (int it = 0; !converged && it < %(max_iteration_count)d; it++) { %(ScalarType)s dX[%(topological_dimension)d] = { 0.0 }; %(to_reference_coords)s if (%(dX_norm_square)s < %(convergence_epsilon)g * %(convergence_epsilon)g) { converged = 1; } %(X_isub_dX)s } // Are we inside the reference element? *return_value = %(inside_predicate)s; } static inline void wrap_to_reference_coords( void* const result_, double* const x, int* const return_value, %(IntType)s const start, %(IntType)s const end%(extruded_arg)s, %(ScalarType)s const *__restrict__ coords, %(IntType)s const *__restrict__ coords_map); int to_reference_coords(void *result_, struct Function *f, int cell, double *x) { int return_value = 0; %(extr_comment_out)swrap_to_reference_coords(result_, x, &return_value, cell, cell+1, f->coords, f->coords_map); return return_value; } int to_reference_coords_xtr(void *result_, struct Function *f, int cell, int layer, double *x) { int return_value = 0; %(non_extr_comment_out)sint layers[2] = {0, layer+2}; // +2 because the layer loop goes to layers[1]-1, which is nlayers-1 %(non_extr_comment_out)swrap_to_reference_coords(result_, x, &return_value, cell, cell+1, layers, f->coords, f->coords_map); return return_value; } """ return evaluate_template_c % code
def to_reference_coordinates(ufl_coordinate_element, parameters=None): if parameters is None: parameters = tsfc.default_parameters() else: _ = tsfc.default_parameters() _.update(parameters) parameters = _ # Create FInAT element element = tsfc.finatinterface.create_element(ufl_coordinate_element) cell = ufl_coordinate_element.cell() code = { "geometric_dimension": cell.geometric_dimension(), "topological_dimension": cell.topological_dimension(), "to_reference_coords": to_reference_coordinates_body(ufl_coordinate_element, parameters), "init_X": init_X(element.cell, parameters), "max_iteration_count": 1 if is_affine(ufl_coordinate_element) else 16, "convergence_epsilon": 1e-12, "dX_norm_square": dX_norm_square(cell.topological_dimension()), "X_isub_dX": X_isub_dX(cell.topological_dimension()), "IntType": as_cstr(IntType), } evaluate_template_c = """#include <math.h> #include <stdio.h> #include <petsc.h> static inline void to_reference_coords_kernel(PetscScalar *X, const PetscScalar *x0, const PetscScalar *C) { const int space_dim = %(geometric_dimension)d; /* * Mapping coordinates from physical to reference space */ %(init_X)s double x[space_dim]; int converged = 0; for (int it = 0; !converged && it < %(max_iteration_count)d; it++) { double dX[%(topological_dimension)d] = { 0.0 }; %(to_reference_coords)s if (%(dX_norm_square)s < %(convergence_epsilon)g * %(convergence_epsilon)g) { converged = 1; } %(X_isub_dX)s } }""" return evaluate_template_c % code
def compile_element(expression, coordinates, parameters=None): """Generates C code for point evaluations. :arg expression: UFL expression :arg coordinates: coordinate field :arg parameters: form compiler parameters :returns: C code as string """ if parameters is None: parameters = default_parameters() else: _ = default_parameters() _.update(parameters) parameters = _ # No arguments, please! if extract_arguments(expression): return ValueError("Cannot interpolate UFL expression with Arguments!") # Apply UFL preprocessing expression = tsfc.ufl_utils.preprocess_expression( expression, complex_mode=utils.complex_mode) # Collect required coefficients coefficient, = extract_coefficients(expression) # Point evaluation of mixed coefficients not supported here if type(coefficient.ufl_element()) == MixedElement: raise NotImplementedError("Cannot point evaluate mixed elements yet!") # Replace coordinates (if any) domain = expression.ufl_domain() assert coordinates.ufl_domain() == domain # Initialise kernel builder builder = firedrake_interface.KernelBuilderBase(utils.ScalarType_c) builder.domain_coordinate[domain] = coordinates x_arg = builder._coefficient(coordinates, "x") f_arg = builder._coefficient(coefficient, "f") # TODO: restore this for expression evaluation! # expression = ufl_utils.split_coefficients(expression, builder.coefficient_split) # Translate to GEM cell = domain.ufl_cell() dim = cell.topological_dimension() point = gem.Variable('X', (dim, )) point_arg = ast.Decl(utils.ScalarType_c, ast.Symbol('X', rank=(dim, ))) config = dict(interface=builder, ufl_cell=coordinates.ufl_domain().ufl_cell(), point_indices=(), point_expr=point, scalar_type=utils.ScalarType) # TODO: restore this for expression evaluation! # config["cellvolume"] = cellvolume_generator(coordinates.ufl_domain(), coordinates, config) context = tsfc.fem.GemPointContext(**config) # Abs-simplification expression = tsfc.ufl_utils.simplify_abs(expression, utils.complex_mode) # Translate UFL -> GEM translator = tsfc.fem.Translator(context) result, = map_expr_dags(translator, [expression]) tensor_indices = () if expression.ufl_shape: tensor_indices = tuple(gem.Index() for s in expression.ufl_shape) return_variable = gem.Indexed(gem.Variable('R', expression.ufl_shape), tensor_indices) result_arg = ast.Decl(utils.ScalarType_c, ast.Symbol('R', rank=expression.ufl_shape)) result = gem.Indexed(result, tensor_indices) else: return_variable = gem.Indexed(gem.Variable('R', (1, )), (0, )) result_arg = ast.Decl(utils.ScalarType_c, ast.Symbol('R', rank=(1, ))) # Unroll max_extent = parameters["unroll_indexsum"] if max_extent: def predicate(index): return index.extent <= max_extent result, = gem.optimise.unroll_indexsum([result], predicate=predicate) # Translate GEM -> COFFEE result, = gem.impero_utils.preprocess_gem([result]) impero_c = gem.impero_utils.compile_gem([(return_variable, result)], tensor_indices) body = generate_coffee(impero_c, {}, utils.ScalarType) # Build kernel tuple kernel_code = builder.construct_kernel( "evaluate_kernel", [result_arg, point_arg, x_arg, f_arg], body) # Fill the code template extruded = isinstance(cell, TensorProductCell) code = { "geometric_dimension": cell.geometric_dimension(), "layers_arg": ", int const *__restrict__ layers" if extruded else "", "layers": ", layers" if extruded else "", "extruded_define": "1" if extruded else "0", "IntType": as_cstr(IntType), "scalar_type": utils.ScalarType_c, } # if maps are the same, only need to pass one of them if coordinates.cell_node_map() == coefficient.cell_node_map(): code[ "wrapper_map_args"] = "%(IntType)s const *__restrict__ coords_map" % code code["map_args"] = "f->coords_map" else: code[ "wrapper_map_args"] = "%(IntType)s const *__restrict__ coords_map, %(IntType)s const *__restrict__ f_map" % code code["map_args"] = "f->coords_map, f->f_map" evaluate_template_c = """ static inline void wrap_evaluate(%(scalar_type)s* const result, %(scalar_type)s* const X, %(IntType)s const start, %(IntType)s const end%(layers_arg)s, %(scalar_type)s const *__restrict__ coords, %(scalar_type)s const *__restrict__ f, %(wrapper_map_args)s); int evaluate(struct Function *f, double *x, %(scalar_type)s *result) { struct ReferenceCoords reference_coords; %(IntType)s cell = locate_cell(f, x, %(geometric_dimension)d, &to_reference_coords, &to_reference_coords_xtr, &reference_coords); if (cell == -1) { return -1; } if (!result) { return 0; } #if %(extruded_define)s int layers[2] = {0, 0}; int nlayers = f->n_layers; layers[1] = cell %% nlayers + 2; cell = cell / nlayers; #endif wrap_evaluate(result, reference_coords.X, cell, cell+1%(layers)s, f->coords, f->f, %(map_args)s); return 0; } """ return (evaluate_template_c % code) + kernel_code.gencode()