def statement_evaluate(leaf, parameters): expr = leaf.expression if isinstance(expr, gem.ListTensor): if parameters.declare[leaf]: array_expression = numpy.vectorize(lambda v: expression(v, parameters)) return coffee.Decl(parameters.scalar_type, _decl_symbol(expr, parameters), coffee.ArrayInit(array_expression(expr.array), precision=parameters.precision)) else: ops = [] for multiindex, value in numpy.ndenumerate(expr.array): coffee_sym = _coffee_symbol(_ref_symbol(expr, parameters), rank=multiindex) ops.append(coffee.Assign(coffee_sym, expression(value, parameters))) return coffee.Block(ops, open_scope=False) elif isinstance(expr, gem.Constant): assert parameters.declare[leaf] return coffee.Decl(parameters.scalar_type, _decl_symbol(expr, parameters), coffee.ArrayInit(expr.array, parameters.precision), qualifiers=["static", "const"]) else: code = expression(expr, parameters, top=True) if parameters.declare[leaf]: return coffee.Decl(parameters.scalar_type, _decl_symbol(expr, parameters), code) else: return coffee.Assign(_ref_symbol(expr, parameters), code)
def get_restriction_kernel(fiat_element, unique_indices, dim=1, no_weights=False): weights = restriction_weights(fiat_element)[unique_indices].T ncdof = weights.shape[0] nfdof = weights.shape[1] arglist = [ast.Decl("double", ast.Symbol("coarse", (ncdof*dim, ))), ast.Decl("double *restrict *restrict ", ast.Symbol("fine", ()), qualifiers=["const"])] if not no_weights: arglist.append(ast.Decl("double *restrict *restrict", ast.Symbol("count_weights", ()), qualifiers=["const"])) all_ones = np.allclose(weights, 1.0) if all_ones: w = [] else: w_sym = ast.Symbol("weights", (ncdof, nfdof)) init = ast.ArrayInit(format_array_literal(weights)) w = [ast.Decl("double", w_sym, init, qualifiers=["const"])] i = ast.Symbol("i", ()) j = ast.Symbol("j", ()) k = ast.Symbol("k", ()) fine = ast.Symbol("fine", (j, k)) if no_weights: if all_ones: assign = fine else: assign = ast.Prod(fine, ast.Symbol("weights", (i, j))) else: if all_ones: assign = ast.Prod(fine, ast.Symbol("count_weights", (j, 0))) else: assign = ast.Prod(fine, ast.Prod(ast.Symbol("weights", (i, j)), ast.Symbol("count_weights", (j, 0)))) assignment = ast.Incr(ast.Symbol("coarse", (ast.Sum(k, ast.Prod(i, ast.c_sym(dim))),)), assign) k_loop = ast.For(ast.Decl("int", k, ast.c_sym(0)), ast.Less(k, ast.c_sym(dim)), ast.Incr(k, ast.c_sym(1)), ast.Block([assignment], open_scope=True)) j_loop = ast.For(ast.Decl("int", j, ast.c_sym(0)), ast.Less(j, ast.c_sym(nfdof)), ast.Incr(j, ast.c_sym(1)), ast.Block([k_loop], open_scope=True)) i_loop = ast.For(ast.Decl("int", i, ast.c_sym(0)), ast.Less(i, ast.c_sym(ncdof)), ast.Incr(i, ast.c_sym(1)), ast.Block([j_loop], open_scope=True)) k = ast.FunDecl("void", "restriction", arglist, ast.Block(w + [i_loop]), pred=["static", "inline"]) return op2.Kernel(k, "restriction", opts=parameters["coffee"])
def get_injection_kernel(fiat_element, unique_indices, dim=1): weights = injection_weights(fiat_element)[unique_indices].T ncdof = weights.shape[0] nfdof = weights.shape[1] # What if we have multiple nodes in same location (DG)? Divide by # rowsum. weights = weights / np.sum(weights, axis=1).reshape(-1, 1) all_same = np.allclose(weights, weights[0, 0]) arglist = [ ast.Decl("double", ast.Symbol("coarse", (ncdof * dim, ))), ast.Decl("double *restrict *restrict ", ast.Symbol("fine", ()), qualifiers=["const"]) ] if all_same: w_sym = ast.Symbol("weights", ()) w = [ast.Decl("double", w_sym, weights[0, 0], qualifiers=["const"])] else: init = ast.ArrayInit(format_array_literal(weights)) w_sym = ast.Symbol("weights", (ncdof, nfdof)) w = [ast.Decl("double", w_sym, init, qualifiers=["const"])] i = ast.Symbol("i", ()) j = ast.Symbol("j", ()) k = ast.Symbol("k", ()) if all_same: assign = ast.Prod(ast.Symbol("fine", (j, k)), w_sym) else: assign = ast.Prod(ast.Symbol("fine", (j, k)), ast.Symbol("weights", (i, j))) assignment = ast.Incr( ast.Symbol("coarse", (ast.Sum(k, ast.Prod(i, ast.c_sym(dim))), )), assign) k_loop = ast.For(ast.Decl("int", k, ast.c_sym(0)), ast.Less(k, ast.c_sym(dim)), ast.Incr(k, ast.c_sym(1)), ast.Block([assignment], open_scope=True)) j_loop = ast.For(ast.Decl("int", j, ast.c_sym(0)), ast.Less(j, ast.c_sym(nfdof)), ast.Incr(j, ast.c_sym(1)), ast.Block([k_loop], open_scope=True)) i_loop = ast.For(ast.Decl("int", i, ast.c_sym(0)), ast.Less(i, ast.c_sym(ncdof)), ast.Incr(i, ast.c_sym(1)), ast.Block([j_loop], open_scope=True)) k = ast.FunDecl("void", "injection", arglist, ast.Block(w + [i_loop]), pred=["static", "inline"]) return op2.Kernel(k, "injection", opts=parameters["coffee"])
def get_prolongation_kernel(fiat_element, unique_indices, dim=1): weights = get_restriction_weights(fiat_element)[unique_indices] nfdof = weights.shape[0] ncdof = weights.shape[1] arglist = [ ast.Decl("double", ast.Symbol("fine", (nfdof * dim, ))), ast.Decl("double", ast.Symbol("*restrict *restrict coarse", ()), qualifiers=["const"]) ] all_same = np.allclose(weights, weights[0, 0]) if all_same: w_sym = ast.Symbol("weights", ()) w = [ast.Decl("double", w_sym, weights[0, 0], qualifiers=["const"])] else: w_sym = ast.Symbol("weights", (nfdof, ncdof)) init = ast.ArrayInit(format_array_literal(weights)) w = [ast.Decl("double", w_sym, init, qualifiers=["const"])] i = ast.Symbol("i", ()) j = ast.Symbol("j", ()) k = ast.Symbol("k", ()) if all_same: assign = ast.Prod(ast.Symbol("coarse", (j, k)), w_sym) else: assign = ast.Prod(ast.Symbol("coarse", (j, k)), ast.Symbol("weights", (i, j))) assignment = ast.Incr( ast.Symbol("fine", (ast.Sum(k, ast.Prod(i, ast.c_sym(dim))), )), assign) k_loop = ast.For(ast.Decl("int", k, ast.c_sym(0)), ast.Less(k, ast.c_sym(dim)), ast.Incr(k, ast.c_sym(1)), ast.Block([assignment], open_scope=True)) j_loop = ast.For(ast.Decl("int", j, ast.c_sym(0)), ast.Less(j, ast.c_sym(ncdof)), ast.Incr(j, ast.c_sym(1)), ast.Block([k_loop], open_scope=True)) i_loop = ast.For(ast.Decl("int", i, ast.c_sym(0)), ast.Less(i, ast.c_sym(nfdof)), ast.Incr(i, ast.c_sym(1)), ast.Block([j_loop], open_scope=True)) k = ast.FunDecl("void", "prolongation", arglist, ast.Block(w + [i_loop]), pred=["static", "inline"]) return op2.Kernel(k, "prolongation", opts=parameters["coffee"])
def exterior_facet_boundary_node_map(self, V, method): """Return the :class:`pyop2.Map` from exterior facets to nodes on the boundary. :arg V: The function space. :arg method: The method for determining boundary nodes. See :class:`~.DirichletBC` for details. """ try: return self.map_caches["boundary_node"][method] except KeyError: pass el = V.finat_element dim = self.mesh.facet_dimension() if method == "topological": boundary_dofs = el.entity_closure_dofs()[dim] elif method == "geometric": # This function is only called on extruded meshes when # asking for the nodes that live on the "vertical" # exterior facets. boundary_dofs = entity_support_dofs(el, dim) nodes_per_facet = \ len(boundary_dofs[0]) # HACK ALERT # The facet set does not have a halo associated with it, since # we only construct halos for DoF sets. Fortunately, this # loop is direct and we already have all the correct # information available locally. So We fake a set of the # correct size and carry out a direct loop facet_set = op2.Set(self.mesh.exterior_facets.set.total_size, comm=self.mesh.comm) fs_dat = op2.Dat( facet_set**el.space_dimension(), data=V.exterior_facet_node_map().values_with_halo.view()) facet_dat = op2.Dat(facet_set**nodes_per_facet, dtype=IntType) # Ensure these come out in sorted order. local_facet_nodes = numpy.array( [boundary_dofs[e] for e in sorted(boundary_dofs.keys())]) # Helper function to turn the inner index of an array into c # array literals. c_array = lambda xs: "{" + ", ".join(map(str, xs)) + "}" # AST for: l_nodes[facet[0]][n] rank_ast = ast.Symbol("l_nodes", rank=(ast.Symbol("facet", rank=(0, )), "n")) body = ast.Block([ ast.Decl("int", ast.Symbol("l_nodes", (len(el.cell.topology[dim]), nodes_per_facet)), init=ast.ArrayInit( c_array(map(c_array, local_facet_nodes))), qualifiers=["const"]), ast.For( ast.Decl("int", "n", 0), ast.Less("n", nodes_per_facet), ast.Incr("n", 1), ast.Assign(ast.Symbol("facet_nodes", ("n", )), ast.Symbol("cell_nodes", (rank_ast, )))) ]) kernel = op2.Kernel( ast.FunDecl("void", "create_bc_node_map", [ ast.Decl("%s*" % as_cstr(fs_dat.dtype), "cell_nodes"), ast.Decl("%s*" % as_cstr(facet_dat.dtype), "facet_nodes"), ast.Decl("unsigned int*", "facet") ], body), "create_bc_node_map") local_facet_dat = op2.Dat( facet_set**self.mesh.exterior_facets._rank, self.mesh.exterior_facets.local_facet_dat.data_ro_with_halos, dtype=numpy.uintc) op2.par_loop(kernel, facet_set, fs_dat(op2.READ), facet_dat(op2.WRITE), local_facet_dat(op2.READ)) if self.extruded: offset = self.offset[boundary_dofs[0]] else: offset = None val = op2.Map(facet_set, self.node_set, nodes_per_facet, facet_dat.data_ro_with_halos, name="exterior_facet_boundary_node", offset=offset) self.map_caches["boundary_node"][method] = val return val
def dg_injection_kernel(Vf, Vc, ncell): from firedrake import Tensor, AssembledVector, TestFunction, TrialFunction from firedrake.slate.slac import compile_expression macro_builder = MacroKernelBuilder(ScalarType_c, ncell) f = ufl.Coefficient(Vf) macro_builder.set_coefficients([f]) macro_builder.set_coordinates(Vf.mesh()) Vfe = create_element(Vf.ufl_element()) macro_quadrature_rule = make_quadrature( Vfe.cell, estimate_total_polynomial_degree(ufl.inner(f, f))) index_cache = {} parameters = default_parameters() integration_dim, entity_ids = lower_integral_type(Vfe.cell, "cell") macro_cfg = dict(interface=macro_builder, ufl_cell=Vf.ufl_cell(), precision=parameters["precision"], integration_dim=integration_dim, entity_ids=entity_ids, index_cache=index_cache, quadrature_rule=macro_quadrature_rule) fexpr, = fem.compile_ufl(f, **macro_cfg) X = ufl.SpatialCoordinate(Vf.mesh()) C_a, = fem.compile_ufl(X, **macro_cfg) detJ = ufl_utils.preprocess_expression( abs(ufl.JacobianDeterminant(f.ufl_domain()))) macro_detJ, = fem.compile_ufl(detJ, **macro_cfg) Vce = create_element(Vc.ufl_element()) coarse_builder = firedrake_interface.KernelBuilder("cell", "otherwise", 0, ScalarType_c) coarse_builder.set_coordinates(Vc.mesh()) argument_multiindices = (Vce.get_indices(), ) argument_multiindex, = argument_multiindices return_variable, = coarse_builder.set_arguments((ufl.TestFunction(Vc), ), argument_multiindices) integration_dim, entity_ids = lower_integral_type(Vce.cell, "cell") # Midpoint quadrature for jacobian on coarse cell. quadrature_rule = make_quadrature(Vce.cell, 0) coarse_cfg = dict(interface=coarse_builder, ufl_cell=Vc.ufl_cell(), precision=parameters["precision"], integration_dim=integration_dim, entity_ids=entity_ids, index_cache=index_cache, quadrature_rule=quadrature_rule) X = ufl.SpatialCoordinate(Vc.mesh()) K = ufl_utils.preprocess_expression(ufl.JacobianInverse(Vc.mesh())) C_0, = fem.compile_ufl(X, **coarse_cfg) K, = fem.compile_ufl(K, **coarse_cfg) i = gem.Index() j = gem.Index() C_0 = gem.Indexed(C_0, (j, )) C_0 = gem.index_sum(C_0, quadrature_rule.point_set.indices) C_a = gem.Indexed(C_a, (j, )) X_a = gem.Sum(C_0, gem.Product(gem.Literal(-1), C_a)) K_ij = gem.Indexed(K, (i, j)) K_ij = gem.index_sum(K_ij, quadrature_rule.point_set.indices) X_a = gem.index_sum(gem.Product(K_ij, X_a), (j, )) C_0, = quadrature_rule.point_set.points C_0 = gem.Indexed(gem.Literal(C_0), (i, )) # fine quad points in coarse reference space. X_a = gem.Sum(C_0, gem.Product(gem.Literal(-1), X_a)) X_a = gem.ComponentTensor(X_a, (i, )) # Coarse basis function evaluated at fine quadrature points phi_c = fem.fiat_to_ufl( Vce.point_evaluation(0, X_a, (Vce.cell.get_dimension(), 0)), 0) tensor_indices = tuple(gem.Index(extent=d) for d in f.ufl_shape) phi_c = gem.Indexed(phi_c, argument_multiindex + tensor_indices) fexpr = gem.Indexed(fexpr, tensor_indices) quadrature_weight = macro_quadrature_rule.weight_expression expr = gem.Product(gem.IndexSum(gem.Product(phi_c, fexpr), tensor_indices), gem.Product(macro_detJ, quadrature_weight)) quadrature_indices = macro_builder.indices + macro_quadrature_rule.point_set.indices reps = spectral.Integrals([expr], quadrature_indices, argument_multiindices, parameters) assignments = spectral.flatten([(return_variable, reps)], index_cache) return_variables, expressions = zip(*assignments) expressions = impero_utils.preprocess_gem(expressions, **spectral.finalise_options) assignments = list(zip(return_variables, expressions)) impero_c = impero_utils.compile_gem(assignments, quadrature_indices + argument_multiindex, remove_zeros=True) index_names = [] def name_index(index, name): index_names.append((index, name)) if index in index_cache: for multiindex, suffix in zip(index_cache[index], string.ascii_lowercase): name_multiindex(multiindex, name + suffix) def name_multiindex(multiindex, name): if len(multiindex) == 1: name_index(multiindex[0], name) else: for i, index in enumerate(multiindex): name_index(index, name + str(i)) name_multiindex(quadrature_indices, 'ip') for multiindex, name in zip(argument_multiindices, ['j', 'k']): name_multiindex(multiindex, name) index_names.extend(zip(macro_builder.indices, ["entity"])) body = generate_coffee(impero_c, index_names, parameters["precision"], ScalarType_c) retarg = ast.Decl(ScalarType_c, ast.Symbol("R", rank=(Vce.space_dimension(), ))) local_tensor = coarse_builder.local_tensor local_tensor.init = ast.ArrayInit( numpy.zeros(Vce.space_dimension(), dtype=ScalarType_c)) body.children.insert(0, local_tensor) args = [retarg] + macro_builder.kernel_args + [ macro_builder.coordinates_arg, coarse_builder.coordinates_arg ] # Now we have the kernel that computes <f, phi_c>dx_c # So now we need to hit it with the inverse mass matrix on dx_c u = TrialFunction(Vc) v = TestFunction(Vc) expr = Tensor(ufl.inner(u, v) * ufl.dx).inv * AssembledVector( ufl.Coefficient(Vc)) Ainv, = compile_expression(expr) Ainv = Ainv.kinfo.kernel A = ast.Symbol(local_tensor.sym.symbol) R = ast.Symbol("R") body.children.append( ast.FunCall(Ainv.name, R, coarse_builder.coordinates_arg.sym, A)) from coffee.base import Node assert isinstance(Ainv._code, Node) return op2.Kernel(ast.Node([ Ainv._code, ast.FunDecl("void", "pyop2_kernel_injection_dg", args, body, pred=["static", "inline"]) ]), name="pyop2_kernel_injection_dg", cpp=True, include_dirs=Ainv._include_dirs, headers=Ainv._headers)
def _tabulate_tensor(ir, parameters): "Generate code for a single integral (tabulate_tensor())." p_format = parameters["format"] precision = parameters["precision"] f_comment = format["comment"] f_G = format["geometry constant"] f_const_double = format["assign"] f_float = format["float"] f_assign = format["assign"] f_A = format["element tensor"][p_format] f_r = format["free indices"][0] f_j = format["first free index"] f_k = format["second free index"] f_loop = format["generate loop"] f_int = format["int"] f_weight = format["weight"] # Get data. opt_par = ir["optimise_parameters"] integral_type = ir["integral_type"] cell = ir["cell"] gdim = cell.geometric_dimension() tdim = cell.topological_dimension() num_facets = ir["num_facets"] num_vertices= ir["num_vertices"] integrals = ir["trans_integrals"] geo_consts = ir["geo_consts"] oriented = ir["needs_oriented"] # Create sets of used variables. used_weights = set() used_psi_tables = set() used_nzcs = set() trans_set = set() sets = [used_weights, used_psi_tables, used_nzcs, trans_set] affine_tables = {} # TODO: This is not populated anywhere, remove? quadrature_weights = ir["quadrature_weights"] #The pyop2 format requires dereferencing constant coefficients since # these are passed in as double * common = [] if p_format == "pyop2": for n, c in zip(ir["coefficient_names"], ir["coefficient_elements"]): if c.family() == 'Real': # Second index is always? 0, so we cast to (double (*)[1]). common += ['double (*w%(n)s)[1] = (double (*)[1])c%(n)s;\n' % {'n': n[1:]}] operations = [] if integral_type == "cell": # Update transformer with facets and generate code + set of used geometry terms. nest_ir, num_ops = _generate_element_tensor(integrals, sets, \ opt_par, parameters) # Set operations equal to num_ops (for printing info on operations). operations.append([num_ops]) # Generate code for basic geometric quantities # @@@: Jacobian snippet jacobi_code = "" jacobi_code += format["compute_jacobian"](cell) jacobi_code += "\n" jacobi_code += format["compute_jacobian_inverse"](cell) if oriented and tdim != gdim: # NEED TO THINK ABOUT THIS FOR EXTRUSION jacobi_code += format["orientation"][p_format](tdim, gdim) jacobi_code += "\n" jacobi_code += format["scale factor snippet"][p_format] # Generate code for cell volume and circumradius -- note that the # former will be incorrect on extruded meshes by a constant factor. jacobi_code += "\n\n" + format["generate cell volume"][p_format](tdim, gdim, integral_type) jacobi_code += "\n\n" + format["generate circumradius"][p_format](tdim, gdim, integral_type) elif integral_type in ("exterior_facet", "exterior_facet_vert"): if p_format == 'pyop2': common += ["unsigned int facet = *facet_p;\n"] # Generate tensor code for facets + set of used geometry terms. nest_ir, ops = _generate_element_tensor(integrals, sets, opt_par, parameters) # Save number of operations (for printing info on operations). operations.append([ops]) # Generate code for basic geometric quantities # @@@: Jacobian snippet jacobi_code = "" jacobi_code += format["compute_jacobian"](cell) jacobi_code += "\n" jacobi_code += format["compute_jacobian_inverse"](cell) if oriented and tdim != gdim: # NEED TO THINK ABOUT THIS FOR EXTRUSION jacobi_code += format["orientation"][p_format](tdim, gdim) jacobi_code += "\n" if integral_type == "exterior_facet": jacobi_code += "\n\n" + format["facet determinant"](cell, p_format, integral_type) jacobi_code += "\n\n" + format["generate normal"](cell, p_format, integral_type) jacobi_code += "\n\n" + format["generate facet area"](tdim, gdim) if tdim == 3: jacobi_code += "\n\n" + format["generate min facet edge length"](tdim, gdim) jacobi_code += "\n\n" + format["generate max facet edge length"](tdim, gdim) # Generate code for cell volume and circumradius jacobi_code += "\n\n" + format["generate cell volume"][p_format](tdim, gdim, integral_type) jacobi_code += "\n\n" + format["generate circumradius"][p_format](tdim, gdim, integral_type) elif integral_type == "exterior_facet_vert": jacobi_code += "\n\n" + format["facet determinant"](cell, p_format, integral_type) jacobi_code += "\n\n" + format["generate normal"](cell, p_format, integral_type) # OTHER THINGS NOT IMPLEMENTED YET else: raise RuntimeError("Invalid integral_type") elif integral_type in ("exterior_facet_top", "exterior_facet_bottom"): nest_ir, ops = _generate_element_tensor(integrals, sets, opt_par, parameters) operations.append([ops]) # Generate code for basic geometric quantities # @@@: Jacobian snippet jacobi_code = "" jacobi_code += format["compute_jacobian"](cell) jacobi_code += "\n" jacobi_code += format["compute_jacobian_inverse"](cell) if oriented: # NEED TO THINK ABOUT THIS FOR EXTRUSION jacobi_code += format["orientation"][p_format](tdim, gdim) jacobi_code += "\n" jacobi_code += "\n\n" + format["facet determinant"](cell, p_format, integral_type) jacobi_code += "\n\n" + format["generate normal"](cell, p_format, integral_type) # THE REST IS NOT IMPLEMENTED YET elif integral_type in ("interior_facet", "interior_facet_vert"): if p_format == 'pyop2': common += ["unsigned int facet_0 = facet_p[0];"] common += ["unsigned int facet_1 = facet_p[1];"] common += ["double **coordinate_dofs_0 = coordinate_dofs;"] # Note that the following line is unsafe for isoparametric elements. common += ["double **coordinate_dofs_1 = coordinate_dofs + %d;" % num_vertices] # Generate tensor code for facets + set of used geometry terms. nest_ir, ops = _generate_element_tensor(integrals, sets, opt_par, parameters) # Save number of operations (for printing info on operations). operations.append([ops]) # Generate code for basic geometric quantities # @@@: Jacobian snippet jacobi_code = "" for _r in ["+", "-"]: if p_format == "pyop2": jacobi_code += format["compute_jacobian_interior"](cell, r=_r) else: jacobi_code += format["compute_jacobian"](cell, r=_r) jacobi_code += "\n" jacobi_code += format["compute_jacobian_inverse"](cell, r=_r) if oriented and tdim != gdim: # NEED TO THINK ABOUT THIS FOR EXTRUSION jacobi_code += format["orientation"][p_format](tdim, gdim, r=_r) jacobi_code += "\n" if integral_type == "interior_facet": jacobi_code += "\n\n" + format["facet determinant"](cell, p_format, integral_type, r="+") jacobi_code += "\n\n" + format["generate normal"](cell, p_format, integral_type) jacobi_code += "\n\n" + format["generate facet area"](tdim, gdim) if tdim == 3: jacobi_code += "\n\n" + format["generate min facet edge length"](tdim, gdim, r="+") jacobi_code += "\n\n" + format["generate max facet edge length"](tdim, gdim, r="+") # Generate code for cell volume and circumradius jacobi_code += "\n\n" + format["generate cell volume"][p_format](tdim, gdim, integral_type) jacobi_code += "\n\n" + format["generate circumradius interior"](tdim, gdim, integral_type) elif integral_type == "interior_facet_vert": # THE REST IS NOT IMPLEMENTED YET jacobi_code += "\n\n" + format["facet determinant"](cell, p_format, integral_type, r="+") jacobi_code += "\n\n" + format["generate normal"](cell, p_format, integral_type) else: raise RuntimeError("Invalid integral_type") elif integral_type == "interior_facet_horiz": common += ["double **coordinate_dofs_0 = coordinate_dofs;"] # Note that the following line is unsafe for isoparametric elements. common += ["double **coordinate_dofs_1 = coordinate_dofs + %d;" % num_vertices] nest_ir, ops = _generate_element_tensor(integrals, sets, opt_par, parameters) # Save number of operations (for printing info on operations). operations.append([ops]) # Generate code for basic geometric quantities # @@@: Jacobian snippet jacobi_code = "" for _r in ["+", "-"]: jacobi_code += format["compute_jacobian_interior"](cell, r=_r) jacobi_code += "\n" jacobi_code += format["compute_jacobian_inverse"](cell, r=_r) if oriented: # NEED TO THINK ABOUT THIS FOR EXTRUSION jacobi_code += format["orientation"][p_format](tdim, gdim, r=_r) jacobi_code += "\n" # TODO: verify that this is correct (we think it is) jacobi_code += "\n\n" + format["facet determinant"](cell, p_format, integral_type, r="+") jacobi_code += "\n\n" + format["generate normal"](cell, p_format, integral_type) # THE REST IS NOT IMPLEMENTED YET elif integral_type == "point": # Update transformer with vertices and generate code + set of used geometry terms. nest_ir, ops = _generate_element_tensor(integrals, sets, opt_par, parameters) # Save number of operations (for printing info on operations). operations.append([ops]) # Generate code for basic geometric quantities # @@@: Jacobian snippet jacobi_code = "" jacobi_code += format["compute_jacobian"](cell) jacobi_code += "\n" jacobi_code += format["compute_jacobian_inverse"](cell) if oriented and tdim != gdim: jacobi_code += format["orientation"][p_format](tdim, gdim) jacobi_code += "\n" else: error("Unhandled integral type: " + str(integral_type)) # Embedded manifold, need to pass in cell orientations if oriented and tdim != gdim and p_format == 'pyop2': if integral_type in ("interior_facet", "interior_facet_vert", "interior_facet_horiz"): common += ["const int cell_orientation%s = cell_orientation_[0][0];" % _choose_map('+'), "const int cell_orientation%s = cell_orientation_[1][0];" % _choose_map('-')] else: common += ["const int cell_orientation = cell_orientation_[0][0];"] # After we have generated the element code for all facets we can remove # the unused transformations and tabulate the used psi tables and weights. common += [remove_unused(jacobi_code, trans_set)] jacobi_ir = pyop2.FlatBlock("\n".join(common)) # @@@: const double W3[3] = {{...}} pyop2_weights = [] for weights, points in [quadrature_weights[p] for p in used_weights]: n_points = len(points) w_sym = pyop2.Symbol(f_weight(n_points), () if n_points == 1 else (n_points,)) pyop2_weights.append(pyop2.Decl("double", w_sym, pyop2.ArrayInit(weights, precision), qualifiers=["static", "const"])) name_map = ir["name_map"] tables = ir["unique_tables"] tables.update(affine_tables) # TODO: This is not populated anywhere, remove? # @@@: const double FE0[] = {{...}} code, decl = _tabulate_psis(tables, used_psi_tables, name_map, used_nzcs, opt_par, parameters) pyop2_basis = [] for name, data in decl.items(): rank, _, values = data zeroflags = values.get_zeros() feo_sym = pyop2.Symbol(name, rank) init = pyop2.ArrayInit(values, precision) if zeroflags is not None and not zeroflags.all(): nz_indices = numpy.logical_not(zeroflags).nonzero() # Note: in the following, we take the last entry of /nz_indices/ since we /know/ # we have been tracking only zero-valued columns nz_indices = nz_indices[-1] nz_bounds = tuple([(i, 0)] for i in rank[:-1]) nz_bounds += ([(max(nz_indices) - min(nz_indices) + 1, min(nz_indices))],) init = pyop2.SparseArrayInit(values, precision, nz_bounds) pyop2_basis.append(pyop2.Decl("double", feo_sym, init, ["static", "const"])) # Build the root of the PyOP2' ast pyop2_tables = pyop2_weights + [tab for tab in pyop2_basis] root = pyop2.Root([jacobi_ir] + pyop2_tables + nest_ir) return root
def build_hard_fusion_kernel(base_loop, fuse_loop, fusion_map, loop_chain_index): """ Build AST and :class:`Kernel` for two loops suitable to hard fusion. The AST consists of three functions: fusion, base, fuse. base and fuse are respectively the ``base_loop`` and the ``fuse_loop`` kernels, whereas fusion is the orchestrator that invokes, for each ``base_loop`` iteration, base and, if still to be executed, fuse. The orchestrator has the following structure: :: fusion (buffer, ..., executed): base (buffer, ...) for i = 0 to arity: if not executed[i]: additional pointer staging required by kernel2 fuse (sub_buffer, ...) insertion into buffer The executed array tracks whether the i-th iteration (out of /arity/) adjacent to the main kernel1 iteration has been executed. """ finder = Find((ast.FunDecl, ast.PreprocessNode)) base = base_loop.kernel base_ast = dcopy(base._ast) base_info = finder.visit(base_ast) base_headers = base_info[ast.PreprocessNode] base_fundecl = base_info[ast.FunDecl] assert len(base_fundecl) == 1 base_fundecl = base_fundecl[0] fuse = fuse_loop.kernel fuse_ast = dcopy(fuse._ast) fuse_info = finder.visit(fuse_ast) fuse_headers = fuse_info[ast.PreprocessNode] fuse_fundecl = fuse_info[ast.FunDecl] assert len(fuse_fundecl) == 1 fuse_fundecl = fuse_fundecl[0] # Create /fusion/ arguments and signature body = ast.Block([]) fusion_name = '%s_%s' % (base_fundecl.name, fuse_fundecl.name) fusion_args = dcopy(base_fundecl.args + fuse_fundecl.args) fusion_fundecl = ast.FunDecl(base_fundecl.ret, fusion_name, fusion_args, body) # Make sure kernel and variable names are unique base_fundecl.name = "%s_base" % base_fundecl.name fuse_fundecl.name = "%s_fuse" % fuse_fundecl.name for i, decl in enumerate(fusion_args): decl.sym.symbol += '_%d' % i # Filter out duplicate arguments, and append extra arguments to the fundecl binding = WeakFilter().kernel_args([base_loop, fuse_loop], fusion_fundecl) fusion_args += [ast.Decl('int*', 'executed'), ast.Decl('int*', 'fused_iters'), ast.Decl('int', 'i')] # Which args are actually used in /fuse/, but not in /base/ ? The gather for # such arguments is moved to /fusion/, to avoid usless memory LOADs base_dats = set(a.data for a in base_loop.args) fuse_dats = set(a.data for a in fuse_loop.args) unshared = OrderedDict() for arg, decl in binding.items(): if arg.data in fuse_dats - base_dats: unshared.setdefault(decl, arg) # Track position of Args that need a postponed gather # Can't track Args themselves as they change across different parloops fargs = {fusion_args.index(i): ('postponed', False) for i in unshared.keys()} fargs.update({len(set(binding.values())): ('onlymap', True)}) # Add maps for arguments that need a postponed gather for decl, arg in unshared.items(): decl_pos = fusion_args.index(decl) fusion_args[decl_pos].sym.symbol = arg.c_arg_name() if arg._is_indirect: fusion_args[decl_pos].sym.rank = () fusion_args.insert(decl_pos + 1, ast.Decl('int*', arg.c_map_name(0, 0))) # Append the invocation of /base/; then, proceed with the invocation # of the /fuse/ kernels base_funcall_syms = [binding[a].sym.symbol for a in base_loop.args] body.children.append(ast.FunCall(base_fundecl.name, *base_funcall_syms)) for idx in range(fusion_map.arity): fused_iter = ast.Assign('i', ast.Symbol('fused_iters', (idx,))) fuse_funcall = ast.FunCall(fuse_fundecl.name) if_cond = ast.Not(ast.Symbol('executed', ('i',))) if_update = ast.Assign(ast.Symbol('executed', ('i',)), 1) if_body = ast.Block([fuse_funcall, if_update], open_scope=True) if_exec = ast.If(if_cond, [if_body]) body.children.extend([ast.FlatBlock('\n'), fused_iter, if_exec]) # Modify the /fuse/ kernel # This is to take into account that many arguments are shared with # /base/, so they will only staged once for /base/. This requires # tweaking the way the arguments are declared and accessed in /fuse/. # For example, the shared incremented array (called /buffer/ in # the pseudocode in the comment above) now needs to take offsets # to be sure the locations that /base/ is supposed to increment are # actually accessed. The same concept apply to indirect arguments. init = lambda v: '{%s}' % ', '.join([str(j) for j in v]) for i, fuse_loop_arg in enumerate(fuse_loop.args): fuse_kernel_arg = binding[fuse_loop_arg] buffer_name = '%s_vec' % fuse_kernel_arg.sym.symbol fuse_funcall_sym = ast.Symbol(buffer_name) # What kind of temporaries do we need ? if fuse_loop_arg.access == INC: op, lvalue, rvalue = ast.Incr, fuse_kernel_arg.sym.symbol, buffer_name stager = lambda b, l: b.children.extend(l) indexer = lambda indices: [(k, j) for j, k in enumerate(indices)] pointers = [] elif fuse_loop_arg.access == READ: op, lvalue, rvalue = ast.Assign, buffer_name, fuse_kernel_arg.sym.symbol stager = lambda b, l: [b.children.insert(0, j) for j in reversed(l)] indexer = lambda indices: [(j, k) for j, k in enumerate(indices)] pointers = list(fuse_kernel_arg.pointers) # Now gonna handle arguments depending on their type and rank ... if fuse_loop_arg._is_global: # ... Handle global arguments. These can be dropped in the # kernel without any particular fiddling fuse_funcall_sym = ast.Symbol(fuse_kernel_arg.sym.symbol) elif fuse_kernel_arg in unshared: # ... Handle arguments that appear only in /fuse/ staging = unshared[fuse_kernel_arg].c_vec_init(False).split('\n') rvalues = [ast.FlatBlock(j.split('=')[1]) for j in staging] lvalues = [ast.Symbol(buffer_name, (j,)) for j in range(len(staging))] staging = [ast.Assign(j, k) for j, k in zip(lvalues, rvalues)] # Set up the temporary buffer_symbol = ast.Symbol(buffer_name, (len(staging),)) buffer_decl = ast.Decl(fuse_kernel_arg.typ, buffer_symbol, qualifiers=fuse_kernel_arg.qual, pointers=list(pointers)) # Update the if-then AST body stager(if_exec.children[0], staging) if_exec.children[0].children.insert(0, buffer_decl) elif fuse_loop_arg._is_mat: # ... Handle Mats staging = [] for b in fused_inc_arg._block_shape: for rc in b: lvalue = ast.Symbol(lvalue, (idx, idx), ((rc[0], 'j'), (rc[1], 'k'))) rvalue = ast.Symbol(rvalue, ('j', 'k')) staging = ItSpace(mode=0).to_for([(0, rc[0]), (0, rc[1])], ('j', 'k'), [op(lvalue, rvalue)])[:1] # Set up the temporary buffer_symbol = ast.Symbol(buffer_name, (fuse_kernel_arg.sym.rank,)) buffer_init = ast.ArrayInit(init([init([0.0])])) buffer_decl = ast.Decl(fuse_kernel_arg.typ, buffer_symbol, buffer_init, qualifiers=fuse_kernel_arg.qual, pointers=pointers) # Update the if-then AST body stager(if_exec.children[0], staging) if_exec.children[0].children.insert(0, buffer_decl) elif fuse_loop_arg._is_indirect: cdim = fuse_loop_arg.data.cdim if cdim == 1 and fuse_kernel_arg.sym.rank: # [Special case] # ... Handle rank 1 indirect arguments that appear in both # /base/ and /fuse/: just point into the right location rank = (idx,) if fusion_map.arity > 1 else () fuse_funcall_sym = ast.Symbol(fuse_kernel_arg.sym.symbol, rank) else: # ... Handle indirect arguments. At the C level, these arguments # are of pointer type, so simple pointer arithmetic is used # to ensure the kernel accesses are to the correct locations fuse_arity = fuse_loop_arg.map.arity base_arity = fuse_arity*fusion_map.arity size = fuse_arity*cdim # Set the proper storage layout before invoking /fuse/ ofs_vals = [[base_arity*j + k for k in range(fuse_arity)] for j in range(cdim)] ofs_vals = [[fuse_arity*j + k for k in flatten(ofs_vals)] for j in range(fusion_map.arity)] ofs_vals = list(flatten(ofs_vals)) indices = [ofs_vals[idx*size + j] for j in range(size)] staging = [op(ast.Symbol(lvalue, (j,)), ast.Symbol(rvalue, (k,))) for j, k in indexer(indices)] # Set up the temporary buffer_symbol = ast.Symbol(buffer_name, (size,)) if fuse_loop_arg.access == INC: buffer_init = ast.ArrayInit(init([0.0])) else: buffer_init = ast.EmptyStatement() pointers.pop() buffer_decl = ast.Decl(fuse_kernel_arg.typ, buffer_symbol, buffer_init, qualifiers=fuse_kernel_arg.qual, pointers=pointers) # Update the if-then AST body stager(if_exec.children[0], staging) if_exec.children[0].children.insert(0, buffer_decl) else: # Nothing special to do for direct arguments pass # Finally update the /fuse/ funcall fuse_funcall.children.append(fuse_funcall_sym) fused_headers = set([str(h) for h in base_headers + fuse_headers]) fused_ast = ast.Root([ast.PreprocessNode(h) for h in fused_headers] + [base_fundecl, fuse_fundecl, fusion_fundecl]) return Kernel([base, fuse], fused_ast, loop_chain_index), fargs
def exterior_facet_boundary_node_map(self, method): '''The :class:`pyop2.Map` from exterior facets to the nodes on those facets. Note that this differs from :meth:`exterior_facet_node_map` in that only surface nodes are referenced, not all nodes in cells touching the surface. :arg method: The method for determining boundary nodes. See :class:`~.bcs.DirichletBC`. ''' el = self.fiat_element dim = self._mesh.facet_dimension() if method == "topological": boundary_dofs = el.entity_closure_dofs()[dim] elif method == "geometric": boundary_dofs = el.facet_support_dofs() nodes_per_facet = \ len(boundary_dofs[0]) # HACK ALERT # The facet set does not have a halo associated with it, since # we only construct halos for DoF sets. Fortunately, this # loop is direct and we already have all the correct # information available locally. So We fake a set of the # correct size and carry out a direct loop facet_set = op2.Set(self._mesh.exterior_facets.set.total_size) fs_dat = op2.Dat(facet_set**el.space_dimension(), data=self.exterior_facet_node_map().values_with_halo) facet_dat = op2.Dat(facet_set**nodes_per_facet, dtype=np.int32) local_facet_nodes = np.array( [dofs for e, dofs in boundary_dofs.iteritems()]) # Helper function to turn the inner index of an array into c # array literals. c_array = lambda xs: "{" + ", ".join(map(str, xs)) + "}" body = ast.Block([ ast.Decl("int", ast.Symbol("l_nodes", (len(el.get_reference_element().topology[dim]), nodes_per_facet)), init=ast.ArrayInit( c_array(map(c_array, local_facet_nodes))), qualifiers=["const"]), ast.For( ast.Decl("int", "n", 0), ast.Less("n", nodes_per_facet), ast.Incr("n", 1), ast.Assign( ast.Symbol("facet_nodes", ("n", )), ast.Symbol("cell_nodes", ("l_nodes[facet[0]][n]", )))) ]) kernel = op2.Kernel( ast.FunDecl("void", "create_bc_node_map", [ ast.Decl("int*", "cell_nodes"), ast.Decl("int*", "facet_nodes"), ast.Decl("unsigned int*", "facet") ], body), "create_bc_node_map") local_facet_dat = op2.Dat( facet_set**self._mesh.exterior_facets._rank, self._mesh.exterior_facets.local_facet_dat.data_ro_with_halos, dtype=np.uintc) op2.par_loop(kernel, facet_set, fs_dat(op2.READ), facet_dat(op2.WRITE), local_facet_dat(op2.READ)) if isinstance(self._mesh, mesh_t.ExtrudedMesh): offset = self.offset[boundary_dofs[0]] else: offset = None return op2.Map(facet_set, self.node_set, nodes_per_facet, facet_dat.data_ro_with_halos, name="exterior_facet_boundary_node", offset=offset)
def test_funcall_in_arrayinit(): tree = ast.ArrayInit(np.asarray([ast.FunCall("foo"), ast.Symbol("bar")])) assert tree.gencode() == "{foo(), bar}"