Beispiel #1
0
def statement_evaluate(leaf, parameters):
    expr = leaf.expression
    if isinstance(expr, gem.ListTensor):
        if parameters.declare[leaf]:
            array_expression = numpy.vectorize(lambda v: expression(v, parameters))
            return coffee.Decl(parameters.scalar_type,
                               _decl_symbol(expr, parameters),
                               coffee.ArrayInit(array_expression(expr.array),
                                                precision=parameters.precision))
        else:
            ops = []
            for multiindex, value in numpy.ndenumerate(expr.array):
                coffee_sym = _coffee_symbol(_ref_symbol(expr, parameters), rank=multiindex)
                ops.append(coffee.Assign(coffee_sym, expression(value, parameters)))
            return coffee.Block(ops, open_scope=False)
    elif isinstance(expr, gem.Constant):
        assert parameters.declare[leaf]
        return coffee.Decl(parameters.scalar_type,
                           _decl_symbol(expr, parameters),
                           coffee.ArrayInit(expr.array, parameters.precision),
                           qualifiers=["static", "const"])
    else:
        code = expression(expr, parameters, top=True)
        if parameters.declare[leaf]:
            return coffee.Decl(parameters.scalar_type, _decl_symbol(expr, parameters), code)
        else:
            return coffee.Assign(_ref_symbol(expr, parameters), code)
Beispiel #2
0
def get_restriction_kernel(fiat_element, unique_indices, dim=1, no_weights=False):
    weights = restriction_weights(fiat_element)[unique_indices].T
    ncdof = weights.shape[0]
    nfdof = weights.shape[1]
    arglist = [ast.Decl("double", ast.Symbol("coarse", (ncdof*dim, ))),
               ast.Decl("double *restrict *restrict ", ast.Symbol("fine", ()),
                        qualifiers=["const"])]
    if not no_weights:
        arglist.append(ast.Decl("double *restrict *restrict", ast.Symbol("count_weights", ()),
                                qualifiers=["const"]))

    all_ones = np.allclose(weights, 1.0)

    if all_ones:
        w = []
    else:
        w_sym = ast.Symbol("weights", (ncdof, nfdof))
        init = ast.ArrayInit(format_array_literal(weights))
        w = [ast.Decl("double", w_sym, init,
                      qualifiers=["const"])]

    i = ast.Symbol("i", ())
    j = ast.Symbol("j", ())
    k = ast.Symbol("k", ())
    fine = ast.Symbol("fine", (j, k))
    if no_weights:
        if all_ones:
            assign = fine
        else:
            assign = ast.Prod(fine, ast.Symbol("weights", (i, j)))
    else:
        if all_ones:
            assign = ast.Prod(fine, ast.Symbol("count_weights", (j, 0)))
        else:
            assign = ast.Prod(fine,
                              ast.Prod(ast.Symbol("weights", (i, j)),
                                       ast.Symbol("count_weights", (j, 0))))
    assignment = ast.Incr(ast.Symbol("coarse", (ast.Sum(k, ast.Prod(i, ast.c_sym(dim))),)),
                          assign)
    k_loop = ast.For(ast.Decl("int", k, ast.c_sym(0)),
                     ast.Less(k, ast.c_sym(dim)),
                     ast.Incr(k, ast.c_sym(1)),
                     ast.Block([assignment], open_scope=True))
    j_loop = ast.For(ast.Decl("int", j, ast.c_sym(0)),
                     ast.Less(j, ast.c_sym(nfdof)),
                     ast.Incr(j, ast.c_sym(1)),
                     ast.Block([k_loop], open_scope=True))
    i_loop = ast.For(ast.Decl("int", i, ast.c_sym(0)),
                     ast.Less(i, ast.c_sym(ncdof)),
                     ast.Incr(i, ast.c_sym(1)),
                     ast.Block([j_loop], open_scope=True))
    k = ast.FunDecl("void", "restriction", arglist, ast.Block(w + [i_loop]),
                    pred=["static", "inline"])

    return op2.Kernel(k, "restriction", opts=parameters["coffee"])
Beispiel #3
0
def get_injection_kernel(fiat_element, unique_indices, dim=1):
    weights = injection_weights(fiat_element)[unique_indices].T
    ncdof = weights.shape[0]
    nfdof = weights.shape[1]
    # What if we have multiple nodes in same location (DG)?  Divide by
    # rowsum.
    weights = weights / np.sum(weights, axis=1).reshape(-1, 1)

    all_same = np.allclose(weights, weights[0, 0])

    arglist = [
        ast.Decl("double", ast.Symbol("coarse", (ncdof * dim, ))),
        ast.Decl("double *restrict *restrict ",
                 ast.Symbol("fine", ()),
                 qualifiers=["const"])
    ]
    if all_same:
        w_sym = ast.Symbol("weights", ())
        w = [ast.Decl("double", w_sym, weights[0, 0], qualifiers=["const"])]
    else:
        init = ast.ArrayInit(format_array_literal(weights))
        w_sym = ast.Symbol("weights", (ncdof, nfdof))
        w = [ast.Decl("double", w_sym, init, qualifiers=["const"])]

    i = ast.Symbol("i", ())
    j = ast.Symbol("j", ())
    k = ast.Symbol("k", ())
    if all_same:
        assign = ast.Prod(ast.Symbol("fine", (j, k)), w_sym)
    else:
        assign = ast.Prod(ast.Symbol("fine", (j, k)),
                          ast.Symbol("weights", (i, j)))
    assignment = ast.Incr(
        ast.Symbol("coarse", (ast.Sum(k, ast.Prod(i, ast.c_sym(dim))), )),
        assign)
    k_loop = ast.For(ast.Decl("int", k, ast.c_sym(0)),
                     ast.Less(k, ast.c_sym(dim)), ast.Incr(k, ast.c_sym(1)),
                     ast.Block([assignment], open_scope=True))
    j_loop = ast.For(ast.Decl("int", j, ast.c_sym(0)),
                     ast.Less(j, ast.c_sym(nfdof)), ast.Incr(j, ast.c_sym(1)),
                     ast.Block([k_loop], open_scope=True))
    i_loop = ast.For(ast.Decl("int", i, ast.c_sym(0)),
                     ast.Less(i, ast.c_sym(ncdof)), ast.Incr(i, ast.c_sym(1)),
                     ast.Block([j_loop], open_scope=True))
    k = ast.FunDecl("void",
                    "injection",
                    arglist,
                    ast.Block(w + [i_loop]),
                    pred=["static", "inline"])

    return op2.Kernel(k, "injection", opts=parameters["coffee"])
Beispiel #4
0
def get_prolongation_kernel(fiat_element, unique_indices, dim=1):
    weights = get_restriction_weights(fiat_element)[unique_indices]
    nfdof = weights.shape[0]
    ncdof = weights.shape[1]
    arglist = [
        ast.Decl("double", ast.Symbol("fine", (nfdof * dim, ))),
        ast.Decl("double",
                 ast.Symbol("*restrict *restrict coarse", ()),
                 qualifiers=["const"])
    ]
    all_same = np.allclose(weights, weights[0, 0])

    if all_same:
        w_sym = ast.Symbol("weights", ())
        w = [ast.Decl("double", w_sym, weights[0, 0], qualifiers=["const"])]
    else:
        w_sym = ast.Symbol("weights", (nfdof, ncdof))
        init = ast.ArrayInit(format_array_literal(weights))
        w = [ast.Decl("double", w_sym, init, qualifiers=["const"])]
    i = ast.Symbol("i", ())
    j = ast.Symbol("j", ())
    k = ast.Symbol("k", ())
    if all_same:
        assign = ast.Prod(ast.Symbol("coarse", (j, k)), w_sym)
    else:
        assign = ast.Prod(ast.Symbol("coarse", (j, k)),
                          ast.Symbol("weights", (i, j)))

    assignment = ast.Incr(
        ast.Symbol("fine", (ast.Sum(k, ast.Prod(i, ast.c_sym(dim))), )),
        assign)
    k_loop = ast.For(ast.Decl("int", k, ast.c_sym(0)),
                     ast.Less(k, ast.c_sym(dim)), ast.Incr(k, ast.c_sym(1)),
                     ast.Block([assignment], open_scope=True))
    j_loop = ast.For(ast.Decl("int", j, ast.c_sym(0)),
                     ast.Less(j, ast.c_sym(ncdof)), ast.Incr(j, ast.c_sym(1)),
                     ast.Block([k_loop], open_scope=True))
    i_loop = ast.For(ast.Decl("int", i, ast.c_sym(0)),
                     ast.Less(i, ast.c_sym(nfdof)), ast.Incr(i, ast.c_sym(1)),
                     ast.Block([j_loop], open_scope=True))
    k = ast.FunDecl("void",
                    "prolongation",
                    arglist,
                    ast.Block(w + [i_loop]),
                    pred=["static", "inline"])

    return op2.Kernel(k, "prolongation", opts=parameters["coffee"])
Beispiel #5
0
    def exterior_facet_boundary_node_map(self, V, method):
        """Return the :class:`pyop2.Map` from exterior facets to nodes
        on the boundary.

        :arg V: The function space.
        :arg method:  The method for determining boundary nodes.  See
           :class:`~.DirichletBC` for details.
        """
        try:
            return self.map_caches["boundary_node"][method]
        except KeyError:
            pass
        el = V.finat_element

        dim = self.mesh.facet_dimension()

        if method == "topological":
            boundary_dofs = el.entity_closure_dofs()[dim]
        elif method == "geometric":
            # This function is only called on extruded meshes when
            # asking for the nodes that live on the "vertical"
            # exterior facets.
            boundary_dofs = entity_support_dofs(el, dim)

        nodes_per_facet = \
            len(boundary_dofs[0])

        # HACK ALERT
        # The facet set does not have a halo associated with it, since
        # we only construct halos for DoF sets.  Fortunately, this
        # loop is direct and we already have all the correct
        # information available locally.  So We fake a set of the
        # correct size and carry out a direct loop
        facet_set = op2.Set(self.mesh.exterior_facets.set.total_size,
                            comm=self.mesh.comm)

        fs_dat = op2.Dat(
            facet_set**el.space_dimension(),
            data=V.exterior_facet_node_map().values_with_halo.view())

        facet_dat = op2.Dat(facet_set**nodes_per_facet, dtype=IntType)

        # Ensure these come out in sorted order.
        local_facet_nodes = numpy.array(
            [boundary_dofs[e] for e in sorted(boundary_dofs.keys())])

        # Helper function to turn the inner index of an array into c
        # array literals.
        c_array = lambda xs: "{" + ", ".join(map(str, xs)) + "}"

        # AST for: l_nodes[facet[0]][n]
        rank_ast = ast.Symbol("l_nodes",
                              rank=(ast.Symbol("facet", rank=(0, )), "n"))

        body = ast.Block([
            ast.Decl("int",
                     ast.Symbol("l_nodes",
                                (len(el.cell.topology[dim]), nodes_per_facet)),
                     init=ast.ArrayInit(
                         c_array(map(c_array, local_facet_nodes))),
                     qualifiers=["const"]),
            ast.For(
                ast.Decl("int", "n", 0), ast.Less("n", nodes_per_facet),
                ast.Incr("n", 1),
                ast.Assign(ast.Symbol("facet_nodes", ("n", )),
                           ast.Symbol("cell_nodes", (rank_ast, ))))
        ])

        kernel = op2.Kernel(
            ast.FunDecl("void", "create_bc_node_map", [
                ast.Decl("%s*" % as_cstr(fs_dat.dtype), "cell_nodes"),
                ast.Decl("%s*" % as_cstr(facet_dat.dtype), "facet_nodes"),
                ast.Decl("unsigned int*", "facet")
            ], body), "create_bc_node_map")

        local_facet_dat = op2.Dat(
            facet_set**self.mesh.exterior_facets._rank,
            self.mesh.exterior_facets.local_facet_dat.data_ro_with_halos,
            dtype=numpy.uintc)
        op2.par_loop(kernel, facet_set, fs_dat(op2.READ), facet_dat(op2.WRITE),
                     local_facet_dat(op2.READ))

        if self.extruded:
            offset = self.offset[boundary_dofs[0]]
        else:
            offset = None
        val = op2.Map(facet_set,
                      self.node_set,
                      nodes_per_facet,
                      facet_dat.data_ro_with_halos,
                      name="exterior_facet_boundary_node",
                      offset=offset)
        self.map_caches["boundary_node"][method] = val
        return val
Beispiel #6
0
def dg_injection_kernel(Vf, Vc, ncell):
    from firedrake import Tensor, AssembledVector, TestFunction, TrialFunction
    from firedrake.slate.slac import compile_expression
    macro_builder = MacroKernelBuilder(ScalarType_c, ncell)
    f = ufl.Coefficient(Vf)
    macro_builder.set_coefficients([f])
    macro_builder.set_coordinates(Vf.mesh())

    Vfe = create_element(Vf.ufl_element())
    macro_quadrature_rule = make_quadrature(
        Vfe.cell, estimate_total_polynomial_degree(ufl.inner(f, f)))
    index_cache = {}
    parameters = default_parameters()
    integration_dim, entity_ids = lower_integral_type(Vfe.cell, "cell")
    macro_cfg = dict(interface=macro_builder,
                     ufl_cell=Vf.ufl_cell(),
                     precision=parameters["precision"],
                     integration_dim=integration_dim,
                     entity_ids=entity_ids,
                     index_cache=index_cache,
                     quadrature_rule=macro_quadrature_rule)

    fexpr, = fem.compile_ufl(f, **macro_cfg)
    X = ufl.SpatialCoordinate(Vf.mesh())
    C_a, = fem.compile_ufl(X, **macro_cfg)
    detJ = ufl_utils.preprocess_expression(
        abs(ufl.JacobianDeterminant(f.ufl_domain())))
    macro_detJ, = fem.compile_ufl(detJ, **macro_cfg)

    Vce = create_element(Vc.ufl_element())

    coarse_builder = firedrake_interface.KernelBuilder("cell", "otherwise", 0,
                                                       ScalarType_c)
    coarse_builder.set_coordinates(Vc.mesh())
    argument_multiindices = (Vce.get_indices(), )
    argument_multiindex, = argument_multiindices
    return_variable, = coarse_builder.set_arguments((ufl.TestFunction(Vc), ),
                                                    argument_multiindices)

    integration_dim, entity_ids = lower_integral_type(Vce.cell, "cell")
    # Midpoint quadrature for jacobian on coarse cell.
    quadrature_rule = make_quadrature(Vce.cell, 0)

    coarse_cfg = dict(interface=coarse_builder,
                      ufl_cell=Vc.ufl_cell(),
                      precision=parameters["precision"],
                      integration_dim=integration_dim,
                      entity_ids=entity_ids,
                      index_cache=index_cache,
                      quadrature_rule=quadrature_rule)

    X = ufl.SpatialCoordinate(Vc.mesh())
    K = ufl_utils.preprocess_expression(ufl.JacobianInverse(Vc.mesh()))
    C_0, = fem.compile_ufl(X, **coarse_cfg)
    K, = fem.compile_ufl(K, **coarse_cfg)

    i = gem.Index()
    j = gem.Index()

    C_0 = gem.Indexed(C_0, (j, ))
    C_0 = gem.index_sum(C_0, quadrature_rule.point_set.indices)
    C_a = gem.Indexed(C_a, (j, ))
    X_a = gem.Sum(C_0, gem.Product(gem.Literal(-1), C_a))

    K_ij = gem.Indexed(K, (i, j))
    K_ij = gem.index_sum(K_ij, quadrature_rule.point_set.indices)
    X_a = gem.index_sum(gem.Product(K_ij, X_a), (j, ))
    C_0, = quadrature_rule.point_set.points
    C_0 = gem.Indexed(gem.Literal(C_0), (i, ))
    # fine quad points in coarse reference space.
    X_a = gem.Sum(C_0, gem.Product(gem.Literal(-1), X_a))
    X_a = gem.ComponentTensor(X_a, (i, ))

    # Coarse basis function evaluated at fine quadrature points
    phi_c = fem.fiat_to_ufl(
        Vce.point_evaluation(0, X_a, (Vce.cell.get_dimension(), 0)), 0)

    tensor_indices = tuple(gem.Index(extent=d) for d in f.ufl_shape)

    phi_c = gem.Indexed(phi_c, argument_multiindex + tensor_indices)
    fexpr = gem.Indexed(fexpr, tensor_indices)
    quadrature_weight = macro_quadrature_rule.weight_expression
    expr = gem.Product(gem.IndexSum(gem.Product(phi_c, fexpr), tensor_indices),
                       gem.Product(macro_detJ, quadrature_weight))

    quadrature_indices = macro_builder.indices + macro_quadrature_rule.point_set.indices

    reps = spectral.Integrals([expr], quadrature_indices,
                              argument_multiindices, parameters)
    assignments = spectral.flatten([(return_variable, reps)], index_cache)
    return_variables, expressions = zip(*assignments)
    expressions = impero_utils.preprocess_gem(expressions,
                                              **spectral.finalise_options)
    assignments = list(zip(return_variables, expressions))
    impero_c = impero_utils.compile_gem(assignments,
                                        quadrature_indices +
                                        argument_multiindex,
                                        remove_zeros=True)

    index_names = []

    def name_index(index, name):
        index_names.append((index, name))
        if index in index_cache:
            for multiindex, suffix in zip(index_cache[index],
                                          string.ascii_lowercase):
                name_multiindex(multiindex, name + suffix)

    def name_multiindex(multiindex, name):
        if len(multiindex) == 1:
            name_index(multiindex[0], name)
        else:
            for i, index in enumerate(multiindex):
                name_index(index, name + str(i))

    name_multiindex(quadrature_indices, 'ip')
    for multiindex, name in zip(argument_multiindices, ['j', 'k']):
        name_multiindex(multiindex, name)

    index_names.extend(zip(macro_builder.indices, ["entity"]))
    body = generate_coffee(impero_c, index_names, parameters["precision"],
                           ScalarType_c)

    retarg = ast.Decl(ScalarType_c,
                      ast.Symbol("R", rank=(Vce.space_dimension(), )))
    local_tensor = coarse_builder.local_tensor
    local_tensor.init = ast.ArrayInit(
        numpy.zeros(Vce.space_dimension(), dtype=ScalarType_c))
    body.children.insert(0, local_tensor)
    args = [retarg] + macro_builder.kernel_args + [
        macro_builder.coordinates_arg, coarse_builder.coordinates_arg
    ]

    # Now we have the kernel that computes <f, phi_c>dx_c
    # So now we need to hit it with the inverse mass matrix on dx_c

    u = TrialFunction(Vc)
    v = TestFunction(Vc)
    expr = Tensor(ufl.inner(u, v) * ufl.dx).inv * AssembledVector(
        ufl.Coefficient(Vc))
    Ainv, = compile_expression(expr)
    Ainv = Ainv.kinfo.kernel
    A = ast.Symbol(local_tensor.sym.symbol)
    R = ast.Symbol("R")
    body.children.append(
        ast.FunCall(Ainv.name, R, coarse_builder.coordinates_arg.sym, A))
    from coffee.base import Node
    assert isinstance(Ainv._code, Node)
    return op2.Kernel(ast.Node([
        Ainv._code,
        ast.FunDecl("void",
                    "pyop2_kernel_injection_dg",
                    args,
                    body,
                    pred=["static", "inline"])
    ]),
                      name="pyop2_kernel_injection_dg",
                      cpp=True,
                      include_dirs=Ainv._include_dirs,
                      headers=Ainv._headers)
Beispiel #7
0
def _tabulate_tensor(ir, parameters):
    "Generate code for a single integral (tabulate_tensor())."

    p_format        = parameters["format"]
    precision       = parameters["precision"]

    f_comment       = format["comment"]
    f_G             = format["geometry constant"]
    f_const_double  = format["assign"]
    f_float         = format["float"]
    f_assign        = format["assign"]
    f_A             = format["element tensor"][p_format]
    f_r             = format["free indices"][0]
    f_j             = format["first free index"]
    f_k             = format["second free index"]
    f_loop          = format["generate loop"]
    f_int           = format["int"]
    f_weight        = format["weight"]

    # Get data.
    opt_par     = ir["optimise_parameters"]
    integral_type = ir["integral_type"]
    cell        = ir["cell"]
    gdim        = cell.geometric_dimension()
    tdim        = cell.topological_dimension()
    num_facets  = ir["num_facets"]
    num_vertices= ir["num_vertices"]
    integrals   = ir["trans_integrals"]
    geo_consts  = ir["geo_consts"]
    oriented    = ir["needs_oriented"]

    # Create sets of used variables.
    used_weights    = set()
    used_psi_tables = set()
    used_nzcs       = set()
    trans_set       = set()
    sets = [used_weights, used_psi_tables, used_nzcs, trans_set]

    affine_tables = {} # TODO: This is not populated anywhere, remove?
    quadrature_weights = ir["quadrature_weights"]

    #The pyop2 format requires dereferencing constant coefficients since
    # these are passed in as double *
    common = []
    if p_format == "pyop2":
        for n, c in zip(ir["coefficient_names"], ir["coefficient_elements"]):
            if c.family() == 'Real':
                # Second index is always? 0, so we cast to (double (*)[1]).
                common += ['double (*w%(n)s)[1] = (double (*)[1])c%(n)s;\n' %
                           {'n': n[1:]}]

    operations = []
    if integral_type == "cell":
        # Update transformer with facets and generate code + set of used geometry terms.
        nest_ir, num_ops = _generate_element_tensor(integrals, sets, \
                                                    opt_par, parameters)

        # Set operations equal to num_ops (for printing info on operations).
        operations.append([num_ops])

        # Generate code for basic geometric quantities
        # @@@: Jacobian snippet
        jacobi_code  = ""
        jacobi_code += format["compute_jacobian"](cell)
        jacobi_code += "\n"
        jacobi_code += format["compute_jacobian_inverse"](cell)
        if oriented and tdim != gdim:
            # NEED TO THINK ABOUT THIS FOR EXTRUSION
            jacobi_code += format["orientation"][p_format](tdim, gdim)
        jacobi_code += "\n"
        jacobi_code += format["scale factor snippet"][p_format]

        # Generate code for cell volume and circumradius -- note that the
        # former will be incorrect on extruded meshes by a constant factor.
        jacobi_code += "\n\n" + format["generate cell volume"][p_format](tdim, gdim, integral_type)
        jacobi_code += "\n\n" + format["generate circumradius"][p_format](tdim, gdim, integral_type)

    elif integral_type in ("exterior_facet", "exterior_facet_vert"):
        if p_format == 'pyop2':
            common += ["unsigned int facet = *facet_p;\n"]

        # Generate tensor code for facets + set of used geometry terms.
        nest_ir, ops = _generate_element_tensor(integrals, sets, opt_par, parameters)

        # Save number of operations (for printing info on operations).
        operations.append([ops])

        # Generate code for basic geometric quantities
        # @@@: Jacobian snippet
        jacobi_code  = ""
        jacobi_code += format["compute_jacobian"](cell)
        jacobi_code += "\n"
        jacobi_code += format["compute_jacobian_inverse"](cell)
        if oriented and tdim != gdim:
            # NEED TO THINK ABOUT THIS FOR EXTRUSION
            jacobi_code += format["orientation"][p_format](tdim, gdim)
        jacobi_code += "\n"
        if integral_type == "exterior_facet":
            jacobi_code += "\n\n" + format["facet determinant"](cell, p_format, integral_type)
            jacobi_code += "\n\n" + format["generate normal"](cell, p_format, integral_type)
            jacobi_code += "\n\n" + format["generate facet area"](tdim, gdim)
            if tdim == 3:
                jacobi_code += "\n\n" + format["generate min facet edge length"](tdim, gdim)
                jacobi_code += "\n\n" + format["generate max facet edge length"](tdim, gdim)

            # Generate code for cell volume and circumradius
            jacobi_code += "\n\n" + format["generate cell volume"][p_format](tdim, gdim, integral_type)
            jacobi_code += "\n\n" + format["generate circumradius"][p_format](tdim, gdim, integral_type)

        elif integral_type == "exterior_facet_vert":
            jacobi_code += "\n\n" + format["facet determinant"](cell, p_format, integral_type)
            jacobi_code += "\n\n" + format["generate normal"](cell, p_format, integral_type)
            # OTHER THINGS NOT IMPLEMENTED YET
        else:
            raise RuntimeError("Invalid integral_type")

    elif integral_type in ("exterior_facet_top", "exterior_facet_bottom"):
        nest_ir, ops = _generate_element_tensor(integrals, sets, opt_par, parameters)
        operations.append([ops])

        # Generate code for basic geometric quantities
        # @@@: Jacobian snippet
        jacobi_code  = ""
        jacobi_code += format["compute_jacobian"](cell)
        jacobi_code += "\n"
        jacobi_code += format["compute_jacobian_inverse"](cell)
        if oriented:
            # NEED TO THINK ABOUT THIS FOR EXTRUSION
            jacobi_code += format["orientation"][p_format](tdim, gdim)
        jacobi_code += "\n"
        jacobi_code += "\n\n" + format["facet determinant"](cell, p_format, integral_type)
        jacobi_code += "\n\n" + format["generate normal"](cell, p_format, integral_type)
        # THE REST IS NOT IMPLEMENTED YET

    elif integral_type in ("interior_facet", "interior_facet_vert"):
        if p_format == 'pyop2':
            common += ["unsigned int facet_0 = facet_p[0];"]
            common += ["unsigned int facet_1 = facet_p[1];"]
            common += ["double **coordinate_dofs_0 = coordinate_dofs;"]
            # Note that the following line is unsafe for isoparametric elements.
            common += ["double **coordinate_dofs_1 = coordinate_dofs + %d;" % num_vertices]

        # Generate tensor code for facets + set of used geometry terms.
        nest_ir, ops = _generate_element_tensor(integrals, sets, opt_par, parameters)

        # Save number of operations (for printing info on operations).
        operations.append([ops])

        # Generate code for basic geometric quantities
        # @@@: Jacobian snippet
        jacobi_code  = ""
        for _r in ["+", "-"]:
            if p_format == "pyop2":
                jacobi_code += format["compute_jacobian_interior"](cell, r=_r)
            else:
                jacobi_code += format["compute_jacobian"](cell, r=_r)

            jacobi_code += "\n"
            jacobi_code += format["compute_jacobian_inverse"](cell, r=_r)
            if oriented and tdim != gdim:
                # NEED TO THINK ABOUT THIS FOR EXTRUSION
                jacobi_code += format["orientation"][p_format](tdim, gdim, r=_r)
            jacobi_code += "\n"

        if integral_type == "interior_facet":
            jacobi_code += "\n\n" + format["facet determinant"](cell, p_format, integral_type, r="+")
            jacobi_code += "\n\n" + format["generate normal"](cell, p_format, integral_type)

            jacobi_code += "\n\n" + format["generate facet area"](tdim, gdim)
            if tdim == 3:
                jacobi_code += "\n\n" + format["generate min facet edge length"](tdim, gdim, r="+")
                jacobi_code += "\n\n" + format["generate max facet edge length"](tdim, gdim, r="+")

            # Generate code for cell volume and circumradius
            jacobi_code += "\n\n" + format["generate cell volume"][p_format](tdim, gdim, integral_type)
            jacobi_code += "\n\n" + format["generate circumradius interior"](tdim, gdim, integral_type)

        elif integral_type == "interior_facet_vert":
            # THE REST IS NOT IMPLEMENTED YET
            jacobi_code += "\n\n" + format["facet determinant"](cell, p_format, integral_type, r="+")
            jacobi_code += "\n\n" + format["generate normal"](cell, p_format, integral_type)
        else:
            raise RuntimeError("Invalid integral_type")

    elif integral_type == "interior_facet_horiz":
        common += ["double **coordinate_dofs_0 = coordinate_dofs;"]
        # Note that the following line is unsafe for isoparametric elements.
        common += ["double **coordinate_dofs_1 = coordinate_dofs + %d;" % num_vertices]

        nest_ir, ops = _generate_element_tensor(integrals, sets, opt_par, parameters)

        # Save number of operations (for printing info on operations).
        operations.append([ops])

        # Generate code for basic geometric quantities
        # @@@: Jacobian snippet
        jacobi_code  = ""
        for _r in ["+", "-"]:
            jacobi_code += format["compute_jacobian_interior"](cell, r=_r)
            jacobi_code += "\n"
            jacobi_code += format["compute_jacobian_inverse"](cell, r=_r)
            if oriented:
                # NEED TO THINK ABOUT THIS FOR EXTRUSION
                jacobi_code += format["orientation"][p_format](tdim, gdim, r=_r)
            jacobi_code += "\n"

        # TODO: verify that this is correct (we think it is)
        jacobi_code += "\n\n" + format["facet determinant"](cell, p_format, integral_type, r="+")
        jacobi_code += "\n\n" + format["generate normal"](cell, p_format, integral_type)
        # THE REST IS NOT IMPLEMENTED YET

    elif integral_type == "point":
        # Update transformer with vertices and generate code + set of used geometry terms.
        nest_ir, ops = _generate_element_tensor(integrals, sets, opt_par, parameters)

        # Save number of operations (for printing info on operations).
        operations.append([ops])

        # Generate code for basic geometric quantities
        # @@@: Jacobian snippet
        jacobi_code  = ""
        jacobi_code += format["compute_jacobian"](cell)
        jacobi_code += "\n"
        jacobi_code += format["compute_jacobian_inverse"](cell)
        if oriented and tdim != gdim:
            jacobi_code += format["orientation"][p_format](tdim, gdim)
        jacobi_code += "\n"

    else:
        error("Unhandled integral type: " + str(integral_type))

    # Embedded manifold, need to pass in cell orientations
    if oriented and tdim != gdim and p_format == 'pyop2':
        if integral_type in ("interior_facet", "interior_facet_vert", "interior_facet_horiz"):
            common += ["const int cell_orientation%s = cell_orientation_[0][0];" % _choose_map('+'),
                       "const int cell_orientation%s = cell_orientation_[1][0];" % _choose_map('-')]
        else:
            common += ["const int cell_orientation = cell_orientation_[0][0];"]
    # After we have generated the element code for all facets we can remove
    # the unused transformations and tabulate the used psi tables and weights.
    common += [remove_unused(jacobi_code, trans_set)]
    jacobi_ir = pyop2.FlatBlock("\n".join(common))

    # @@@: const double W3[3] = {{...}}
    pyop2_weights = []
    for weights, points in [quadrature_weights[p] for p in used_weights]:
        n_points = len(points)
        w_sym = pyop2.Symbol(f_weight(n_points), () if n_points == 1 else (n_points,))
        pyop2_weights.append(pyop2.Decl("double", w_sym,
                                        pyop2.ArrayInit(weights, precision),
                                        qualifiers=["static", "const"]))

    name_map = ir["name_map"]
    tables = ir["unique_tables"]
    tables.update(affine_tables) # TODO: This is not populated anywhere, remove?

    # @@@: const double FE0[] = {{...}}
    code, decl = _tabulate_psis(tables, used_psi_tables, name_map, used_nzcs, opt_par, parameters)
    pyop2_basis = []
    for name, data in decl.items():
        rank, _, values = data
        zeroflags = values.get_zeros()
        feo_sym = pyop2.Symbol(name, rank)
        init = pyop2.ArrayInit(values, precision)
        if zeroflags is not None and not zeroflags.all():
            nz_indices = numpy.logical_not(zeroflags).nonzero()
            # Note: in the following, we take the last entry of /nz_indices/ since we /know/
            # we have been tracking only zero-valued columns
            nz_indices = nz_indices[-1]
            nz_bounds = tuple([(i, 0)] for i in rank[:-1])
            nz_bounds += ([(max(nz_indices) - min(nz_indices) + 1, min(nz_indices))],)
            init = pyop2.SparseArrayInit(values, precision, nz_bounds)
        pyop2_basis.append(pyop2.Decl("double", feo_sym, init, ["static", "const"]))

    # Build the root of the PyOP2' ast
    pyop2_tables = pyop2_weights + [tab for tab in pyop2_basis]
    root = pyop2.Root([jacobi_ir] + pyop2_tables + nest_ir)

    return root
Beispiel #8
0
def build_hard_fusion_kernel(base_loop, fuse_loop, fusion_map, loop_chain_index):
    """
    Build AST and :class:`Kernel` for two loops suitable to hard fusion.

    The AST consists of three functions: fusion, base, fuse. base and fuse
    are respectively the ``base_loop`` and the ``fuse_loop`` kernels, whereas
    fusion is the orchestrator that invokes, for each ``base_loop`` iteration,
    base and, if still to be executed, fuse.

    The orchestrator has the following structure: ::

        fusion (buffer, ..., executed):
            base (buffer, ...)
            for i = 0 to arity:
                if not executed[i]:
                    additional pointer staging required by kernel2
                    fuse (sub_buffer, ...)
                    insertion into buffer

    The executed array tracks whether the i-th iteration (out of /arity/)
    adjacent to the main kernel1 iteration has been executed.
    """

    finder = Find((ast.FunDecl, ast.PreprocessNode))

    base = base_loop.kernel
    base_ast = dcopy(base._ast)
    base_info = finder.visit(base_ast)
    base_headers = base_info[ast.PreprocessNode]
    base_fundecl = base_info[ast.FunDecl]
    assert len(base_fundecl) == 1
    base_fundecl = base_fundecl[0]

    fuse = fuse_loop.kernel
    fuse_ast = dcopy(fuse._ast)
    fuse_info = finder.visit(fuse_ast)
    fuse_headers = fuse_info[ast.PreprocessNode]
    fuse_fundecl = fuse_info[ast.FunDecl]
    assert len(fuse_fundecl) == 1
    fuse_fundecl = fuse_fundecl[0]

    # Create /fusion/ arguments and signature
    body = ast.Block([])
    fusion_name = '%s_%s' % (base_fundecl.name, fuse_fundecl.name)
    fusion_args = dcopy(base_fundecl.args + fuse_fundecl.args)
    fusion_fundecl = ast.FunDecl(base_fundecl.ret, fusion_name, fusion_args, body)

    # Make sure kernel and variable names are unique
    base_fundecl.name = "%s_base" % base_fundecl.name
    fuse_fundecl.name = "%s_fuse" % fuse_fundecl.name
    for i, decl in enumerate(fusion_args):
        decl.sym.symbol += '_%d' % i

    # Filter out duplicate arguments, and append extra arguments to the fundecl
    binding = WeakFilter().kernel_args([base_loop, fuse_loop], fusion_fundecl)
    fusion_args += [ast.Decl('int*', 'executed'),
                    ast.Decl('int*', 'fused_iters'),
                    ast.Decl('int', 'i')]

    # Which args are actually used in /fuse/, but not in /base/ ? The gather for
    # such arguments is moved to /fusion/, to avoid usless memory LOADs
    base_dats = set(a.data for a in base_loop.args)
    fuse_dats = set(a.data for a in fuse_loop.args)
    unshared = OrderedDict()
    for arg, decl in binding.items():
        if arg.data in fuse_dats - base_dats:
            unshared.setdefault(decl, arg)

    # Track position of Args that need a postponed gather
    # Can't track Args themselves as they change across different parloops
    fargs = {fusion_args.index(i): ('postponed', False) for i in unshared.keys()}
    fargs.update({len(set(binding.values())): ('onlymap', True)})

    # Add maps for arguments that need a postponed gather
    for decl, arg in unshared.items():
        decl_pos = fusion_args.index(decl)
        fusion_args[decl_pos].sym.symbol = arg.c_arg_name()
        if arg._is_indirect:
            fusion_args[decl_pos].sym.rank = ()
            fusion_args.insert(decl_pos + 1, ast.Decl('int*', arg.c_map_name(0, 0)))

    # Append the invocation of /base/; then, proceed with the invocation
    # of the /fuse/ kernels
    base_funcall_syms = [binding[a].sym.symbol for a in base_loop.args]
    body.children.append(ast.FunCall(base_fundecl.name, *base_funcall_syms))

    for idx in range(fusion_map.arity):

        fused_iter = ast.Assign('i', ast.Symbol('fused_iters', (idx,)))
        fuse_funcall = ast.FunCall(fuse_fundecl.name)
        if_cond = ast.Not(ast.Symbol('executed', ('i',)))
        if_update = ast.Assign(ast.Symbol('executed', ('i',)), 1)
        if_body = ast.Block([fuse_funcall, if_update], open_scope=True)
        if_exec = ast.If(if_cond, [if_body])
        body.children.extend([ast.FlatBlock('\n'), fused_iter, if_exec])

        # Modify the /fuse/ kernel
        # This is to take into account that many arguments are shared with
        # /base/, so they will only staged once for /base/. This requires
        # tweaking the way the arguments are declared and accessed in /fuse/.
        # For example, the shared incremented array (called /buffer/ in
        # the pseudocode in the comment above) now needs to take offsets
        # to be sure the locations that /base/ is supposed to increment are
        # actually accessed. The same concept apply to indirect arguments.
        init = lambda v: '{%s}' % ', '.join([str(j) for j in v])
        for i, fuse_loop_arg in enumerate(fuse_loop.args):
            fuse_kernel_arg = binding[fuse_loop_arg]

            buffer_name = '%s_vec' % fuse_kernel_arg.sym.symbol
            fuse_funcall_sym = ast.Symbol(buffer_name)

            # What kind of temporaries do we need ?
            if fuse_loop_arg.access == INC:
                op, lvalue, rvalue = ast.Incr, fuse_kernel_arg.sym.symbol, buffer_name
                stager = lambda b, l: b.children.extend(l)
                indexer = lambda indices: [(k, j) for j, k in enumerate(indices)]
                pointers = []
            elif fuse_loop_arg.access == READ:
                op, lvalue, rvalue = ast.Assign, buffer_name, fuse_kernel_arg.sym.symbol
                stager = lambda b, l: [b.children.insert(0, j) for j in reversed(l)]
                indexer = lambda indices: [(j, k) for j, k in enumerate(indices)]
                pointers = list(fuse_kernel_arg.pointers)

            # Now gonna handle arguments depending on their type and rank ...

            if fuse_loop_arg._is_global:
                # ... Handle global arguments. These can be dropped in the
                # kernel without any particular fiddling
                fuse_funcall_sym = ast.Symbol(fuse_kernel_arg.sym.symbol)

            elif fuse_kernel_arg in unshared:
                # ... Handle arguments that appear only in /fuse/
                staging = unshared[fuse_kernel_arg].c_vec_init(False).split('\n')
                rvalues = [ast.FlatBlock(j.split('=')[1]) for j in staging]
                lvalues = [ast.Symbol(buffer_name, (j,)) for j in range(len(staging))]
                staging = [ast.Assign(j, k) for j, k in zip(lvalues, rvalues)]

                # Set up the temporary
                buffer_symbol = ast.Symbol(buffer_name, (len(staging),))
                buffer_decl = ast.Decl(fuse_kernel_arg.typ, buffer_symbol,
                                       qualifiers=fuse_kernel_arg.qual,
                                       pointers=list(pointers))

                # Update the if-then AST body
                stager(if_exec.children[0], staging)
                if_exec.children[0].children.insert(0, buffer_decl)

            elif fuse_loop_arg._is_mat:
                # ... Handle Mats
                staging = []
                for b in fused_inc_arg._block_shape:
                    for rc in b:
                        lvalue = ast.Symbol(lvalue, (idx, idx),
                                            ((rc[0], 'j'), (rc[1], 'k')))
                        rvalue = ast.Symbol(rvalue, ('j', 'k'))
                        staging = ItSpace(mode=0).to_for([(0, rc[0]), (0, rc[1])],
                                                         ('j', 'k'),
                                                         [op(lvalue, rvalue)])[:1]

                # Set up the temporary
                buffer_symbol = ast.Symbol(buffer_name, (fuse_kernel_arg.sym.rank,))
                buffer_init = ast.ArrayInit(init([init([0.0])]))
                buffer_decl = ast.Decl(fuse_kernel_arg.typ, buffer_symbol, buffer_init,
                                       qualifiers=fuse_kernel_arg.qual, pointers=pointers)

                # Update the if-then AST body
                stager(if_exec.children[0], staging)
                if_exec.children[0].children.insert(0, buffer_decl)

            elif fuse_loop_arg._is_indirect:
                cdim = fuse_loop_arg.data.cdim

                if cdim == 1 and fuse_kernel_arg.sym.rank:
                    # [Special case]
                    # ... Handle rank 1 indirect arguments that appear in both
                    # /base/ and /fuse/: just point into the right location
                    rank = (idx,) if fusion_map.arity > 1 else ()
                    fuse_funcall_sym = ast.Symbol(fuse_kernel_arg.sym.symbol, rank)

                else:
                    # ... Handle indirect arguments. At the C level, these arguments
                    # are of pointer type, so simple pointer arithmetic is used
                    # to ensure the kernel accesses are to the correct locations
                    fuse_arity = fuse_loop_arg.map.arity
                    base_arity = fuse_arity*fusion_map.arity
                    size = fuse_arity*cdim

                    # Set the proper storage layout before invoking /fuse/
                    ofs_vals = [[base_arity*j + k for k in range(fuse_arity)]
                                for j in range(cdim)]
                    ofs_vals = [[fuse_arity*j + k for k in flatten(ofs_vals)]
                                for j in range(fusion_map.arity)]
                    ofs_vals = list(flatten(ofs_vals))
                    indices = [ofs_vals[idx*size + j] for j in range(size)]

                    staging = [op(ast.Symbol(lvalue, (j,)), ast.Symbol(rvalue, (k,)))
                               for j, k in indexer(indices)]

                    # Set up the temporary
                    buffer_symbol = ast.Symbol(buffer_name, (size,))
                    if fuse_loop_arg.access == INC:
                        buffer_init = ast.ArrayInit(init([0.0]))
                    else:
                        buffer_init = ast.EmptyStatement()
                        pointers.pop()
                    buffer_decl = ast.Decl(fuse_kernel_arg.typ, buffer_symbol, buffer_init,
                                           qualifiers=fuse_kernel_arg.qual,
                                           pointers=pointers)

                    # Update the if-then AST body
                    stager(if_exec.children[0], staging)
                    if_exec.children[0].children.insert(0, buffer_decl)

            else:
                # Nothing special to do for direct arguments
                pass

            # Finally update the /fuse/ funcall
            fuse_funcall.children.append(fuse_funcall_sym)

    fused_headers = set([str(h) for h in base_headers + fuse_headers])
    fused_ast = ast.Root([ast.PreprocessNode(h) for h in fused_headers] +
                         [base_fundecl, fuse_fundecl, fusion_fundecl])

    return Kernel([base, fuse], fused_ast, loop_chain_index), fargs
Beispiel #9
0
    def exterior_facet_boundary_node_map(self, method):
        '''The :class:`pyop2.Map` from exterior facets to the nodes on
        those facets. Note that this differs from
        :meth:`exterior_facet_node_map` in that only surface nodes
        are referenced, not all nodes in cells touching the surface.

        :arg method: The method for determining boundary nodes. See
            :class:`~.bcs.DirichletBC`.
        '''

        el = self.fiat_element

        dim = self._mesh.facet_dimension()

        if method == "topological":
            boundary_dofs = el.entity_closure_dofs()[dim]
        elif method == "geometric":
            boundary_dofs = el.facet_support_dofs()

        nodes_per_facet = \
            len(boundary_dofs[0])

        # HACK ALERT
        # The facet set does not have a halo associated with it, since
        # we only construct halos for DoF sets.  Fortunately, this
        # loop is direct and we already have all the correct
        # information available locally.  So We fake a set of the
        # correct size and carry out a direct loop
        facet_set = op2.Set(self._mesh.exterior_facets.set.total_size)

        fs_dat = op2.Dat(facet_set**el.space_dimension(),
                         data=self.exterior_facet_node_map().values_with_halo)

        facet_dat = op2.Dat(facet_set**nodes_per_facet, dtype=np.int32)

        local_facet_nodes = np.array(
            [dofs for e, dofs in boundary_dofs.iteritems()])

        # Helper function to turn the inner index of an array into c
        # array literals.
        c_array = lambda xs: "{" + ", ".join(map(str, xs)) + "}"

        body = ast.Block([
            ast.Decl("int",
                     ast.Symbol("l_nodes",
                                (len(el.get_reference_element().topology[dim]),
                                 nodes_per_facet)),
                     init=ast.ArrayInit(
                         c_array(map(c_array, local_facet_nodes))),
                     qualifiers=["const"]),
            ast.For(
                ast.Decl("int", "n", 0), ast.Less("n", nodes_per_facet),
                ast.Incr("n", 1),
                ast.Assign(
                    ast.Symbol("facet_nodes", ("n", )),
                    ast.Symbol("cell_nodes", ("l_nodes[facet[0]][n]", ))))
        ])

        kernel = op2.Kernel(
            ast.FunDecl("void", "create_bc_node_map", [
                ast.Decl("int*", "cell_nodes"),
                ast.Decl("int*", "facet_nodes"),
                ast.Decl("unsigned int*", "facet")
            ], body), "create_bc_node_map")

        local_facet_dat = op2.Dat(
            facet_set**self._mesh.exterior_facets._rank,
            self._mesh.exterior_facets.local_facet_dat.data_ro_with_halos,
            dtype=np.uintc)
        op2.par_loop(kernel, facet_set, fs_dat(op2.READ), facet_dat(op2.WRITE),
                     local_facet_dat(op2.READ))

        if isinstance(self._mesh, mesh_t.ExtrudedMesh):
            offset = self.offset[boundary_dofs[0]]
        else:
            offset = None
        return op2.Map(facet_set,
                       self.node_set,
                       nodes_per_facet,
                       facet_dat.data_ro_with_halos,
                       name="exterior_facet_boundary_node",
                       offset=offset)
Beispiel #10
0
def test_funcall_in_arrayinit():
    tree = ast.ArrayInit(np.asarray([ast.FunCall("foo"), ast.Symbol("bar")]))

    assert tree.gencode() == "{foo(), bar}"