Esempio n. 1
0
    def visit_Decl(self, o, *args, **kwargs):
        """Visits a declared tensor and changes its type to
        :template: result `Eigen::MatrixBase<Derived>`.

        i.e. double A[n][m] ---> const Eigen::MatrixBase<Derived> &A_
        """
        name = kwargs.get("name", "A")
        if o.sym.symbol != name:
            return o
        newtype = "const Eigen::MatrixBase<Derived> &"

        return o.reconstruct(newtype, ast.Symbol("%s_" % name))
Esempio n. 2
0
def _form_string_kernel(body, measure, args, **kwargs):
    kargs = []
    if body.find("][") >= 0:
        warning("""Your kernel body contains a double indirection.\n"""
                """You should update it to single indirections.\n"""
                """\n"""
                """Mail [email protected] for advice.\n""")
    for var, (func, intent) in args.items():
        if isinstance(func, constant.Constant):
            if intent is not READ:
                raise RuntimeError("Only READ access is allowed to Constant")
            # Constants modelled as Globals, so no need for double
            # indirection
            ndof = func.dat.cdim
            kargs.append(
                ast.Decl("double",
                         ast.Symbol(var, (ndof, )),
                         qualifiers=["const"]))
        else:
            # Do we have a component of a mixed function?
            if isinstance(func, Indexed):
                c, i = func.ufl_operands
                idx = i._indices[0]._value
                ndof = c.function_space()[idx].finat_element.space_dimension()
            else:
                if len(func.function_space()) > 1:
                    raise NotImplementedError(
                        "Must index mixed function in par_loop.")
                ndof = func.function_space().finat_element.space_dimension()
            if measure.integral_type() == 'interior_facet':
                ndof *= 2
            kargs.append(ast.Decl("double", ast.Symbol(var, (ndof, ))))
        body = body.replace(var + ".dofs", str(ndof))

    return pyop2.Kernel(
        ast.FunDecl("void",
                    "par_loop_kernel",
                    kargs,
                    ast.FlatBlock(body),
                    pred=["static"]), "par_loop_kernel", **kwargs)
Esempio n. 3
0
    def construct_kernel(self, name, body):
        """Construct a fully built :class:`Kernel`.

        This function contains the logic for building the argument
        list for assembly kernels.

        :arg name: function name
        :arg body: function body (:class:`coffee.Block` node)
        :returns: :class:`Kernel` object
        """
        args = [self.local_tensor]
        args.extend(self.coefficient_args)
        args.extend(self.coordinates_args)

        # Facet number(s)
        if self.integral_type == "exterior_facet":
            args.append(coffee.Decl("std::size_t", coffee.Symbol("facet")))
        elif self.integral_type == "interior_facet":
            args.append(coffee.Decl("std::size_t", coffee.Symbol("facet_0")))
            args.append(coffee.Decl("std::size_t", coffee.Symbol("facet_1")))

        # Cell orientation(s)
        if self.interior_facet:
            args.append(coffee.Decl("int", coffee.Symbol("cell_orientation_0")))
            args.append(coffee.Decl("int", coffee.Symbol("cell_orientation_1")))
        else:
            args.append(coffee.Decl("int", coffee.Symbol("cell_orientation")))

        return KernelBuilderBase.construct_kernel(self, name, args, body)
Esempio n. 4
0
def prepare_arguments(arguments, multiindices, interior_facet=False):
    """Bridges the kernel interface and the GEM abstraction for
    Arguments.  Vector Arguments are rearranged here for interior
    facet integrals.

    :arg arguments: UFL Arguments
    :arg multiindices: Argument multiindices
    :arg interior_facet: interior facet integral?
    :returns: (funarg, prepare, expressions)
         funarg      - :class:`coffee.Decl` function argument
         prepare     - list of COFFEE nodes to be prepended to the
                       kernel body
         expressions - GEM expressions referring to the argument
                       tensor
    """
    funarg = coffee.Decl(SCALAR_TYPE, coffee.Symbol("A"), pointers=[()])
    varexp = gem.Variable("A", (None, ))

    if len(arguments) == 0:
        # No arguments
        zero = coffee.FlatBlock("memset({name}, 0, sizeof(*{name}));\n".format(
            name=funarg.sym.gencode()))
        return funarg, [zero], [gem.reshape(varexp, ())]

    elements = tuple(create_element(arg.ufl_element()) for arg in arguments)
    shapes = [element.index_shape for element in elements]
    indices = tuple(chain(*multiindices))

    def expression(restricted):
        return gem.Indexed(gem.reshape(restricted, *shapes), indices)

    u_shape = numpy.array(
        [numpy.prod(element.index_shape, dtype=int) for element in elements])
    if interior_facet:
        c_shape = tuple(2 * u_shape)
        slicez = [[
            slice(r * s, (r + 1) * s) for r, s in zip(restrictions, u_shape)
        ] for restrictions in product((0, 1), repeat=len(arguments))]
    else:
        c_shape = tuple(u_shape)
        slicez = [[slice(s) for s in u_shape]]

    expressions = [
        expression(gem.view(gem.reshape(varexp, c_shape), *slices))
        for slices in slicez
    ]

    zero = coffee.FlatBlock(
        str.format("memset({name}, 0, {size} * sizeof(*{name}));\n",
                   name=funarg.sym.gencode(),
                   size=numpy.product(c_shape, dtype=int)))
    return funarg, [zero], prune(expressions)
Esempio n. 5
0
def _form_kernel(kernel, measure, args, **kwargs):

    kargs = []
    lkernel = kernel

    for var, (func, intent) in args.iteritems():
        if isinstance(func, constant.Constant):
            if intent is not READ:
                raise RuntimeError("Only READ access is allowed to Constant")
            # Constants modelled as Globals, so no need for double
            # indirection
            ndof = func.dat.cdim
            kargs.append(
                ast.Decl("double",
                         ast.Symbol(var, (ndof, )),
                         qualifiers=["const"]))
        else:
            # Do we have a component of a mixed function?
            if isinstance(func, Indexed):
                c, i = func.ufl_operands
                idx = i._indices[0]._value
                ndof = c.function_space()[idx].finat_element.space_dimension()
            else:
                if len(func.function_space()) > 1:
                    raise NotImplementedError(
                        "Must index mixed function in par_loop.")
                ndof = func.function_space().finat_element.space_dimension()
            if measure.integral_type() == 'interior_facet':
                ndof *= 2
            if measure is direct:
                kargs.append(ast.Decl("double", ast.Symbol(var, (ndof, ))))
            else:
                kargs.append(ast.Decl("double *", ast.Symbol(var, (ndof, ))))
        lkernel = lkernel.replace(var + ".dofs", str(ndof))

    body = ast.FlatBlock(lkernel)

    return pyop2.Kernel(ast.FunDecl("void", "par_loop_kernel", kargs, body),
                        "par_loop_kernel", **kwargs)
Esempio n. 6
0
    def construct_kernel(self, name, impero_c, precision, index_names, quadrature_rule):
        """Construct a fully built :class:`Kernel`.

        This function contains the logic for building the argument
        list for assembly kernels.

        :arg name: function name
        :arg impero_c: ImperoC tuple with Impero AST and other data
        :arg precision: floating-point precision for printing
        :arg index_names: pre-assigned index names
        :arg quadrature rule: quadrature rule

        :returns: :class:`Kernel` object
        """
        body = generate_coffee(impero_c, index_names, precision, self.scalar_type)

        args = [self.local_tensor, self.coordinates_arg]
        if self.kernel.oriented:
            args.append(cell_orientations_coffee_arg)
        if self.kernel.needs_cell_sizes:
            args.append(self.cell_sizes_arg)
        args.extend(self.coefficient_args)
        if self.kernel.integral_type in ["exterior_facet", "exterior_facet_vert"]:
            args.append(coffee.Decl("unsigned int",
                                    coffee.Symbol("facet", rank=(1,)),
                                    qualifiers=["const"]))
        elif self.kernel.integral_type in ["interior_facet", "interior_facet_vert"]:
            args.append(coffee.Decl("unsigned int",
                                    coffee.Symbol("facet", rank=(2,)),
                                    qualifiers=["const"]))

        for name_, shape in self.kernel.tabulations:
            args.append(coffee.Decl(self.scalar_type, coffee.Symbol(
                name_, rank=shape), qualifiers=["const"]))

        self.kernel.quadrature_rule = quadrature_rule

        self.kernel.ast = KernelBuilderBase.construct_kernel(self, name, args, body)
        return self.kernel
Esempio n. 7
0
def _coffee_symbol(symbol, rank=()):
    """Build a coffee Symbol, concatenating rank.

    :arg symbol: Either a symbol name, or else an existing coffee Symbol.
    :arg rank: The ``rank`` argument to the coffee Symbol constructor.

    If symbol is a symbol, then the returned symbol has rank
    ``symbol.rank + rank``."""
    if isinstance(symbol, coffee.Symbol):
        rank = symbol.rank + rank
        symbol = symbol.symbol
    else:
        assert isinstance(symbol, str)
    return coffee.Symbol(symbol, rank=rank)
Esempio n. 8
0
def _expression_flexiblyindexed(expr, parameters):
    var = expression(expr.children[0], parameters)
    assert isinstance(var, coffee.Symbol)
    assert not var.rank
    assert not var.offset

    rank = []
    offset = []
    for off, idxs in expr.dim2idxs:
        for index, stride in idxs:
            assert isinstance(index, gem.Index)

        if len(idxs) == 0:
            rank.append(off)
            offset.append((1, 0))
        elif len(idxs) == 1:
            (index, stride), = idxs
            rank.append(parameters.index_names[index])
            offset.append((stride, off))
        else:
            parts = []
            if off:
                parts += [coffee.Symbol(str(off))]
            for index, stride in idxs:
                index_sym = coffee.Symbol(parameters.index_names[index])
                assert stride
                if stride == 1:
                    parts += [index_sym]
                else:
                    parts += [
                        coffee.Prod(index_sym, coffee.Symbol(str(stride)))
                    ]
            assert parts
            rank.append(reduce(coffee.Sum, parts))
            offset.append((1, 0))

    return coffee.Symbol(var.symbol, rank=tuple(rank), offset=tuple(offset))
Esempio n. 9
0
    def construct_kernel(self, return_arg, body):
        """Constructs an :class:`ExpressionKernel`.

        :arg return_arg: COFFEE argument for the return value
        :arg body: function body (:class:`coffee.Block` node)
        :returns: :class:`ExpressionKernel` object
        """
        args = [return_arg]
        if self.oriented:
            args.append(cell_orientations_coffee_arg)
        if self.cell_sizes:
            args.append(self.cell_sizes_arg)
        args.extend(self.kernel_args)

        for name_, shape in self.tabulations:
            args.append(coffee.Decl(self.scalar_type, coffee.Symbol(
                name_, rank=shape), qualifiers=["const"]))

        kernel_code = super(ExpressionKernelBuilder, self).construct_kernel("expression_kernel", args, body)
        return ExpressionKernel(kernel_code, self.oriented, self.cell_sizes, self.coefficients, self.tabulations)
Esempio n. 10
0
def generate_expr_data(expr):
    """This function generates a mapping of the form:

       ``temporaries = {node: symbol_name}``

    where `node` objects are :class:`slate.TensorBase` nodes, and
    `symbol_name` are :class:`coffee.base.Symbol` objects. In addition,
    this function will return a list `aux_exprs` of any expressions that
    require special handling in the compiler. This includes expressions
    that require performing operations on already assembled data or
    generating extra temporaries.

    This mapping is used in the :class:`KernelBuilder` to provide direct
    access to all temporaries associated with a particular slate expression.

    :arg expression: a :class:`slate.TensorBase` object.

    Returns: the terminal temporaries map and auxiliary temporaries.
    """
    # Prepare temporaries map and auxiliary expressions list
    # NOTE: Ordering here matters, especially when running
    # Slate in parallel.
    temps = OrderedDict()
    aux_exprs = []
    for tensor in traverse_dags([expr]):
        if isinstance(tensor, Tensor):
            temps.setdefault(tensor, ast.Symbol("T%d" % len(temps)))

        elif isinstance(tensor, TensorOp):
            # For Action, we need to declare a temporary later on for the
            # acting coefficient. For inverses, we may declare additional
            # temporaries (depending on reference count).
            if isinstance(tensor, (Action, Inverse)):
                aux_exprs.append(tensor)

    # Aux expressions are visited pre-order. We want to declare as if we'd
    # visited post-order (child temporaries first), so reverse.
    aux_exprs = list(OrderedDict.fromkeys(reversed(aux_exprs)))

    return temps, aux_exprs
Esempio n. 11
0
    def set_coefficients(self, integral_data, form_data):
        """Prepare the coefficients of the form.

        :arg integral_data: UFL integral data
        :arg form_data: UFL form data
        """
        name = "w"
        self.coefficient_args = [
            coffee.Decl(SCALAR_TYPE, coffee.Symbol(name),
                        pointers=[("const",), ()],
                        qualifiers=["const"])
        ]

        # enabled_coefficients is a boolean array that indicates which
        # of reduced_coefficients the integral requires.
        for n in range(len(integral_data.enabled_coefficients)):
            if not integral_data.enabled_coefficients[n]:
                continue

            coefficient = form_data.function_replace_map[form_data.reduced_coefficients[n]]
            expression = prepare_coefficient(coefficient, n, name, self.interior_facet)
            self.coefficient_map[coefficient] = expression
Esempio n. 12
0
def _arglist(ir):
    "Generate argument list for tensor tabulation function (only for pyop2)"

    rank  = len(ir['prim_idims'])
    f_j   = format["first free index"]
    f_k   = format["second free index"]
    float = format['float declaration']
    int   = format['int declaration']
    prim_idims  = ir["prim_idims"]
    integral_type = ir["integral_type"]

    if integral_type in ("interior_facet", "interior_facet_horiz", "interior_facet_vert"):
        prim_idims = [d*2 for d in prim_idims]
    localtensor = pyop2.Decl(float, pyop2.Symbol("A", tuple(prim_idims) or (1,)))

    coordinates = pyop2.Decl("%s**" % float, pyop2.Symbol("coordinate_dofs", ()))

    coeffs = []
    for n, e in zip(ir['coefficient_names'], ir['coefficient_elements']):
        typ = "%s*" % float if e.family() == 'Real' else "%s**" % float
        coeffs.append(pyop2.Decl(typ, pyop2.Symbol("%s%s" % \
            ("c" if e.family() == 'Real' else "", n[1:] if e.family() == 'Real' else n), ())))

    arglist = [localtensor, coordinates]
    # embedded manifold, passing in cell_orientation
    if ir['needs_oriented'] and \
        ir['cell'].topological_dimension() != ir['cell'].geometric_dimension():
        cell_orientation = pyop2.Decl("%s**" % int, pyop2.Symbol("cell_orientation_", ()))
        arglist.append(cell_orientation)
    arglist += coeffs
    if integral_type in ("exterior_facet", "exterior_facet_vert"):
        arglist.append(pyop2.Decl("%s*" % int, pyop2.Symbol("facet_p", ()), qualifiers=["unsigned"]))
    if integral_type in ("interior_facet", "interior_facet_vert"):
        arglist.append(pyop2.Decl(int, pyop2.Symbol("facet_p", (2,)), qualifiers=["unsigned"]))

    return arglist
Esempio n. 13
0
def _generate_element_tensor(integrals, sets, optimise_parameters, parameters):
    "Construct quadrature code for element tensors."

    # Prefetch formats to speed up code generation.
    f_comment    = format["comment"]
    f_ip         = format["integration points"]
    f_I          = format["ip constant"]
    f_loop       = format["generate loop"]
    f_ip_coords  = format["generate ip coordinates"]
    f_coords     = format["coordinate_dofs"]
    f_double     = format["float declaration"]
    f_decl       = format["declaration"]
    f_X          = format["ip coordinates"]
    f_C          = format["conditional"]


    # Initialise return values.
    tensor_ops_count = 0

    ffc_assert(1 == len(integrals), "This function is not capable of handling multiple integrals.")

    # We receive a dictionary {num_points: form,}.
    # Loop points and forms.
    for points, terms, functions, ip_consts, coordinate, conditionals in integrals:

        nest_ir = []
        ip_ir = []
        num_ops = 0

        # Generate code to compute coordinates if used.
        if coordinate:
            raise RuntimeError("Don't know how to compute coordinates")
            # Left in place for posterity
            name, gdim, ip, r = coordinate
            element_code += ["", f_comment("Declare array to hold physical coordinate of quadrature point.")]
            element_code += [f_decl(f_double, f_X(points, gdim))]
            ops, coord_code = f_ip_coords(gdim, points, name, ip, r)
            ip_code += ["", f_comment("Compute physical coordinate of quadrature point, operations: %d." % ops)]
            ip_code += [coord_code]
            num_ops += ops
            # Update used psi tables and transformation set.
            sets[1].add(name)
            sets[3].add(f_coords(r))

        # Generate code to compute function values.
        if functions:
            const_func_code, func_code, ops = _generate_functions(functions, sets)
            nest_ir += const_func_code
            ip_ir += func_code
            num_ops += ops

        # Generate code to compute conditionals (might depend on coordinates
        # and function values so put here).
        # TODO: Some conditionals might only depend on geometry so they
        # should be moved outside if possible.
        if conditionals:
            ip_ir.append(pyop2.Decl(f_double, c_sym(f_C(len(conditionals)))))
            # Sort conditionals (need to in case of nested conditionals).
            reversed_conds = dict([(n, (o, e)) for e, (t, o, n) in conditionals.items()])
            for num in range(len(conditionals)):
                name = format["conditional"](num)
                ops, expr = reversed_conds[num]
                ip_ir.append(pyop2.Assign(c_sym(name), visit_rhs(expr)))
                num_ops += ops

        # Generate code for ip constant declarations.
        # TODO: this code should be removable as only executed when ffc's optimisations are on
        ip_const_ops, ip_const_code = generate_aux_constants(ip_consts, f_I,\
                                        format["assign"], True)
        if len(ip_const_code) > 0:
            raise RuntimeError("IP Const code not supported")
        num_ops += ip_const_ops

        # Generate code to evaluate the element tensor.
        code, ops = _generate_integral_ir(points, terms, sets, optimise_parameters, parameters)
        num_ops += ops
        tensor_ops_count += num_ops*points
        ip_ir += code

        # Loop code over all IPs.
        # @@@: for (ip ...) { A[0][0] += ... }
        if points > 1:
            it_var = pyop2.Symbol(f_ip, ())
            nest_ir += [pyop2.For(pyop2.Decl("int", it_var, c_sym(0)),
                                  pyop2.Less(it_var, c_sym(points)),
                                  pyop2.Incr(it_var, c_sym(1)),
                                  pyop2.Block(ip_ir, open_scope=True))]
        else:
            nest_ir += ip_ir

    return (nest_ir, tensor_ops_count)
Esempio n. 14
0
def get_restriction_kernel(fiat_element,
                           unique_indices,
                           dim=1,
                           no_weights=False):
    weights = restriction_weights(fiat_element)[unique_indices].T
    ncdof = weights.shape[0]
    nfdof = weights.shape[1]
    arglist = [
        ast.Decl("double", ast.Symbol("coarse", (ncdof * dim, ))),
        ast.Decl("double *restrict *restrict ",
                 ast.Symbol("fine", ()),
                 qualifiers=["const"])
    ]
    if not no_weights:
        arglist.append(
            ast.Decl("double *restrict *restrict",
                     ast.Symbol("count_weights", ()),
                     qualifiers=["const"]))

    all_ones = np.allclose(weights, 1.0)

    if all_ones:
        w = []
    else:
        w_sym = ast.Symbol("weights", (ncdof, nfdof))
        init = ast.ArrayInit(format_array_literal(weights))
        w = [ast.Decl("double", w_sym, init, qualifiers=["const"])]

    i = ast.Symbol("i", ())
    j = ast.Symbol("j", ())
    k = ast.Symbol("k", ())
    fine = ast.Symbol("fine", (j, k))
    if no_weights:
        if all_ones:
            assign = fine
        else:
            assign = ast.Prod(fine, ast.Symbol("weights", (i, j)))
    else:
        if all_ones:
            assign = ast.Prod(fine, ast.Symbol("count_weights", (j, 0)))
        else:
            assign = ast.Prod(
                fine,
                ast.Prod(ast.Symbol("weights", (i, j)),
                         ast.Symbol("count_weights", (j, 0))))
    assignment = ast.Incr(
        ast.Symbol("coarse", (ast.Sum(k, ast.Prod(i, ast.c_sym(dim))), )),
        assign)
    k_loop = ast.For(ast.Decl("int", k, ast.c_sym(0)),
                     ast.Less(k, ast.c_sym(dim)), ast.Incr(k, ast.c_sym(1)),
                     ast.Block([assignment], open_scope=True))
    j_loop = ast.For(ast.Decl("int", j, ast.c_sym(0)),
                     ast.Less(j, ast.c_sym(nfdof)), ast.Incr(j, ast.c_sym(1)),
                     ast.Block([k_loop], open_scope=True))
    i_loop = ast.For(ast.Decl("int", i, ast.c_sym(0)),
                     ast.Less(i, ast.c_sym(ncdof)), ast.Incr(i, ast.c_sym(1)),
                     ast.Block([j_loop], open_scope=True))
    k = ast.FunDecl("void",
                    "restriction",
                    arglist,
                    ast.Block(w + [i_loop]),
                    pred=["static", "inline"])

    return op2.Kernel(k, "restriction", opts=parameters["coffee"])
Esempio n. 15
0
def get_injection_kernel(fiat_element, unique_indices, dim=1):
    weights = injection_weights(fiat_element)[unique_indices].T
    ncdof = weights.shape[0]
    nfdof = weights.shape[1]
    # What if we have multiple nodes in same location (DG)?  Divide by
    # rowsum.
    weights = weights / np.sum(weights, axis=1).reshape(-1, 1)

    all_same = np.allclose(weights, weights[0, 0])

    arglist = [
        ast.Decl("double", ast.Symbol("coarse", (ncdof * dim, ))),
        ast.Decl("double *restrict *restrict ",
                 ast.Symbol("fine", ()),
                 qualifiers=["const"])
    ]
    if all_same:
        w_sym = ast.Symbol("weights", ())
        w = [ast.Decl("double", w_sym, weights[0, 0], qualifiers=["const"])]
    else:
        init = ast.ArrayInit(format_array_literal(weights))
        w_sym = ast.Symbol("weights", (ncdof, nfdof))
        w = [ast.Decl("double", w_sym, init, qualifiers=["const"])]

    i = ast.Symbol("i", ())
    j = ast.Symbol("j", ())
    k = ast.Symbol("k", ())
    if all_same:
        assign = ast.Prod(ast.Symbol("fine", (j, k)), w_sym)
    else:
        assign = ast.Prod(ast.Symbol("fine", (j, k)),
                          ast.Symbol("weights", (i, j)))
    assignment = ast.Incr(
        ast.Symbol("coarse", (ast.Sum(k, ast.Prod(i, ast.c_sym(dim))), )),
        assign)
    k_loop = ast.For(ast.Decl("int", k, ast.c_sym(0)),
                     ast.Less(k, ast.c_sym(dim)), ast.Incr(k, ast.c_sym(1)),
                     ast.Block([assignment], open_scope=True))
    j_loop = ast.For(ast.Decl("int", j, ast.c_sym(0)),
                     ast.Less(j, ast.c_sym(nfdof)), ast.Incr(j, ast.c_sym(1)),
                     ast.Block([k_loop], open_scope=True))
    i_loop = ast.For(ast.Decl("int", i, ast.c_sym(0)),
                     ast.Less(i, ast.c_sym(ncdof)), ast.Incr(i, ast.c_sym(1)),
                     ast.Block([j_loop], open_scope=True))
    k = ast.FunDecl("void",
                    "injection",
                    arglist,
                    ast.Block(w + [i_loop]),
                    pred=["static", "inline"])

    return op2.Kernel(k, "injection", opts=parameters["coffee"])
Esempio n. 16
0
def compile_c_kernel(expression, to_pts, to_element, fs, coords):
    """Produce a :class:`PyOP2.Kernel` from the c expression provided."""

    coords_space = coords.function_space()
    coords_element = coords_space.fiat_element

    names = {v[0] for v in expression._user_args}

    X = coords_element.tabulate(0, to_pts).values()[0]

    # Produce C array notation of X.
    X_str = "{{"+"},\n{".join([",".join(map(str, x)) for x in X.T])+"}}"

    A = utils.unique_name("A", names)
    X = utils.unique_name("X", names)
    x_ = utils.unique_name("x_", names)
    k = utils.unique_name("k", names)
    d = utils.unique_name("d", names)
    i_ = utils.unique_name("i", names)
    # x is a reserved name.
    x = "x"
    if "x" in names:
        raise ValueError("cannot use 'x' as a user-defined Expression variable")
    ass_exp = [ast.Assign(ast.Symbol(A, (k,), ((len(expression.code), i),)),
                          ast.FlatBlock("%s" % code))
               for i, code in enumerate(expression.code)]
    vals = {
        "X": X,
        "x": x,
        "x_": x_,
        "k": k,
        "d": d,
        "i": i_,
        "x_array": X_str,
        "dim": coords_space.dim,
        "xndof": coords_element.space_dimension(),
        # FS will always either be a functionspace or
        # vectorfunctionspace, so just accessing dim here is safe
        # (we don't need to go through ufl_element.value_shape())
        "nfdof": to_element.space_dimension() * numpy.prod(fs.dim, dtype=int),
        "ndof": to_element.space_dimension(),
        "assign_dim": numpy.prod(expression.value_shape(), dtype=int)
    }
    init = ast.FlatBlock("""
const double %(X)s[%(ndof)d][%(xndof)d] = %(x_array)s;

double %(x)s[%(dim)d];
const double pi = 3.141592653589793;

""" % vals)
    block = ast.FlatBlock("""
for (unsigned int %(d)s=0; %(d)s < %(dim)d; %(d)s++) {
  %(x)s[%(d)s] = 0;
  for (unsigned int %(i)s=0; %(i)s < %(xndof)d; %(i)s++) {
        %(x)s[%(d)s] += %(X)s[%(k)s][%(i)s] * %(x_)s[%(i)s][%(d)s];
  };
};

""" % vals)
    loop = ast.c_for(k, "%(ndof)d" % vals, ast.Block([block] + ass_exp,
                                                     open_scope=True))
    user_args = []
    user_init = []
    for _, arg in expression._user_args:
        if arg.shape == (1, ):
            user_args.append(ast.Decl("double *", "%s_" % arg.name))
            user_init.append(ast.FlatBlock("const double %s = *%s_;" %
                                           (arg.name, arg.name)))
        else:
            user_args.append(ast.Decl("double *", arg.name))
    kernel_code = ast.FunDecl("void", "expression_kernel",
                              [ast.Decl("double", ast.Symbol(A, (int("%(nfdof)d" % vals),))),
                               ast.Decl("double**", x_)] + user_args,
                              ast.Block(user_init + [init, loop],
                                        open_scope=False))
    coefficients = [coords]
    for _, arg in expression._user_args:
        coefficients.append(GlobalWrapper(arg))
    return op2.Kernel(kernel_code, kernel_code.name), False, tuple(coefficients)
Esempio n. 17
0
def compile_ufl_kernel(expression, to_pts, to_element, fs):
    import collections
    from ufl.algorithms.apply_function_pullbacks import apply_function_pullbacks
    from ufl.algorithms.apply_algebra_lowering import apply_algebra_lowering
    from ufl.algorithms.apply_derivatives import apply_derivatives
    from ufl.algorithms.apply_geometry_lowering import apply_geometry_lowering
    from ufl.algorithms import extract_arguments, extract_coefficients
    from gem import gem, impero_utils
    from tsfc import fem, ufl_utils
    from tsfc.coffee import generate as generate_coffee
    from tsfc.kernel_interface import (KernelBuilderBase,
                                       needs_cell_orientations,
                                       cell_orientations_coffee_arg)

    # Imitate the compute_form_data processing pipeline
    #
    # Unfortunately, we cannot call compute_form_data here, since
    # we only have an expression, not a form
    expression = apply_algebra_lowering(expression)
    expression = apply_derivatives(expression)
    expression = apply_function_pullbacks(expression)
    expression = apply_geometry_lowering(expression)
    expression = apply_derivatives(expression)
    expression = apply_geometry_lowering(expression)
    expression = apply_derivatives(expression)

    # Replace coordinates (if any)
    if expression.ufl_domain():
        assert fs.mesh() == expression.ufl_domain()
        expression = ufl_utils.replace_coordinates(expression, fs.mesh().coordinates)

    if extract_arguments(expression):
        return ValueError("Cannot interpolate UFL expression with Arguments!")

    builder = KernelBuilderBase()
    args = []

    coefficients = extract_coefficients(expression)
    for i, coefficient in enumerate(coefficients):
        args.append(builder.coefficient(coefficient, "w_%d" % i))

    point_index = gem.Index(name='p')
    ir = fem.process('cell', fs.mesh().ufl_cell(), to_pts, None,
                     point_index, (), expression,
                     builder.coefficient_mapper,
                     collections.defaultdict(gem.Index))
    assert len(ir) == 1

    # Deal with non-scalar expressions
    tensor_indices = ()
    if fs.shape:
        tensor_indices = tuple(gem.Index() for s in fs.shape)
        ir = [gem.Indexed(ir[0], tensor_indices)]

    # Build kernel body
    return_var = gem.Variable('A', (len(to_pts),) + fs.shape)
    return_expr = gem.Indexed(return_var, (point_index,) + tensor_indices)
    impero_c = impero_utils.compile_gem([return_expr], ir, [point_index])
    body = generate_coffee(impero_c, index_names={point_index: 'p'})

    oriented = needs_cell_orientations(ir)
    if oriented:
        args.insert(0, cell_orientations_coffee_arg)

    # Build kernel
    args.insert(0, ast.Decl("double", ast.Symbol('A', rank=(len(to_pts),) + fs.shape)))
    kernel_code = builder.construct_kernel("expression_kernel", args, body)

    return op2.Kernel(kernel_code, kernel_code.name), oriented, coefficients
Esempio n. 18
0
def compile_element(expression,
                    dual_space=None,
                    parameters=None,
                    name="evaluate"):
    """Generate code for point evaluations.

    :arg expression: A UFL expression (may contain up to one coefficient, or one argument)
    :arg dual_space: if the expression has an argument, should we also distribute residual data?
    :returns: Some coffee AST
    """
    if parameters is None:
        parameters = default_parameters()
    else:
        _ = default_parameters()
        _.update(parameters)
        parameters = _

    expression = tsfc.ufl_utils.preprocess_expression(expression)

    # # Collect required coefficients

    try:
        arg, = extract_coefficients(expression)
        argument_multiindices = ()
        coefficient = True
        if expression.ufl_shape:
            tensor_indices = tuple(gem.Index() for s in expression.ufl_shape)
        else:
            tensor_indices = ()
    except ValueError:
        arg, = extract_arguments(expression)
        finat_elem = create_element(arg.ufl_element())
        argument_multiindices = (finat_elem.get_indices(), )
        argument_multiindex, = argument_multiindices
        value_shape = finat_elem.value_shape
        if value_shape:
            tensor_indices = argument_multiindex[-len(value_shape):]
        else:
            tensor_indices = ()
        coefficient = False

    # Replace coordinates (if any)
    builder = firedrake_interface.KernelBuilderBase(scalar_type=ScalarType_c)
    domain = expression.ufl_domain()
    # Translate to GEM
    cell = domain.ufl_cell()
    dim = cell.topological_dimension()
    point = gem.Variable('X', (dim, ))
    point_arg = ast.Decl(ScalarType_c, ast.Symbol('X', rank=(dim, )))

    config = dict(interface=builder,
                  ufl_cell=cell,
                  precision=parameters["precision"],
                  point_indices=(),
                  point_expr=point,
                  argument_multiindices=argument_multiindices)
    context = tsfc.fem.GemPointContext(**config)

    # Abs-simplification
    expression = tsfc.ufl_utils.simplify_abs(expression)

    # Translate UFL -> GEM
    if coefficient:
        assert dual_space is None
        f_arg = [builder._coefficient(arg, "f")]
    else:
        f_arg = []
    translator = tsfc.fem.Translator(context)
    result, = map_expr_dags(translator, [expression])

    b_arg = []
    if coefficient:
        if expression.ufl_shape:
            return_variable = gem.Indexed(
                gem.Variable('R', expression.ufl_shape), tensor_indices)
            result_arg = ast.Decl(ScalarType_c,
                                  ast.Symbol('R', rank=expression.ufl_shape))
            result = gem.Indexed(result, tensor_indices)
        else:
            return_variable = gem.Indexed(gem.Variable('R', (1, )), (0, ))
            result_arg = ast.Decl(ScalarType_c, ast.Symbol('R', rank=(1, )))

    else:
        return_variable = gem.Indexed(
            gem.Variable('R', finat_elem.index_shape), argument_multiindex)
        result = gem.Indexed(result, tensor_indices)
        if dual_space:
            elem = create_element(dual_space.ufl_element())
            if elem.value_shape:
                var = gem.Indexed(gem.Variable("b", elem.value_shape),
                                  tensor_indices)
                b_arg = [
                    ast.Decl(ScalarType_c,
                             ast.Symbol("b", rank=elem.value_shape))
                ]
            else:
                var = gem.Indexed(gem.Variable("b", (1, )), (0, ))
                b_arg = [ast.Decl(ScalarType_c, ast.Symbol("b", rank=(1, )))]
            result = gem.Product(result, var)

        result_arg = ast.Decl(ScalarType_c,
                              ast.Symbol('R', rank=finat_elem.index_shape))

    # Unroll
    max_extent = parameters["unroll_indexsum"]
    if max_extent:

        def predicate(index):
            return index.extent <= max_extent

        result, = gem.optimise.unroll_indexsum([result], predicate=predicate)

    # Translate GEM -> COFFEE
    result, = gem.impero_utils.preprocess_gem([result])
    impero_c = gem.impero_utils.compile_gem([(return_variable, result)],
                                            tensor_indices)
    body = generate_coffee(impero_c, {}, parameters["precision"], ScalarType_c)

    # Build kernel tuple
    kernel_code = builder.construct_kernel(
        "pyop2_kernel_" + name, [result_arg] + b_arg + f_arg + [point_arg],
        body)

    return kernel_code
Esempio n. 19
0
def tensor_assembly_calls(builder):
    """Generates a block of statements for assembling the local
    finite element tensors.

    :arg builder: The :class:`LocalKernelBuilder` containing
        all relevant expression information and assembly calls.
    """
    assembly_calls = builder.assembly_calls
    statements = [ast.FlatBlock("/* Assemble local tensors */\n")]

    # Cell integrals are straightforward. Just splat them out.
    statements.extend(assembly_calls["cell"])

    if builder.needs_cell_facets:
        # The for-loop will have the general structure:
        #
        #    FOR (facet=0; facet<num_facets; facet++):
        #        IF (facet is interior):
        #            *interior calls
        #        ELSE IF (facet is exterior):
        #            *exterior calls
        #
        # If only interior (exterior) facets are present,
        # then only a single IF-statement checking for interior
        # (exterior) facets will be present within the loop. The
        # cell facets are labelled `1` for interior, and `0` for
        # exterior.
        statements.append(ast.FlatBlock("/* Loop over cell facets */\n"))
        int_calls = list(
            chain(*[
                assembly_calls[it_type]
                for it_type in ("interior_facet", "interior_facet_vert")
            ]))
        ext_calls = list(
            chain(*[
                assembly_calls[it_type]
                for it_type in ("exterior_facet", "exterior_facet_vert")
            ]))

        # Generate logical statements for handling exterior/interior facet
        # integrals on subdomains.
        # Currently only facet integrals are supported.
        for sd_type in ("subdomains_exterior_facet",
                        "subdomains_interior_facet"):
            stmts = []
            for sd, sd_calls in groupby(assembly_calls[sd_type],
                                        lambda x: x[0]):
                _, calls = zip(*sd_calls)
                if_sd = ast.Eq(
                    ast.Symbol(builder.cell_facet_sym,
                               rank=(builder.it_sym, 1)), sd)
                stmts.append(
                    ast.If(if_sd, (ast.Block(calls, open_scope=True), )))

            if sd_type == "subdomains_exterior_facet":
                ext_calls.extend(stmts)
            if sd_type == "subdomains_interior_facet":
                int_calls.extend(stmts)

        # Compute the number of facets to loop over
        domain = builder.expression.ufl_domain()
        if domain.cell_set._extruded:
            num_facets = domain.ufl_cell()._cells[0].num_facets()
        else:
            num_facets = domain.ufl_cell().num_facets()

        if_ext = ast.Eq(
            ast.Symbol(builder.cell_facet_sym, rank=(builder.it_sym, 0)), 0)
        if_int = ast.Eq(
            ast.Symbol(builder.cell_facet_sym, rank=(builder.it_sym, 0)), 1)
        body = []
        if ext_calls:
            body.append(
                ast.If(if_ext, (ast.Block(ext_calls, open_scope=True), )))
        if int_calls:
            body.append(
                ast.If(if_int, (ast.Block(int_calls, open_scope=True), )))

        statements.append(
            ast.For(ast.Decl("unsigned int", builder.it_sym, init=0),
                    ast.Less(builder.it_sym, num_facets),
                    ast.Incr(builder.it_sym, 1), body))

    if builder.needs_mesh_layers:
        # In the presence of interior horizontal facet calls, an
        # IF-ELIF-ELSE block is generated using the mesh levels
        # as conditions for which calls are needed:
        #
        #    IF (layer == bottom_layer):
        #        *bottom calls
        #    ELSE IF (layer == top_layer):
        #        *top calls
        #    ELSE:
        #        *top calls
        #        *bottom calls
        #
        # Any extruded top or bottom calls for extruded facets are
        # included within the appropriate mesh-level IF-blocks. If
        # no interior horizontal facet calls are present, then
        # standard IF-blocks are generated for exterior top/bottom
        # facet calls when appropriate:
        #
        #    IF (layer == bottom_layer):
        #        *bottom calls
        #
        #    IF (layer == top_layer):
        #        *top calls
        #
        # The mesh level is an integer provided as a macro kernel
        # argument.

        # FIXME: No variable layers assumption
        statements.append(ast.FlatBlock("/* Mesh levels: */\n"))
        num_layers = ast.Symbol(builder.mesh_layer_count_sym, rank=(0, ))
        layer = builder.mesh_layer_sym
        types = [
            "interior_facet_horiz_top", "interior_facet_horiz_bottom",
            "exterior_facet_top", "exterior_facet_bottom"
        ]
        decide = [
            ast.Less(layer, num_layers),
            ast.Greater(layer, 0),
            ast.Eq(layer, num_layers),
            ast.Eq(layer, 0)
        ]
        for (integral_type, which) in zip(types, decide):
            statements.append(
                ast.If(which, (ast.Block(assembly_calls[integral_type],
                                         open_scope=True), )))

    return statements
Esempio n. 20
0
def generate_kernel_ast(builder, statements, declared_temps):
    """Glues together the complete AST for the Slate expression
    contained in the :class:`LocalKernelBuilder`.

    :arg builder: The :class:`LocalKernelBuilder` containing
        all relevant expression information.
    :arg statements: A list of COFFEE objects containing all
        assembly calls and temporary declarations.
    :arg declared_temps: A `dict` containing all previously
        declared temporaries.

    Return: A `KernelInfo` object describing the complete AST.
    """
    slate_expr = builder.expression
    if slate_expr.rank == 0:
        # Scalars are treated as 1x1 MatrixBase objects
        shape = (1, )
    else:
        shape = slate_expr.shape

    # Now we create the result statement by declaring its eigen type and
    # using Eigen::Map to move between Eigen and C data structs.
    statements.append(ast.FlatBlock("/* Map eigen tensor into C struct */\n"))
    result_sym = ast.Symbol("T%d" % len(declared_temps))
    result_data_sym = ast.Symbol("A%d" % len(declared_temps))
    result_type = "Eigen::Map<%s >" % eigen_matrixbase_type(shape)
    result = ast.Decl(ScalarType_c,
                      ast.Symbol(result_data_sym),
                      pointers=[("restrict", )])
    result_statement = ast.FlatBlock(
        "%s %s((%s *)%s);\n" %
        (result_type, result_sym, ScalarType_c, result_data_sym))
    statements.append(result_statement)

    # Generate the complete c++ string performing the linear algebra operations
    # on Eigen matrices/vectors
    statements.append(ast.FlatBlock("/* Linear algebra expression */\n"))
    cpp_string = ast.FlatBlock(slate_to_cpp(slate_expr, declared_temps))
    statements.append(ast.Incr(result_sym, cpp_string))

    # Generate arguments for the macro kernel
    args = [
        result,
        ast.Decl(ScalarType_c,
                 builder.coord_sym,
                 pointers=[("restrict", )],
                 qualifiers=["const"])
    ]

    # Orientation information
    if builder.oriented:
        args.append(
            ast.Decl("int",
                     builder.cell_orientations_sym,
                     pointers=[("restrict", )],
                     qualifiers=["const"]))

    # Coefficient information
    expr_coeffs = slate_expr.coefficients()
    for c in expr_coeffs:
        args.extend([
            ast.Decl(ScalarType_c,
                     csym,
                     pointers=[("restrict", )],
                     qualifiers=["const"]) for csym in builder.coefficient(c)
        ])

    # Facet information
    if builder.needs_cell_facets:
        f_sym = builder.cell_facet_sym
        f_arg = ast.Symbol("arg_cell_facets")
        f_dtype = as_cstr(cell_to_facets_dtype)

        # cell_facets is locally a flattened 2-D array. We typecast here so we
        # can access its entries using standard array notation.
        cast = "%s (*%s)[2] = (%s (*)[2])%s;\n" % (f_dtype, f_sym, f_dtype,
                                                   f_arg)
        statements.insert(0, ast.FlatBlock(cast))
        args.append(
            ast.Decl(f_dtype,
                     f_arg,
                     pointers=[("restrict", )],
                     qualifiers=["const"]))

    # NOTE: We need to be careful about the ordering here. Mesh layers are
    # added as the final argument to the kernel
    # and the amount of layers before that.
    if builder.needs_mesh_layers:
        args.append(
            ast.Decl("int",
                     builder.mesh_layer_count_sym,
                     pointers=[("restrict", )],
                     qualifiers=["const"]))
        args.append(ast.Decl("int", builder.mesh_layer_sym))

    # Cell size information
    if builder.needs_cell_sizes:
        args.append(
            ast.Decl(ScalarType_c,
                     builder.cell_size_sym,
                     pointers=[("restrict", )],
                     qualifiers=["const"]))

    # Macro kernel
    macro_kernel_name = "pyop2_kernel_compile_slate"
    stmts = ast.Block(statements)
    macro_kernel = ast.FunDecl("void",
                               macro_kernel_name,
                               args,
                               stmts,
                               pred=["static", "inline"])

    # Construct the final ast
    kernel_ast = ast.Node(builder.templated_subkernels + [macro_kernel])

    # Now we wrap up the kernel ast as a PyOP2 kernel and include the
    # Eigen header files
    include_dirs = list(builder.include_dirs)
    include_dirs.append(EIGEN_INCLUDE_DIR)
    op2kernel = op2.Kernel(
        kernel_ast,
        macro_kernel_name,
        cpp=True,
        include_dirs=include_dirs,
        headers=['#include <Eigen/Dense>', '#define restrict __restrict'])

    op2kernel.num_flops = builder.expression_flops + builder.terminal_flops
    # Send back a "TSFC-like" SplitKernel object with an
    # index and KernelInfo
    kinfo = KernelInfo(kernel=op2kernel,
                       integral_type=builder.integral_type,
                       oriented=builder.oriented,
                       subdomain_id="otherwise",
                       domain_number=0,
                       coefficient_map=slate_expr.coeff_map,
                       needs_cell_facets=builder.needs_cell_facets,
                       pass_layer_arg=builder.needs_mesh_layers,
                       needs_cell_sizes=builder.needs_cell_sizes)

    return kinfo
Esempio n. 21
0
def _generate_integral_ir(points, terms, sets, optimise_parameters, parameters):
    "Generate code to evaluate the element tensor."

    # For checking if the integral code is for a matrix
    def is_matrix(loop):
        loop_indices = [ l[0] for l in loop ]
        return (format["first free index"] in loop_indices and \
                format["second free index"] in loop_indices)

    # Prefetch formats to speed up code generation.
    p_format        = parameters["format"]
    f_comment       = format["comment"]
    f_mul           = format["mul"]
    f_scale_factor  = format["scale factor"]
    f_iadd          = format["iadd"]
    f_add           = format["add"]
    f_A             = format["element tensor"][p_format]
    f_j             = format["first free index"]
    f_k             = format["second free index"]
    f_loop          = format["generate loop"]
    f_B             = format["basis constant"]

    # Initialise return values.
    code = []
    num_ops = 0
    loops = {}

    # Extract sets.
    used_weights, used_psi_tables, used_nzcs, trans_set = sets

    nests = []
    # Loop terms and create code.
    for loop, (data, entry_vals) in terms.items():
        # If we don't have any entry values, there's no need to generate the loop.
        if not entry_vals:
            continue

        # Get data.
        t_set, u_weights, u_psi_tables, u_nzcs, basis_consts = data

        # If we have a value, then we also need to update the sets of used variables.
        trans_set.update(t_set)
        used_weights.update(u_weights)
        used_psi_tables.update(u_psi_tables)
        used_nzcs.update(u_nzcs)

        # @@@: A[0][0] += FE0[ip][j]*FE0[ip][k]*W24[ip]*det;

        entry_ir = []
        for entry, value, ops in entry_vals:
            # Left hand side
            it_vars = entry if len(loop) > 0 else (0,)
            rank = tuple(i.loop_index if hasattr(i, 'loop_index') else i for i in it_vars)
            offset = tuple((1, int(i.offset)) if hasattr(i, 'offset') else (1, 0) for i in it_vars)
            local_tensor = pyop2.Symbol(f_A(''), rank, offset)
            # Right hand side
            pyop2_rhs = visit_rhs(value)
            entry_ir.append(pyop2.Incr(local_tensor, pyop2_rhs, "#pragma coffee expression"))

        # Disgusting hack, ensure resulting IR comes out in same order
        # on all processes.
        entry_ir = sorted(entry_ir, key=lambda x: x.gencode())

        if len(loop) == 0:
            nest = pyop2.Block(entry_ir, open_scope=True)
        elif len(loop) in [1, 2]:
            it_var = c_sym(loop[0][0])
            end = c_sym(loop[0][2])
            nest = pyop2.For(pyop2.Decl("int", it_var, c_sym(0)), pyop2.Less(it_var, end), \
                             pyop2.Incr(it_var, c_sym(1)), pyop2.Block(entry_ir, open_scope=True), "#pragma coffee itspace")
        if len(loop) == 2:
            it_var = c_sym(loop[1][0])
            end = c_sym(loop[1][2])
            nest_k = pyop2.For(pyop2.Decl("int", it_var, c_sym(0)), pyop2.Less(it_var, end), \
                               pyop2.Incr(it_var, c_sym(1)), pyop2.Block(entry_ir, open_scope=True), "#pragma coffee itspace")
            nest.children[0] = pyop2.Block([nest_k], open_scope=True)
        nests.append(nest)

    return nests, num_ops
Esempio n. 22
0
def get_prolongation_kernel(fiat_element, unique_indices, dim=1):
    weights = get_restriction_weights(fiat_element)[unique_indices]
    nfdof = weights.shape[0]
    ncdof = weights.shape[1]
    arglist = [
        ast.Decl("double", ast.Symbol("fine", (nfdof * dim, ))),
        ast.Decl("double",
                 ast.Symbol("*restrict *restrict coarse", ()),
                 qualifiers=["const"])
    ]
    all_same = np.allclose(weights, weights[0, 0])

    if all_same:
        w_sym = ast.Symbol("weights", ())
        w = [ast.Decl("double", w_sym, weights[0, 0], qualifiers=["const"])]
    else:
        w_sym = ast.Symbol("weights", (nfdof, ncdof))
        init = ast.ArrayInit(format_array_literal(weights))
        w = [ast.Decl("double", w_sym, init, qualifiers=["const"])]
    i = ast.Symbol("i", ())
    j = ast.Symbol("j", ())
    k = ast.Symbol("k", ())
    if all_same:
        assign = ast.Prod(ast.Symbol("coarse", (j, k)), w_sym)
    else:
        assign = ast.Prod(ast.Symbol("coarse", (j, k)),
                          ast.Symbol("weights", (i, j)))

    assignment = ast.Incr(
        ast.Symbol("fine", (ast.Sum(k, ast.Prod(i, ast.c_sym(dim))), )),
        assign)
    k_loop = ast.For(ast.Decl("int", k, ast.c_sym(0)),
                     ast.Less(k, ast.c_sym(dim)), ast.Incr(k, ast.c_sym(1)),
                     ast.Block([assignment], open_scope=True))
    j_loop = ast.For(ast.Decl("int", j, ast.c_sym(0)),
                     ast.Less(j, ast.c_sym(ncdof)), ast.Incr(j, ast.c_sym(1)),
                     ast.Block([k_loop], open_scope=True))
    i_loop = ast.For(ast.Decl("int", i, ast.c_sym(0)),
                     ast.Less(i, ast.c_sym(nfdof)), ast.Incr(i, ast.c_sym(1)),
                     ast.Block([j_loop], open_scope=True))
    k = ast.FunDecl("void",
                    "prolongation",
                    arglist,
                    ast.Block(w + [i_loop]),
                    pred=["static", "inline"])

    return op2.Kernel(k, "prolongation", opts=parameters["coffee"])
Esempio n. 23
0
def _tabulate_tensor(ir, parameters):
    "Generate code for a single integral (tabulate_tensor())."

    p_format        = parameters["format"]
    precision       = parameters["precision"]

    f_comment       = format["comment"]
    f_G             = format["geometry constant"]
    f_const_double  = format["assign"]
    f_float         = format["float"]
    f_assign        = format["assign"]
    f_A             = format["element tensor"][p_format]
    f_r             = format["free indices"][0]
    f_j             = format["first free index"]
    f_k             = format["second free index"]
    f_loop          = format["generate loop"]
    f_int           = format["int"]
    f_weight        = format["weight"]

    # Get data.
    opt_par     = ir["optimise_parameters"]
    integral_type = ir["integral_type"]
    cell        = ir["cell"]
    gdim        = cell.geometric_dimension()
    tdim        = cell.topological_dimension()
    num_facets  = ir["num_facets"]
    num_vertices= ir["num_vertices"]
    integrals   = ir["trans_integrals"]
    geo_consts  = ir["geo_consts"]
    oriented    = ir["needs_oriented"]

    # Create sets of used variables.
    used_weights    = set()
    used_psi_tables = set()
    used_nzcs       = set()
    trans_set       = set()
    sets = [used_weights, used_psi_tables, used_nzcs, trans_set]

    affine_tables = {} # TODO: This is not populated anywhere, remove?
    quadrature_weights = ir["quadrature_weights"]

    #The pyop2 format requires dereferencing constant coefficients since
    # these are passed in as double *
    common = []
    if p_format == "pyop2":
        for n, c in zip(ir["coefficient_names"], ir["coefficient_elements"]):
            if c.family() == 'Real':
                # Second index is always? 0, so we cast to (double (*)[1]).
                common += ['double (*w%(n)s)[1] = (double (*)[1])c%(n)s;\n' %
                           {'n': n[1:]}]

    operations = []
    if integral_type == "cell":
        # Update transformer with facets and generate code + set of used geometry terms.
        nest_ir, num_ops = _generate_element_tensor(integrals, sets, \
                                                    opt_par, parameters)

        # Set operations equal to num_ops (for printing info on operations).
        operations.append([num_ops])

        # Generate code for basic geometric quantities
        # @@@: Jacobian snippet
        jacobi_code  = ""
        jacobi_code += format["compute_jacobian"](cell)
        jacobi_code += "\n"
        jacobi_code += format["compute_jacobian_inverse"](cell)
        if oriented and tdim != gdim:
            # NEED TO THINK ABOUT THIS FOR EXTRUSION
            jacobi_code += format["orientation"][p_format](tdim, gdim)
        jacobi_code += "\n"
        jacobi_code += format["scale factor snippet"][p_format]

        # Generate code for cell volume and circumradius -- note that the
        # former will be incorrect on extruded meshes by a constant factor.
        jacobi_code += "\n\n" + format["generate cell volume"][p_format](tdim, gdim, integral_type)
        jacobi_code += "\n\n" + format["generate circumradius"][p_format](tdim, gdim, integral_type)

    elif integral_type in ("exterior_facet", "exterior_facet_vert"):
        if p_format == 'pyop2':
            common += ["unsigned int facet = *facet_p;\n"]

        # Generate tensor code for facets + set of used geometry terms.
        nest_ir, ops = _generate_element_tensor(integrals, sets, opt_par, parameters)

        # Save number of operations (for printing info on operations).
        operations.append([ops])

        # Generate code for basic geometric quantities
        # @@@: Jacobian snippet
        jacobi_code  = ""
        jacobi_code += format["compute_jacobian"](cell)
        jacobi_code += "\n"
        jacobi_code += format["compute_jacobian_inverse"](cell)
        if oriented and tdim != gdim:
            # NEED TO THINK ABOUT THIS FOR EXTRUSION
            jacobi_code += format["orientation"][p_format](tdim, gdim)
        jacobi_code += "\n"
        if integral_type == "exterior_facet":
            jacobi_code += "\n\n" + format["facet determinant"](cell, p_format, integral_type)
            jacobi_code += "\n\n" + format["generate normal"](cell, p_format, integral_type)
            jacobi_code += "\n\n" + format["generate facet area"](tdim, gdim)
            if tdim == 3:
                jacobi_code += "\n\n" + format["generate min facet edge length"](tdim, gdim)
                jacobi_code += "\n\n" + format["generate max facet edge length"](tdim, gdim)

            # Generate code for cell volume and circumradius
            jacobi_code += "\n\n" + format["generate cell volume"][p_format](tdim, gdim, integral_type)
            jacobi_code += "\n\n" + format["generate circumradius"][p_format](tdim, gdim, integral_type)

        elif integral_type == "exterior_facet_vert":
            jacobi_code += "\n\n" + format["facet determinant"](cell, p_format, integral_type)
            jacobi_code += "\n\n" + format["generate normal"](cell, p_format, integral_type)
            # OTHER THINGS NOT IMPLEMENTED YET
        else:
            raise RuntimeError("Invalid integral_type")

    elif integral_type in ("exterior_facet_top", "exterior_facet_bottom"):
        nest_ir, ops = _generate_element_tensor(integrals, sets, opt_par, parameters)
        operations.append([ops])

        # Generate code for basic geometric quantities
        # @@@: Jacobian snippet
        jacobi_code  = ""
        jacobi_code += format["compute_jacobian"](cell)
        jacobi_code += "\n"
        jacobi_code += format["compute_jacobian_inverse"](cell)
        if oriented:
            # NEED TO THINK ABOUT THIS FOR EXTRUSION
            jacobi_code += format["orientation"][p_format](tdim, gdim)
        jacobi_code += "\n"
        jacobi_code += "\n\n" + format["facet determinant"](cell, p_format, integral_type)
        jacobi_code += "\n\n" + format["generate normal"](cell, p_format, integral_type)
        # THE REST IS NOT IMPLEMENTED YET

    elif integral_type in ("interior_facet", "interior_facet_vert"):
        if p_format == 'pyop2':
            common += ["unsigned int facet_0 = facet_p[0];"]
            common += ["unsigned int facet_1 = facet_p[1];"]
            common += ["double **coordinate_dofs_0 = coordinate_dofs;"]
            # Note that the following line is unsafe for isoparametric elements.
            common += ["double **coordinate_dofs_1 = coordinate_dofs + %d;" % num_vertices]

        # Generate tensor code for facets + set of used geometry terms.
        nest_ir, ops = _generate_element_tensor(integrals, sets, opt_par, parameters)

        # Save number of operations (for printing info on operations).
        operations.append([ops])

        # Generate code for basic geometric quantities
        # @@@: Jacobian snippet
        jacobi_code  = ""
        for _r in ["+", "-"]:
            if p_format == "pyop2":
                jacobi_code += format["compute_jacobian_interior"](cell, r=_r)
            else:
                jacobi_code += format["compute_jacobian"](cell, r=_r)

            jacobi_code += "\n"
            jacobi_code += format["compute_jacobian_inverse"](cell, r=_r)
            if oriented and tdim != gdim:
                # NEED TO THINK ABOUT THIS FOR EXTRUSION
                jacobi_code += format["orientation"][p_format](tdim, gdim, r=_r)
            jacobi_code += "\n"

        if integral_type == "interior_facet":
            jacobi_code += "\n\n" + format["facet determinant"](cell, p_format, integral_type, r="+")
            jacobi_code += "\n\n" + format["generate normal"](cell, p_format, integral_type)

            jacobi_code += "\n\n" + format["generate facet area"](tdim, gdim)
            if tdim == 3:
                jacobi_code += "\n\n" + format["generate min facet edge length"](tdim, gdim, r="+")
                jacobi_code += "\n\n" + format["generate max facet edge length"](tdim, gdim, r="+")

            # Generate code for cell volume and circumradius
            jacobi_code += "\n\n" + format["generate cell volume"][p_format](tdim, gdim, integral_type)
            jacobi_code += "\n\n" + format["generate circumradius interior"](tdim, gdim, integral_type)

        elif integral_type == "interior_facet_vert":
            # THE REST IS NOT IMPLEMENTED YET
            jacobi_code += "\n\n" + format["facet determinant"](cell, p_format, integral_type, r="+")
            jacobi_code += "\n\n" + format["generate normal"](cell, p_format, integral_type)
        else:
            raise RuntimeError("Invalid integral_type")

    elif integral_type == "interior_facet_horiz":
        common += ["double **coordinate_dofs_0 = coordinate_dofs;"]
        # Note that the following line is unsafe for isoparametric elements.
        common += ["double **coordinate_dofs_1 = coordinate_dofs + %d;" % num_vertices]

        nest_ir, ops = _generate_element_tensor(integrals, sets, opt_par, parameters)

        # Save number of operations (for printing info on operations).
        operations.append([ops])

        # Generate code for basic geometric quantities
        # @@@: Jacobian snippet
        jacobi_code  = ""
        for _r in ["+", "-"]:
            jacobi_code += format["compute_jacobian_interior"](cell, r=_r)
            jacobi_code += "\n"
            jacobi_code += format["compute_jacobian_inverse"](cell, r=_r)
            if oriented:
                # NEED TO THINK ABOUT THIS FOR EXTRUSION
                jacobi_code += format["orientation"][p_format](tdim, gdim, r=_r)
            jacobi_code += "\n"

        # TODO: verify that this is correct (we think it is)
        jacobi_code += "\n\n" + format["facet determinant"](cell, p_format, integral_type, r="+")
        jacobi_code += "\n\n" + format["generate normal"](cell, p_format, integral_type)
        # THE REST IS NOT IMPLEMENTED YET

    elif integral_type == "point":
        # Update transformer with vertices and generate code + set of used geometry terms.
        nest_ir, ops = _generate_element_tensor(integrals, sets, opt_par, parameters)

        # Save number of operations (for printing info on operations).
        operations.append([ops])

        # Generate code for basic geometric quantities
        # @@@: Jacobian snippet
        jacobi_code  = ""
        jacobi_code += format["compute_jacobian"](cell)
        jacobi_code += "\n"
        jacobi_code += format["compute_jacobian_inverse"](cell)
        if oriented and tdim != gdim:
            jacobi_code += format["orientation"][p_format](tdim, gdim)
        jacobi_code += "\n"

    else:
        error("Unhandled integral type: " + str(integral_type))

    # Embedded manifold, need to pass in cell orientations
    if oriented and tdim != gdim and p_format == 'pyop2':
        if integral_type in ("interior_facet", "interior_facet_vert", "interior_facet_horiz"):
            common += ["const int cell_orientation%s = cell_orientation_[0][0];" % _choose_map('+'),
                       "const int cell_orientation%s = cell_orientation_[1][0];" % _choose_map('-')]
        else:
            common += ["const int cell_orientation = cell_orientation_[0][0];"]
    # After we have generated the element code for all facets we can remove
    # the unused transformations and tabulate the used psi tables and weights.
    common += [remove_unused(jacobi_code, trans_set)]
    jacobi_ir = pyop2.FlatBlock("\n".join(common))

    # @@@: const double W3[3] = {{...}}
    pyop2_weights = []
    for weights, points in [quadrature_weights[p] for p in used_weights]:
        n_points = len(points)
        w_sym = pyop2.Symbol(f_weight(n_points), () if n_points == 1 else (n_points,))
        pyop2_weights.append(pyop2.Decl("double", w_sym,
                                        pyop2.ArrayInit(weights, precision),
                                        qualifiers=["static", "const"]))

    name_map = ir["name_map"]
    tables = ir["unique_tables"]
    tables.update(affine_tables) # TODO: This is not populated anywhere, remove?

    # @@@: const double FE0[] = {{...}}
    code, decl = _tabulate_psis(tables, used_psi_tables, name_map, used_nzcs, opt_par, parameters)
    pyop2_basis = []
    for name, data in decl.items():
        rank, _, values = data
        zeroflags = values.get_zeros()
        feo_sym = pyop2.Symbol(name, rank)
        init = pyop2.ArrayInit(values, precision)
        if zeroflags is not None and not zeroflags.all():
            nz_indices = numpy.logical_not(zeroflags).nonzero()
            # Note: in the following, we take the last entry of /nz_indices/ since we /know/
            # we have been tracking only zero-valued columns
            nz_indices = nz_indices[-1]
            nz_bounds = tuple([(i, 0)] for i in rank[:-1])
            nz_bounds += ([(max(nz_indices) - min(nz_indices) + 1, min(nz_indices))],)
            init = pyop2.SparseArrayInit(values, precision, nz_bounds)
        pyop2_basis.append(pyop2.Decl("double", feo_sym, init, ["static", "const"]))

    # Build the root of the PyOP2' ast
    pyop2_tables = pyop2_weights + [tab for tab in pyop2_basis]
    root = pyop2.Root([jacobi_ir] + pyop2_tables + nest_ir)

    return root
Esempio n. 24
0
def generate_kernel_ast(builder, statements, declared_temps):
    """Glues together the complete AST for the Slate expression
    contained in the :class:`LocalKernelBuilder`.

    :arg builder: The :class:`LocalKernelBuilder` containing
                  all relevant expression information.
    :arg statements: A list of COFFEE objects containing all
                     assembly calls and temporary declarations.
    :arg declared_temps: A `dict` containing all previously
                         declared temporaries.

    Return: A `KernelInfo` object describing the complete AST.
    """
    slate_expr = builder.expression
    if slate_expr.rank == 0:
        # Scalars are treated as 1x1 MatrixBase objects
        shape = (1, )
    else:
        shape = slate_expr.shape

    # Now we create the result statement by declaring its eigen type and
    # using Eigen::Map to move between Eigen and C data structs.
    statements.append(ast.FlatBlock("/* Map eigen tensor into C struct */\n"))
    result_sym = ast.Symbol("T%d" % len(declared_temps))
    result_data_sym = ast.Symbol("A%d" % len(declared_temps))
    result_type = "Eigen::Map<%s >" % eigen_matrixbase_type(shape)
    result = ast.Decl(SCALAR_TYPE, ast.Symbol(result_data_sym, shape))
    result_statement = ast.FlatBlock(
        "%s %s((%s *)%s);\n" %
        (result_type, result_sym, SCALAR_TYPE, result_data_sym))
    statements.append(result_statement)

    # Generate the complete c++ string performing the linear algebra operations
    # on Eigen matrices/vectors
    statements.append(ast.FlatBlock("/* Linear algebra expression */\n"))
    cpp_string = ast.FlatBlock(
        metaphrase_slate_to_cpp(slate_expr, declared_temps))
    statements.append(ast.Incr(result_sym, cpp_string))

    # Generate arguments for the macro kernel
    args = [result, ast.Decl("%s **" % SCALAR_TYPE, builder.coord_sym)]

    # Orientation information
    if builder.oriented:
        args.append(ast.Decl("int **", builder.cell_orientations_sym))

    # Coefficient information
    expr_coeffs = slate_expr.coefficients()
    for c in expr_coeffs:
        if isinstance(c, Constant):
            ctype = "%s *" % SCALAR_TYPE
        else:
            ctype = "%s **" % SCALAR_TYPE
        args.extend([ast.Decl(ctype, csym) for csym in builder.coefficient(c)])

    # Facet information
    if builder.needs_cell_facets:
        args.append(
            ast.Decl("%s *" % as_cstr(cell_to_facets_dtype),
                     builder.cell_facet_sym))

    # NOTE: We need to be careful about the ordering here. Mesh layers are
    # added as the final argument to the kernel.
    if builder.needs_mesh_layers:
        args.append(ast.Decl("int", builder.mesh_layer_sym))

    # Macro kernel
    macro_kernel_name = "compile_slate"
    stmts = ast.Block(statements)
    macro_kernel = ast.FunDecl("void",
                               macro_kernel_name,
                               args,
                               stmts,
                               pred=["static", "inline"])

    # Construct the final ast
    kernel_ast = ast.Node(builder.templated_subkernels + [macro_kernel])

    # Now we wrap up the kernel ast as a PyOP2 kernel and include the
    # Eigen header files
    include_dirs = builder.include_dirs
    include_dirs.extend(["%s/include/eigen3/" % d for d in PETSC_DIR])
    op2kernel = op2.Kernel(
        kernel_ast,
        macro_kernel_name,
        cpp=True,
        include_dirs=include_dirs,
        headers=['#include <Eigen/Dense>', '#define restrict __restrict'])

    # Send back a "TSFC-like" SplitKernel object with an
    # index and KernelInfo
    kinfo = KernelInfo(kernel=op2kernel,
                       integral_type=builder.integral_type,
                       oriented=builder.oriented,
                       subdomain_id="otherwise",
                       domain_number=0,
                       coefficient_map=tuple(range(len(expr_coeffs))),
                       needs_cell_facets=builder.needs_cell_facets,
                       pass_layer_arg=builder.needs_mesh_layers)

    return kinfo
Esempio n. 25
0
def coefficient_temporaries(builder, declared_temps):
    """Generates coefficient temporary statements for assigning
    coefficients to vector temporaries.

    :arg builder: The :class:`LocalKernelBuilder` containing
        all relevant expression information.
    :arg declared_temps: A `dict` keeping track of all declared
        temporaries. This dictionary is updated as coefficients
        are assigned temporaries.

    'AssembledVector's require creating coefficient temporaries to
    store data. The temporaries are created by inspecting the function
    space of the coefficient to compute node and dof extents. The
    coefficient is then assigned values by looping over both the node
    extent and dof extent (double FOR-loop). A double FOR-loop is needed
    for each function space (if the function space is mixed, then a loop
    will be constructed for each component space). The general structure
    of each coefficient loop will be:

         FOR (i1=0; i1<node_extent; i1++):
             FOR (j1=0; j1<dof_extent; j1++):
                 VT0[offset + (dof_extent * i1) + j1] = w_0_0[i1][j1]
                 VT1[offset + (dof_extent * i1) + j1] = w_1_0[i1][j1]
                 .
                 .
                 .

    where wT0, wT1, ... are temporaries for coefficients sharing the
    same node and dof extents. The offset is computed based on whether
    the function space is mixed. The offset is always 0 for non-mixed
    coefficients. If the coefficient is mixed, then the offset is
    incremented by the total number of nodal unknowns associated with
    the component spaces of the mixed space.
    """
    statements = [ast.FlatBlock("/* Coefficient temporaries */\n")]
    j = ast.Symbol("j1")
    loops = [ast.FlatBlock("/* Loops for coefficient temps */\n")]
    for dofs, cinfo_list in builder.coefficient_vecs.items():
        # Collect all coefficients which share the same node/dof extent
        assignments = []
        for cinfo in cinfo_list:
            fs_i = cinfo.space_index
            offset = cinfo.offset_index
            c_shape = cinfo.shape
            vector = cinfo.vector
            function = vector._function
            t = cinfo.local_temp

            if vector not in declared_temps:
                # Declare and initialize coefficient temporary
                c_type = eigen_matrixbase_type(shape=c_shape)
                statements.append(ast.Decl(c_type, t))
                declared_temps[vector] = t

            # Assigning coefficient values into temporary
            coeff_sym = ast.Symbol(builder.coefficient(function)[fs_i],
                                   rank=(j, ))
            index = ast.Sum(offset, j)
            coeff_temp = ast.Symbol(t, rank=(index, ))
            assignments.append(ast.Assign(coeff_temp, coeff_sym))

        # loop over dofs
        loop = ast.For(ast.Decl("unsigned int", j, init=0), ast.Less(j, dofs),
                       ast.Incr(j, 1), assignments)

        loops.append(loop)

    statements.extend(loops)

    return statements
Esempio n. 26
0
def tensor_assembly_calls(builder):
    """Generates a block of statements for assembling the local
    finite element tensors.

    :arg builder: The :class:`LocalKernelBuilder` containing
                  all relevant expression information and
                  assembly calls.
    """
    statements = [ast.FlatBlock("/* Assemble local tensors */\n")]

    # Cell integrals are straightforward. Just splat them out.
    statements.extend(builder.assembly_calls["cell"])

    if builder.needs_cell_facets:
        # The for-loop will have the general structure:
        #
        #    FOR (facet=0; facet<num_facets; facet++):
        #        IF (facet is interior):
        #            *interior calls
        #        ELSE IF (facet is exterior):
        #            *exterior calls
        #
        # If only interior (exterior) facets are present,
        # then only a single IF-statement checking for interior
        # (exterior) facets will be present within the loop. The
        # cell facets are labelled `1` for interior, and `0` for
        # exterior.

        statements.append(ast.FlatBlock("/* Loop over cell facets */\n"))
        int_calls = list(
            chain(*[
                builder.assembly_calls[it_type]
                for it_type in ("interior_facet", "interior_facet_vert")
            ]))
        ext_calls = list(
            chain(*[
                builder.assembly_calls[it_type]
                for it_type in ("exterior_facet", "exterior_facet_vert")
            ]))

        # Compute the number of facets to loop over
        domain = builder.expression.ufl_domain()
        if domain.cell_set._extruded:
            num_facets = domain.ufl_cell()._cells[0].num_facets()
        else:
            num_facets = domain.ufl_cell().num_facets()

        if_ext = ast.Eq(
            ast.Symbol(builder.cell_facet_sym, rank=(builder.it_sym, )), 0)
        if_int = ast.Eq(
            ast.Symbol(builder.cell_facet_sym, rank=(builder.it_sym, )), 1)
        body = []
        if ext_calls:
            body.append(
                ast.If(if_ext, (ast.Block(ext_calls, open_scope=True), )))
        if int_calls:
            body.append(
                ast.If(if_int, (ast.Block(int_calls, open_scope=True), )))

        statements.append(
            ast.For(ast.Decl("unsigned int", builder.it_sym, init=0),
                    ast.Less(builder.it_sym, num_facets),
                    ast.Incr(builder.it_sym, 1), body))

    if builder.needs_mesh_layers:
        # In the presence of interior horizontal facet calls, an
        # IF-ELIF-ELSE block is generated using the mesh levels
        # as conditions for which calls are needed:
        #
        #    IF (layer == bottom_layer):
        #        *bottom calls
        #    ELSE IF (layer == top_layer):
        #        *top calls
        #    ELSE:
        #        *top calls
        #        *bottom calls
        #
        # Any extruded top or bottom calls for extruded facets are
        # included within the appropriate mesh-level IF-blocks. If
        # no interior horizontal facet calls are present, then
        # standard IF-blocks are generated for exterior top/bottom
        # facet calls when appropriate:
        #
        #    IF (layer == bottom_layer):
        #        *bottom calls
        #
        #    IF (layer == top_layer):
        #        *top calls
        #
        # The mesh level is an integer provided as a macro kernel
        # argument.

        # FIXME: No variable layers assumption
        statements.append(ast.FlatBlock("/* Mesh levels: */\n"))
        num_layers = builder.expression.ufl_domain().topological.layers - 1
        int_top = builder.assembly_calls["interior_facet_horiz_top"]
        int_btm = builder.assembly_calls["interior_facet_horiz_bottom"]
        ext_top = builder.assembly_calls["exterior_facet_top"]
        ext_btm = builder.assembly_calls["exterior_facet_bottom"]

        bottom = ast.Block(int_top + ext_btm, open_scope=True)
        top = ast.Block(int_btm + ext_top, open_scope=True)
        rest = ast.Block(int_btm + int_top, open_scope=True)
        statements.append(
            ast.If(ast.Eq(builder.mesh_layer_sym, 0),
                   (bottom,
                    ast.If(ast.Eq(builder.mesh_layer_sym, num_layers - 1),
                           (top, rest)))))

    return statements
Esempio n. 27
0
def dg_injection_kernel(Vf, Vc, ncell):
    from firedrake import Tensor, AssembledVector, TestFunction, TrialFunction
    from firedrake.slate.slac import compile_expression
    macro_builder = MacroKernelBuilder(ScalarType_c, ncell)
    f = ufl.Coefficient(Vf)
    macro_builder.set_coefficients([f])
    macro_builder.set_coordinates(Vf.mesh())

    Vfe = create_element(Vf.ufl_element())
    macro_quadrature_rule = make_quadrature(
        Vfe.cell, estimate_total_polynomial_degree(ufl.inner(f, f)))
    index_cache = {}
    parameters = default_parameters()
    integration_dim, entity_ids = lower_integral_type(Vfe.cell, "cell")
    macro_cfg = dict(interface=macro_builder,
                     ufl_cell=Vf.ufl_cell(),
                     precision=parameters["precision"],
                     integration_dim=integration_dim,
                     entity_ids=entity_ids,
                     index_cache=index_cache,
                     quadrature_rule=macro_quadrature_rule)

    fexpr, = fem.compile_ufl(f, **macro_cfg)
    X = ufl.SpatialCoordinate(Vf.mesh())
    C_a, = fem.compile_ufl(X, **macro_cfg)
    detJ = ufl_utils.preprocess_expression(
        abs(ufl.JacobianDeterminant(f.ufl_domain())))
    macro_detJ, = fem.compile_ufl(detJ, **macro_cfg)

    Vce = create_element(Vc.ufl_element())

    coarse_builder = firedrake_interface.KernelBuilder("cell", "otherwise", 0,
                                                       ScalarType_c)
    coarse_builder.set_coordinates(Vc.mesh())
    argument_multiindices = (Vce.get_indices(), )
    argument_multiindex, = argument_multiindices
    return_variable, = coarse_builder.set_arguments((ufl.TestFunction(Vc), ),
                                                    argument_multiindices)

    integration_dim, entity_ids = lower_integral_type(Vce.cell, "cell")
    # Midpoint quadrature for jacobian on coarse cell.
    quadrature_rule = make_quadrature(Vce.cell, 0)

    coarse_cfg = dict(interface=coarse_builder,
                      ufl_cell=Vc.ufl_cell(),
                      precision=parameters["precision"],
                      integration_dim=integration_dim,
                      entity_ids=entity_ids,
                      index_cache=index_cache,
                      quadrature_rule=quadrature_rule)

    X = ufl.SpatialCoordinate(Vc.mesh())
    K = ufl_utils.preprocess_expression(ufl.JacobianInverse(Vc.mesh()))
    C_0, = fem.compile_ufl(X, **coarse_cfg)
    K, = fem.compile_ufl(K, **coarse_cfg)

    i = gem.Index()
    j = gem.Index()

    C_0 = gem.Indexed(C_0, (j, ))
    C_0 = gem.index_sum(C_0, quadrature_rule.point_set.indices)
    C_a = gem.Indexed(C_a, (j, ))
    X_a = gem.Sum(C_0, gem.Product(gem.Literal(-1), C_a))

    K_ij = gem.Indexed(K, (i, j))
    K_ij = gem.index_sum(K_ij, quadrature_rule.point_set.indices)
    X_a = gem.index_sum(gem.Product(K_ij, X_a), (j, ))
    C_0, = quadrature_rule.point_set.points
    C_0 = gem.Indexed(gem.Literal(C_0), (i, ))
    # fine quad points in coarse reference space.
    X_a = gem.Sum(C_0, gem.Product(gem.Literal(-1), X_a))
    X_a = gem.ComponentTensor(X_a, (i, ))

    # Coarse basis function evaluated at fine quadrature points
    phi_c = fem.fiat_to_ufl(
        Vce.point_evaluation(0, X_a, (Vce.cell.get_dimension(), 0)), 0)

    tensor_indices = tuple(gem.Index(extent=d) for d in f.ufl_shape)

    phi_c = gem.Indexed(phi_c, argument_multiindex + tensor_indices)
    fexpr = gem.Indexed(fexpr, tensor_indices)
    quadrature_weight = macro_quadrature_rule.weight_expression
    expr = gem.Product(gem.IndexSum(gem.Product(phi_c, fexpr), tensor_indices),
                       gem.Product(macro_detJ, quadrature_weight))

    quadrature_indices = macro_builder.indices + macro_quadrature_rule.point_set.indices

    reps = spectral.Integrals([expr], quadrature_indices,
                              argument_multiindices, parameters)
    assignments = spectral.flatten([(return_variable, reps)], index_cache)
    return_variables, expressions = zip(*assignments)
    expressions = impero_utils.preprocess_gem(expressions,
                                              **spectral.finalise_options)
    assignments = list(zip(return_variables, expressions))
    impero_c = impero_utils.compile_gem(assignments,
                                        quadrature_indices +
                                        argument_multiindex,
                                        remove_zeros=True)

    index_names = []

    def name_index(index, name):
        index_names.append((index, name))
        if index in index_cache:
            for multiindex, suffix in zip(index_cache[index],
                                          string.ascii_lowercase):
                name_multiindex(multiindex, name + suffix)

    def name_multiindex(multiindex, name):
        if len(multiindex) == 1:
            name_index(multiindex[0], name)
        else:
            for i, index in enumerate(multiindex):
                name_index(index, name + str(i))

    name_multiindex(quadrature_indices, 'ip')
    for multiindex, name in zip(argument_multiindices, ['j', 'k']):
        name_multiindex(multiindex, name)

    index_names.extend(zip(macro_builder.indices, ["entity"]))
    body = generate_coffee(impero_c, index_names, parameters["precision"],
                           ScalarType_c)

    retarg = ast.Decl(ScalarType_c,
                      ast.Symbol("R", rank=(Vce.space_dimension(), )))
    local_tensor = coarse_builder.local_tensor
    local_tensor.init = ast.ArrayInit(
        numpy.zeros(Vce.space_dimension(), dtype=ScalarType_c))
    body.children.insert(0, local_tensor)
    args = [retarg] + macro_builder.kernel_args + [
        macro_builder.coordinates_arg, coarse_builder.coordinates_arg
    ]

    # Now we have the kernel that computes <f, phi_c>dx_c
    # So now we need to hit it with the inverse mass matrix on dx_c

    u = TrialFunction(Vc)
    v = TestFunction(Vc)
    expr = Tensor(ufl.inner(u, v) * ufl.dx).inv * AssembledVector(
        ufl.Coefficient(Vc))
    Ainv, = compile_expression(expr)
    Ainv = Ainv.kinfo.kernel
    A = ast.Symbol(local_tensor.sym.symbol)
    R = ast.Symbol("R")
    body.children.append(
        ast.FunCall(Ainv.name, R, coarse_builder.coordinates_arg.sym, A))
    from coffee.base import Node
    assert isinstance(Ainv._code, Node)
    return op2.Kernel(ast.Node([
        Ainv._code,
        ast.FunDecl("void",
                    "pyop2_kernel_injection_dg",
                    args,
                    body,
                    pred=["static", "inline"])
    ]),
                      name="pyop2_kernel_injection_dg",
                      cpp=True,
                      include_dirs=Ainv._include_dirs,
                      headers=Ainv._headers)
Esempio n. 28
0
def build_hard_fusion_kernel(base_loop, fuse_loop, fusion_map, loop_chain_index):
    """
    Build AST and :class:`Kernel` for two loops suitable to hard fusion.

    The AST consists of three functions: fusion, base, fuse. base and fuse
    are respectively the ``base_loop`` and the ``fuse_loop`` kernels, whereas
    fusion is the orchestrator that invokes, for each ``base_loop`` iteration,
    base and, if still to be executed, fuse.

    The orchestrator has the following structure: ::

        fusion (buffer, ..., executed):
            base (buffer, ...)
            for i = 0 to arity:
                if not executed[i]:
                    additional pointer staging required by kernel2
                    fuse (sub_buffer, ...)
                    insertion into buffer

    The executed array tracks whether the i-th iteration (out of /arity/)
    adjacent to the main kernel1 iteration has been executed.
    """

    finder = Find((ast.FunDecl, ast.PreprocessNode))

    base = base_loop.kernel
    base_ast = dcopy(base._ast)
    base_info = finder.visit(base_ast)
    base_headers = base_info[ast.PreprocessNode]
    base_fundecl = base_info[ast.FunDecl]
    assert len(base_fundecl) == 1
    base_fundecl = base_fundecl[0]

    fuse = fuse_loop.kernel
    fuse_ast = dcopy(fuse._ast)
    fuse_info = finder.visit(fuse_ast)
    fuse_headers = fuse_info[ast.PreprocessNode]
    fuse_fundecl = fuse_info[ast.FunDecl]
    assert len(fuse_fundecl) == 1
    fuse_fundecl = fuse_fundecl[0]

    # Create /fusion/ arguments and signature
    body = ast.Block([])
    fusion_name = '%s_%s' % (base_fundecl.name, fuse_fundecl.name)
    fusion_args = dcopy(base_fundecl.args + fuse_fundecl.args)
    fusion_fundecl = ast.FunDecl(base_fundecl.ret, fusion_name, fusion_args, body)

    # Make sure kernel and variable names are unique
    base_fundecl.name = "%s_base" % base_fundecl.name
    fuse_fundecl.name = "%s_fuse" % fuse_fundecl.name
    for i, decl in enumerate(fusion_args):
        decl.sym.symbol += '_%d' % i

    # Filter out duplicate arguments, and append extra arguments to the fundecl
    binding = WeakFilter().kernel_args([base_loop, fuse_loop], fusion_fundecl)
    fusion_args += [ast.Decl('int*', 'executed'),
                    ast.Decl('int*', 'fused_iters'),
                    ast.Decl('int', 'i')]

    # Which args are actually used in /fuse/, but not in /base/ ? The gather for
    # such arguments is moved to /fusion/, to avoid usless memory LOADs
    base_dats = set(a.data for a in base_loop.args)
    fuse_dats = set(a.data for a in fuse_loop.args)
    unshared = OrderedDict()
    for arg, decl in binding.items():
        if arg.data in fuse_dats - base_dats:
            unshared.setdefault(decl, arg)

    # Track position of Args that need a postponed gather
    # Can't track Args themselves as they change across different parloops
    fargs = {fusion_args.index(i): ('postponed', False) for i in unshared.keys()}
    fargs.update({len(set(binding.values())): ('onlymap', True)})

    # Add maps for arguments that need a postponed gather
    for decl, arg in unshared.items():
        decl_pos = fusion_args.index(decl)
        fusion_args[decl_pos].sym.symbol = arg.c_arg_name()
        if arg._is_indirect:
            fusion_args[decl_pos].sym.rank = ()
            fusion_args.insert(decl_pos + 1, ast.Decl('int*', arg.c_map_name(0, 0)))

    # Append the invocation of /base/; then, proceed with the invocation
    # of the /fuse/ kernels
    base_funcall_syms = [binding[a].sym.symbol for a in base_loop.args]
    body.children.append(ast.FunCall(base_fundecl.name, *base_funcall_syms))

    for idx in range(fusion_map.arity):

        fused_iter = ast.Assign('i', ast.Symbol('fused_iters', (idx,)))
        fuse_funcall = ast.FunCall(fuse_fundecl.name)
        if_cond = ast.Not(ast.Symbol('executed', ('i',)))
        if_update = ast.Assign(ast.Symbol('executed', ('i',)), 1)
        if_body = ast.Block([fuse_funcall, if_update], open_scope=True)
        if_exec = ast.If(if_cond, [if_body])
        body.children.extend([ast.FlatBlock('\n'), fused_iter, if_exec])

        # Modify the /fuse/ kernel
        # This is to take into account that many arguments are shared with
        # /base/, so they will only staged once for /base/. This requires
        # tweaking the way the arguments are declared and accessed in /fuse/.
        # For example, the shared incremented array (called /buffer/ in
        # the pseudocode in the comment above) now needs to take offsets
        # to be sure the locations that /base/ is supposed to increment are
        # actually accessed. The same concept apply to indirect arguments.
        init = lambda v: '{%s}' % ', '.join([str(j) for j in v])
        for i, fuse_loop_arg in enumerate(fuse_loop.args):
            fuse_kernel_arg = binding[fuse_loop_arg]

            buffer_name = '%s_vec' % fuse_kernel_arg.sym.symbol
            fuse_funcall_sym = ast.Symbol(buffer_name)

            # What kind of temporaries do we need ?
            if fuse_loop_arg.access == INC:
                op, lvalue, rvalue = ast.Incr, fuse_kernel_arg.sym.symbol, buffer_name
                stager = lambda b, l: b.children.extend(l)
                indexer = lambda indices: [(k, j) for j, k in enumerate(indices)]
                pointers = []
            elif fuse_loop_arg.access == READ:
                op, lvalue, rvalue = ast.Assign, buffer_name, fuse_kernel_arg.sym.symbol
                stager = lambda b, l: [b.children.insert(0, j) for j in reversed(l)]
                indexer = lambda indices: [(j, k) for j, k in enumerate(indices)]
                pointers = list(fuse_kernel_arg.pointers)

            # Now gonna handle arguments depending on their type and rank ...

            if fuse_loop_arg._is_global:
                # ... Handle global arguments. These can be dropped in the
                # kernel without any particular fiddling
                fuse_funcall_sym = ast.Symbol(fuse_kernel_arg.sym.symbol)

            elif fuse_kernel_arg in unshared:
                # ... Handle arguments that appear only in /fuse/
                staging = unshared[fuse_kernel_arg].c_vec_init(False).split('\n')
                rvalues = [ast.FlatBlock(j.split('=')[1]) for j in staging]
                lvalues = [ast.Symbol(buffer_name, (j,)) for j in range(len(staging))]
                staging = [ast.Assign(j, k) for j, k in zip(lvalues, rvalues)]

                # Set up the temporary
                buffer_symbol = ast.Symbol(buffer_name, (len(staging),))
                buffer_decl = ast.Decl(fuse_kernel_arg.typ, buffer_symbol,
                                       qualifiers=fuse_kernel_arg.qual,
                                       pointers=list(pointers))

                # Update the if-then AST body
                stager(if_exec.children[0], staging)
                if_exec.children[0].children.insert(0, buffer_decl)

            elif fuse_loop_arg._is_mat:
                # ... Handle Mats
                staging = []
                for b in fused_inc_arg._block_shape:
                    for rc in b:
                        lvalue = ast.Symbol(lvalue, (idx, idx),
                                            ((rc[0], 'j'), (rc[1], 'k')))
                        rvalue = ast.Symbol(rvalue, ('j', 'k'))
                        staging = ItSpace(mode=0).to_for([(0, rc[0]), (0, rc[1])],
                                                         ('j', 'k'),
                                                         [op(lvalue, rvalue)])[:1]

                # Set up the temporary
                buffer_symbol = ast.Symbol(buffer_name, (fuse_kernel_arg.sym.rank,))
                buffer_init = ast.ArrayInit(init([init([0.0])]))
                buffer_decl = ast.Decl(fuse_kernel_arg.typ, buffer_symbol, buffer_init,
                                       qualifiers=fuse_kernel_arg.qual, pointers=pointers)

                # Update the if-then AST body
                stager(if_exec.children[0], staging)
                if_exec.children[0].children.insert(0, buffer_decl)

            elif fuse_loop_arg._is_indirect:
                cdim = fuse_loop_arg.data.cdim

                if cdim == 1 and fuse_kernel_arg.sym.rank:
                    # [Special case]
                    # ... Handle rank 1 indirect arguments that appear in both
                    # /base/ and /fuse/: just point into the right location
                    rank = (idx,) if fusion_map.arity > 1 else ()
                    fuse_funcall_sym = ast.Symbol(fuse_kernel_arg.sym.symbol, rank)

                else:
                    # ... Handle indirect arguments. At the C level, these arguments
                    # are of pointer type, so simple pointer arithmetic is used
                    # to ensure the kernel accesses are to the correct locations
                    fuse_arity = fuse_loop_arg.map.arity
                    base_arity = fuse_arity*fusion_map.arity
                    size = fuse_arity*cdim

                    # Set the proper storage layout before invoking /fuse/
                    ofs_vals = [[base_arity*j + k for k in range(fuse_arity)]
                                for j in range(cdim)]
                    ofs_vals = [[fuse_arity*j + k for k in flatten(ofs_vals)]
                                for j in range(fusion_map.arity)]
                    ofs_vals = list(flatten(ofs_vals))
                    indices = [ofs_vals[idx*size + j] for j in range(size)]

                    staging = [op(ast.Symbol(lvalue, (j,)), ast.Symbol(rvalue, (k,)))
                               for j, k in indexer(indices)]

                    # Set up the temporary
                    buffer_symbol = ast.Symbol(buffer_name, (size,))
                    if fuse_loop_arg.access == INC:
                        buffer_init = ast.ArrayInit(init([0.0]))
                    else:
                        buffer_init = ast.EmptyStatement()
                        pointers.pop()
                    buffer_decl = ast.Decl(fuse_kernel_arg.typ, buffer_symbol, buffer_init,
                                           qualifiers=fuse_kernel_arg.qual,
                                           pointers=pointers)

                    # Update the if-then AST body
                    stager(if_exec.children[0], staging)
                    if_exec.children[0].children.insert(0, buffer_decl)

            else:
                # Nothing special to do for direct arguments
                pass

            # Finally update the /fuse/ funcall
            fuse_funcall.children.append(fuse_funcall_sym)

    fused_headers = set([str(h) for h in base_headers + fuse_headers])
    fused_ast = ast.Root([ast.PreprocessNode(h) for h in fused_headers] +
                         [base_fundecl, fuse_fundecl, fusion_fundecl])

    return Kernel([base, fuse], fused_ast, loop_chain_index), fargs
Esempio n. 29
0
    def exterior_facet_boundary_node_map(self, V, method):
        """Return the :class:`pyop2.Map` from exterior facets to nodes
        on the boundary.

        :arg V: The function space.
        :arg method:  The method for determining boundary nodes.  See
           :class:`~.DirichletBC` for details.
        """
        try:
            return self.map_caches["boundary_node"][method]
        except KeyError:
            pass
        el = V.finat_element

        dim = self.mesh.facet_dimension()

        if method == "topological":
            boundary_dofs = el.entity_closure_dofs()[dim]
        elif method == "geometric":
            # This function is only called on extruded meshes when
            # asking for the nodes that live on the "vertical"
            # exterior facets.
            boundary_dofs = entity_support_dofs(el, dim)

        nodes_per_facet = \
            len(boundary_dofs[0])

        # HACK ALERT
        # The facet set does not have a halo associated with it, since
        # we only construct halos for DoF sets.  Fortunately, this
        # loop is direct and we already have all the correct
        # information available locally.  So We fake a set of the
        # correct size and carry out a direct loop
        facet_set = op2.Set(self.mesh.exterior_facets.set.total_size,
                            comm=self.mesh.comm)

        fs_dat = op2.Dat(
            facet_set**el.space_dimension(),
            data=V.exterior_facet_node_map().values_with_halo.view())

        facet_dat = op2.Dat(facet_set**nodes_per_facet, dtype=IntType)

        # Ensure these come out in sorted order.
        local_facet_nodes = numpy.array(
            [boundary_dofs[e] for e in sorted(boundary_dofs.keys())])

        # Helper function to turn the inner index of an array into c
        # array literals.
        c_array = lambda xs: "{" + ", ".join(map(str, xs)) + "}"

        # AST for: l_nodes[facet[0]][n]
        rank_ast = ast.Symbol("l_nodes",
                              rank=(ast.Symbol("facet", rank=(0, )), "n"))

        body = ast.Block([
            ast.Decl("int",
                     ast.Symbol("l_nodes",
                                (len(el.cell.topology[dim]), nodes_per_facet)),
                     init=ast.ArrayInit(
                         c_array(map(c_array, local_facet_nodes))),
                     qualifiers=["const"]),
            ast.For(
                ast.Decl("int", "n", 0), ast.Less("n", nodes_per_facet),
                ast.Incr("n", 1),
                ast.Assign(ast.Symbol("facet_nodes", ("n", )),
                           ast.Symbol("cell_nodes", (rank_ast, ))))
        ])

        kernel = op2.Kernel(
            ast.FunDecl("void", "create_bc_node_map", [
                ast.Decl("%s*" % as_cstr(fs_dat.dtype), "cell_nodes"),
                ast.Decl("%s*" % as_cstr(facet_dat.dtype), "facet_nodes"),
                ast.Decl("unsigned int*", "facet")
            ], body), "create_bc_node_map")

        local_facet_dat = op2.Dat(
            facet_set**self.mesh.exterior_facets._rank,
            self.mesh.exterior_facets.local_facet_dat.data_ro_with_halos,
            dtype=numpy.uintc)
        op2.par_loop(kernel, facet_set, fs_dat(op2.READ), facet_dat(op2.WRITE),
                     local_facet_dat(op2.READ))

        if self.extruded:
            offset = self.offset[boundary_dofs[0]]
        else:
            offset = None
        val = op2.Map(facet_set,
                      self.node_set,
                      nodes_per_facet,
                      facet_dat.data_ro_with_halos,
                      name="exterior_facet_boundary_node",
                      offset=offset)
        self.map_caches["boundary_node"][method] = val
        return val
Esempio n. 30
0
def transfer_kernel(Pk, P1):
    """Compile a kernel that will map between Pk and P1.
    :returns: a PyOP2 kernel.

    The prolongation maps a solution in P1 into Pk using the natural
    embedding.  The restriction maps a residual in the dual of Pk into
    the dual of P1 (it is the dual of the prolongation), computed
    using linearity of the test function.
    """
    # Mapping of a residual in Pk into a residual in P1
    from coffee import base as coffee
    from tsfc.coffee import generate as generate_coffee, SCALAR_TYPE
    from tsfc.parameters import default_parameters
    from gem import gem, impero_utils as imp

    # Pk should be at least the same size as P1
    assert Pk.finat_element.space_dimension(
    ) >= P1.finat_element.space_dimension()
    # In the general case we should compute this by doing:
    # numpy.linalg.solve(Pkmass, PkP1mass)
    Pke = Pk.finat_element._element
    P1e = P1.finat_element._element
    # TODO, rework to use finat.
    matrix = numpy.dot(Pke.dual.to_riesz(P1e.get_nodal_basis()),
                       P1e.get_coeffs().T).T

    Vout, Vin = P1, Pk
    weights = gem.Literal(matrix)
    name = "Pk_P1_mapper"

    funargs = []

    assert Vin.shape == Vout.shape

    shape = (P1e.space_dimension() * Vout.value_size,
             Pke.space_dimension() * Vin.value_size)
    outarg = coffee.Decl(SCALAR_TYPE, coffee.Symbol("A", rank=shape))
    i = gem.Index()
    j = gem.Index()
    k = gem.Index()
    indices = i, j, k
    A = gem.Variable("A", shape)

    outgem = [
        gem.Indexed(
            gem.reshape(A, (P1e.space_dimension(), Vout.value_size),
                        (Pke.space_dimension(), Vin.value_size)), (i, k, j, k))
    ]

    funargs.append(outarg)

    expr = gem.Indexed(weights, (i, j))

    outgem, = imp.preprocess_gem(outgem)
    ir = imp.compile_gem([(outgem, expr)], indices)

    index_names = [(i, "i"), (j, "j"), (k, "k")]
    body = generate_coffee(ir, index_names, default_parameters()["precision"])
    function = coffee.FunDecl("void",
                              name,
                              funargs,
                              body,
                              pred=["static", "inline"])

    return op2.Kernel(function, name=function.name)