Beispiel #1
0
def eigen_tensor(expr, temporary, index):
    """Returns an appropriate assignment statement for populating a particular
    `Eigen::MatrixBase` tensor. If the tensor is mixed, then access to the
    :meth:`block` of the eigen tensor is provided. Otherwise, no block
    information is needed and the tensor is returned as is.

    :arg expr: a `slate.Tensor` node.
    :arg temporary: the associated temporary of the expr argument.
    :arg index: a tuple of integers used to determine row and column
                information. This is provided by the SplitKernel
                associated with the expr.
    """
    if expr.rank == 0:
        tensor = temporary
    else:
        try:
            row, col = index
        except ValueError:
            row = index[0]
            col = 0
        rshape = expr.shapes[0][row]
        rstart = sum(expr.shapes[0][:row])
        try:
            cshape = expr.shapes[1][col]
            cstart = sum(expr.shapes[1][:col])
        except KeyError:
            cshape = 1
            cstart = 0

        # Create sub-block if tensor is mixed
        if (rshape, cshape) != expr.shape:
            tensor = ast.FlatBlock("%s.block<%d, %d>(%d, %d)" % (temporary,
                                                                 rshape,
                                                                 cshape,
                                                                 rstart,
                                                                 cstart))
        else:
            tensor = temporary

    return tensor
Beispiel #2
0
def _form_string_kernel(body, measure, args, **kwargs):
    kargs = []
    if body.find("][") >= 0:
        warning("""Your kernel body contains a double indirection.\n"""
                """You should update it to single indirections.\n"""
                """\n"""
                """Mail [email protected] for advice.\n""")
    for var, (func, intent) in args.items():
        if isinstance(func, constant.Constant):
            if intent is not READ:
                raise RuntimeError("Only READ access is allowed to Constant")
            # Constants modelled as Globals, so no need for double
            # indirection
            ndof = func.dat.cdim
            kargs.append(
                ast.Decl("double",
                         ast.Symbol(var, (ndof, )),
                         qualifiers=["const"]))
        else:
            # Do we have a component of a mixed function?
            if isinstance(func, Indexed):
                c, i = func.ufl_operands
                idx = i._indices[0]._value
                ndof = c.function_space()[idx].finat_element.space_dimension()
            else:
                if len(func.function_space()) > 1:
                    raise NotImplementedError(
                        "Must index mixed function in par_loop.")
                ndof = func.function_space().finat_element.space_dimension()
            if measure.integral_type() == 'interior_facet':
                ndof *= 2
            kargs.append(ast.Decl("double", ast.Symbol(var, (ndof, ))))
        body = body.replace(var + ".dofs", str(ndof))

    return pyop2.Kernel(
        ast.FunDecl("void",
                    "par_loop_kernel",
                    kargs,
                    ast.FlatBlock(body),
                    pred=["static"]), "par_loop_kernel", **kwargs)
Beispiel #3
0
def _form_kernel(kernel, measure, args, **kwargs):

    kargs = []
    lkernel = kernel

    for var, (func, intent) in args.iteritems():
        if isinstance(func, constant.Constant):
            if intent is not READ:
                raise RuntimeError("Only READ access is allowed to Constant")
            # Constants modelled as Globals, so no need for double
            # indirection
            ndof = func.dat.cdim
            kargs.append(
                ast.Decl("double",
                         ast.Symbol(var, (ndof, )),
                         qualifiers=["const"]))
        else:
            # Do we have a component of a mixed function?
            if isinstance(func, Indexed):
                c, i = func.ufl_operands
                idx = i._indices[0]._value
                ndof = c.function_space()[idx].finat_element.space_dimension()
            else:
                if len(func.function_space()) > 1:
                    raise NotImplementedError(
                        "Must index mixed function in par_loop.")
                ndof = func.function_space().finat_element.space_dimension()
            if measure.integral_type() == 'interior_facet':
                ndof *= 2
            if measure is direct:
                kargs.append(ast.Decl("double", ast.Symbol(var, (ndof, ))))
            else:
                kargs.append(ast.Decl("double *", ast.Symbol(var, (ndof, ))))
        lkernel = lkernel.replace(var + ".dofs", str(ndof))

    body = ast.FlatBlock(lkernel)

    return pyop2.Kernel(ast.FunDecl("void", "par_loop_kernel", kargs, body),
                        "par_loop_kernel", **kwargs)
Beispiel #4
0
def tensor_assembly_calls(builder):
    """Generates a block of statements for assembling the local
    finite element tensors.

    :arg builder: The :class:`LocalKernelBuilder` containing
                  all relevant expression information and
                  assembly calls.
    """
    statements = [ast.FlatBlock("/* Assemble local tensors */\n")]

    # Cell integrals are straightforward. Just splat them out.
    statements.extend(builder.assembly_calls["cell"])

    if builder.needs_cell_facets:
        # The for-loop will have the general structure:
        #
        #    FOR (facet=0; facet<num_facets; facet++):
        #        IF (facet is interior):
        #            *interior calls
        #        ELSE IF (facet is exterior):
        #            *exterior calls
        #
        # If only interior (exterior) facets are present,
        # then only a single IF-statement checking for interior
        # (exterior) facets will be present within the loop. The
        # cell facets are labelled `1` for interior, and `0` for
        # exterior.

        statements.append(ast.FlatBlock("/* Loop over cell facets */\n"))
        int_calls = list(
            chain(*[
                builder.assembly_calls[it_type]
                for it_type in ("interior_facet", "interior_facet_vert")
            ]))
        ext_calls = list(
            chain(*[
                builder.assembly_calls[it_type]
                for it_type in ("exterior_facet", "exterior_facet_vert")
            ]))

        # Compute the number of facets to loop over
        domain = builder.expression.ufl_domain()
        if domain.cell_set._extruded:
            num_facets = domain.ufl_cell()._cells[0].num_facets()
        else:
            num_facets = domain.ufl_cell().num_facets()

        if_ext = ast.Eq(
            ast.Symbol(builder.cell_facet_sym, rank=(builder.it_sym, )), 0)
        if_int = ast.Eq(
            ast.Symbol(builder.cell_facet_sym, rank=(builder.it_sym, )), 1)
        body = []
        if ext_calls:
            body.append(
                ast.If(if_ext, (ast.Block(ext_calls, open_scope=True), )))
        if int_calls:
            body.append(
                ast.If(if_int, (ast.Block(int_calls, open_scope=True), )))

        statements.append(
            ast.For(ast.Decl("unsigned int", builder.it_sym, init=0),
                    ast.Less(builder.it_sym, num_facets),
                    ast.Incr(builder.it_sym, 1), body))

    if builder.needs_mesh_layers:
        # In the presence of interior horizontal facet calls, an
        # IF-ELIF-ELSE block is generated using the mesh levels
        # as conditions for which calls are needed:
        #
        #    IF (layer == bottom_layer):
        #        *bottom calls
        #    ELSE IF (layer == top_layer):
        #        *top calls
        #    ELSE:
        #        *top calls
        #        *bottom calls
        #
        # Any extruded top or bottom calls for extruded facets are
        # included within the appropriate mesh-level IF-blocks. If
        # no interior horizontal facet calls are present, then
        # standard IF-blocks are generated for exterior top/bottom
        # facet calls when appropriate:
        #
        #    IF (layer == bottom_layer):
        #        *bottom calls
        #
        #    IF (layer == top_layer):
        #        *top calls
        #
        # The mesh level is an integer provided as a macro kernel
        # argument.

        # FIXME: No variable layers assumption
        statements.append(ast.FlatBlock("/* Mesh levels: */\n"))
        num_layers = builder.expression.ufl_domain().topological.layers - 1
        int_top = builder.assembly_calls["interior_facet_horiz_top"]
        int_btm = builder.assembly_calls["interior_facet_horiz_bottom"]
        ext_top = builder.assembly_calls["exterior_facet_top"]
        ext_btm = builder.assembly_calls["exterior_facet_bottom"]

        bottom = ast.Block(int_top + ext_btm, open_scope=True)
        top = ast.Block(int_btm + ext_top, open_scope=True)
        rest = ast.Block(int_btm + int_top, open_scope=True)
        statements.append(
            ast.If(ast.Eq(builder.mesh_layer_sym, 0),
                   (bottom,
                    ast.If(ast.Eq(builder.mesh_layer_sym, num_layers - 1),
                           (top, rest)))))

    return statements
Beispiel #5
0
def generate_kernel_ast(builder, statements, declared_temps):
    """Glues together the complete AST for the Slate expression
    contained in the :class:`LocalKernelBuilder`.

    :arg builder: The :class:`LocalKernelBuilder` containing
                  all relevant expression information.
    :arg statements: A list of COFFEE objects containing all
                     assembly calls and temporary declarations.
    :arg declared_temps: A `dict` containing all previously
                         declared temporaries.

    Return: A `KernelInfo` object describing the complete AST.
    """
    slate_expr = builder.expression
    if slate_expr.rank == 0:
        # Scalars are treated as 1x1 MatrixBase objects
        shape = (1, )
    else:
        shape = slate_expr.shape

    # Now we create the result statement by declaring its eigen type and
    # using Eigen::Map to move between Eigen and C data structs.
    statements.append(ast.FlatBlock("/* Map eigen tensor into C struct */\n"))
    result_sym = ast.Symbol("T%d" % len(declared_temps))
    result_data_sym = ast.Symbol("A%d" % len(declared_temps))
    result_type = "Eigen::Map<%s >" % eigen_matrixbase_type(shape)
    result = ast.Decl(SCALAR_TYPE, ast.Symbol(result_data_sym, shape))
    result_statement = ast.FlatBlock(
        "%s %s((%s *)%s);\n" %
        (result_type, result_sym, SCALAR_TYPE, result_data_sym))
    statements.append(result_statement)

    # Generate the complete c++ string performing the linear algebra operations
    # on Eigen matrices/vectors
    statements.append(ast.FlatBlock("/* Linear algebra expression */\n"))
    cpp_string = ast.FlatBlock(
        metaphrase_slate_to_cpp(slate_expr, declared_temps))
    statements.append(ast.Incr(result_sym, cpp_string))

    # Generate arguments for the macro kernel
    args = [result, ast.Decl("%s **" % SCALAR_TYPE, builder.coord_sym)]

    # Orientation information
    if builder.oriented:
        args.append(ast.Decl("int **", builder.cell_orientations_sym))

    # Coefficient information
    expr_coeffs = slate_expr.coefficients()
    for c in expr_coeffs:
        if isinstance(c, Constant):
            ctype = "%s *" % SCALAR_TYPE
        else:
            ctype = "%s **" % SCALAR_TYPE
        args.extend([ast.Decl(ctype, csym) for csym in builder.coefficient(c)])

    # Facet information
    if builder.needs_cell_facets:
        args.append(
            ast.Decl("%s *" % as_cstr(cell_to_facets_dtype),
                     builder.cell_facet_sym))

    # NOTE: We need to be careful about the ordering here. Mesh layers are
    # added as the final argument to the kernel.
    if builder.needs_mesh_layers:
        args.append(ast.Decl("int", builder.mesh_layer_sym))

    # Macro kernel
    macro_kernel_name = "compile_slate"
    stmts = ast.Block(statements)
    macro_kernel = ast.FunDecl("void",
                               macro_kernel_name,
                               args,
                               stmts,
                               pred=["static", "inline"])

    # Construct the final ast
    kernel_ast = ast.Node(builder.templated_subkernels + [macro_kernel])

    # Now we wrap up the kernel ast as a PyOP2 kernel and include the
    # Eigen header files
    include_dirs = builder.include_dirs
    include_dirs.extend(["%s/include/eigen3/" % d for d in PETSC_DIR])
    op2kernel = op2.Kernel(
        kernel_ast,
        macro_kernel_name,
        cpp=True,
        include_dirs=include_dirs,
        headers=['#include <Eigen/Dense>', '#define restrict __restrict'])

    # Send back a "TSFC-like" SplitKernel object with an
    # index and KernelInfo
    kinfo = KernelInfo(kernel=op2kernel,
                       integral_type=builder.integral_type,
                       oriented=builder.oriented,
                       subdomain_id="otherwise",
                       domain_number=0,
                       coefficient_map=tuple(range(len(expr_coeffs))),
                       needs_cell_facets=builder.needs_cell_facets,
                       pass_layer_arg=builder.needs_mesh_layers)

    return kinfo
Beispiel #6
0
def compile_expression(slate_expr, tsfc_parameters=None):
    """Takes a SLATE expression `slate_expr` and returns the appropriate
    :class:`firedrake.op2.Kernel` object representing the SLATE expression.

    :arg slate_expr: a :class:'TensorBase' expression.
    :arg tsfc_parameters: an optional `dict` of form compiler parameters to
                          be passed onto TSFC during the compilation of ufl forms.
    """
    if not isinstance(slate_expr, TensorBase):
        raise ValueError(
            "Expecting a `slate.TensorBase` expression, not a %r" % slate_expr)

    # TODO: Get PyOP2 to write into mixed dats
    if any(len(a.function_space()) > 1 for a in slate_expr.arguments()):
        raise NotImplementedError("Compiling mixed slate expressions")

    # Initialize shape and statements list
    shape = slate_expr.shape
    statements = []

    # Create a builder for the SLATE expression
    builder = KernelBuilder(expression=slate_expr,
                            tsfc_parameters=tsfc_parameters)

    # Initialize coordinate and facet symbols
    coordsym = ast.Symbol("coords")
    coords = None
    cellfacetsym = ast.Symbol("cell_facets")
    inc = []

    # Now we construct the list of statements to provide to the builder
    context_temps = builder.temps.copy()
    for exp, t in context_temps.items():
        statements.append(ast.Decl(eigen_matrixbase_type(exp.shape), t))
        statements.append(ast.FlatBlock("%s.setZero();\n" % t))

        for splitkernel in builder.kernel_exprs[exp]:
            clist = []
            index = splitkernel.indices
            kinfo = splitkernel.kinfo
            integral_type = kinfo.integral_type

            if integral_type not in [
                    "cell", "interior_facet", "exterior_facet"
            ]:
                raise NotImplementedError(
                    "Integral type %s not currently supported." %
                    integral_type)

            coordinates = exp.ufl_domain().coordinates
            if coords is not None:
                assert coordinates == coords
            else:
                coords = coordinates

            for cindex in kinfo.coefficient_map:
                c = exp.coefficients()[cindex]
                # Handles both mixed and non-mixed coefficient cases
                clist.extend(builder.extract_coefficient(c))

            inc.extend(kinfo.kernel._include_dirs)

            tensor = eigen_tensor(exp, t, index)

            if integral_type in ["interior_facet", "exterior_facet"]:
                builder.require_cell_facets()
                itsym = ast.Symbol("i0")
                clist.append(ast.FlatBlock("&%s" % itsym))
                loop_body = []
                nfacet = exp.ufl_domain().ufl_cell().num_facets()

                if integral_type == "exterior_facet":
                    checker = 1
                else:
                    checker = 0
                loop_body.append(
                    ast.If(
                        ast.Eq(ast.Symbol(cellfacetsym, rank=(itsym, )),
                               checker), [
                                   ast.Block([
                                       ast.FunCall(kinfo.kernel.name, tensor,
                                                   coordsym, *clist)
                                   ],
                                             open_scope=True)
                               ]))
                loop = ast.For(ast.Decl("unsigned int", itsym, init=0),
                               ast.Less(itsym, nfacet), ast.Incr(itsym, 1),
                               loop_body)
                statements.append(loop)
            else:
                statements.append(
                    ast.FunCall(kinfo.kernel.name, tensor, coordsym, *clist))

    # Now we handle any terms that require auxiliary data (if any)
    if bool(builder.aux_exprs):
        aux_temps, aux_statements = auxiliary_information(builder)
        context_temps.update(aux_temps)
        statements.extend(aux_statements)

    result_sym = ast.Symbol("T%d" % len(builder.temps))
    result_data_sym = ast.Symbol("A%d" % len(builder.temps))
    result_type = "Eigen::Map<%s >" % eigen_matrixbase_type(shape)
    result = ast.Decl(SCALAR_TYPE, ast.Symbol(result_data_sym, shape))
    result_statement = ast.FlatBlock(
        "%s %s((%s *)%s);\n" %
        (result_type, result_sym, SCALAR_TYPE, result_data_sym))
    statements.append(result_statement)

    cpp_string = ast.FlatBlock(
        metaphrase_slate_to_cpp(slate_expr, context_temps))
    statements.append(ast.Assign(result_sym, cpp_string))

    # Generate arguments for the macro kernel
    args = [result, ast.Decl("%s **" % SCALAR_TYPE, coordsym)]
    for c in slate_expr.coefficients():
        if isinstance(c, Constant):
            ctype = "%s *" % SCALAR_TYPE
        else:
            ctype = "%s **" % SCALAR_TYPE
        args.extend([
            ast.Decl(ctype, sym_c) for sym_c in builder.extract_coefficient(c)
        ])

    if builder.needs_cell_facets:
        args.append(ast.Decl("char *", cellfacetsym))

    macro_kernel_name = "compile_slate"
    kernel_ast, oriented = builder.construct_ast(
        name=macro_kernel_name, args=args, statements=ast.Block(statements))

    inc.extend(["%s/include/eigen3/" % d for d in PETSC_DIR])
    op2kernel = op2.Kernel(
        kernel_ast,
        macro_kernel_name,
        cpp=True,
        include_dirs=inc,
        headers=['#include <Eigen/Dense>', '#define restrict __restrict'])

    assert len(slate_expr.ufl_domains()) == 1
    kinfo = KernelInfo(kernel=op2kernel,
                       integral_type="cell",
                       oriented=oriented,
                       subdomain_id="otherwise",
                       domain_number=0,
                       coefficient_map=range(len(slate_expr.coefficients())),
                       needs_cell_facets=builder.needs_cell_facets)
    idx = tuple([0] * slate_expr.rank)

    return (SplitKernel(idx, kinfo), )
Beispiel #7
0
def prepare_arguments(arguments, multiindices, interior_facet=False):
    """Bridges the kernel interface and the GEM abstraction for
    Arguments.  Vector Arguments are rearranged here for interior
    facet integrals.

    :arg arguments: UFL Arguments
    :arg multiindices: Argument multiindices
    :arg interior_facet: interior facet integral?
    :returns: (funarg, prepare, expressions)
         funarg      - :class:`coffee.Decl` function argument
         prepare     - list of COFFEE nodes to be prepended to the
                       kernel body
         expressions - GEM expressions referring to the argument
                       tensor
    """
    funarg = coffee.Decl(SCALAR_TYPE, coffee.Symbol("A"), pointers=[()])
    varexp = gem.Variable("A", (None,))

    if len(arguments) == 0:
        # No arguments
        zero = coffee.FlatBlock(
            "memset({name}, 0, sizeof(*{name}));\n".format(name=funarg.sym.gencode())
        )
        return funarg, [zero], [gem.reshape(varexp, ())]

    elements = tuple(create_element(arg.ufl_element()) for arg in arguments)
    transposed_shapes = []
    transposed_indices = []
    for element, multiindex in zip(elements, multiindices):
        if isinstance(element, TensorFiniteElement):
            scalar_shape = element.base_element.index_shape
            tensor_shape = element.index_shape[len(scalar_shape):]
        else:
            scalar_shape = element.index_shape
            tensor_shape = ()

        transposed_shapes.append(tensor_shape + scalar_shape)
        scalar_rank = len(scalar_shape)
        transposed_indices.extend(multiindex[scalar_rank:] + multiindex[:scalar_rank])
    transposed_indices = tuple(transposed_indices)

    def expression(restricted):
        return gem.Indexed(gem.reshape(restricted, *transposed_shapes),
                           transposed_indices)

    u_shape = numpy.array([numpy.prod(element.index_shape, dtype=int)
                           for element in elements])
    if interior_facet:
        c_shape = tuple(2 * u_shape)
        slicez = [[slice(r * s, (r + 1) * s)
                   for r, s in zip(restrictions, u_shape)]
                  for restrictions in product((0, 1), repeat=len(arguments))]
    else:
        c_shape = tuple(u_shape)
        slicez = [[slice(s) for s in u_shape]]

    expressions = [expression(gem.view(gem.reshape(varexp, c_shape), *slices))
                   for slices in slicez]

    zero = coffee.FlatBlock(
        str.format("memset({name}, 0, {size} * sizeof(*{name}));\n",
                   name=funarg.sym.gencode(), size=numpy.product(c_shape, dtype=int))
    )
    return funarg, [zero], prune(expressions)
Beispiel #8
0
def coefficient_temporaries(builder, declared_temps):
    """Generates coefficient temporary statements for assigning
    coefficients to vector temporaries.

    :arg builder: The :class:`LocalKernelBuilder` containing
        all relevant expression information.
    :arg declared_temps: A `dict` keeping track of all declared
        temporaries. This dictionary is updated as coefficients
        are assigned temporaries.

    'AssembledVector's require creating coefficient temporaries to
    store data. The temporaries are created by inspecting the function
    space of the coefficient to compute node and dof extents. The
    coefficient is then assigned values by looping over both the node
    extent and dof extent (double FOR-loop). A double FOR-loop is needed
    for each function space (if the function space is mixed, then a loop
    will be constructed for each component space). The general structure
    of each coefficient loop will be:

         FOR (i1=0; i1<node_extent; i1++):
             FOR (j1=0; j1<dof_extent; j1++):
                 VT0[offset + (dof_extent * i1) + j1] = w_0_0[i1][j1]
                 VT1[offset + (dof_extent * i1) + j1] = w_1_0[i1][j1]
                 .
                 .
                 .

    where wT0, wT1, ... are temporaries for coefficients sharing the
    same node and dof extents. The offset is computed based on whether
    the function space is mixed. The offset is always 0 for non-mixed
    coefficients. If the coefficient is mixed, then the offset is
    incremented by the total number of nodal unknowns associated with
    the component spaces of the mixed space.
    """
    statements = [ast.FlatBlock("/* Coefficient temporaries */\n")]
    j = ast.Symbol("j1")
    loops = [ast.FlatBlock("/* Loops for coefficient temps */\n")]
    for dofs, cinfo_list in builder.coefficient_vecs.items():
        # Collect all coefficients which share the same node/dof extent
        assignments = []
        for cinfo in cinfo_list:
            fs_i = cinfo.space_index
            offset = cinfo.offset_index
            c_shape = cinfo.shape
            vector = cinfo.vector
            function = vector._function
            t = cinfo.local_temp

            if vector not in declared_temps:
                # Declare and initialize coefficient temporary
                c_type = eigen_matrixbase_type(shape=c_shape)
                statements.append(ast.Decl(c_type, t))
                declared_temps[vector] = t

            # Assigning coefficient values into temporary
            coeff_sym = ast.Symbol(builder.coefficient(function)[fs_i],
                                   rank=(j, ))
            index = ast.Sum(offset, j)
            coeff_temp = ast.Symbol(t, rank=(index, ))
            assignments.append(ast.Assign(coeff_temp, coeff_sym))

        # loop over dofs
        loop = ast.For(ast.Decl("unsigned int", j, init=0), ast.Less(j, dofs),
                       ast.Incr(j, 1), assignments)

        loops.append(loop)

    statements.extend(loops)

    return statements
Beispiel #9
0
def _tabulate_tensor(ir, parameters):
    "Generate code for a single integral (tabulate_tensor())."

    p_format        = parameters["format"]
    precision       = parameters["precision"]

    f_comment       = format["comment"]
    f_G             = format["geometry constant"]
    f_const_double  = format["assign"]
    f_float         = format["float"]
    f_assign        = format["assign"]
    f_A             = format["element tensor"][p_format]
    f_r             = format["free indices"][0]
    f_j             = format["first free index"]
    f_k             = format["second free index"]
    f_loop          = format["generate loop"]
    f_int           = format["int"]
    f_weight        = format["weight"]

    # Get data.
    opt_par     = ir["optimise_parameters"]
    integral_type = ir["integral_type"]
    cell        = ir["cell"]
    gdim        = cell.geometric_dimension()
    tdim        = cell.topological_dimension()
    num_facets  = ir["num_facets"]
    num_vertices= ir["num_vertices"]
    integrals   = ir["trans_integrals"]
    geo_consts  = ir["geo_consts"]
    oriented    = ir["needs_oriented"]

    # Create sets of used variables.
    used_weights    = set()
    used_psi_tables = set()
    used_nzcs       = set()
    trans_set       = set()
    sets = [used_weights, used_psi_tables, used_nzcs, trans_set]

    affine_tables = {} # TODO: This is not populated anywhere, remove?
    quadrature_weights = ir["quadrature_weights"]

    #The pyop2 format requires dereferencing constant coefficients since
    # these are passed in as double *
    common = []
    if p_format == "pyop2":
        for n, c in zip(ir["coefficient_names"], ir["coefficient_elements"]):
            if c.family() == 'Real':
                # Second index is always? 0, so we cast to (double (*)[1]).
                common += ['double (*w%(n)s)[1] = (double (*)[1])c%(n)s;\n' %
                           {'n': n[1:]}]

    operations = []
    if integral_type == "cell":
        # Update transformer with facets and generate code + set of used geometry terms.
        nest_ir, num_ops = _generate_element_tensor(integrals, sets, \
                                                    opt_par, parameters)

        # Set operations equal to num_ops (for printing info on operations).
        operations.append([num_ops])

        # Generate code for basic geometric quantities
        # @@@: Jacobian snippet
        jacobi_code  = ""
        jacobi_code += format["compute_jacobian"](cell)
        jacobi_code += "\n"
        jacobi_code += format["compute_jacobian_inverse"](cell)
        if oriented and tdim != gdim:
            # NEED TO THINK ABOUT THIS FOR EXTRUSION
            jacobi_code += format["orientation"][p_format](tdim, gdim)
        jacobi_code += "\n"
        jacobi_code += format["scale factor snippet"][p_format]

        # Generate code for cell volume and circumradius -- note that the
        # former will be incorrect on extruded meshes by a constant factor.
        jacobi_code += "\n\n" + format["generate cell volume"][p_format](tdim, gdim, integral_type)
        jacobi_code += "\n\n" + format["generate circumradius"][p_format](tdim, gdim, integral_type)

    elif integral_type in ("exterior_facet", "exterior_facet_vert"):
        if p_format == 'pyop2':
            common += ["unsigned int facet = *facet_p;\n"]

        # Generate tensor code for facets + set of used geometry terms.
        nest_ir, ops = _generate_element_tensor(integrals, sets, opt_par, parameters)

        # Save number of operations (for printing info on operations).
        operations.append([ops])

        # Generate code for basic geometric quantities
        # @@@: Jacobian snippet
        jacobi_code  = ""
        jacobi_code += format["compute_jacobian"](cell)
        jacobi_code += "\n"
        jacobi_code += format["compute_jacobian_inverse"](cell)
        if oriented and tdim != gdim:
            # NEED TO THINK ABOUT THIS FOR EXTRUSION
            jacobi_code += format["orientation"][p_format](tdim, gdim)
        jacobi_code += "\n"
        if integral_type == "exterior_facet":
            jacobi_code += "\n\n" + format["facet determinant"](cell, p_format, integral_type)
            jacobi_code += "\n\n" + format["generate normal"](cell, p_format, integral_type)
            jacobi_code += "\n\n" + format["generate facet area"](tdim, gdim)
            if tdim == 3:
                jacobi_code += "\n\n" + format["generate min facet edge length"](tdim, gdim)
                jacobi_code += "\n\n" + format["generate max facet edge length"](tdim, gdim)

            # Generate code for cell volume and circumradius
            jacobi_code += "\n\n" + format["generate cell volume"][p_format](tdim, gdim, integral_type)
            jacobi_code += "\n\n" + format["generate circumradius"][p_format](tdim, gdim, integral_type)

        elif integral_type == "exterior_facet_vert":
            jacobi_code += "\n\n" + format["facet determinant"](cell, p_format, integral_type)
            jacobi_code += "\n\n" + format["generate normal"](cell, p_format, integral_type)
            # OTHER THINGS NOT IMPLEMENTED YET
        else:
            raise RuntimeError("Invalid integral_type")

    elif integral_type in ("exterior_facet_top", "exterior_facet_bottom"):
        nest_ir, ops = _generate_element_tensor(integrals, sets, opt_par, parameters)
        operations.append([ops])

        # Generate code for basic geometric quantities
        # @@@: Jacobian snippet
        jacobi_code  = ""
        jacobi_code += format["compute_jacobian"](cell)
        jacobi_code += "\n"
        jacobi_code += format["compute_jacobian_inverse"](cell)
        if oriented:
            # NEED TO THINK ABOUT THIS FOR EXTRUSION
            jacobi_code += format["orientation"][p_format](tdim, gdim)
        jacobi_code += "\n"
        jacobi_code += "\n\n" + format["facet determinant"](cell, p_format, integral_type)
        jacobi_code += "\n\n" + format["generate normal"](cell, p_format, integral_type)
        # THE REST IS NOT IMPLEMENTED YET

    elif integral_type in ("interior_facet", "interior_facet_vert"):
        if p_format == 'pyop2':
            common += ["unsigned int facet_0 = facet_p[0];"]
            common += ["unsigned int facet_1 = facet_p[1];"]
            common += ["double **coordinate_dofs_0 = coordinate_dofs;"]
            # Note that the following line is unsafe for isoparametric elements.
            common += ["double **coordinate_dofs_1 = coordinate_dofs + %d;" % num_vertices]

        # Generate tensor code for facets + set of used geometry terms.
        nest_ir, ops = _generate_element_tensor(integrals, sets, opt_par, parameters)

        # Save number of operations (for printing info on operations).
        operations.append([ops])

        # Generate code for basic geometric quantities
        # @@@: Jacobian snippet
        jacobi_code  = ""
        for _r in ["+", "-"]:
            if p_format == "pyop2":
                jacobi_code += format["compute_jacobian_interior"](cell, r=_r)
            else:
                jacobi_code += format["compute_jacobian"](cell, r=_r)

            jacobi_code += "\n"
            jacobi_code += format["compute_jacobian_inverse"](cell, r=_r)
            if oriented and tdim != gdim:
                # NEED TO THINK ABOUT THIS FOR EXTRUSION
                jacobi_code += format["orientation"][p_format](tdim, gdim, r=_r)
            jacobi_code += "\n"

        if integral_type == "interior_facet":
            jacobi_code += "\n\n" + format["facet determinant"](cell, p_format, integral_type, r="+")
            jacobi_code += "\n\n" + format["generate normal"](cell, p_format, integral_type)

            jacobi_code += "\n\n" + format["generate facet area"](tdim, gdim)
            if tdim == 3:
                jacobi_code += "\n\n" + format["generate min facet edge length"](tdim, gdim, r="+")
                jacobi_code += "\n\n" + format["generate max facet edge length"](tdim, gdim, r="+")

            # Generate code for cell volume and circumradius
            jacobi_code += "\n\n" + format["generate cell volume"][p_format](tdim, gdim, integral_type)
            jacobi_code += "\n\n" + format["generate circumradius interior"](tdim, gdim, integral_type)

        elif integral_type == "interior_facet_vert":
            # THE REST IS NOT IMPLEMENTED YET
            jacobi_code += "\n\n" + format["facet determinant"](cell, p_format, integral_type, r="+")
            jacobi_code += "\n\n" + format["generate normal"](cell, p_format, integral_type)
        else:
            raise RuntimeError("Invalid integral_type")

    elif integral_type == "interior_facet_horiz":
        common += ["double **coordinate_dofs_0 = coordinate_dofs;"]
        # Note that the following line is unsafe for isoparametric elements.
        common += ["double **coordinate_dofs_1 = coordinate_dofs + %d;" % num_vertices]

        nest_ir, ops = _generate_element_tensor(integrals, sets, opt_par, parameters)

        # Save number of operations (for printing info on operations).
        operations.append([ops])

        # Generate code for basic geometric quantities
        # @@@: Jacobian snippet
        jacobi_code  = ""
        for _r in ["+", "-"]:
            jacobi_code += format["compute_jacobian_interior"](cell, r=_r)
            jacobi_code += "\n"
            jacobi_code += format["compute_jacobian_inverse"](cell, r=_r)
            if oriented:
                # NEED TO THINK ABOUT THIS FOR EXTRUSION
                jacobi_code += format["orientation"][p_format](tdim, gdim, r=_r)
            jacobi_code += "\n"

        # TODO: verify that this is correct (we think it is)
        jacobi_code += "\n\n" + format["facet determinant"](cell, p_format, integral_type, r="+")
        jacobi_code += "\n\n" + format["generate normal"](cell, p_format, integral_type)
        # THE REST IS NOT IMPLEMENTED YET

    elif integral_type == "point":
        # Update transformer with vertices and generate code + set of used geometry terms.
        nest_ir, ops = _generate_element_tensor(integrals, sets, opt_par, parameters)

        # Save number of operations (for printing info on operations).
        operations.append([ops])

        # Generate code for basic geometric quantities
        # @@@: Jacobian snippet
        jacobi_code  = ""
        jacobi_code += format["compute_jacobian"](cell)
        jacobi_code += "\n"
        jacobi_code += format["compute_jacobian_inverse"](cell)
        if oriented and tdim != gdim:
            jacobi_code += format["orientation"][p_format](tdim, gdim)
        jacobi_code += "\n"

    else:
        error("Unhandled integral type: " + str(integral_type))

    # Embedded manifold, need to pass in cell orientations
    if oriented and tdim != gdim and p_format == 'pyop2':
        if integral_type in ("interior_facet", "interior_facet_vert", "interior_facet_horiz"):
            common += ["const int cell_orientation%s = cell_orientation_[0][0];" % _choose_map('+'),
                       "const int cell_orientation%s = cell_orientation_[1][0];" % _choose_map('-')]
        else:
            common += ["const int cell_orientation = cell_orientation_[0][0];"]
    # After we have generated the element code for all facets we can remove
    # the unused transformations and tabulate the used psi tables and weights.
    common += [remove_unused(jacobi_code, trans_set)]
    jacobi_ir = pyop2.FlatBlock("\n".join(common))

    # @@@: const double W3[3] = {{...}}
    pyop2_weights = []
    for weights, points in [quadrature_weights[p] for p in used_weights]:
        n_points = len(points)
        w_sym = pyop2.Symbol(f_weight(n_points), () if n_points == 1 else (n_points,))
        pyop2_weights.append(pyop2.Decl("double", w_sym,
                                        pyop2.ArrayInit(weights, precision),
                                        qualifiers=["static", "const"]))

    name_map = ir["name_map"]
    tables = ir["unique_tables"]
    tables.update(affine_tables) # TODO: This is not populated anywhere, remove?

    # @@@: const double FE0[] = {{...}}
    code, decl = _tabulate_psis(tables, used_psi_tables, name_map, used_nzcs, opt_par, parameters)
    pyop2_basis = []
    for name, data in decl.items():
        rank, _, values = data
        zeroflags = values.get_zeros()
        feo_sym = pyop2.Symbol(name, rank)
        init = pyop2.ArrayInit(values, precision)
        if zeroflags is not None and not zeroflags.all():
            nz_indices = numpy.logical_not(zeroflags).nonzero()
            # Note: in the following, we take the last entry of /nz_indices/ since we /know/
            # we have been tracking only zero-valued columns
            nz_indices = nz_indices[-1]
            nz_bounds = tuple([(i, 0)] for i in rank[:-1])
            nz_bounds += ([(max(nz_indices) - min(nz_indices) + 1, min(nz_indices))],)
            init = pyop2.SparseArrayInit(values, precision, nz_bounds)
        pyop2_basis.append(pyop2.Decl("double", feo_sym, init, ["static", "const"]))

    # Build the root of the PyOP2' ast
    pyop2_tables = pyop2_weights + [tab for tab in pyop2_basis]
    root = pyop2.Root([jacobi_ir] + pyop2_tables + nest_ir)

    return root
Beispiel #10
0
def facet_integral_loop(cxt_kernel, builder, coordsym, cellfacetsym,
                        cell_orientations):
    """Generates a code statement for evaluating exterior/interior facet
    integrals.

    :arg cxt_kernel: A :namedtuple:`ContextKernel` containing all relevant
                     integral types and TSFC kernels associated with the
                     form nested in the expression.
    :arg builder: A :class:`KernelBuilder` containing the expression context.
    :arg coordsym: An `ast.Symbol` object representing coordinate arguments
                   for the kernel.
    :arg cellfacetsym: An `ast.Symbol` representing the cell facets.
    :arg cell_orientations: An `ast.Symbol` representing cell orientation
                            information.

    Returns: A COFFEE code statement and updated include_dirs
    """
    exp = cxt_kernel.tensor
    t = builder.temps[exp]
    it_type = cxt_kernel.original_integral_type
    itsym = ast.Symbol("i0")

    chker = {
        "interior_facet": 1,
        "interior_facet_vert": 1,
        "exterior_facet": 0,
        "exterior_facet_vert": 0
    }

    # Compute the correct number of facets for a particular facet measure
    if it_type in ["interior_facet", "exterior_facet"]:
        # Non-extruded case
        nfacet = exp.ufl_domain().ufl_cell().num_facets()

    elif it_type in ["interior_facet_vert", "exterior_facet_vert"]:
        # Extrusion case
        base_cell = exp.ufl_domain().ufl_cell()._cells[0]
        nfacet = base_cell.num_facets()

    else:
        raise ValueError("Integral type %s not supported." % it_type)

    incl = []
    funcalls = []
    checker = chker[it_type]
    for splitkernel in cxt_kernel.tsfc_kernels:
        index = splitkernel.indices
        kinfo = splitkernel.kinfo

        # Generate an iterable of coefficients to pass to the subkernel
        # if any are required
        clist = [
            c for ci in kinfo.coefficient_map
            for c in builder.coefficient(exp.coefficients()[ci])
        ]

        incl.extend(kinfo.kernel._include_dirs)
        tensor = eigen_tensor(exp, t, index)

        if kinfo.oriented:
            clist.insert(0, cell_orientations)

        clist.append(ast.FlatBlock("&%s" % itsym))
        funcalls.append(
            ast.FunCall(kinfo.kernel.name, tensor, coordsym, *clist))

    loop_body = ast.If(
        ast.Eq(ast.Symbol(cellfacetsym, rank=(itsym, )), checker),
        [ast.Block(funcalls, open_scope=True)])

    loop_stmt = ast.For(ast.Decl("unsigned int", itsym, init=0),
                        ast.Less(itsym, nfacet), ast.Incr(itsym, 1), loop_body)

    return loop_stmt, incl
Beispiel #11
0
    def init_cell_orientations(self, expr):
        """Compute and initialise :attr:`cell_orientations` relative to a specified orientation.

        :arg expr: an :class:`.Expression` evaluated to produce a
             reference normal direction.

        """
        import firedrake.function as function
        import firedrake.functionspace as functionspace

        if expr.value_shape()[0] != 3:
            raise NotImplementedError('Only implemented for 3-vectors')
        if self.ufl_cell() not in (ufl.Cell('triangle', 3), ufl.Cell("quadrilateral", 3), ufl.OuterProductCell(ufl.Cell('interval'), ufl.Cell('interval'), gdim=3)):
            raise NotImplementedError('Only implemented for triangles and quadrilaterals embedded in 3d')

        if hasattr(self.topology, '_cell_orientations'):
            raise RuntimeError("init_cell_orientations already called, did you mean to do so again?")

        v0 = lambda x: ast.Symbol("v0", (x,))
        v1 = lambda x: ast.Symbol("v1", (x,))
        n = lambda x: ast.Symbol("n", (x,))
        x = lambda x: ast.Symbol("x", (x,))
        coords = lambda x, y: ast.Symbol("coords", (x, y))

        body = []
        body += [ast.Decl("double", v(3)) for v in [v0, v1, n, x]]
        body.append(ast.Decl("double", "dot"))
        body.append(ast.Assign("dot", 0.0))
        body.append(ast.Decl("int", "i"))

        # if triangle, use v0 = x1 - x0, v1 = x2 - x0
        # otherwise, for the various quads, use v0 = x2 - x0, v1 = x1 - x0
        # recall reference element ordering:
        # triangle: 2        quad: 1 3
        #           0 1            0 2
        if self.ufl_cell() == ufl.Cell('triangle', 3):
            body.append(ast.For(ast.Assign("i", 0), ast.Less("i", 3), ast.Incr("i", 1),
                                [ast.Assign(v0("i"), ast.Sub(coords(1, "i"), coords(0, "i"))),
                                 ast.Assign(v1("i"), ast.Sub(coords(2, "i"), coords(0, "i"))),
                                 ast.Assign(x("i"), 0.0)]))
        else:
            body.append(ast.For(ast.Assign("i", 0), ast.Less("i", 3), ast.Incr("i", 1),
                                [ast.Assign(v0("i"), ast.Sub(coords(2, "i"), coords(0, "i"))),
                                 ast.Assign(v1("i"), ast.Sub(coords(1, "i"), coords(0, "i"))),
                                 ast.Assign(x("i"), 0.0)]))

        # n = v0 x v1
        body.append(ast.Assign(n(0), ast.Sub(ast.Prod(v0(1), v1(2)), ast.Prod(v0(2), v1(1)))))
        body.append(ast.Assign(n(1), ast.Sub(ast.Prod(v0(2), v1(0)), ast.Prod(v0(0), v1(2)))))
        body.append(ast.Assign(n(2), ast.Sub(ast.Prod(v0(0), v1(1)), ast.Prod(v0(1), v1(0)))))

        body.append(ast.For(ast.Assign("i", 0), ast.Less("i", 3), ast.Incr("i", 1),
                            [ast.Incr(x(j), coords("i", j)) for j in range(3)]))

        body.extend([ast.FlatBlock("dot += (%(x)s) * n[%(i)d];\n" % {"x": x_, "i": i})
                     for i, x_ in enumerate(expr.code)])
        body.append(ast.Assign("orientation[0][0]", ast.Ternary(ast.Less("dot", 0), 1, 0)))

        kernel = op2.Kernel(ast.FunDecl("void", "cell_orientations",
                                        [ast.Decl("int**", "orientation"),
                                         ast.Decl("double**", "coords")],
                                        ast.Block(body)),
                            "cell_orientations")

        # Build the cell orientations as a DG0 field (so that we can
        # pass it in for facet integrals and the like)
        fs = functionspace.FunctionSpace(self, 'DG', 0)
        cell_orientations = function.Function(fs, name="cell_orientations", dtype=np.int32)
        op2.par_loop(kernel, self.cell_set,
                     cell_orientations.dat(op2.WRITE, cell_orientations.cell_node_map()),
                     self.coordinates.dat(op2.READ, self.coordinates.cell_node_map()))
        self.topology._cell_orientations = cell_orientations
Beispiel #12
0
    def _setup(self):
        """A setup method to initialize all the local assembly
        kernels generated by TSFC and creates templated function calls
        conforming to the Eigen-C++ template library standard.
        This function also collects any information regarding orientations
        and extra include directories.
        """
        transformer = Transformer()
        include_dirs = []
        templated_subkernels = []
        assembly_calls = OrderedDict([(it, [])
                                      for it in self.supported_integral_types])
        coords = None
        oriented = False
        for cxt_kernel in self.context_kernels:
            local_coefficients = cxt_kernel.coefficients
            it_type = cxt_kernel.original_integral_type
            exp = cxt_kernel.tensor

            if it_type not in self.supported_integral_types:
                raise ValueError("Integral type '%s' not recognized" % it_type)

            # Explicit checking of coordinates
            coordinates = cxt_kernel.tensor.ufl_domain().coordinates
            if coords is not None:
                assert coordinates == coords, "Mismatching coordinates!"
            else:
                coords = coordinates

            for split_kernel in cxt_kernel.tsfc_kernels:
                indices = split_kernel.indices
                kinfo = split_kernel.kinfo

                # TODO: Implement subdomains for Slate tensors
                if kinfo.subdomain_id != "otherwise":
                    raise NotImplementedError("Subdomains not implemented.")

                args = [
                    c for i in kinfo.coefficient_map
                    for c in self.coefficient(local_coefficients[i])
                ]

                if kinfo.oriented:
                    args.insert(0, self.cell_orientations_sym)

                if kinfo.integral_type in [
                        "interior_facet", "exterior_facet",
                        "interior_facet_vert", "exterior_facet_vert"
                ]:
                    args.append(ast.FlatBlock("&%s" % self.it_sym))

                # Assembly calls within the macro kernel
                tensor = eigen_tensor(exp, self.temps[exp], indices)
                call = ast.FunCall(kinfo.kernel.name, tensor, self.coord_sym,
                                   *args)
                assembly_calls[it_type].append(call)

                # Subkernels for local assembly (Eigen templated functions)
                kast = transformer.visit(kinfo.kernel._ast)
                templated_subkernels.append(kast)
                include_dirs.extend(kinfo.kernel._include_dirs)
                oriented = oriented or kinfo.oriented

        self.assembly_calls = assembly_calls
        self.templated_subkernels = templated_subkernels
        self.include_dirs = list(set(include_dirs))
        self.oriented = oriented
Beispiel #13
0
def compile_c_kernel(expression, to_pts, to_element, fs, coords):
    """Produce a :class:`PyOP2.Kernel` from the c expression provided."""

    coords_space = coords.function_space()
    coords_element = create_element(coords_space.ufl_element(),
                                    vector_is_mixed=False)

    names = {v[0] for v in expression._user_args}

    X = list(coords_element.tabulate(0, to_pts).values())[0]

    # Produce C array notation of X.
    X_str = "{{" + "},\n{".join([",".join(map(str, x)) for x in X.T]) + "}}"

    A = utils.unique_name("A", names)
    X = utils.unique_name("X", names)
    x_ = utils.unique_name("x_", names)
    k = utils.unique_name("k", names)
    d = utils.unique_name("d", names)
    i_ = utils.unique_name("i", names)
    # x is a reserved name.
    x = "x"
    if "x" in names:
        raise ValueError(
            "cannot use 'x' as a user-defined Expression variable")
    ass_exp = [
        ast.Assign(ast.Symbol(A, (k, ), ((len(expression.code), i), )),
                   ast.FlatBlock("%s" % code))
        for i, code in enumerate(expression.code)
    ]

    dim = coords_space.value_size
    ndof = to_element.space_dimension()
    xndof = coords_element.space_dimension()
    nfdof = to_element.space_dimension() * numpy.prod(fs.value_size, dtype=int)

    init_X = ast.Decl(typ="double",
                      sym=ast.Symbol(X, rank=(ndof, xndof)),
                      qualifiers=["const"],
                      init=X_str)
    init_x = ast.Decl(typ="double",
                      sym=ast.Symbol(x, rank=(coords_space.value_size, )))
    init_pi = ast.Decl(typ="double",
                       sym="pi",
                       qualifiers=["const"],
                       init="3.141592653589793")
    init = ast.Block([init_X, init_x, init_pi])
    incr_x = ast.Incr(
        ast.Symbol(x, rank=(d, )),
        ast.Prod(ast.Symbol(X, rank=(k, i_)),
                 ast.Symbol(x_, rank=(ast.Sum(ast.Prod(i_, dim), d), ))))
    assign_x = ast.Assign(ast.Symbol(x, rank=(d, )), 0)
    loop_x = ast.For(init=ast.Decl("unsigned int", i_, 0),
                     cond=ast.Less(i_, xndof),
                     incr=ast.Incr(i_, 1),
                     body=[incr_x])

    block = ast.For(init=ast.Decl("unsigned int", d, 0),
                    cond=ast.Less(d, dim),
                    incr=ast.Incr(d, 1),
                    body=[assign_x, loop_x])
    loop = ast.c_for(k, ndof, ast.Block([block] + ass_exp, open_scope=True))
    user_args = []
    user_init = []
    for _, arg in expression._user_args:
        if arg.shape == (1, ):
            user_args.append(ast.Decl("double *", "%s_" % arg.name))
            user_init.append(
                ast.FlatBlock("const double %s = *%s_;" %
                              (arg.name, arg.name)))
        else:
            user_args.append(ast.Decl("double *", arg.name))
    kernel_code = ast.FunDecl(
        "void", "expression_kernel", [
            ast.Decl("double", ast.Symbol(A, (nfdof, ))),
            ast.Decl("double*", x_)
        ] + user_args, ast.Block(user_init + [init, loop], open_scope=False))
    coefficients = [coords]
    for _, arg in expression._user_args:
        coefficients.append(GlobalWrapper(arg))
    return op2.Kernel(kernel_code,
                      kernel_code.name), False, tuple(coefficients)
Beispiel #14
0
    def _setup(self):
        """A setup method to initialize all the local assembly
        kernels generated by TSFC and creates templated function calls
        conforming to the Eigen-C++ template library standard.
        This function also collects any information regarding orientations
        and extra include directories.
        """
        transformer = Transformer()
        include_dirs = []
        templated_subkernels = []
        assembly_calls = OrderedDict([(it, []) for it in self.supported_integral_types])
        subdomain_calls = OrderedDict([(sd, []) for sd in self.supported_subdomain_types])
        coords = None
        oriented = False
        needs_cell_sizes = False

        # Maps integral type to subdomain key
        subdomain_map = {"exterior_facet": "subdomains_exterior_facet",
                         "exterior_facet_vert": "subdomains_exterior_facet",
                         "interior_facet": "subdomains_interior_facet",
                         "interior_facet_vert": "subdomains_interior_facet"}
        for cxt_kernel in self.context_kernels:
            local_coefficients = cxt_kernel.coefficients
            it_type = cxt_kernel.original_integral_type
            exp = cxt_kernel.tensor

            if it_type not in self.supported_integral_types:
                raise ValueError("Integral type '%s' not recognized" % it_type)

            # Explicit checking of coordinates
            coordinates = cxt_kernel.tensor.ufl_domain().coordinates
            if coords is not None:
                assert coordinates == coords, "Mismatching coordinates!"
            else:
                coords = coordinates

            for split_kernel in cxt_kernel.tsfc_kernels:
                indices = split_kernel.indices
                kinfo = split_kernel.kinfo
                kint_type = kinfo.integral_type
                needs_cell_sizes = needs_cell_sizes or kinfo.needs_cell_sizes

                args = [c for i in kinfo.coefficient_map
                        for c in self.coefficient(local_coefficients[i])]

                if kinfo.oriented:
                    args.insert(0, self.cell_orientations_sym)

                if kint_type in ["interior_facet",
                                 "exterior_facet",
                                 "interior_facet_vert",
                                 "exterior_facet_vert"]:
                    args.append(ast.FlatBlock("&%s" % self.it_sym))

                if kinfo.needs_cell_sizes:
                    args.append(self.cell_size_sym)

                # Assembly calls within the macro kernel
                tensor = eigen_tensor(exp, self.temps[exp], indices)
                call = ast.FunCall(kinfo.kernel.name,
                                   tensor,
                                   self.coord_sym,
                                   *args)

                # Subdomains only implemented for exterior facet integrals
                if kinfo.subdomain_id != "otherwise":
                    if kint_type not in subdomain_map:
                        msg = "Subdomains for integral type '%s' not implemented" % kint_type
                        raise NotImplementedError(msg)

                    sd_id = kinfo.subdomain_id
                    sd_key = subdomain_map[kint_type]
                    subdomain_calls[sd_key].append((sd_id, call))
                else:
                    assembly_calls[it_type].append(call)

                # Subkernels for local assembly (Eigen templated functions)
                from coffee.base import Node
                assert isinstance(kinfo.kernel._code, Node)
                kast = transformer.visit(kinfo.kernel._code)
                templated_subkernels.append(kast)
                include_dirs.extend(kinfo.kernel._include_dirs)
                oriented = oriented or kinfo.oriented

        # Add subdomain call to assembly dict
        assembly_calls.update(subdomain_calls)

        self.assembly_calls = assembly_calls
        self.templated_subkernels = templated_subkernels
        self.include_dirs = list(set(include_dirs))
        self.oriented = oriented
        self.needs_cell_sizes = needs_cell_sizes
Beispiel #15
0
def build_hard_fusion_kernel(base_loop, fuse_loop, fusion_map, loop_chain_index):
    """
    Build AST and :class:`Kernel` for two loops suitable to hard fusion.

    The AST consists of three functions: fusion, base, fuse. base and fuse
    are respectively the ``base_loop`` and the ``fuse_loop`` kernels, whereas
    fusion is the orchestrator that invokes, for each ``base_loop`` iteration,
    base and, if still to be executed, fuse.

    The orchestrator has the following structure: ::

        fusion (buffer, ..., executed):
            base (buffer, ...)
            for i = 0 to arity:
                if not executed[i]:
                    additional pointer staging required by kernel2
                    fuse (sub_buffer, ...)
                    insertion into buffer

    The executed array tracks whether the i-th iteration (out of /arity/)
    adjacent to the main kernel1 iteration has been executed.
    """

    finder = Find((ast.FunDecl, ast.PreprocessNode))

    base = base_loop.kernel
    base_ast = dcopy(base._ast)
    base_info = finder.visit(base_ast)
    base_headers = base_info[ast.PreprocessNode]
    base_fundecl = base_info[ast.FunDecl]
    assert len(base_fundecl) == 1
    base_fundecl = base_fundecl[0]

    fuse = fuse_loop.kernel
    fuse_ast = dcopy(fuse._ast)
    fuse_info = finder.visit(fuse_ast)
    fuse_headers = fuse_info[ast.PreprocessNode]
    fuse_fundecl = fuse_info[ast.FunDecl]
    assert len(fuse_fundecl) == 1
    fuse_fundecl = fuse_fundecl[0]

    # Create /fusion/ arguments and signature
    body = ast.Block([])
    fusion_name = '%s_%s' % (base_fundecl.name, fuse_fundecl.name)
    fusion_args = dcopy(base_fundecl.args + fuse_fundecl.args)
    fusion_fundecl = ast.FunDecl(base_fundecl.ret, fusion_name, fusion_args, body)

    # Make sure kernel and variable names are unique
    base_fundecl.name = "%s_base" % base_fundecl.name
    fuse_fundecl.name = "%s_fuse" % fuse_fundecl.name
    for i, decl in enumerate(fusion_args):
        decl.sym.symbol += '_%d' % i

    # Filter out duplicate arguments, and append extra arguments to the fundecl
    binding = WeakFilter().kernel_args([base_loop, fuse_loop], fusion_fundecl)
    fusion_args += [ast.Decl('int*', 'executed'),
                    ast.Decl('int*', 'fused_iters'),
                    ast.Decl('int', 'i')]

    # Which args are actually used in /fuse/, but not in /base/ ? The gather for
    # such arguments is moved to /fusion/, to avoid usless memory LOADs
    base_dats = set(a.data for a in base_loop.args)
    fuse_dats = set(a.data for a in fuse_loop.args)
    unshared = OrderedDict()
    for arg, decl in binding.items():
        if arg.data in fuse_dats - base_dats:
            unshared.setdefault(decl, arg)

    # Track position of Args that need a postponed gather
    # Can't track Args themselves as they change across different parloops
    fargs = {fusion_args.index(i): ('postponed', False) for i in unshared.keys()}
    fargs.update({len(set(binding.values())): ('onlymap', True)})

    # Add maps for arguments that need a postponed gather
    for decl, arg in unshared.items():
        decl_pos = fusion_args.index(decl)
        fusion_args[decl_pos].sym.symbol = arg.c_arg_name()
        if arg._is_indirect:
            fusion_args[decl_pos].sym.rank = ()
            fusion_args.insert(decl_pos + 1, ast.Decl('int*', arg.c_map_name(0, 0)))

    # Append the invocation of /base/; then, proceed with the invocation
    # of the /fuse/ kernels
    base_funcall_syms = [binding[a].sym.symbol for a in base_loop.args]
    body.children.append(ast.FunCall(base_fundecl.name, *base_funcall_syms))

    for idx in range(fusion_map.arity):

        fused_iter = ast.Assign('i', ast.Symbol('fused_iters', (idx,)))
        fuse_funcall = ast.FunCall(fuse_fundecl.name)
        if_cond = ast.Not(ast.Symbol('executed', ('i',)))
        if_update = ast.Assign(ast.Symbol('executed', ('i',)), 1)
        if_body = ast.Block([fuse_funcall, if_update], open_scope=True)
        if_exec = ast.If(if_cond, [if_body])
        body.children.extend([ast.FlatBlock('\n'), fused_iter, if_exec])

        # Modify the /fuse/ kernel
        # This is to take into account that many arguments are shared with
        # /base/, so they will only staged once for /base/. This requires
        # tweaking the way the arguments are declared and accessed in /fuse/.
        # For example, the shared incremented array (called /buffer/ in
        # the pseudocode in the comment above) now needs to take offsets
        # to be sure the locations that /base/ is supposed to increment are
        # actually accessed. The same concept apply to indirect arguments.
        init = lambda v: '{%s}' % ', '.join([str(j) for j in v])
        for i, fuse_loop_arg in enumerate(fuse_loop.args):
            fuse_kernel_arg = binding[fuse_loop_arg]

            buffer_name = '%s_vec' % fuse_kernel_arg.sym.symbol
            fuse_funcall_sym = ast.Symbol(buffer_name)

            # What kind of temporaries do we need ?
            if fuse_loop_arg.access == INC:
                op, lvalue, rvalue = ast.Incr, fuse_kernel_arg.sym.symbol, buffer_name
                stager = lambda b, l: b.children.extend(l)
                indexer = lambda indices: [(k, j) for j, k in enumerate(indices)]
                pointers = []
            elif fuse_loop_arg.access == READ:
                op, lvalue, rvalue = ast.Assign, buffer_name, fuse_kernel_arg.sym.symbol
                stager = lambda b, l: [b.children.insert(0, j) for j in reversed(l)]
                indexer = lambda indices: [(j, k) for j, k in enumerate(indices)]
                pointers = list(fuse_kernel_arg.pointers)

            # Now gonna handle arguments depending on their type and rank ...

            if fuse_loop_arg._is_global:
                # ... Handle global arguments. These can be dropped in the
                # kernel without any particular fiddling
                fuse_funcall_sym = ast.Symbol(fuse_kernel_arg.sym.symbol)

            elif fuse_kernel_arg in unshared:
                # ... Handle arguments that appear only in /fuse/
                staging = unshared[fuse_kernel_arg].c_vec_init(False).split('\n')
                rvalues = [ast.FlatBlock(j.split('=')[1]) for j in staging]
                lvalues = [ast.Symbol(buffer_name, (j,)) for j in range(len(staging))]
                staging = [ast.Assign(j, k) for j, k in zip(lvalues, rvalues)]

                # Set up the temporary
                buffer_symbol = ast.Symbol(buffer_name, (len(staging),))
                buffer_decl = ast.Decl(fuse_kernel_arg.typ, buffer_symbol,
                                       qualifiers=fuse_kernel_arg.qual,
                                       pointers=list(pointers))

                # Update the if-then AST body
                stager(if_exec.children[0], staging)
                if_exec.children[0].children.insert(0, buffer_decl)

            elif fuse_loop_arg._is_mat:
                # ... Handle Mats
                staging = []
                for b in fused_inc_arg._block_shape:
                    for rc in b:
                        lvalue = ast.Symbol(lvalue, (idx, idx),
                                            ((rc[0], 'j'), (rc[1], 'k')))
                        rvalue = ast.Symbol(rvalue, ('j', 'k'))
                        staging = ItSpace(mode=0).to_for([(0, rc[0]), (0, rc[1])],
                                                         ('j', 'k'),
                                                         [op(lvalue, rvalue)])[:1]

                # Set up the temporary
                buffer_symbol = ast.Symbol(buffer_name, (fuse_kernel_arg.sym.rank,))
                buffer_init = ast.ArrayInit(init([init([0.0])]))
                buffer_decl = ast.Decl(fuse_kernel_arg.typ, buffer_symbol, buffer_init,
                                       qualifiers=fuse_kernel_arg.qual, pointers=pointers)

                # Update the if-then AST body
                stager(if_exec.children[0], staging)
                if_exec.children[0].children.insert(0, buffer_decl)

            elif fuse_loop_arg._is_indirect:
                cdim = fuse_loop_arg.data.cdim

                if cdim == 1 and fuse_kernel_arg.sym.rank:
                    # [Special case]
                    # ... Handle rank 1 indirect arguments that appear in both
                    # /base/ and /fuse/: just point into the right location
                    rank = (idx,) if fusion_map.arity > 1 else ()
                    fuse_funcall_sym = ast.Symbol(fuse_kernel_arg.sym.symbol, rank)

                else:
                    # ... Handle indirect arguments. At the C level, these arguments
                    # are of pointer type, so simple pointer arithmetic is used
                    # to ensure the kernel accesses are to the correct locations
                    fuse_arity = fuse_loop_arg.map.arity
                    base_arity = fuse_arity*fusion_map.arity
                    size = fuse_arity*cdim

                    # Set the proper storage layout before invoking /fuse/
                    ofs_vals = [[base_arity*j + k for k in range(fuse_arity)]
                                for j in range(cdim)]
                    ofs_vals = [[fuse_arity*j + k for k in flatten(ofs_vals)]
                                for j in range(fusion_map.arity)]
                    ofs_vals = list(flatten(ofs_vals))
                    indices = [ofs_vals[idx*size + j] for j in range(size)]

                    staging = [op(ast.Symbol(lvalue, (j,)), ast.Symbol(rvalue, (k,)))
                               for j, k in indexer(indices)]

                    # Set up the temporary
                    buffer_symbol = ast.Symbol(buffer_name, (size,))
                    if fuse_loop_arg.access == INC:
                        buffer_init = ast.ArrayInit(init([0.0]))
                    else:
                        buffer_init = ast.EmptyStatement()
                        pointers.pop()
                    buffer_decl = ast.Decl(fuse_kernel_arg.typ, buffer_symbol, buffer_init,
                                           qualifiers=fuse_kernel_arg.qual,
                                           pointers=pointers)

                    # Update the if-then AST body
                    stager(if_exec.children[0], staging)
                    if_exec.children[0].children.insert(0, buffer_decl)

            else:
                # Nothing special to do for direct arguments
                pass

            # Finally update the /fuse/ funcall
            fuse_funcall.children.append(fuse_funcall_sym)

    fused_headers = set([str(h) for h in base_headers + fuse_headers])
    fused_ast = ast.Root([ast.PreprocessNode(h) for h in fused_headers] +
                         [base_fundecl, fuse_fundecl, fusion_fundecl])

    return Kernel([base, fuse], fused_ast, loop_chain_index), fargs
Beispiel #16
0
def compile_c_kernel(expression, to_pts, to_element, fs, coords):
    """Produce a :class:`PyOP2.Kernel` from the c expression provided."""

    coords_space = coords.function_space()
    coords_element = coords_space.fiat_element

    names = {v[0] for v in expression._user_args}

    X = coords_element.tabulate(0, to_pts).values()[0]

    # Produce C array notation of X.
    X_str = "{{"+"},\n{".join([",".join(map(str, x)) for x in X.T])+"}}"

    A = utils.unique_name("A", names)
    X = utils.unique_name("X", names)
    x_ = utils.unique_name("x_", names)
    k = utils.unique_name("k", names)
    d = utils.unique_name("d", names)
    i_ = utils.unique_name("i", names)
    # x is a reserved name.
    x = "x"
    if "x" in names:
        raise ValueError("cannot use 'x' as a user-defined Expression variable")
    ass_exp = [ast.Assign(ast.Symbol(A, (k,), ((len(expression.code), i),)),
                          ast.FlatBlock("%s" % code))
               for i, code in enumerate(expression.code)]
    vals = {
        "X": X,
        "x": x,
        "x_": x_,
        "k": k,
        "d": d,
        "i": i_,
        "x_array": X_str,
        "dim": coords_space.dim,
        "xndof": coords_element.space_dimension(),
        # FS will always either be a functionspace or
        # vectorfunctionspace, so just accessing dim here is safe
        # (we don't need to go through ufl_element.value_shape())
        "nfdof": to_element.space_dimension() * numpy.prod(fs.dim, dtype=int),
        "ndof": to_element.space_dimension(),
        "assign_dim": numpy.prod(expression.value_shape(), dtype=int)
    }
    init = ast.FlatBlock("""
const double %(X)s[%(ndof)d][%(xndof)d] = %(x_array)s;

double %(x)s[%(dim)d];
const double pi = 3.141592653589793;

""" % vals)
    block = ast.FlatBlock("""
for (unsigned int %(d)s=0; %(d)s < %(dim)d; %(d)s++) {
  %(x)s[%(d)s] = 0;
  for (unsigned int %(i)s=0; %(i)s < %(xndof)d; %(i)s++) {
        %(x)s[%(d)s] += %(X)s[%(k)s][%(i)s] * %(x_)s[%(i)s][%(d)s];
  };
};

""" % vals)
    loop = ast.c_for(k, "%(ndof)d" % vals, ast.Block([block] + ass_exp,
                                                     open_scope=True))
    user_args = []
    user_init = []
    for _, arg in expression._user_args:
        if arg.shape == (1, ):
            user_args.append(ast.Decl("double *", "%s_" % arg.name))
            user_init.append(ast.FlatBlock("const double %s = *%s_;" %
                                           (arg.name, arg.name)))
        else:
            user_args.append(ast.Decl("double *", arg.name))
    kernel_code = ast.FunDecl("void", "expression_kernel",
                              [ast.Decl("double", ast.Symbol(A, (int("%(nfdof)d" % vals),))),
                               ast.Decl("double**", x_)] + user_args,
                              ast.Block(user_init + [init, loop],
                                        open_scope=False))
    coefficients = [coords]
    for _, arg in expression._user_args:
        coefficients.append(GlobalWrapper(arg))
    return op2.Kernel(kernel_code, kernel_code.name), False, tuple(coefficients)
Beispiel #17
0
def compile_expression(slate_expr, tsfc_parameters=None):
    """Takes a Slate expression `slate_expr` and returns the appropriate
    :class:`firedrake.op2.Kernel` object representing the Slate expression.

    :arg slate_expr: a :class:'TensorBase' expression.
    :arg tsfc_parameters: an optional `dict` of form compiler parameters to
                          be passed onto TSFC during the compilation of
                          ufl forms.

    Returns: A `tuple` containing a `SplitKernel(idx, kinfo)`
    """
    if not isinstance(slate_expr, TensorBase):
        raise ValueError("Expecting a `TensorBase` expression, not %s" %
                         type(slate_expr))

    # TODO: Get PyOP2 to write into mixed dats
    if any(len(a.function_space()) > 1 for a in slate_expr.arguments()):
        raise NotImplementedError("Compiling mixed slate expressions")

    # If the expression has already been symbolically compiled, then
    # simply reuse the produced kernel.
    if slate_expr._metakernel_cache is not None:
        return slate_expr._metakernel_cache

    # Initialize coefficients, shape and statements list
    expr_coeffs = slate_expr.coefficients()

    # We treat scalars as 1x1 MatrixBase objects, so we give
    # the right shape to do so and everything just falls out.
    # This bit here ensures the return result has the right
    # shape
    if slate_expr.rank == 0:
        shape = (1, )
    else:
        shape = slate_expr.shape

    statements = []

    # Create a builder for the Slate expression
    builder = KernelBuilder(expression=slate_expr,
                            tsfc_parameters=tsfc_parameters)

    # Initialize coordinate, cell orientations and facet/layer
    # symbols
    coordsym = ast.Symbol("coords")
    coords = None
    cell_orientations = ast.Symbol("cell_orientations")
    cellfacetsym = ast.Symbol("cell_facets")
    mesh_layer_sym = ast.Symbol("layer")
    inc = []

    # We keep track of temporaries that have been declared
    declared_temps = {}
    for cxt_kernel in builder.context_kernels:
        exp = cxt_kernel.tensor
        t = builder.temps[exp]

        if exp not in declared_temps:
            # Declare and initialize the temporary
            statements.append(ast.Decl(eigen_matrixbase_type(exp.shape), t))
            statements.append(ast.FlatBlock("%s.setZero();\n" % t))
            declared_temps[exp] = t

        it_type = cxt_kernel.original_integral_type

        if it_type not in supported_integral_types:
            raise NotImplementedError("Type %s not supported." % it_type)

        # Explicit checking of coordinates
        coordinates = exp.ufl_domain().coordinates
        if coords is not None:
            assert coordinates == coords
        else:
            coords = coordinates

        if it_type == "cell":
            # Nothing difficult about cellwise integrals. Just need
            # to get coefficient info, include_dirs and append
            # function calls to the appropriate subkernels.

            # If tensor is mixed, there will be more than one SplitKernel
            incl = []
            for splitkernel in cxt_kernel.tsfc_kernels:
                index = splitkernel.indices
                kinfo = splitkernel.kinfo

                # Generate an iterable of coefficients to pass to the subkernel
                # if any are required
                clist = [
                    c for ci in kinfo.coefficient_map
                    for c in builder.coefficient(exp.coefficients()[ci])
                ]

                if kinfo.oriented:
                    clist.insert(0, cell_orientations)

                incl.extend(kinfo.kernel._include_dirs)
                tensor = eigen_tensor(exp, t, index)
                statements.append(
                    ast.FunCall(kinfo.kernel.name, tensor, coordsym, *clist))

        elif it_type in [
                "interior_facet", "exterior_facet", "interior_facet_vert",
                "exterior_facet_vert"
        ]:
            # These integral types will require accessing local facet
            # information and looping over facet indices.
            builder.require_cell_facets()
            loop_stmt, incl = facet_integral_loop(cxt_kernel, builder,
                                                  coordsym, cellfacetsym,
                                                  cell_orientations)
            statements.append(loop_stmt)

        elif it_type == "interior_facet_horiz":
            # The infamous interior horizontal facet
            # will have two SplitKernels: one top,
            # one bottom. The mesh layer will determine
            # which kernels we call.
            builder.require_mesh_layers()
            top_sks = [
                k for k in cxt_kernel.tsfc_kernels
                if k.kinfo.integral_type == "exterior_facet_top"
            ]
            bottom_sks = [
                k for k in cxt_kernel.tsfc_kernels
                if k.kinfo.integral_type == "exterior_facet_bottom"
            ]
            assert len(top_sks) == len(bottom_sks), (
                "Number of top and bottom kernels should be equal")
            # Top and bottom kernels need to be sorted by kinfo.indices
            # if the space is mixed to ensure indices match.
            top_sks = sorted(top_sks, key=lambda x: x.indices)
            bottom_sks = sorted(bottom_sks, key=lambda x: x.indices)
            stmt, incl = extruded_int_horiz_facet(exp, builder, top_sks,
                                                  bottom_sks, coordsym,
                                                  mesh_layer_sym,
                                                  cell_orientations)
            statements.append(stmt)

        elif it_type in ["exterior_facet_bottom", "exterior_facet_top"]:
            # These kernels will only be called if we are on
            # the top or bottom layers of the extruded mesh.
            builder.require_mesh_layers()
            stmt, incl = extruded_top_bottom_facet(cxt_kernel, builder,
                                                   coordsym, mesh_layer_sym,
                                                   cell_orientations)
            statements.append(stmt)

        else:
            raise ValueError("Kernel type not recognized: %s" % it_type)

        # Don't duplicate include lines
        inc_dir = list(set(incl) - set(inc))
        inc.extend(inc_dir)

    # Now we handle any terms that require auxiliary temporaries,
    # such as inverses, transposes and actions of a tensor on a
    # coefficient
    if builder.aux_exprs:
        # The declared temps will be updated within this method
        aux_statements = auxiliary_temporaries(builder, declared_temps)
        statements.extend(aux_statements)

    # Now we create the result statement by declaring its eigen type and
    # using Eigen::Map to move between Eigen and C data structs.
    result_sym = ast.Symbol("T%d" % len(builder.temps))
    result_data_sym = ast.Symbol("A%d" % len(builder.temps))
    result_type = "Eigen::Map<%s >" % eigen_matrixbase_type(shape)
    result = ast.Decl(SCALAR_TYPE, ast.Symbol(result_data_sym, shape))
    result_statement = ast.FlatBlock(
        "%s %s((%s *)%s);\n" %
        (result_type, result_sym, SCALAR_TYPE, result_data_sym))
    statements.append(result_statement)

    # Generate the complete c++ string performing the linear algebra operations
    # on Eigen matrices/vectors
    cpp_string = ast.FlatBlock(
        metaphrase_slate_to_cpp(slate_expr, declared_temps))
    statements.append(ast.Incr(result_sym, cpp_string))

    # Finalize AST for macro kernel construction
    builder._finalize_kernels_and_update()

    # Generate arguments for the macro kernel
    args = [result, ast.Decl("%s **" % SCALAR_TYPE, coordsym)]

    # Orientation information
    if builder.oriented:
        args.append(ast.Decl("int **", cell_orientations))

    # Coefficient information
    for c in expr_coeffs:
        if isinstance(c, Constant):
            ctype = "%s *" % SCALAR_TYPE
        else:
            ctype = "%s **" % SCALAR_TYPE
        args.extend([ast.Decl(ctype, csym) for csym in builder.coefficient(c)])

    # Facet information
    if builder.needs_cell_facets:
        args.append(
            ast.Decl("%s *" % as_cstr(cell_to_facets_dtype), cellfacetsym))

    # NOTE: We need to be careful about the ordering here. Mesh layers are
    # added as the final argument to the kernel.
    if builder.needs_mesh_layers:
        args.append(ast.Decl("int", mesh_layer_sym))

    # NOTE: In the future we may want to have more than one "macro_kernel"
    macro_kernel_name = "compile_slate"
    stmt = ast.Block(statements)
    macro_kernel = builder.construct_macro_kernel(name=macro_kernel_name,
                                                  args=args,
                                                  statements=stmt)

    # Tell the builder to construct the final ast
    kernel_ast = builder.construct_ast([macro_kernel])

    # Now we wrap up the kernel ast as a PyOP2 kernel.
    # Include the Eigen header files
    inc.extend(["%s/include/eigen3/" % d for d in PETSC_DIR])
    op2kernel = op2.Kernel(
        kernel_ast,
        macro_kernel_name,
        cpp=True,
        include_dirs=inc,
        headers=['#include <Eigen/Dense>', '#define restrict __restrict'])

    assert len(slate_expr.ufl_domains()) == 1, (
        "No support for multiple domains yet!")

    # Send back a "TSFC-like" SplitKernel object with an
    # index and KernelInfo
    kinfo = KernelInfo(kernel=op2kernel,
                       integral_type=builder.integral_type,
                       oriented=builder.oriented,
                       subdomain_id="otherwise",
                       domain_number=0,
                       coefficient_map=tuple(range(len(expr_coeffs))),
                       needs_cell_facets=builder.needs_cell_facets,
                       pass_layer_arg=builder.needs_mesh_layers)

    idx = tuple([0] * slate_expr.rank)

    kernels = (SplitKernel(idx, kinfo), )

    # Store the resulting kernel for reuse
    slate_expr._metakernel_cache = kernels

    return kernels
Beispiel #18
0
def generate_kernel_ast(builder, statements, declared_temps):
    """Glues together the complete AST for the Slate expression
    contained in the :class:`LocalKernelBuilder`.

    :arg builder: The :class:`LocalKernelBuilder` containing
        all relevant expression information.
    :arg statements: A list of COFFEE objects containing all
        assembly calls and temporary declarations.
    :arg declared_temps: A `dict` containing all previously
        declared temporaries.

    Return: A `KernelInfo` object describing the complete AST.
    """
    slate_expr = builder.expression
    if slate_expr.rank == 0:
        # Scalars are treated as 1x1 MatrixBase objects
        shape = (1, )
    else:
        shape = slate_expr.shape

    # Now we create the result statement by declaring its eigen type and
    # using Eigen::Map to move between Eigen and C data structs.
    statements.append(ast.FlatBlock("/* Map eigen tensor into C struct */\n"))
    result_sym = ast.Symbol("T%d" % len(declared_temps))
    result_data_sym = ast.Symbol("A%d" % len(declared_temps))
    result_type = "Eigen::Map<%s >" % eigen_matrixbase_type(shape)
    result = ast.Decl(ScalarType_c,
                      ast.Symbol(result_data_sym),
                      pointers=[("restrict", )])
    result_statement = ast.FlatBlock(
        "%s %s((%s *)%s);\n" %
        (result_type, result_sym, ScalarType_c, result_data_sym))
    statements.append(result_statement)

    # Generate the complete c++ string performing the linear algebra operations
    # on Eigen matrices/vectors
    statements.append(ast.FlatBlock("/* Linear algebra expression */\n"))
    cpp_string = ast.FlatBlock(slate_to_cpp(slate_expr, declared_temps))
    statements.append(ast.Incr(result_sym, cpp_string))

    # Generate arguments for the macro kernel
    args = [
        result,
        ast.Decl(ScalarType_c,
                 builder.coord_sym,
                 pointers=[("restrict", )],
                 qualifiers=["const"])
    ]

    # Orientation information
    if builder.oriented:
        args.append(
            ast.Decl("int",
                     builder.cell_orientations_sym,
                     pointers=[("restrict", )],
                     qualifiers=["const"]))

    # Coefficient information
    expr_coeffs = slate_expr.coefficients()
    for c in expr_coeffs:
        args.extend([
            ast.Decl(ScalarType_c,
                     csym,
                     pointers=[("restrict", )],
                     qualifiers=["const"]) for csym in builder.coefficient(c)
        ])

    # Facet information
    if builder.needs_cell_facets:
        f_sym = builder.cell_facet_sym
        f_arg = ast.Symbol("arg_cell_facets")
        f_dtype = as_cstr(cell_to_facets_dtype)

        # cell_facets is locally a flattened 2-D array. We typecast here so we
        # can access its entries using standard array notation.
        cast = "%s (*%s)[2] = (%s (*)[2])%s;\n" % (f_dtype, f_sym, f_dtype,
                                                   f_arg)
        statements.insert(0, ast.FlatBlock(cast))
        args.append(
            ast.Decl(f_dtype,
                     f_arg,
                     pointers=[("restrict", )],
                     qualifiers=["const"]))

    # NOTE: We need to be careful about the ordering here. Mesh layers are
    # added as the final argument to the kernel
    # and the amount of layers before that.
    if builder.needs_mesh_layers:
        args.append(
            ast.Decl("int",
                     builder.mesh_layer_count_sym,
                     pointers=[("restrict", )],
                     qualifiers=["const"]))
        args.append(ast.Decl("int", builder.mesh_layer_sym))

    # Cell size information
    if builder.needs_cell_sizes:
        args.append(
            ast.Decl(ScalarType_c,
                     builder.cell_size_sym,
                     pointers=[("restrict", )],
                     qualifiers=["const"]))

    # Macro kernel
    macro_kernel_name = "pyop2_kernel_compile_slate"
    stmts = ast.Block(statements)
    macro_kernel = ast.FunDecl("void",
                               macro_kernel_name,
                               args,
                               stmts,
                               pred=["static", "inline"])

    # Construct the final ast
    kernel_ast = ast.Node(builder.templated_subkernels + [macro_kernel])

    # Now we wrap up the kernel ast as a PyOP2 kernel and include the
    # Eigen header files
    include_dirs = list(builder.include_dirs)
    include_dirs.append(EIGEN_INCLUDE_DIR)
    op2kernel = op2.Kernel(
        kernel_ast,
        macro_kernel_name,
        cpp=True,
        include_dirs=include_dirs,
        headers=['#include <Eigen/Dense>', '#define restrict __restrict'])

    op2kernel.num_flops = builder.expression_flops + builder.terminal_flops
    # Send back a "TSFC-like" SplitKernel object with an
    # index and KernelInfo
    kinfo = KernelInfo(kernel=op2kernel,
                       integral_type=builder.integral_type,
                       oriented=builder.oriented,
                       subdomain_id="otherwise",
                       domain_number=0,
                       coefficient_map=slate_expr.coeff_map,
                       needs_cell_facets=builder.needs_cell_facets,
                       pass_layer_arg=builder.needs_mesh_layers,
                       needs_cell_sizes=builder.needs_cell_sizes)

    return kinfo
Beispiel #19
0
def auxiliary_temporaries(builder, declared_temps):
    """This function generates auxiliary information regarding special
    handling of expressions that require creating additional temporaries.

    :arg builder: a :class:`KernelBuilder` object that contains all the
                  necessary temporary and expression information.
    :arg declared_temps: a `dict` of temporaries that have already been
                         declared and assigned values. This will be
                         updated in this method and referenced later
                         in the compiler.
    Returns: a list of auxiliary statements are returned that contain temporary
             declarations and any code-blocks needed to evaluate the
             expression.
    """
    aux_statements = []
    for exp in builder.aux_exprs:
        if isinstance(exp, Inverse):
            if builder._ref_counts[exp] > 1:
                # Get the temporary for the particular expression
                result = metaphrase_slate_to_cpp(exp, declared_temps)

                # Now we use the generated result and assign the value to the
                # corresponding temporary.
                temp = ast.Symbol("auxT%d" % len(declared_temps))
                shape = exp.shape
                aux_statements.append(
                    ast.Decl(eigen_matrixbase_type(shape), temp))
                aux_statements.append(ast.FlatBlock("%s.setZero();\n" % temp))
                aux_statements.append(ast.Assign(temp, result))

                # Update declared temps
                declared_temps[exp] = temp

        elif isinstance(exp, Action):
            # Action computations are relatively inexpensive, so
            # we don't waste memory space on creating temps for
            # these expressions. However, we must create a temporary
            # for the actee coefficient (if we haven't already).
            actee, = exp.actee
            if actee not in declared_temps:
                # Declare a temporary for the coefficient
                V = actee.function_space()
                shape_array = [(Vi.finat_element.space_dimension(),
                                np.prod(Vi.shape)) for Vi in V.split()]
                ctemp = ast.Symbol("auxT%d" % len(declared_temps))
                shape = sum(n * d for (n, d) in shape_array)
                typ = eigen_matrixbase_type(shape=(shape, ))
                aux_statements.append(ast.Decl(typ, ctemp))
                aux_statements.append(ast.FlatBlock("%s.setZero();\n" % ctemp))

                # Now we populate the temporary with the coefficient
                # information and insert in the right place.
                offset = 0
                for i, shp in enumerate(shape_array):
                    node_extent, dof_extent = shp
                    # Now we unpack the function and insert its entries into a
                    # 1D vector temporary
                    isym = ast.Symbol("i1")
                    jsym = ast.Symbol("j1")
                    tensor_index = ast.Sum(
                        offset, ast.Sum(ast.Prod(dof_extent, isym), jsym))

                    # Inner-loop running over dof_extent
                    coeff_sym = ast.Symbol(builder.coefficient(actee)[i],
                                           rank=(isym, jsym))
                    coeff_temp = ast.Symbol(ctemp, rank=(tensor_index, ))
                    inner_loop = ast.For(
                        ast.Decl("unsigned int", jsym, init=0),
                        ast.Less(jsym, dof_extent), ast.Incr(jsym, 1),
                        ast.Assign(coeff_temp, coeff_sym))
                    # Outer-loop running over node_extent
                    loop = ast.For(ast.Decl("unsigned int", isym, init=0),
                                   ast.Less(isym, node_extent),
                                   ast.Incr(isym, 1), inner_loop)

                    aux_statements.append(loop)
                    offset += node_extent * dof_extent

                # Update declared temporaries with the coefficient
                declared_temps[actee] = ctemp
        else:
            raise NotImplementedError(
                "Auxiliary expr type %s not currently implemented." %
                type(exp))

    return aux_statements
Beispiel #20
0
def tensor_assembly_calls(builder):
    """Generates a block of statements for assembling the local
    finite element tensors.

    :arg builder: The :class:`LocalKernelBuilder` containing
        all relevant expression information and assembly calls.
    """
    assembly_calls = builder.assembly_calls
    statements = [ast.FlatBlock("/* Assemble local tensors */\n")]

    # Cell integrals are straightforward. Just splat them out.
    statements.extend(assembly_calls["cell"])

    if builder.needs_cell_facets:
        # The for-loop will have the general structure:
        #
        #    FOR (facet=0; facet<num_facets; facet++):
        #        IF (facet is interior):
        #            *interior calls
        #        ELSE IF (facet is exterior):
        #            *exterior calls
        #
        # If only interior (exterior) facets are present,
        # then only a single IF-statement checking for interior
        # (exterior) facets will be present within the loop. The
        # cell facets are labelled `1` for interior, and `0` for
        # exterior.
        statements.append(ast.FlatBlock("/* Loop over cell facets */\n"))
        int_calls = list(
            chain(*[
                assembly_calls[it_type]
                for it_type in ("interior_facet", "interior_facet_vert")
            ]))
        ext_calls = list(
            chain(*[
                assembly_calls[it_type]
                for it_type in ("exterior_facet", "exterior_facet_vert")
            ]))

        # Generate logical statements for handling exterior/interior facet
        # integrals on subdomains.
        # Currently only facet integrals are supported.
        for sd_type in ("subdomains_exterior_facet",
                        "subdomains_interior_facet"):
            stmts = []
            for sd, sd_calls in groupby(assembly_calls[sd_type],
                                        lambda x: x[0]):
                _, calls = zip(*sd_calls)
                if_sd = ast.Eq(
                    ast.Symbol(builder.cell_facet_sym,
                               rank=(builder.it_sym, 1)), sd)
                stmts.append(
                    ast.If(if_sd, (ast.Block(calls, open_scope=True), )))

            if sd_type == "subdomains_exterior_facet":
                ext_calls.extend(stmts)
            if sd_type == "subdomains_interior_facet":
                int_calls.extend(stmts)

        # Compute the number of facets to loop over
        domain = builder.expression.ufl_domain()
        if domain.cell_set._extruded:
            num_facets = domain.ufl_cell()._cells[0].num_facets()
        else:
            num_facets = domain.ufl_cell().num_facets()

        if_ext = ast.Eq(
            ast.Symbol(builder.cell_facet_sym, rank=(builder.it_sym, 0)), 0)
        if_int = ast.Eq(
            ast.Symbol(builder.cell_facet_sym, rank=(builder.it_sym, 0)), 1)
        body = []
        if ext_calls:
            body.append(
                ast.If(if_ext, (ast.Block(ext_calls, open_scope=True), )))
        if int_calls:
            body.append(
                ast.If(if_int, (ast.Block(int_calls, open_scope=True), )))

        statements.append(
            ast.For(ast.Decl("unsigned int", builder.it_sym, init=0),
                    ast.Less(builder.it_sym, num_facets),
                    ast.Incr(builder.it_sym, 1), body))

    if builder.needs_mesh_layers:
        # In the presence of interior horizontal facet calls, an
        # IF-ELIF-ELSE block is generated using the mesh levels
        # as conditions for which calls are needed:
        #
        #    IF (layer == bottom_layer):
        #        *bottom calls
        #    ELSE IF (layer == top_layer):
        #        *top calls
        #    ELSE:
        #        *top calls
        #        *bottom calls
        #
        # Any extruded top or bottom calls for extruded facets are
        # included within the appropriate mesh-level IF-blocks. If
        # no interior horizontal facet calls are present, then
        # standard IF-blocks are generated for exterior top/bottom
        # facet calls when appropriate:
        #
        #    IF (layer == bottom_layer):
        #        *bottom calls
        #
        #    IF (layer == top_layer):
        #        *top calls
        #
        # The mesh level is an integer provided as a macro kernel
        # argument.

        # FIXME: No variable layers assumption
        statements.append(ast.FlatBlock("/* Mesh levels: */\n"))
        num_layers = ast.Symbol(builder.mesh_layer_count_sym, rank=(0, ))
        layer = builder.mesh_layer_sym
        types = [
            "interior_facet_horiz_top", "interior_facet_horiz_bottom",
            "exterior_facet_top", "exterior_facet_bottom"
        ]
        decide = [
            ast.Less(layer, num_layers),
            ast.Greater(layer, 0),
            ast.Eq(layer, num_layers),
            ast.Eq(layer, 0)
        ]
        for (integral_type, which) in zip(types, decide):
            statements.append(
                ast.If(which, (ast.Block(assembly_calls[integral_type],
                                         open_scope=True), )))

    return statements
Beispiel #21
0
def coefficient_temporaries(builder, declared_temps):
    """Generates coefficient temporary statements for assigning
    coefficients to vector temporaries.

    :arg builder: The :class:`LocalKernelBuilder` containing
                  all relevant expression information.
    :arg declared_temps: A `dict` keeping track of all declared
                         temporaries. This dictionary is updated
                         as coefficients are assigned temporaries.

    Action computations require creating coefficient temporaries to
    compute the matrix-vector product. The temporaries are created by
    inspecting the function space of the coefficient to compute node
    and dof extents. The coefficient is then assigned values by looping
    over both the node extent and dof extent (double FOR-loop). A double
    FOR-loop is needed for each function space (if the function space is
    mixed, then a loop will be constructed for each component space).
    The general structure of each coefficient loop will be:

         FOR (i1=0; i1<node_extent; i1++):
             FOR (j1=0; j1<dof_extent; j1++):
                 wT0[offset + (dof_extent * i1) + j1] = w_0_0[i1][j1]
                 wT1[offset + (dof_extent * i1) + j1] = w_1_0[i1][j1]
                 .
                 .
                 .

    where wT0, wT1, ... are temporaries for coefficients sharing the
    same node and dof extents. The offset is computed based on whether
    the function space is mixed. The offset is always 0 for non-mixed
    coefficients. If the coefficient is mixed, then the offset is
    incremented by the total number of nodal unknowns associated with
    the component spaces of the mixed space.
    """
    statements = [ast.FlatBlock("/* Coefficient temporaries */\n")]
    i_sym = ast.Symbol("i1")
    j_sym = ast.Symbol("j1")
    loops = [ast.FlatBlock("/* Loops for coefficient temps */\n")]
    for (nodes, dofs), cinfo_list in builder.action_coefficients.items():
        # Collect all coefficients which share the same node/dof extent
        assignments = []
        for cinfo in cinfo_list:
            fs_i = cinfo.space_index
            offset = cinfo.offset_index
            c_shape = cinfo.shape
            actee = cinfo.coefficient

            if actee not in declared_temps:
                # Declare and initialize coefficient temporary
                c_type = eigen_matrixbase_type(shape=c_shape)
                t = ast.Symbol("wT%d" % len(declared_temps))
                statements.append(ast.Decl(c_type, t))
                statements.append(ast.FlatBlock("%s.setZero();\n" % t))
                declared_temps[actee] = t

            # Assigning coefficient values into temporary
            coeff_sym = ast.Symbol(builder.coefficient(actee)[fs_i],
                                   rank=(i_sym, j_sym))
            index = ast.Sum(offset, ast.Sum(ast.Prod(dofs, i_sym), j_sym))
            coeff_temp = ast.Symbol(t, rank=(index, ))
            assignments.append(ast.Assign(coeff_temp, coeff_sym))

        # Inner-loop running over dof extent
        inner_loop = ast.For(ast.Decl("unsigned int", j_sym, init=0),
                             ast.Less(j_sym, dofs), ast.Incr(j_sym, 1),
                             assignments)

        # Outer-loop running over node extent
        loop = ast.For(ast.Decl("unsigned int", i_sym, init=0),
                       ast.Less(i_sym, nodes), ast.Incr(i_sym, 1), inner_loop)

        loops.append(loop)

    statements.extend(loops)

    return statements