Ejemplos de ParLoop en Python

Lenguaje de programación: Python

Namespace/Package Name: pyop2.op2

Método / Función: ParLoop

Ejemplos en hotexamples.com: 7

Python ParLoop - 7 ejemplos encontrados. Estos son los ejemplos en Python del mundo real mejor valorados de pyop2.op2.ParLoop extraídos de proyectos de código abierto. Puedes valorar ejemplos para ayudarnos a mejorar la calidad de los ejemplos.

Ejemplo n.º 1

Mostrar archivo

def make_extruded_coords(extruded_topology,
                         base_coords,
                         ext_coords,
                         layer_height,
                         extrusion_type='uniform',
                         kernel=None):
    """
    Given either a kernel or a (fixed) layer_height, compute an
    extruded coordinate field for an extruded mesh.

    :arg extruded_topology: an :class:`~.ExtrudedMeshTopology` to extrude
         a coordinate field for.
    :arg base_coords: a :class:`~.Function` to read the base
         coordinates from.
    :arg ext_coords: a :class:`~.Function` to write the extruded
         coordinates into.
    :arg layer_height: the height for each layer.  Either a scalar,
         where layers will be equi-spaced at the specified height, or a
         1D array of variable layer heights to use through the extrusion.
    :arg extrusion_type: the type of extrusion to use.  Predefined
         options are either "uniform" (creating equi-spaced layers by
         extruding in the (n+1)dth direction), "radial" (creating
         equi-spaced layers by extruding in the outward direction from
         the origin) or "radial_hedgehog" (creating equi-spaced layers
         by extruding coordinates in the outward cell-normal
         direction, needs a P1dgxP1 coordinate field).
    :arg kernel: an optional kernel to carry out coordinate extrusion.

    The kernel signature (if provided) is::

        void kernel(double **base_coords, double **ext_coords,
                    double *layer_height, int layer)

    The kernel iterates over the cells of the mesh and receives as
    arguments the coordinates of the base cell (to read), the
    coordinates on the extruded cell (to write to), the fixed layer
    height, and the current cell layer.
    """
    _, vert_space = ext_coords.function_space().ufl_element().sub_elements(
    )[0].sub_elements()
    if kernel is None and not (vert_space.degree() == 1
                               and vert_space.family()
                               in ['Lagrange', 'Discontinuous Lagrange']):
        raise RuntimeError(
            'Extrusion of coordinates is only possible for a P1 or P1dg interval unless a custom kernel is provided'
        )

    layer_height = numpy.atleast_1d(numpy.array(layer_height, dtype=RealType))

    if layer_height.ndim > 1:
        raise RuntimeError('Extrusion layer height should be 1d or scalar')

    if layer_height.size > 1:
        layer_height = numpy.cumsum(numpy.concatenate(([0], layer_height)))

    layer_heights = layer_height.size
    layer_height = op2.Global(layer_heights, layer_height, dtype=RealType)

    if kernel is not None:
        op2.ParLoop(kernel,
                    ext_coords.cell_set,
                    ext_coords.dat(op2.WRITE, ext_coords.cell_node_map()),
                    base_coords.dat(op2.READ, base_coords.cell_node_map()),
                    layer_height(op2.READ),
                    pass_layer_arg=True,
                    is_loopy_kernel=True).compute()
        return
    ext_fe = create_element(ext_coords.ufl_element())
    ext_shape = ext_fe.index_shape
    base_fe = create_element(base_coords.ufl_element())
    base_shape = base_fe.index_shape
    data = []
    data.append(lp.GlobalArg("ext_coords", dtype=ScalarType, shape=ext_shape))
    data.append(lp.GlobalArg("base_coords", dtype=ScalarType,
                             shape=base_shape))
    data.append(
        lp.GlobalArg("layer_height", dtype=RealType, shape=(layer_heights, )))
    data.append(lp.ValueArg('layer'))
    base_coord_dim = base_coords.function_space().value_size
    # Deal with tensor product cells
    adim = len(ext_shape) - 2

    # handle single or variable layer heights
    if layer_heights == 1:
        height_var = "layer_height[0] * (layer + l)"
    else:
        height_var = "layer_height[layer + l]"

    def _get_arity_axis_inames(_base):
        return tuple(_base + str(i) for i in range(adim))

    def _get_lp_domains(_inames, _extents):
        domains = []
        for idx, extent in zip(_inames, _extents):
            inames = isl.make_zero_and_vars([idx])
            domains.append(((inames[0].le_set(inames[idx])) &
                            (inames[idx].lt_set(inames[0] + extent))))
        return domains

    if extrusion_type == 'uniform':
        domains = []
        dd = _get_arity_axis_inames('d')
        domains.extend(_get_lp_domains(dd, ext_shape[:adim]))
        domains.extend(_get_lp_domains(('c', ), (base_coord_dim, )))
        if layer_heights == 1:
            domains.extend(_get_lp_domains(('l', ), (2, )))
        else:
            domains.append(
                "[layer] -> { [l] : 0 <= l <= 1 & 0 <= l + layer < %d}" %
                layer_heights)
        instructions = """
        ext_coords[{dd}, l, c] = base_coords[{dd}, c]
        ext_coords[{dd}, l, {base_coord_dim}] = ({hv})
        """.format(dd=', '.join(dd),
                   base_coord_dim=base_coord_dim,
                   hv=height_var)
        name = "pyop2_kernel_uniform_extrusion"
    elif extrusion_type == 'radial':
        domains = []
        dd = _get_arity_axis_inames('d')
        domains.extend(_get_lp_domains(dd, ext_shape[:adim]))
        domains.extend(_get_lp_domains(('c', 'k'), (base_coord_dim, ) * 2))
        if layer_heights == 1:
            domains.extend(_get_lp_domains(('l', ), (2, )))
        else:
            domains.append(
                "[layer] -> { [l] : 0 <= l <= 1 & 0 <= l + layer < %d}" %
                layer_heights)
        instructions = """
        <{RealType}> tt[{dd}] = 0
        <{RealType}> bc[{dd}] = 0
        for k
            bc[{dd}] = real(base_coords[{dd}, k])
            tt[{dd}] = tt[{dd}] + bc[{dd}] * bc[{dd}]
        end
        tt[{dd}] = sqrt(tt[{dd}])
        ext_coords[{dd}, l, c] = base_coords[{dd}, c] + base_coords[{dd}, c] * ({hv}) / tt[{dd}]
        """.format(RealType=RealType, dd=', '.join(dd), hv=height_var)
        name = "pyop2_kernel_radial_extrusion"
    elif extrusion_type == 'radial_hedgehog':
        # Only implemented for interval in 2D and triangle in 3D.
        # gdim != tdim already checked in ExtrudedMesh constructor.
        tdim = base_coords.ufl_domain().ufl_cell().topological_dimension()
        if tdim not in [1, 2]:
            raise NotImplementedError(
                "Hedgehog extrusion not implemented for %s" %
                base_coords.ufl_domain().ufl_cell())
        # tdim == 1:
        #
        # normal is:
        # (0 -1) (x2 - x1)
        # (1  0) (y2 - y1)
        #
        # tdim == 2:
        # normal is
        # v0 x v1
        #
        #    /\
        # v0/  \
        #  /    \
        # /------\
        #    v1
        domains = []
        dd = _get_arity_axis_inames('d')
        _dd = _get_arity_axis_inames('_d')
        domains.extend(_get_lp_domains(dd, ext_shape[:adim]))
        domains.extend(_get_lp_domains(_dd, ext_shape[:adim]))
        domains.extend(
            _get_lp_domains(('c0', 'c1', 'c2', 'c3', 'k', 'l'),
                            (base_coord_dim, ) * 5 + (2, )))
        # Formula for normal, n
        n_1_1 = """
        n[0] = -bc[1, 1] + bc[0, 1]
        n[1] = bc[1, 0] - bc[0, 0]
        """
        n_2_1 = """
        v0[c3] = bc[1, c3] - bc[0, c3]
        v1[c3] = bc[2, c3] - bc[0, c3]
        n[0] = v0[1] * v1[2] - v0[2] * v1[1]
        n[1] = v0[2] * v1[0] - v0[0] * v1[2]
        n[2] = v0[0] * v1[1] - v0[1] * v1[0]
        """
        n_2_2 = """
        v0[c3] = bc[0, 1, c3] - bc[0, 0, c3]
        v1[c3] = bc[1, 0, c3] - bc[0, 0, c3]
        n[0] = v0[1] * v1[2] - v0[2] * v1[1]
        n[1] = v0[2] * v1[0] - v0[0] * v1[2]
        n[2] = v0[0] * v1[1] - v0[1] * v1[0]
        """
        n_dict = {1: {1: n_1_1}, 2: {1: n_2_1, 2: n_2_2}}
        instructions = """
        <{RealType}> dot = 0
        <{RealType}> norm = 0
        <{RealType}> v0[c2] = 0
        <{RealType}> v1[c2] = 0
        <{RealType}> n[c2] = 0
        <{RealType}> x[c2] = 0
        <{RealType}> bc[{_dd}, c1] = real(base_coords[{_dd}, c1])
        for {_dd}
            x[c1] = x[c1] + bc[{_dd}, c1]
        end
        {ninst}
        for k
            dot = dot + x[k] * n[k]
            norm = norm + n[k] * n[k]
        end
        norm = sqrt(norm)
        norm = -norm if dot < 0 else norm
        ext_coords[{dd}, l, c0] = base_coords[{dd}, c0] + n[c0] * ({hv}) / norm
        """.format(RealType=RealType,
                   dd=', '.join(dd),
                   _dd=', '.join(_dd),
                   ninst=n_dict[tdim][adim],
                   hv=height_var)
        name = "pyop2_kernel_radial_hedgehog_extrusion"
    else:
        raise NotImplementedError('Unsupported extrusion type "%s"' %
                                  extrusion_type)

    ast = lp.make_function(domains,
                           instructions,
                           data,
                           name=name,
                           target=lp.CTarget(),
                           seq_dependencies=True,
                           silenced_warnings=["summing_if_branches_ops"])
    kernel = op2.Kernel(ast, name)
    op2.ParLoop(kernel,
                ext_coords.cell_set,
                ext_coords.dat(op2.WRITE, ext_coords.cell_node_map()),
                base_coords.dat(op2.READ, base_coords.cell_node_map()),
                layer_height(op2.READ),
                pass_layer_arg=True,
                is_loopy_kernel=True).compute()

Ejemplo n.º 2

Mostrar archivo

Archivo: interpolation.py Proyecto: jwallwork23/firedrake

def _interpolator(V, dat, expr, subset, access):
    to_element = create_element(V.ufl_element(), vector_is_mixed=False)
    to_pts = []

    if access is op2.READ:
        raise ValueError("Can't have READ access for output function")
    if V.ufl_element().mapping() != "identity":
        raise NotImplementedError("Can only interpolate onto elements "
                                  "with affine mapping. Try projecting instead")

    for dual in to_element.dual_basis():
        if not isinstance(dual, FIAT.functional.PointEvaluation):
            raise NotImplementedError("Can only interpolate onto point "
                                      "evaluation operators. Try projecting instead")
        pts, = dual.pt_dict.keys()
        to_pts.append(pts)

    if len(expr.ufl_shape) != len(V.ufl_element().value_shape()):
        raise RuntimeError('Rank mismatch: Expression rank %d, FunctionSpace rank %d'
                           % (len(expr.ufl_shape), len(V.ufl_element().value_shape())))

    if expr.ufl_shape != V.ufl_element().value_shape():
        raise RuntimeError('Shape mismatch: Expression shape %r, FunctionSpace shape %r'
                           % (expr.ufl_shape, V.ufl_element().value_shape()))

    mesh = V.ufl_domain()
    coords = mesh.coordinates

    if not isinstance(expr, firedrake.Expression):
        if expr.ufl_domain() and expr.ufl_domain() != V.mesh():
            raise NotImplementedError("Interpolation onto another mesh not supported.")
        if expr.ufl_shape != V.shape:
            raise ValueError("UFL expression has incorrect shape for interpolation.")
        ast, oriented, needs_cell_sizes, coefficients, _ = compile_ufl_kernel(expr, to_pts, coords, coffee=False)
        kernel = op2.Kernel(ast, ast.name)
    elif hasattr(expr, "eval"):
        kernel, oriented, needs_cell_sizes, coefficients = compile_python_kernel(expr, to_pts, to_element, V, coords)
    else:
        raise RuntimeError("Attempting to evaluate an Expression which has no value.")

    cell_set = coords.cell_set
    if subset is not None:
        assert subset.superset == cell_set
        cell_set = subset
    args = [kernel, cell_set]

    if dat in set((c.dat for c in coefficients)):
        output = dat
        dat = op2.Dat(dat.dataset)
        if access is not op2.WRITE:
            copyin = (partial(output.copy, dat), )
        else:
            copyin = ()
        copyout = (partial(dat.copy, output), )
    else:
        copyin = ()
        copyout = ()
    args.append(dat(access, V.cell_node_map()))
    if oriented:
        co = mesh.cell_orientations()
        args.append(co.dat(op2.READ, co.cell_node_map()))
    if needs_cell_sizes:
        cs = mesh.cell_sizes
        args.append(cs.dat(op2.READ, cs.cell_node_map()))
    for coefficient in coefficients:
        m_ = coefficient.cell_node_map()
        args.append(coefficient.dat(op2.READ, m_))

    for o in coefficients:
        domain = o.ufl_domain()
        if domain is not None and domain.topology != mesh.topology:
            raise NotImplementedError("Interpolation onto another mesh not supported.")

    return copyin + (op2.ParLoop(*args).compute, ) + copyout

Ejemplo n.º 3

Mostrar archivo

Archivo: assemble.py Proyecto: xywei/firedrake

def _assemble(f, tensor=None, bcs=None, form_compiler_parameters=None,
              inverse=False, mat_type=None, sub_mat_type=None,
              appctx={},
              options_prefix=None,
              assemble_now=False,
              allocate_only=False,
              zero_tensor=True,
              diagonal=False):
    r"""Assemble the form or Slate expression f and return a Firedrake object
    representing the result. This will be a :class:`float` for 0-forms/rank-0
    Slate tensors, a :class:`.Function` for 1-forms/rank-1 Slate tensors and
    a :class:`.Matrix` for 2-forms/rank-2 Slate tensors.

    :arg bcs: A tuple of :class`.DirichletBC`\s and/or :class`.EquationBCSplit`\s to be applied.
    :arg tensor: An existing tensor object into which the form should be
        assembled. If this is not supplied, a new tensor will be created for
        the purpose.
    :arg form_compiler_parameters: (optional) dict of parameters to pass to
        the form compiler.
    :arg inverse: (optional) if f is a 2-form, then assemble the inverse
         of the local matrices.
    :arg mat_type: (optional) type for assembled matrices, one of
        "nest", "aij", "baij", or "matfree".
    :arg sub_mat_type: (optional) type for assembled sub matrices
        inside a "nest" matrix.  One of "aij" or "baij".
    :arg appctx: Additional information to hang on the assembled
         matrix if an implicit matrix is requested (mat_type "matfree").
    :arg options_prefix: An options prefix for the PETSc matrix
        (ignored if not assembling a bilinear form).
    """
    if mat_type is None:
        mat_type = parameters.parameters["default_matrix_type"]
    if mat_type not in ["matfree", "aij", "baij", "nest"]:
        raise ValueError("Unrecognised matrix type, '%s'" % mat_type)
    if sub_mat_type is None:
        sub_mat_type = parameters.parameters["default_sub_matrix_type"]
    if sub_mat_type not in ["aij", "baij"]:
        raise ValueError("Invalid submatrix type, '%s' (not 'aij' or 'baij')", sub_mat_type)

    if form_compiler_parameters:
        form_compiler_parameters = form_compiler_parameters.copy()
    else:
        form_compiler_parameters = {}
    form_compiler_parameters["assemble_inverse"] = inverse

    topology = f.ufl_domains()[0].topology
    for m in f.ufl_domains():
        # Ensure mesh is "initialised" (could have got here without
        # building a functionspace (e.g. if integrating a constant)).
        m.init()
        if m.topology != topology:
            raise NotImplementedError("All integration domains must share a mesh topology.")

    for o in chain(f.arguments(), f.coefficients()):
        domain = o.ufl_domain()
        if domain is not None and domain.topology != topology:
            raise NotImplementedError("Assembly with multiple meshes not supported.")

    if isinstance(f, slate.TensorBase):
        if diagonal:
            raise NotImplementedError("Diagonal + slate not supported")
        kernels = slac.compile_expression(f, tsfc_parameters=form_compiler_parameters)
        integral_types = [kernel.kinfo.integral_type for kernel in kernels]
    else:
        kernels = tsfc_interface.compile_form(f, "form", parameters=form_compiler_parameters, inverse=inverse, diagonal=diagonal)
        integral_types = [integral.integral_type() for integral in f.integrals()]

        if bcs is not None:
            for bc in bcs:
                integral_types += [integral.integral_type() for integral in bc.integrals()]

    rank = len(f.arguments())
    if diagonal:
        assert rank == 2
    is_mat = rank == 2 and not diagonal
    is_vec = rank == 1 or diagonal

    if any((coeff.function_space() and coeff.function_space().component is not None)
           for coeff in f.coefficients()):
        raise NotImplementedError("Integration of subscripted VFS not yet implemented")

    if inverse and rank != 2:
        raise ValueError("Can only assemble the inverse of a 2-form")

    zero_tensor_parloop = lambda: None

    if is_mat:
        matfree = mat_type == "matfree"
        nest = mat_type == "nest"
        if nest:
            baij = sub_mat_type == "baij"
        else:
            baij = mat_type == "baij"
        # intercept matrix-free matrices here
        if matfree:
            if inverse:
                raise NotImplementedError("Inverse not implemented with matfree")
            if diagonal:
                raise NotImplementedError("Diagonal not implemented with matfree")
            if tensor is None:
                result_matrix = matrix.ImplicitMatrix(f, bcs,
                                                      fc_params=form_compiler_parameters,
                                                      appctx=appctx,
                                                      options_prefix=options_prefix)
                yield lambda: result_matrix
                return
            if not isinstance(tensor, matrix.ImplicitMatrix):
                raise ValueError("Expecting implicit matrix with matfree")
            tensor.assemble()
            yield lambda: tensor
            return

        test, trial = f.arguments()

        map_pairs = []
        cell_domains = []
        exterior_facet_domains = []
        interior_facet_domains = []
        if tensor is None:
            # For horizontal facets of extruded meshes, the corresponding domain
            # in the base mesh is the cell domain. Hence all the maps used for top
            # bottom and interior horizontal facets will use the cell to dofs map
            # coming from the base mesh as a starting point for the actual dynamic map
            # computation.
            for integral_type in integral_types:
                if integral_type == "cell":
                    cell_domains.append(op2.ALL)
                elif integral_type == "exterior_facet":
                    exterior_facet_domains.append(op2.ALL)
                elif integral_type == "interior_facet":
                    interior_facet_domains.append(op2.ALL)
                elif integral_type == "exterior_facet_bottom":
                    cell_domains.append(op2.ON_BOTTOM)
                elif integral_type == "exterior_facet_top":
                    cell_domains.append(op2.ON_TOP)
                elif integral_type == "exterior_facet_vert":
                    exterior_facet_domains.append(op2.ALL)
                elif integral_type == "interior_facet_horiz":
                    cell_domains.append(op2.ON_INTERIOR_FACETS)
                elif integral_type == "interior_facet_vert":
                    interior_facet_domains.append(op2.ALL)
                else:
                    raise ValueError('Unknown integral type "%s"' % integral_type)

            # Used for the sparsity construction
            iteration_regions = []
            if cell_domains:
                map_pairs.append((test.cell_node_map(), trial.cell_node_map()))
                iteration_regions.append(tuple(cell_domains))
            if exterior_facet_domains:
                map_pairs.append((test.exterior_facet_node_map(), trial.exterior_facet_node_map()))
                iteration_regions.append(tuple(exterior_facet_domains))
            if interior_facet_domains:
                map_pairs.append((test.interior_facet_node_map(), trial.interior_facet_node_map()))
                iteration_regions.append(tuple(interior_facet_domains))

            map_pairs = tuple(map_pairs)
            # Construct OP2 Mat to assemble into
            fs_names = (test.function_space().name, trial.function_space().name)

            try:
                sparsity = op2.Sparsity((test.function_space().dof_dset,
                                         trial.function_space().dof_dset),
                                        map_pairs,
                                        iteration_regions=iteration_regions,
                                        name="%s_%s_sparsity" % fs_names,
                                        nest=nest,
                                        block_sparse=baij)
            except SparsityFormatError:
                raise ValueError("Monolithic matrix assembly is not supported for systems with R-space blocks.")

            result_matrix = matrix.Matrix(f, bcs, mat_type, sparsity, ScalarType,
                                          "%s_%s_matrix" % fs_names,
                                          options_prefix=options_prefix)
            tensor = result_matrix.M
        else:
            if isinstance(tensor, matrix.ImplicitMatrix):
                raise ValueError("Expecting matfree with implicit matrix")

            result_matrix = tensor
            tensor = tensor.M
            zero_tensor_parloop = tensor.zero

        if result_matrix.block_shape != (1, 1) and mat_type == "baij":
            raise ValueError("BAIJ matrix type makes no sense for mixed spaces, use 'aij'")

        def mat(testmap, trialmap, rowbc, colbc, i, j):
            m = testmap(test.function_space()[i])
            n = trialmap(trial.function_space()[j])
            maps = (m if m else None, n if n else None)

            rlgmap, clgmap = tensor[i, j].local_to_global_maps
            V = test.function_space()[i]
            rlgmap = V.local_to_global_map(rowbc, lgmap=rlgmap)
            V = trial.function_space()[j]
            clgmap = V.local_to_global_map(colbc, lgmap=clgmap)
            if rowbc is None:
                rowbc = []
            if colbc is None:
                colbc = []
            unroll = any(bc.function_space().component is not None
                         for bc in chain(rowbc, colbc) if bc is not None)
            return tensor[i, j](op2.INC, maps, lgmaps=(rlgmap, clgmap), unroll_map=unroll)

        result = lambda: result_matrix
        if allocate_only:
            yield result
            return
    elif is_vec:
        test = f.arguments()[0]
        if tensor is None:
            result_function = function.Function(test.function_space())
            tensor = result_function.dat
        else:
            result_function = tensor
            tensor = result_function.dat
            zero_tensor_parloop = tensor.zero

        def vec(testmap, i):
            _testmap = testmap(test.function_space()[i])
            return tensor[i](op2.INC, _testmap if _testmap else None)
        result = lambda: result_function
    else:
        # 0-forms are always scalar
        if tensor is None:
            tensor = op2.Global(1, [0.0])
        else:
            raise ValueError("Can't assemble 0-form into existing tensor")
        result = lambda: tensor.data[0]

    coefficients = f.coefficients()
    domains = f.ufl_domains()

    # These will be used to correctly interpret the "otherwise"
    # subdomain
    all_integer_subdomain_ids = defaultdict(list)
    for k in kernels:
        if k.kinfo.subdomain_id != "otherwise":
            all_integer_subdomain_ids[k.kinfo.integral_type].append(k.kinfo.subdomain_id)
    for k, v in all_integer_subdomain_ids.items():
        all_integer_subdomain_ids[k] = tuple(sorted(v))

    # In collecting loops mode, we collect the loops, and assume the
    # boundary conditions provided are the ones we want.  It therefore
    # is only used inside residual and jacobian assembly.

    if zero_tensor:
        yield zero_tensor_parloop
    for indices, kinfo in kernels:
        kernel = kinfo.kernel
        integral_type = kinfo.integral_type
        domain_number = kinfo.domain_number
        subdomain_id = kinfo.subdomain_id
        coeff_map = kinfo.coefficient_map
        pass_layer_arg = kinfo.pass_layer_arg
        needs_orientations = kinfo.oriented
        needs_cell_facets = kinfo.needs_cell_facets
        needs_cell_sizes = kinfo.needs_cell_sizes

        m = domains[domain_number]
        subdomain_data = f.subdomain_data()[m]
        # Find argument space indices
        if is_mat:
            i, j = indices
        elif is_vec:
            i, = indices
        else:
            assert len(indices) == 0

        sdata = subdomain_data.get(integral_type, None)
        if integral_type != 'cell' and sdata is not None:
            raise NotImplementedError("subdomain_data only supported with cell integrals.")

        # Extract block from tensor and test/trial spaces
        # FIXME Ugly variable renaming required because functions are not
        # lexical closures in Python and we're writing to these variables
        if is_mat:
            if bcs is not None:
                tsbc = list(bc for bc in chain(*bcs))
                if result_matrix.block_shape > (1, 1):
                    trbc = [bc for bc in tsbc if bc.function_space_index() == j and isinstance(bc, DirichletBC)]
                    tsbc = [bc for bc in tsbc if bc.function_space_index() == i]
                else:
                    trbc = [bc for bc in tsbc if isinstance(bc, DirichletBC)]
            else:
                tsbc = []
                trbc = []

        # Now build arguments for the par_loop
        kwargs = {}
        # Some integrals require non-coefficient arguments at the
        # end (facet number information).
        extra_args = []
        # Decoration for applying to matrix maps in extruded case
        decoration = None
        itspace = m.measure_set(integral_type, subdomain_id,
                                all_integer_subdomain_ids)
        if integral_type == "cell":
            itspace = sdata or itspace
            if subdomain_id not in ["otherwise", "everywhere"] and sdata is not None:
                raise ValueError("Cannot use subdomain data and subdomain_id")

            def get_map(x):
                return x.cell_node_map()

        elif integral_type in ("exterior_facet", "exterior_facet_vert"):
            extra_args.append(m.exterior_facets.local_facet_dat(op2.READ))

            def get_map(x):
                return x.exterior_facet_node_map()

        elif integral_type in ("exterior_facet_top", "exterior_facet_bottom"):
            # In the case of extruded meshes with horizontal facet integrals, two
            # parallel loops will (potentially) get created and called based on the
            # domain id: interior horizontal, bottom or top.
            decoration = {"exterior_facet_top": op2.ON_TOP,
                          "exterior_facet_bottom": op2.ON_BOTTOM}[integral_type]
            kwargs["iterate"] = decoration

            def get_map(x):
                return x.cell_node_map()

        elif integral_type in ("interior_facet", "interior_facet_vert"):
            extra_args.append(m.interior_facets.local_facet_dat(op2.READ))

            def get_map(x):
                return x.interior_facet_node_map()

        elif integral_type == "interior_facet_horiz":
            decoration = op2.ON_INTERIOR_FACETS
            kwargs["iterate"] = decoration

            def get_map(x):
                return x.cell_node_map()

        else:
            raise ValueError("Unknown integral type '%s'" % integral_type)

        # Output argument
        if is_mat:
            tensor_arg = mat(lambda s: get_map(s),
                             lambda s: get_map(s),
                             tsbc, trbc,
                             i, j)
        elif is_vec:
            tensor_arg = vec(lambda s: get_map(s), i)
        else:
            tensor_arg = tensor(op2.INC)

        coords = m.coordinates
        args = [kernel, itspace, tensor_arg,
                coords.dat(op2.READ, get_map(coords))]
        if needs_orientations:
            o = m.cell_orientations()
            args.append(o.dat(op2.READ, get_map(o)))
        if needs_cell_sizes:
            o = m.cell_sizes
            args.append(o.dat(op2.READ, get_map(o)))

        for n in coeff_map:
            c = coefficients[n]
            for c_ in c.split():
                m_ = get_map(c_)
                args.append(c_.dat(op2.READ, m_))
        if needs_cell_facets:
            assert integral_type == "cell"
            extra_args.append(m.cell_to_facets(op2.READ))

        args.extend(extra_args)
        kwargs["pass_layer_arg"] = pass_layer_arg
        try:
            yield op2.ParLoop(*args, **kwargs).compute
        except MapValueError:
            raise RuntimeError("Integral measure does not match measure of all coefficients/arguments")

    # Must apply bcs outside loop over kernels because we may wish
    # to apply bcs to a block which is otherwise zero, and
    # therefore does not have an associated kernel.
    if bcs is not None and is_mat:
        for bc in bcs:
            if isinstance(bc, DirichletBC):
                fs = bc.function_space()
                # Evaluate this outwith a "collecting_loops" block,
                # since creation of the bc nodes actually can create a
                # par_loop.
                nodes = bc.nodes
                if len(fs) > 1:
                    raise RuntimeError(r"""Cannot apply boundary conditions to full mixed space. Did you forget to index it?""")
                shape = result_matrix.block_shape
                for i in range(shape[0]):
                    for j in range(shape[1]):
                        # Set diagonal entries on bc nodes to 1 if the current
                        # block is on the matrix diagonal and its index matches the
                        # index of the function space the bc is defined on.
                        if i != j:
                            continue
                        if fs.component is None and fs.index is not None:
                            # Mixed, index (no ComponentFunctionSpace)
                            if fs.index == i:
                                yield functools.partial(tensor[i, j].set_local_diagonal_entries, nodes)
                        elif fs.component is not None:
                            # ComponentFunctionSpace, check parent index
                            if fs.parent.index is not None:
                                # Mixed, index doesn't match
                                if fs.parent.index != i:
                                    continue
                            # Index matches
                            yield functools.partial(tensor[i, j].set_local_diagonal_entries, nodes, idx=fs.component)
                        elif fs.index is None:
                            yield functools.partial(tensor[i, j].set_local_diagonal_entries, nodes)
                        else:
                            raise RuntimeError("Unhandled BC case")
            elif isinstance(bc, EquationBCSplit):
                yield from _assemble(bc.f, tensor=result_matrix, bcs=bc.bcs,
                                     form_compiler_parameters=form_compiler_parameters,
                                     inverse=inverse, mat_type=mat_type,
                                     sub_mat_type=sub_mat_type,
                                     appctx=appctx,
                                     assemble_now=assemble_now,
                                     allocate_only=False,
                                     zero_tensor=False)
            else:
                raise NotImplementedError("Undefined type of bcs class provided.")

    if bcs is not None and is_vec:
        for bc in bcs:
            if isinstance(bc, DirichletBC):
                if assemble_now:
                    if diagonal:
                        yield functools.partial(bc.set, result_function, 1)
                    else:
                        yield functools.partial(bc.apply, result_function)
                else:
                    yield functools.partial(bc.zero, result_function)
            elif isinstance(bc, EquationBCSplit):
                if diagonal:
                    raise NotImplementedError("diagonal assembly and EquationBC not supported")
                yield functools.partial(bc.zero, result_function)
                yield from _assemble(bc.f, tensor=result_function, bcs=bc.bcs,
                                     form_compiler_parameters=form_compiler_parameters,
                                     inverse=inverse, mat_type=mat_type,
                                     sub_mat_type=sub_mat_type,
                                     appctx=appctx,
                                     assemble_now=assemble_now,
                                     allocate_only=False,
                                     zero_tensor=False)
    if zero_tensor:
        if is_mat:
            # Queue up matrix assembly (after we've done all the other operations)
            yield tensor.assemble
        if assemble_now:
            yield result

Ejemplo n.º 4

Mostrar archivo

Archivo: assemble.py Proyecto: connorjward/firedrake

def _make_parloops(expr, tensor, bcs, diagonal, fc_params, assembly_rank):
    """Create parloops for the assembly of the expression.

    :arg expr: The expression to be assembled.
    :arg tensor: The tensor to write to. Depending on ``expr`` and ``diagonal``
        this will either be a scalar (:class:`~pyop2.op2.Global`),
        vector/cofunction (masquerading as a :class:`.Function`) or :class:`.Matrix`.
    :arg bcs: Iterable of boundary conditions.
    :arg diagonal: (:class:`bool`) If assembling a matrix is it diagonal?
    :arg fc_params: Dictionary of parameters to pass to the form compiler.
    :arg assembly_rank: The appropriate :class:`_AssemblyRank`.

    :returns: A tuple of the generated :class:`~pyop2..op2.ParLoop` objects.
    """
    if fc_params:
        form_compiler_parameters = fc_params.copy()
    else:
        form_compiler_parameters = {}

    try:
        topology, = set(d.topology for d in expr.ufl_domains())
    except ValueError:
        raise NotImplementedError(
            "All integration domains must share a mesh topology")
    for m in expr.ufl_domains():
        # Ensure mesh is "initialised" (could have got here without
        # building a functionspace (e.g. if integrating a constant)).
        m.init()

    for o in chain(expr.arguments(), expr.coefficients()):
        domain = o.ufl_domain()
        if domain is not None and domain.topology != topology:
            raise NotImplementedError(
                "Assembly with multiple meshes not supported.")

    if assembly_rank == _AssemblyRank.MATRIX:
        test, trial = expr.arguments()
        create_op2arg = functools.partial(_matrix_arg,
                                          all_bcs=tuple(chain(*bcs)),
                                          matrix=tensor,
                                          Vrow=test.function_space(),
                                          Vcol=trial.function_space())
    elif assembly_rank == _AssemblyRank.VECTOR:
        if diagonal:
            # actually a 2-form but throw away the trial space
            test, _ = expr.arguments()
        else:
            test, = expr.arguments()
        create_op2arg = functools.partial(_vector_arg,
                                          function=tensor,
                                          V=test.function_space())
    else:
        create_op2arg = tensor

    coefficients = expr.coefficients()
    domains = expr.ufl_domains()

    if isinstance(expr, slate.TensorBase):
        kernels = slac.compile_expression(
            expr, compiler_parameters=form_compiler_parameters)
    else:
        kernels = tsfc_interface.compile_form(
            expr,
            "form",
            parameters=form_compiler_parameters,
            diagonal=diagonal)

    # These will be used to correctly interpret the "otherwise"
    # subdomain
    all_integer_subdomain_ids = defaultdict(list)
    for k in kernels:
        if k.kinfo.subdomain_id != "otherwise":
            all_integer_subdomain_ids[k.kinfo.integral_type].append(
                k.kinfo.subdomain_id)
    for k, v in all_integer_subdomain_ids.items():
        all_integer_subdomain_ids[k] = tuple(sorted(v))

    parloops = []
    for indices, kinfo in kernels:
        kernel = kinfo.kernel
        integral_type = kinfo.integral_type
        domain_number = kinfo.domain_number
        subdomain_id = kinfo.subdomain_id
        coeff_map = kinfo.coefficient_map
        pass_layer_arg = kinfo.pass_layer_arg
        needs_orientations = kinfo.oriented
        needs_cell_facets = kinfo.needs_cell_facets
        needs_cell_sizes = kinfo.needs_cell_sizes

        m = domains[domain_number]
        subdomain_data = expr.subdomain_data()[m]
        # Find argument space indices
        if assembly_rank == _AssemblyRank.MATRIX:
            i, j = indices
        elif assembly_rank == _AssemblyRank.VECTOR:
            i, = indices
        else:
            assert len(indices) == 0

        sdata = subdomain_data.get(integral_type, None)
        if integral_type != 'cell' and sdata is not None:
            raise NotImplementedError(
                "subdomain_data only supported with cell integrals.")

        # Now build arguments for the par_loop
        kwargs = {}
        # Some integrals require non-coefficient arguments at the
        # end (facet number information).
        extra_args = []
        itspace = m.measure_set(integral_type, subdomain_id,
                                all_integer_subdomain_ids)
        if integral_type == "cell":
            itspace = sdata or itspace
            if subdomain_id not in ["otherwise", "everywhere"
                                    ] and sdata is not None:
                raise ValueError("Cannot use subdomain data and subdomain_id")

            def get_map(x):
                return x.cell_node_map()
        elif integral_type in ("exterior_facet", "exterior_facet_vert"):
            extra_args.append(m.exterior_facets.local_facet_dat(op2.READ))

            def get_map(x):
                return x.exterior_facet_node_map()
        elif integral_type in ("exterior_facet_top", "exterior_facet_bottom"):
            # In the case of extruded meshes with horizontal facet integrals, two
            # parallel loops will (potentially) get created and called based on the
            # domain id: interior horizontal, bottom or top.
            kwargs["iterate"] = {
                "exterior_facet_top": op2.ON_TOP,
                "exterior_facet_bottom": op2.ON_BOTTOM
            }[integral_type]

            def get_map(x):
                return x.cell_node_map()
        elif integral_type in ("interior_facet", "interior_facet_vert"):
            extra_args.append(m.interior_facets.local_facet_dat(op2.READ))

            def get_map(x):
                return x.interior_facet_node_map()
        elif integral_type == "interior_facet_horiz":
            kwargs["iterate"] = op2.ON_INTERIOR_FACETS

            def get_map(x):
                return x.cell_node_map()
        else:
            raise ValueError("Unknown integral type '%s'" % integral_type)

        # Output argument
        if assembly_rank == _AssemblyRank.MATRIX:
            tensor_arg = create_op2arg(op2.INC, get_map, i, j)
        elif assembly_rank == _AssemblyRank.VECTOR:
            tensor_arg = create_op2arg(op2.INC, get_map, i)
        else:
            tensor_arg = create_op2arg(op2.INC)

        coords = m.coordinates
        args = [
            kernel, itspace, tensor_arg,
            coords.dat(op2.READ, get_map(coords))
        ]
        if needs_orientations:
            o = m.cell_orientations()
            args.append(o.dat(op2.READ, get_map(o)))
        if needs_cell_sizes:
            o = m.cell_sizes
            args.append(o.dat(op2.READ, get_map(o)))

        for n, split_map in coeff_map:
            c = coefficients[n]
            split_c = c.split()
            for c_ in (split_c[i] for i in split_map):
                m_ = get_map(c_)
                args.append(c_.dat(op2.READ, m_))

        if needs_cell_facets:
            assert integral_type == "cell"
            extra_args.append(m.cell_to_facets(op2.READ))
        if pass_layer_arg:
            c = op2.Global(1,
                           itspace.layers - 2,
                           dtype=numpy.dtype(numpy.int32))
            o = c(op2.READ)
            extra_args.append(o)

        args.extend(extra_args)
        kwargs["pass_layer_arg"] = pass_layer_arg
        try:
            parloops.append(op2.ParLoop(*args, **kwargs))
        except MapValueError:
            raise RuntimeError(
                "Integral measure does not match measure of all coefficients/arguments"
            )
    return tuple(parloops)

Ejemplo n.º 5

Mostrar archivo

Archivo: interpolation.py Proyecto: jcwang1027/firedrake

def _interpolator(V, tensor, expr, subset, arguments, access):
    try:
        to_element = create_base_element(V.ufl_element())
    except KeyError:
        # FInAT only elements
        raise NotImplementedError(
            "Don't know how to create FIAT element for %s" % V.ufl_element())

    if access is op2.READ:
        raise ValueError("Can't have READ access for output function")

    if len(expr.ufl_shape) != len(V.ufl_element().value_shape()):
        raise RuntimeError(
            'Rank mismatch: Expression rank %d, FunctionSpace rank %d' %
            (len(expr.ufl_shape), len(V.ufl_element().value_shape())))

    if expr.ufl_shape != V.ufl_element().value_shape():
        raise RuntimeError(
            'Shape mismatch: Expression shape %r, FunctionSpace shape %r' %
            (expr.ufl_shape, V.ufl_element().value_shape()))

    mesh = V.ufl_domain()
    coords = mesh.coordinates

    if not isinstance(expr, firedrake.Expression):
        if expr.ufl_domain() and expr.ufl_domain() != V.mesh():
            raise NotImplementedError(
                "Interpolation onto another mesh not supported.")
        ast, oriented, needs_cell_sizes, coefficients, _ = compile_expression_dual_evaluation(
            expr, to_element, coords, domain=V.mesh(), coffee=False)
        kernel = op2.Kernel(ast,
                            ast.name,
                            requires_zeroed_output_arguments=True)
    elif hasattr(expr, "eval"):
        to_pts = []
        for dual in to_element.fiat_equivalent.dual_basis():
            if not isinstance(dual, FIAT.functional.PointEvaluation):
                raise NotImplementedError(
                    "Can only interpolate Python kernels with Lagrange elements"
                )
            pts, = dual.pt_dict.keys()
            to_pts.append(pts)

        kernel, oriented, needs_cell_sizes, coefficients = compile_python_kernel(
            expr, to_pts, to_element, V, coords)
    else:
        raise RuntimeError(
            "Attempting to evaluate an Expression which has no value.")

    cell_set = coords.cell_set
    if subset is not None:
        assert subset.superset == cell_set
        cell_set = subset
    parloop_args = [kernel, cell_set]

    if tensor in set((c.dat for c in coefficients)):
        output = tensor
        tensor = op2.Dat(tensor.dataset)
        if access is not op2.WRITE:
            copyin = (partial(output.copy, tensor), )
        else:
            copyin = ()
        copyout = (partial(tensor.copy, output), )
    else:
        copyin = ()
        copyout = ()
    if isinstance(tensor, op2.Global):
        parloop_args.append(tensor(access))
    elif isinstance(tensor, op2.Dat):
        parloop_args.append(tensor(access, V.cell_node_map()))
    else:
        assert access == op2.WRITE  # Other access descriptors not done for Matrices.
        parloop_args.append(
            tensor(op2.WRITE, (V.cell_node_map(),
                               arguments[0].function_space().cell_node_map())))
    if oriented:
        co = mesh.cell_orientations()
        parloop_args.append(co.dat(op2.READ, co.cell_node_map()))
    if needs_cell_sizes:
        cs = mesh.cell_sizes
        parloop_args.append(cs.dat(op2.READ, cs.cell_node_map()))
    for coefficient in coefficients:
        m_ = coefficient.cell_node_map()
        parloop_args.append(coefficient.dat(op2.READ, m_))

    for o in coefficients:
        domain = o.ufl_domain()
        if domain is not None and domain.topology != mesh.topology:
            raise NotImplementedError(
                "Interpolation onto another mesh not supported.")

    parloop = op2.ParLoop(*parloop_args).compute
    if isinstance(tensor, op2.Mat):
        return parloop, tensor.assemble
    else:
        return copyin + (parloop, ) + copyout

Ejemplo n.º 6

Mostrar archivo

def create_parloops(expr,
                    create_op2arg,
                    *,
                    assembly_rank=None,
                    diagonal=False,
                    form_compiler_parameters=None):
    """Create parallel loops for assembly of expr.

    :arg expr: The expression to assemble.
    :arg create_op2arg: callable that creates the Arg corresponding to
        the output tensor.
    :arg assembly_rank: are we assembling a scalar, vector, or matrix?
    :arg diagonal: For matrices are we actually assembling the
        diagonal into a vector?
    :arg form_compiler_parameters: parameters to pass to the form
        compiler.
    :returns: a generator of op2.ParLoop objects."""
    coefficients = expr.coefficients()
    domains = expr.ufl_domains()

    if isinstance(expr, slate.TensorBase):
        if diagonal:
            raise NotImplementedError("Diagonal + slate not supported")
        kernels = slac.compile_expression(
            expr, tsfc_parameters=form_compiler_parameters)
    else:
        kernels = tsfc_interface.compile_form(
            expr,
            "form",
            parameters=form_compiler_parameters,
            diagonal=diagonal)

    # These will be used to correctly interpret the "otherwise"
    # subdomain
    all_integer_subdomain_ids = defaultdict(list)
    for k in kernels:
        if k.kinfo.subdomain_id != "otherwise":
            all_integer_subdomain_ids[k.kinfo.integral_type].append(
                k.kinfo.subdomain_id)
    for k, v in all_integer_subdomain_ids.items():
        all_integer_subdomain_ids[k] = tuple(sorted(v))

    for indices, kinfo in kernels:
        kernel = kinfo.kernel
        integral_type = kinfo.integral_type
        domain_number = kinfo.domain_number
        subdomain_id = kinfo.subdomain_id
        coeff_map = kinfo.coefficient_map
        pass_layer_arg = kinfo.pass_layer_arg
        needs_orientations = kinfo.oriented
        needs_cell_facets = kinfo.needs_cell_facets
        needs_cell_sizes = kinfo.needs_cell_sizes

        m = domains[domain_number]
        subdomain_data = expr.subdomain_data()[m]
        # Find argument space indices
        if assembly_rank == AssemblyRank.MATRIX:
            i, j = indices
        elif assembly_rank == AssemblyRank.VECTOR:
            i, = indices
        else:
            assert len(indices) == 0

        sdata = subdomain_data.get(integral_type, None)
        if integral_type != 'cell' and sdata is not None:
            raise NotImplementedError(
                "subdomain_data only supported with cell integrals.")

        # Now build arguments for the par_loop
        kwargs = {}
        # Some integrals require non-coefficient arguments at the
        # end (facet number information).
        extra_args = []
        itspace = m.measure_set(integral_type, subdomain_id,
                                all_integer_subdomain_ids)
        if integral_type == "cell":
            itspace = sdata or itspace
            if subdomain_id not in ["otherwise", "everywhere"
                                    ] and sdata is not None:
                raise ValueError("Cannot use subdomain data and subdomain_id")

            def get_map(x):
                return x.cell_node_map()
        elif integral_type in ("exterior_facet", "exterior_facet_vert"):
            extra_args.append(m.exterior_facets.local_facet_dat(op2.READ))

            def get_map(x):
                return x.exterior_facet_node_map()
        elif integral_type in ("exterior_facet_top", "exterior_facet_bottom"):
            # In the case of extruded meshes with horizontal facet integrals, two
            # parallel loops will (potentially) get created and called based on the
            # domain id: interior horizontal, bottom or top.
            kwargs["iterate"] = {
                "exterior_facet_top": op2.ON_TOP,
                "exterior_facet_bottom": op2.ON_BOTTOM
            }[integral_type]

            def get_map(x):
                return x.cell_node_map()
        elif integral_type in ("interior_facet", "interior_facet_vert"):
            extra_args.append(m.interior_facets.local_facet_dat(op2.READ))

            def get_map(x):
                return x.interior_facet_node_map()
        elif integral_type == "interior_facet_horiz":
            kwargs["iterate"] = op2.ON_INTERIOR_FACETS

            def get_map(x):
                return x.cell_node_map()
        else:
            raise ValueError("Unknown integral type '%s'" % integral_type)

        # Output argument
        if assembly_rank == AssemblyRank.MATRIX:
            tensor_arg = create_op2arg(op2.INC, get_map, i, j)
        elif assembly_rank == AssemblyRank.VECTOR:
            tensor_arg = create_op2arg(op2.INC, get_map, i)
        else:
            tensor_arg = create_op2arg(op2.INC)

        coords = m.coordinates
        args = [
            kernel, itspace, tensor_arg,
            coords.dat(op2.READ, get_map(coords))
        ]
        if needs_orientations:
            o = m.cell_orientations()
            args.append(o.dat(op2.READ, get_map(o)))
        if needs_cell_sizes:
            o = m.cell_sizes
            args.append(o.dat(op2.READ, get_map(o)))

        for n in coeff_map:
            c = coefficients[n]
            for c_ in c.split():
                m_ = get_map(c_)
                args.append(c_.dat(op2.READ, m_))
        if needs_cell_facets:
            assert integral_type == "cell"
            extra_args.append(m.cell_to_facets(op2.READ))
        if pass_layer_arg:
            c = op2.Global(1,
                           itspace.layers - 2,
                           dtype=numpy.dtype(numpy.int32))
            o = c(op2.READ)
            extra_args.append(o)

        args.extend(extra_args)
        kwargs["pass_layer_arg"] = pass_layer_arg
        try:
            yield op2.ParLoop(*args, **kwargs).compute
        except MapValueError:
            raise RuntimeError(
                "Integral measure does not match measure of all coefficients/arguments"
            )

Ejemplo n.º 7

Mostrar archivo

def _interpolator(V, tensor, expr, subset, arguments, access):
    try:
        expr = ufl.as_ufl(expr)
    except ufl.UFLException:
        raise ValueError("Expecting to interpolate a UFL expression")
    try:
        to_element = create_element(V.ufl_element())
    except KeyError:
        # FInAT only elements
        raise NotImplementedError(
            "Don't know how to create FIAT element for %s" % V.ufl_element())

    if access is op2.READ:
        raise ValueError("Can't have READ access for output function")

    if len(expr.ufl_shape) != len(V.ufl_element().value_shape()):
        raise RuntimeError(
            'Rank mismatch: Expression rank %d, FunctionSpace rank %d' %
            (len(expr.ufl_shape), len(V.ufl_element().value_shape())))

    if expr.ufl_shape != V.ufl_element().value_shape():
        raise RuntimeError(
            'Shape mismatch: Expression shape %r, FunctionSpace shape %r' %
            (expr.ufl_shape, V.ufl_element().value_shape()))

    # NOTE: The par_loop is always over the target mesh cells.
    target_mesh = V.ufl_domain()
    source_mesh = expr.ufl_domain() or target_mesh

    if target_mesh is not source_mesh:
        if not isinstance(target_mesh.topology,
                          firedrake.mesh.VertexOnlyMeshTopology):
            raise NotImplementedError(
                "Can only interpolate onto a Vertex Only Mesh")
        if target_mesh.geometric_dimension(
        ) != source_mesh.geometric_dimension():
            raise ValueError(
                "Cannot interpolate onto a mesh of a different geometric dimension"
            )
        if not hasattr(
                target_mesh,
                "_parent_mesh") or target_mesh._parent_mesh is not source_mesh:
            raise ValueError(
                "Can only interpolate across meshes where the source mesh is the parent of the target"
            )
        # For trans-mesh interpolation we use a FInAT QuadratureElement as the
        # (base) target element with runtime point set expressions as their
        # quadrature rule point set and weights from their dual basis.
        # NOTE: This setup is useful for thinking about future design - in the
        # future this `rebuild` function can be absorbed into FInAT as a
        # transformer that eats an element and gives you an equivalent (which
        # may or may not be a QuadratureElement) that lets you do run time
        # tabulation. Alternatively (and this all depends on future design
        # decision about FInAT how dual evaluation should work) the
        # to_element's dual basis (which look rather like quadrature rules) can
        # have their pointset(s) directly replaced with run-time tabulated
        # equivalent(s) (i.e. finat.point_set.UnknownPointSet(s))
        rt_var_name = 'rt_X'
        to_element = rebuild(to_element, expr, rt_var_name)

    parameters = {}
    parameters['scalar_type'] = utils.ScalarType

    # We need to pass both the ufl element and the finat element
    # because the finat elements might not have the right mapping
    # (e.g. L2 Piola, or tensor element with symmetries)
    # FIXME: for the runtime unknown point set (for cross-mesh
    # interpolation) we have to pass the finat element we construct
    # here. Ideally we would only pass the UFL element through.
    kernel = compile_expression_dual_evaluation(expr,
                                                to_element,
                                                V.ufl_element(),
                                                domain=source_mesh,
                                                parameters=parameters)
    ast = kernel.ast
    oriented = kernel.oriented
    needs_cell_sizes = kernel.needs_cell_sizes
    coefficients = kernel.coefficients
    first_coeff_fake_coords = kernel.first_coefficient_fake_coords
    name = kernel.name
    kernel = op2.Kernel(ast,
                        name,
                        requires_zeroed_output_arguments=True,
                        flop_count=kernel.flop_count)
    cell_set = target_mesh.cell_set
    if subset is not None:
        assert subset.superset == cell_set
        cell_set = subset
    parloop_args = [kernel, cell_set]

    if first_coeff_fake_coords:
        # Replace with real source mesh coordinates
        coefficients[0] = source_mesh.coordinates

    if target_mesh is not source_mesh:
        # NOTE: TSFC will sometimes drop run-time arguments in generated
        # kernels if they are deemed not-necessary.
        # FIXME: Checking for argument name in the inner kernel to decide
        # whether to add an extra coefficient is a stopgap until
        # compile_expression_dual_evaluation
        #   (a) outputs a coefficient map to indicate argument ordering in
        #       parloops as `compile_form` does and
        #   (b) allows the dual evaluation related coefficients to be supplied to
        #       them rather than having to be added post-hoc (likely by
        #       replacing `to_element` with a CoFunction/CoArgument as the
        #       target `dual` which would contain `dual` related
        #       coefficient(s))
        if rt_var_name in [arg.name for arg in kernel.code[name].args]:
            # Add the coordinates of the target mesh quadrature points in the
            # source mesh's reference cell as an extra argument for the inner
            # loop. (With a vertex only mesh this is a single point for each
            # vertex cell.)
            coefficients.append(target_mesh.reference_coordinates)

    if tensor in set((c.dat for c in coefficients)):
        output = tensor
        tensor = op2.Dat(tensor.dataset)
        if access is not op2.WRITE:
            copyin = (partial(output.copy, tensor), )
        else:
            copyin = ()
        copyout = (partial(tensor.copy, output), )
    else:
        copyin = ()
        copyout = ()
    if isinstance(tensor, op2.Global):
        parloop_args.append(tensor(access))
    elif isinstance(tensor, op2.Dat):
        parloop_args.append(tensor(access, V.cell_node_map()))
    else:
        assert access == op2.WRITE  # Other access descriptors not done for Matrices.
        rows_map = V.cell_node_map()
        columns_map = arguments[0].function_space().cell_node_map()
        if target_mesh is not source_mesh:
            # Since the par_loop is over the target mesh cells we need to
            # compose a map that takes us from target mesh cells to the
            # function space nodes on the source mesh.
            columns_map = compose_map_and_cache(
                target_mesh.cell_parent_cell_map, columns_map)
        parloop_args.append(tensor(op2.WRITE, (rows_map, columns_map)))
    if oriented:
        co = target_mesh.cell_orientations()
        parloop_args.append(co.dat(op2.READ, co.cell_node_map()))
    if needs_cell_sizes:
        cs = target_mesh.cell_sizes
        parloop_args.append(cs.dat(op2.READ, cs.cell_node_map()))
    for coefficient in coefficients:
        coeff_mesh = coefficient.ufl_domain()
        if coeff_mesh is target_mesh or not coeff_mesh:
            # NOTE: coeff_mesh is None is allowed e.g. when interpolating from
            # a Real space
            m_ = coefficient.cell_node_map()
        elif coeff_mesh is source_mesh:
            if coefficient.cell_node_map():
                # Since the par_loop is over the target mesh cells we need to
                # compose a map that takes us from target mesh cells to the
                # function space nodes on the source mesh.
                m_ = compose_map_and_cache(target_mesh.cell_parent_cell_map,
                                           coefficient.cell_node_map())
            else:
                # m_ is allowed to be None when interpolating from a Real space,
                # even in the trans-mesh case.
                m_ = coefficient.cell_node_map()
        else:
            raise ValueError("Have coefficient with unexpected mesh")
        parloop_args.append(coefficient.dat(op2.READ, m_))

    parloop = op2.ParLoop(*parloop_args)
    parloop_compute_callable = parloop.compute
    if isinstance(tensor, op2.Mat):
        return parloop_compute_callable, tensor.assemble
    else:
        return copyin + (parloop_compute_callable, ) + copyout