예제 #1
0
    def _reconstruction_calls(self, split_mixed_op, split_trace_op):
        """This generates the reconstruction calls for the unknowns using the
        Lagrange multipliers.

        :arg split_mixed_op: a ``dict`` of split forms that make up the broken
                             mixed operator from the original problem.
        :arg split_trace_op: a ``dict`` of split forms that make up the trace
                             contribution in the hybridized mixed system.
        """

        from firedrake.assemble import OneFormAssembler

        # We always eliminate the velocity block first
        id0, id1 = (self.vidx, self.pidx)

        # TODO: When PyOP2 is able to write into mixed dats,
        # the reconstruction expressions can simplify into
        # one clean expression.
        A = Tensor(split_mixed_op[(id0, id0)])
        B = Tensor(split_mixed_op[(id0, id1)])
        C = Tensor(split_mixed_op[(id1, id0)])
        D = Tensor(split_mixed_op[(id1, id1)])
        K_0 = Tensor(split_trace_op[(0, id0)])
        K_1 = Tensor(split_trace_op[(0, id1)])

        # Split functions and reconstruct each bit separately
        split_residual = self.broken_residual.split()
        split_sol = self.broken_solution.split()
        g = AssembledVector(split_residual[id0])
        f = AssembledVector(split_residual[id1])
        sigma = split_sol[id0]
        u = split_sol[id1]
        lambdar = AssembledVector(self.trace_solution)

        M = D - C * A.inv * B
        R = K_1.T - C * A.inv * K_0.T
        u_rec = M.solve(f - C * A.inv * g - R * lambdar,
                        decomposition="PartialPivLU")
        self._sub_unknown = OneFormAssembler(
            u_rec, tensor=u,
            form_compiler_parameters=self.ctx.fc_params).assemble

        sigma_rec = A.solve(g - B * AssembledVector(u) - K_0.T * lambdar,
                            decomposition="PartialPivLU")
        self._elim_unknown = OneFormAssembler(
            sigma_rec,
            tensor=sigma,
            form_compiler_parameters=self.ctx.fc_params).assemble
예제 #2
0
파일: kernels.py 프로젝트: xywei/firedrake
def dg_injection_kernel(Vf, Vc, ncell):
    from firedrake import Tensor, AssembledVector, TestFunction, TrialFunction
    from firedrake.slate.slac import compile_expression
    macro_builder = MacroKernelBuilder(ScalarType_c, ncell)
    f = ufl.Coefficient(Vf)
    macro_builder.set_coefficients([f])
    macro_builder.set_coordinates(Vf.mesh())

    Vfe = create_element(Vf.ufl_element())
    macro_quadrature_rule = make_quadrature(
        Vfe.cell, estimate_total_polynomial_degree(ufl.inner(f, f)))
    index_cache = {}
    parameters = default_parameters()
    integration_dim, entity_ids = lower_integral_type(Vfe.cell, "cell")
    macro_cfg = dict(interface=macro_builder,
                     ufl_cell=Vf.ufl_cell(),
                     precision=parameters["precision"],
                     integration_dim=integration_dim,
                     entity_ids=entity_ids,
                     index_cache=index_cache,
                     quadrature_rule=macro_quadrature_rule)

    fexpr, = fem.compile_ufl(f, **macro_cfg)
    X = ufl.SpatialCoordinate(Vf.mesh())
    C_a, = fem.compile_ufl(X, **macro_cfg)
    detJ = ufl_utils.preprocess_expression(
        abs(ufl.JacobianDeterminant(f.ufl_domain())))
    macro_detJ, = fem.compile_ufl(detJ, **macro_cfg)

    Vce = create_element(Vc.ufl_element())

    coarse_builder = firedrake_interface.KernelBuilder("cell", "otherwise", 0,
                                                       ScalarType_c)
    coarse_builder.set_coordinates(Vc.mesh())
    argument_multiindices = (Vce.get_indices(), )
    argument_multiindex, = argument_multiindices
    return_variable, = coarse_builder.set_arguments((ufl.TestFunction(Vc), ),
                                                    argument_multiindices)

    integration_dim, entity_ids = lower_integral_type(Vce.cell, "cell")
    # Midpoint quadrature for jacobian on coarse cell.
    quadrature_rule = make_quadrature(Vce.cell, 0)

    coarse_cfg = dict(interface=coarse_builder,
                      ufl_cell=Vc.ufl_cell(),
                      precision=parameters["precision"],
                      integration_dim=integration_dim,
                      entity_ids=entity_ids,
                      index_cache=index_cache,
                      quadrature_rule=quadrature_rule)

    X = ufl.SpatialCoordinate(Vc.mesh())
    K = ufl_utils.preprocess_expression(ufl.JacobianInverse(Vc.mesh()))
    C_0, = fem.compile_ufl(X, **coarse_cfg)
    K, = fem.compile_ufl(K, **coarse_cfg)

    i = gem.Index()
    j = gem.Index()

    C_0 = gem.Indexed(C_0, (j, ))
    C_0 = gem.index_sum(C_0, quadrature_rule.point_set.indices)
    C_a = gem.Indexed(C_a, (j, ))
    X_a = gem.Sum(C_0, gem.Product(gem.Literal(-1), C_a))

    K_ij = gem.Indexed(K, (i, j))
    K_ij = gem.index_sum(K_ij, quadrature_rule.point_set.indices)
    X_a = gem.index_sum(gem.Product(K_ij, X_a), (j, ))
    C_0, = quadrature_rule.point_set.points
    C_0 = gem.Indexed(gem.Literal(C_0), (i, ))
    # fine quad points in coarse reference space.
    X_a = gem.Sum(C_0, gem.Product(gem.Literal(-1), X_a))
    X_a = gem.ComponentTensor(X_a, (i, ))

    # Coarse basis function evaluated at fine quadrature points
    phi_c = fem.fiat_to_ufl(
        Vce.point_evaluation(0, X_a, (Vce.cell.get_dimension(), 0)), 0)

    tensor_indices = tuple(gem.Index(extent=d) for d in f.ufl_shape)

    phi_c = gem.Indexed(phi_c, argument_multiindex + tensor_indices)
    fexpr = gem.Indexed(fexpr, tensor_indices)
    quadrature_weight = macro_quadrature_rule.weight_expression
    expr = gem.Product(gem.IndexSum(gem.Product(phi_c, fexpr), tensor_indices),
                       gem.Product(macro_detJ, quadrature_weight))

    quadrature_indices = macro_builder.indices + macro_quadrature_rule.point_set.indices

    reps = spectral.Integrals([expr], quadrature_indices,
                              argument_multiindices, parameters)
    assignments = spectral.flatten([(return_variable, reps)], index_cache)
    return_variables, expressions = zip(*assignments)
    expressions = impero_utils.preprocess_gem(expressions,
                                              **spectral.finalise_options)
    assignments = list(zip(return_variables, expressions))
    impero_c = impero_utils.compile_gem(assignments,
                                        quadrature_indices +
                                        argument_multiindex,
                                        remove_zeros=True)

    index_names = []

    def name_index(index, name):
        index_names.append((index, name))
        if index in index_cache:
            for multiindex, suffix in zip(index_cache[index],
                                          string.ascii_lowercase):
                name_multiindex(multiindex, name + suffix)

    def name_multiindex(multiindex, name):
        if len(multiindex) == 1:
            name_index(multiindex[0], name)
        else:
            for i, index in enumerate(multiindex):
                name_index(index, name + str(i))

    name_multiindex(quadrature_indices, 'ip')
    for multiindex, name in zip(argument_multiindices, ['j', 'k']):
        name_multiindex(multiindex, name)

    index_names.extend(zip(macro_builder.indices, ["entity"]))
    body = generate_coffee(impero_c, index_names, parameters["precision"],
                           ScalarType_c)

    retarg = ast.Decl(ScalarType_c,
                      ast.Symbol("R", rank=(Vce.space_dimension(), )))
    local_tensor = coarse_builder.local_tensor
    local_tensor.init = ast.ArrayInit(
        numpy.zeros(Vce.space_dimension(), dtype=ScalarType_c))
    body.children.insert(0, local_tensor)
    args = [retarg] + macro_builder.kernel_args + [
        macro_builder.coordinates_arg, coarse_builder.coordinates_arg
    ]

    # Now we have the kernel that computes <f, phi_c>dx_c
    # So now we need to hit it with the inverse mass matrix on dx_c

    u = TrialFunction(Vc)
    v = TestFunction(Vc)
    expr = Tensor(ufl.inner(u, v) * ufl.dx).inv * AssembledVector(
        ufl.Coefficient(Vc))
    Ainv, = compile_expression(expr)
    Ainv = Ainv.kinfo.kernel
    A = ast.Symbol(local_tensor.sym.symbol)
    R = ast.Symbol("R")
    body.children.append(
        ast.FunCall(Ainv.name, R, coarse_builder.coordinates_arg.sym, A))
    from coffee.base import Node
    assert isinstance(Ainv._code, Node)
    return op2.Kernel(ast.Node([
        Ainv._code,
        ast.FunDecl("void",
                    "pyop2_kernel_injection_dg",
                    args,
                    body,
                    pred=["static", "inline"])
    ]),
                      name="pyop2_kernel_injection_dg",
                      cpp=True,
                      include_dirs=Ainv._include_dirs,
                      headers=Ainv._headers)
예제 #3
0
    def initialize(self, pc):
        """ Set up the problem context. Takes the original
        mixed problem and transforms it into the equivalent
        hybrid-mixed system.

        A KSP object is created for the Lagrange multipliers
        on the top/bottom faces of the mesh cells.
        """

        from firedrake import (FunctionSpace, Function, Constant,
                               FiniteElement, TensorProductElement,
                               TrialFunction, TrialFunctions, TestFunction,
                               DirichletBC, interval, MixedElement,
                               BrokenElement)
        from firedrake.assemble import (allocate_matrix,
                                        create_assembly_callable)
        from firedrake.formmanipulation import split_form
        from ufl.algorithms.replace import replace
        from ufl.cell import TensorProductCell

        # Extract PC context
        prefix = pc.getOptionsPrefix() + "vert_hybridization_"
        _, P = pc.getOperators()
        self.ctx = P.getPythonContext()

        if not isinstance(self.ctx, ImplicitMatrixContext):
            raise ValueError(
                "The python context must be an ImplicitMatrixContext")

        test, trial = self.ctx.a.arguments()

        V = test.function_space()
        mesh = V.mesh()

        # Magically determine which spaces are vector and scalar valued
        for i, Vi in enumerate(V):

            # Vector-valued spaces will have a non-empty value_shape
            if Vi.ufl_element().value_shape():
                self.vidx = i
            else:
                self.pidx = i

        Vv = V[self.vidx]
        Vp = V[self.pidx]

        # Create the space of approximate traces in the vertical.
        # NOTE: Technically a hack since the resulting space is technically
        # defined in cell interiors, however the degrees of freedom will only
        # be geometrically defined on edges. Arguments will only be used in
        # surface integrals
        deg, _ = Vv.ufl_element().degree()

        # Assumes a tensor product cell (quads, triangular-prisms, cubes)
        if not isinstance(Vp.ufl_element().cell(), TensorProductCell):
            raise NotImplementedError(
                "Currently only implemented for tensor product discretizations"
            )

        # Only want the horizontal cell
        cell, _ = Vp.ufl_element().cell()._cells

        DG = FiniteElement("DG", cell, deg)
        CG = FiniteElement("CG", interval, 1)
        Vv_tr_element = TensorProductElement(DG, CG)
        Vv_tr = FunctionSpace(mesh, Vv_tr_element)

        # Break the spaces
        broken_elements = MixedElement(
            [BrokenElement(Vi.ufl_element()) for Vi in V])
        V_d = FunctionSpace(mesh, broken_elements)

        # Set up relevant functions
        self.broken_solution = Function(V_d)
        self.broken_residual = Function(V_d)
        self.trace_solution = Function(Vv_tr)
        self.unbroken_solution = Function(V)
        self.unbroken_residual = Function(V)

        # Set up transfer kernels to and from the broken velocity space
        # NOTE: Since this snippet of code is used in a couple places in
        # in Gusto, might be worth creating a utility function that is
        # is importable and just called where needed.
        shapes = {
            "i": Vv.finat_element.space_dimension(),
            "j": np.prod(Vv.shape, dtype=int)
        }
        weight_kernel = """
        for (int i=0; i<{i}; ++i)
            for (int j=0; j<{j}; ++j)
                w[i*{j} + j] += 1.0;
        """.format(**shapes)

        self.weight = Function(Vv)
        par_loop(weight_kernel, dx, {"w": (self.weight, INC)})

        # Averaging kernel
        self.average_kernel = """
        for (int i=0; i<{i}; ++i)
            for (int j=0; j<{j}; ++j)
                vec_out[i*{j} + j] += vec_in[i*{j} + j]/w[i*{j} + j];
        """.format(**shapes)
        # Original mixed operator replaced with "broken" arguments
        arg_map = {test: TestFunction(V_d), trial: TrialFunction(V_d)}
        Atilde = Tensor(replace(self.ctx.a, arg_map))
        gammar = TestFunction(Vv_tr)
        n = FacetNormal(mesh)
        sigma = TrialFunctions(V_d)[self.vidx]

        # Again, assumes tensor product structure. Why use this if you
        # don't have some form of vertical extrusion?
        Kform = gammar('+') * jump(sigma, n=n) * dS_h

        # Here we deal with boundary conditions
        if self.ctx.row_bcs:
            # Find all the subdomains with neumann BCS
            # These are Dirichlet BCs on the vidx space
            neumann_subdomains = set()
            for bc in self.ctx.row_bcs:
                if bc.function_space().index == self.pidx:
                    raise NotImplementedError(
                        "Dirichlet conditions for scalar variable not supported. Use a weak bc."
                    )
                if bc.function_space().index != self.vidx:
                    raise NotImplementedError(
                        "Dirichlet bc set on unsupported space.")
                # append the set of sub domains
                subdom = bc.sub_domain
                if isinstance(subdom, str):
                    neumann_subdomains |= set([subdom])
                else:
                    neumann_subdomains |= set(as_tuple(subdom, int))

            # separate out the top and bottom bcs
            extruded_neumann_subdomains = neumann_subdomains & {
                "top", "bottom"
            }
            neumann_subdomains = neumann_subdomains - extruded_neumann_subdomains

            integrand = gammar * dot(sigma, n)
            measures = []
            trace_subdomains = []
            for subdomain in sorted(extruded_neumann_subdomains):
                measures.append({"top": ds_t, "bottom": ds_b}[subdomain])
                trace_subdomains.extend(
                    sorted({"top", "bottom"} - extruded_neumann_subdomains))

            measures.extend((ds(sd) for sd in sorted(neumann_subdomains)))
            markers = [int(x) for x in mesh.exterior_facets.unique_markers]
            dirichlet_subdomains = set(markers) - neumann_subdomains
            trace_subdomains.extend(sorted(dirichlet_subdomains))

            for measure in measures:
                Kform += integrand * measure

        else:
            trace_subdomains = ["top", "bottom"]

        trace_bcs = [
            DirichletBC(Vv_tr, Constant(0.0), subdomain)
            for subdomain in trace_subdomains
        ]

        # Make a SLATE tensor from Kform
        K = Tensor(Kform)

        # Assemble the Schur complement operator and right-hand side
        self.schur_rhs = Function(Vv_tr)
        self._assemble_Srhs = create_assembly_callable(
            K * Atilde.inv * AssembledVector(self.broken_residual),
            tensor=self.schur_rhs,
            form_compiler_parameters=self.ctx.fc_params)

        mat_type = PETSc.Options().getString(prefix + "mat_type", "aij")

        schur_comp = K * Atilde.inv * K.T
        self.S = allocate_matrix(schur_comp,
                                 bcs=trace_bcs,
                                 form_compiler_parameters=self.ctx.fc_params,
                                 mat_type=mat_type,
                                 options_prefix=prefix)
        self._assemble_S = create_assembly_callable(
            schur_comp,
            tensor=self.S,
            bcs=trace_bcs,
            form_compiler_parameters=self.ctx.fc_params,
            mat_type=mat_type)

        self._assemble_S()
        self.S.force_evaluation()
        Smat = self.S.petscmat

        nullspace = self.ctx.appctx.get("vert_trace_nullspace", None)
        if nullspace is not None:
            nsp = nullspace(Vv_tr)
            Smat.setNullSpace(nsp.nullspace(comm=pc.comm))

        # Set up the KSP for the system of Lagrange multipliers
        trace_ksp = PETSc.KSP().create(comm=pc.comm)
        trace_ksp.setOptionsPrefix(prefix)
        trace_ksp.setOperators(Smat)
        trace_ksp.setUp()
        trace_ksp.setFromOptions()
        self.trace_ksp = trace_ksp

        split_mixed_op = dict(split_form(Atilde.form))
        split_trace_op = dict(split_form(K.form))

        # Generate reconstruction calls
        self._reconstruction_calls(split_mixed_op, split_trace_op)
예제 #4
0
    def _setup_solver(self):
        from firedrake.assemble import create_assembly_callable
        import numpy as np

        state = self.state
        dt = state.timestepping.dt
        beta = dt*state.timestepping.alpha
        cp = state.parameters.cp
        mu = state.mu
        Vu = state.spaces("HDiv")
        Vu_broken = FunctionSpace(state.mesh, BrokenElement(Vu.ufl_element()))
        Vtheta = state.spaces("HDiv_v")
        Vrho = state.spaces("DG")

        h_deg = state.horizontal_degree
        v_deg = state.vertical_degree
        Vtrace = FunctionSpace(state.mesh, "HDiv Trace", degree=(h_deg, v_deg))

        # Split up the rhs vector (symbolically)
        u_in, rho_in, theta_in = split(state.xrhs)

        # Build the function space for "broken" u and rho
        # and add the trace variable
        M = MixedFunctionSpace((Vu_broken, Vrho))
        w, phi = TestFunctions(M)
        u, rho = TrialFunctions(M)
        l0 = TrialFunction(Vtrace)
        dl = TestFunction(Vtrace)

        n = FacetNormal(state.mesh)

        # Get background fields
        thetabar = state.fields("thetabar")
        rhobar = state.fields("rhobar")
        pibar = thermodynamics.pi(state.parameters, rhobar, thetabar)
        pibar_rho = thermodynamics.pi_rho(state.parameters, rhobar, thetabar)
        pibar_theta = thermodynamics.pi_theta(state.parameters, rhobar, thetabar)

        # Analytical (approximate) elimination of theta
        k = state.k             # Upward pointing unit vector
        theta = -dot(k, u)*dot(k, grad(thetabar))*beta + theta_in

        # Only include theta' (rather than pi') in the vertical
        # component of the gradient

        # The pi prime term (here, bars are for mean and no bars are
        # for linear perturbations)
        pi = pibar_theta*theta + pibar_rho*rho

        # Vertical projection
        def V(u):
            return k*inner(u, k)

        # Specify degree for some terms as estimated degree is too large
        dxp = dx(degree=(self.quadrature_degree))
        dS_vp = dS_v(degree=(self.quadrature_degree))
        dS_hp = dS_h(degree=(self.quadrature_degree))
        ds_vp = ds_v(degree=(self.quadrature_degree))
        ds_tbp = ds_t(degree=(self.quadrature_degree)) + ds_b(degree=(self.quadrature_degree))

        # Mass matrix for the trace space
        tM = assemble(dl('+')*l0('+')*(dS_v + dS_h)
                      + dl*l0*ds_v + dl*l0*(ds_t + ds_b))

        Lrhobar = Function(Vtrace)
        Lpibar = Function(Vtrace)
        rhopi_solver = LinearSolver(tM, solver_parameters={'ksp_type': 'cg',
                                                           'pc_type': 'bjacobi',
                                                           'sub_pc_type': 'ilu'},
                                    options_prefix='rhobarpibar_solver')

        rhobar_avg = Function(Vtrace)
        pibar_avg = Function(Vtrace)

        def _traceRHS(f):
            return (dl('+')*avg(f)*(dS_v + dS_h)
                    + dl*f*ds_v + dl*f*(ds_t + ds_b))

        assemble(_traceRHS(rhobar), tensor=Lrhobar)
        assemble(_traceRHS(pibar), tensor=Lpibar)

        # Project averages of coefficients into the trace space
        with timed_region("Gusto:HybridProjectRhobar"):
            rhopi_solver.solve(rhobar_avg, Lrhobar)

        with timed_region("Gusto:HybridProjectPibar"):
            rhopi_solver.solve(pibar_avg, Lpibar)

        # Add effect of density of water upon theta
        if self.moisture is not None:
            water_t = Function(Vtheta).assign(0.0)
            for water in self.moisture:
                water_t += self.state.fields(water)
            theta_w = theta / (1 + water_t)
            thetabar_w = thetabar / (1 + water_t)
        else:
            theta_w = theta
            thetabar_w = thetabar

        # "broken" u and rho system
        Aeqn = (inner(w, (state.h_project(u) - u_in))*dx
                - beta*cp*div(theta_w*V(w))*pibar*dxp
                # following does nothing but is preserved in the comments
                # to remind us why (because V(w) is purely vertical).
                # + beta*cp*dot(theta_w*V(w), n)*pibar_avg('+')*dS_vp
                + beta*cp*dot(theta_w*V(w), n)*pibar_avg('+')*dS_hp
                + beta*cp*dot(theta_w*V(w), n)*pibar_avg*ds_tbp
                - beta*cp*div(thetabar_w*w)*pi*dxp
                + (phi*(rho - rho_in) - beta*inner(grad(phi), u)*rhobar)*dx
                + beta*dot(phi*u, n)*rhobar_avg('+')*(dS_v + dS_h))

        if mu is not None:
            Aeqn += dt*mu*inner(w, k)*inner(u, k)*dx

        # Form the mixed operators using Slate
        # (A   K)(X) = (X_r)
        # (K.T 0)(l)   (0  )
        # where X = ("broken" u, rho)
        A = Tensor(lhs(Aeqn))
        X_r = Tensor(rhs(Aeqn))

        # Off-diagonal block matrices containing the contributions
        # of the Lagrange multipliers (surface terms in the momentum equation)
        K = Tensor(beta*cp*dot(thetabar_w*w, n)*l0('+')*(dS_vp + dS_hp)
                   + beta*cp*dot(thetabar_w*w, n)*l0*ds_vp
                   + beta*cp*dot(thetabar_w*w, n)*l0*ds_tbp)

        # X = A.inv * (X_r - K * l),
        # 0 = K.T * X = -(K.T * A.inv * K) * l + K.T * A.inv * X_r,
        # so (K.T * A.inv * K) * l = K.T * A.inv * X_r
        # is the reduced equation for the Lagrange multipliers.
        # Right-hand side expression: (Forward substitution)
        Rexp = K.T * A.inv * X_r
        self.R = Function(Vtrace)

        # We need to rebuild R everytime data changes
        self._assemble_Rexp = create_assembly_callable(Rexp, tensor=self.R)

        # Schur complement operator:
        Smatexp = K.T * A.inv * K
        with timed_region("Gusto:HybridAssembleTraceOp"):
            S = assemble(Smatexp)
            S.force_evaluation()

        # Set up the Linear solver for the system of Lagrange multipliers
        self.lSolver = LinearSolver(S, solver_parameters=self.solver_parameters,
                                    options_prefix='lambda_solve')

        # Result function for the multiplier solution
        self.lambdar = Function(Vtrace)

        # Place to put result of u rho reconstruction
        self.urho = Function(M)

        # Reconstruction of broken u and rho
        u_, rho_ = self.urho.split()

        # Split operators for two-stage reconstruction
        _A = A.blocks
        _K = K.blocks
        _Xr = X_r.blocks

        A00 = _A[0, 0]
        A01 = _A[0, 1]
        A10 = _A[1, 0]
        A11 = _A[1, 1]
        K0 = _K[0, 0]
        Ru = _Xr[0]
        Rrho = _Xr[1]
        lambda_vec = AssembledVector(self.lambdar)

        # rho reconstruction
        Srho = A11 - A10 * A00.inv * A01
        rho_expr = Srho.solve(Rrho - A10 * A00.inv * (Ru - K0 * lambda_vec),
                              decomposition="PartialPivLU")
        self._assemble_rho = create_assembly_callable(rho_expr, tensor=rho_)

        # "broken" u reconstruction
        rho_vec = AssembledVector(rho_)
        u_expr = A00.solve(Ru - A01 * rho_vec - K0 * lambda_vec,
                           decomposition="PartialPivLU")
        self._assemble_u = create_assembly_callable(u_expr, tensor=u_)

        # Project broken u into the HDiv space using facet averaging.
        # Weight function counting the dofs of the HDiv element:
        shapes = (Vu.finat_element.space_dimension(), np.prod(Vu.shape))

        weight_kernel = """
        for (int i=0; i<%d; ++i) {
        for (int j=0; j<%d; ++j) {
        w[i][j] += 1.0;
        }}""" % shapes

        self._weight = Function(Vu)
        par_loop(weight_kernel, dx, {"w": (self._weight, INC)})

        # Averaging kernel
        self._average_kernel = """
        for (int i=0; i<%d; ++i) {
        for (int j=0; j<%d; ++j) {
        vec_out[i][j] += vec_in[i][j]/w[i][j];
        }}""" % shapes

        # HDiv-conforming velocity
        self.u_hdiv = Function(Vu)

        # Reconstruction of theta
        theta = TrialFunction(Vtheta)
        gamma = TestFunction(Vtheta)

        self.theta = Function(Vtheta)
        theta_eqn = gamma*(theta - theta_in +
                           dot(k, self.u_hdiv)*dot(k, grad(thetabar))*beta)*dx

        theta_problem = LinearVariationalProblem(lhs(theta_eqn), rhs(theta_eqn), self.theta)
        self.theta_solver = LinearVariationalSolver(theta_problem,
                                                    solver_parameters={'ksp_type': 'cg',
                                                                       'pc_type': 'bjacobi',
                                                                       'pc_sub_type': 'ilu'},
                                                    options_prefix='thetabacksubstitution')

        self.bcs = [DirichletBC(Vu, 0.0, "bottom"),
                    DirichletBC(Vu, 0.0, "top")]