def assembly():
    # Whether to use custom kernels instead of FFC
    useCustomKernels = False

    mesh = UnitSquareMesh(MPI.comm_world, 13, 13)
    Q = FunctionSpace(mesh, "Lagrange", 1)

    u = TrialFunction(Q)
    v = TestFunction(Q)

    a = dolfin.cpp.fem.Form([Q._cpp_object, Q._cpp_object])
    L = dolfin.cpp.fem.Form([Q._cpp_object])

    # Compile the Poisson kernel using CFFI
    kernel_name = "_poisson_kernel"
    compile_kernels(kernel_name, verbose=True)
    # Import the compiled kernel
    kernel_mod = importlib.import_module(kernel_name)
    ffi, lib = kernel_mod.ffi, kernel_mod.lib

    # Get pointers to the CFFI functions
    fnA_ptr = ffi.cast("uintptr_t", ffi.addressof(lib, "tabulate_tensor_A"))
    fnB_ptr = ffi.cast("uintptr_t", ffi.addressof(lib, "tabulate_tensor_b"))

    if useCustomKernels:
        # Configure Forms to use own tabulate functions
        a.set_cell_tabulate(0, fnA_ptr)
        L.set_cell_tabulate(0, fnB_ptr)
    else:
        # Use FFC
        ufc_form = ffc_jit(dot(grad(u), grad(v)) * dx)
        ufc_form = cpp.fem.make_ufc_form(ufc_form[0])
        a = cpp.fem.Form(ufc_form, [Q._cpp_object, Q._cpp_object])
        ufc_form = ffc_jit(v * dx)
        ufc_form = cpp.fem.make_ufc_form(ufc_form[0])
        L = cpp.fem.Form(ufc_form, [Q._cpp_object])

    start = time.time()
    assembler = cpp.fem.Assembler([[a]], [L], [])
    A = PETScMatrix()
    b = PETScVector()
    assembler.assemble(A, cpp.fem.Assembler.BlockType.monolithic)
    assembler.assemble(b, cpp.fem.Assembler.BlockType.monolithic)
    end = time.time()

    print(f"Time for assembly: {(end-start)*1000.0}ms")

    Anorm = A.norm(cpp.la.Norm.frobenius)
    bnorm = b.norm(cpp.la.Norm.l2)

    print(Anorm, bnorm)

    #A_np = scipy2numpy(A.mat())

    assert (np.isclose(Anorm, 56.124860801609124))
    assert (np.isclose(bnorm, 0.0739710713711999))
예제 #2
0
def test_numba_assembly():
    mesh = UnitSquareMesh(MPI.comm_world, 13, 13)
    Q = FunctionSpace(mesh, "Lagrange", 1)

    u = TrialFunction(Q)
    v = TestFunction(Q)

    a = cpp.fem.Form([Q._cpp_object, Q._cpp_object])
    L = cpp.fem.Form([Q._cpp_object])

    sig = types.void(types.CPointer(typeof(ScalarType())),
                     types.CPointer(types.CPointer(typeof(ScalarType()))),
                     types.CPointer(types.double), types.intc)

    fnA = cfunc(sig, cache=True)(tabulate_tensor_A)
    a.set_cell_tabulate(0, fnA.address)

    fnb = cfunc(sig, cache=True)(tabulate_tensor_b)
    L.set_cell_tabulate(0, fnb.address)

    if (False):
        ufc_form = ffc_jit(dot(grad(u), grad(v)) * dx)
        ufc_form = cpp.fem.make_ufc_form(ufc_form[0])
        a = cpp.fem.Form(ufc_form, [Q._cpp_object, Q._cpp_object])
        ufc_form = ffc_jit(v * dx)
        ufc_form = cpp.fem.make_ufc_form(ufc_form[0])
        L = cpp.fem.Form(ufc_form, [Q._cpp_object])

    assembler = cpp.fem.Assembler([[a]], [L], [])
    A = PETScMatrix()
    b = PETScVector()
    assembler.assemble(A, cpp.fem.Assembler.BlockType.monolithic)
    assembler.assemble(b, cpp.fem.Assembler.BlockType.monolithic)

    Anorm = A.norm(cpp.la.Norm.frobenius)
    bnorm = b.norm(cpp.la.Norm.l2)

    print(Anorm, bnorm)

    assert (np.isclose(Anorm, 56.124860801609124))
    assert (np.isclose(bnorm, 0.0739710713711999))

    list_timings([TimingType.wall])
예제 #3
0
def assembly():
    # Whether to use custom kernels instead of FFC
    useCustomKernel = True
    # Whether to use CFFI kernels instead of Numba kernels
    useCffiKernel = False

    mesh = UnitSquareMesh(MPI.comm_world, 13, 13)
    Q = FunctionSpace(mesh, "Lagrange", 1)

    u = TrialFunction(Q)
    v = TestFunction(Q)

    a = dolfin.cpp.fem.Form([Q._cpp_object, Q._cpp_object])
    L = dolfin.cpp.fem.Form([Q._cpp_object])

    # Variant 1: Compile the Poisson kernel using CFFI
    kernel_name = "_poisson_kernel"
    compile_kernels(kernel_name)
    # Import the compiled kernel
    kernel_mod = importlib.import_module(kernel_name)
    ffi, lib = kernel_mod.ffi, kernel_mod.lib

    # Variant 2: Get pointers to the Numba kernels
    sig = nb.types.void(nb.types.CPointer(nb.types.double),
                        nb.types.CPointer(nb.types.CPointer(nb.types.double)),
                        nb.types.CPointer(nb.types.double), nb.types.intc)

    fnA = nb.cfunc(sig, cache=True, nopython=True)(tabulate_tensor_A)
    fnb = nb.cfunc(sig, cache=True, nopython=True)(tabulate_tensor_b)

    if useCustomKernel:

        if useCffiKernel:
            # Use the cffi kernel, compiled from raw C
            fnA_ptr = ffi.cast("uintptr_t",
                               ffi.addressof(lib, "tabulate_tensor_A"))
            fnB_ptr = ffi.cast("uintptr_t",
                               ffi.addressof(lib, "tabulate_tensor_b"))
        else:
            # Use the numba generated kernels
            fnA_ptr = fnA.address
            fnB_ptr = fnb.address

        a.set_cell_tabulate(0, fnA_ptr)
        L.set_cell_tabulate(0, fnB_ptr)

    else:
        # Use FFC
        ufc_form = ffc_jit(dot(grad(u), grad(v)) * dx)
        ufc_form = cpp.fem.make_ufc_form(ufc_form[0])
        a = cpp.fem.Form(ufc_form, [Q._cpp_object, Q._cpp_object])
        ufc_form = ffc_jit(v * dx)
        ufc_form = cpp.fem.make_ufc_form(ufc_form[0])
        L = cpp.fem.Form(ufc_form, [Q._cpp_object])

    assembler = cpp.fem.Assembler([[a]], [L], [])
    A = PETScMatrix()
    b = PETScVector()
    assembler.assemble(A, cpp.fem.Assembler.BlockType.monolithic)
    assembler.assemble(b, cpp.fem.Assembler.BlockType.monolithic)

    Anorm = A.norm(cpp.la.Norm.frobenius)
    bnorm = b.norm(cpp.la.Norm.l2)

    print(Anorm, bnorm)

    assert (np.isclose(Anorm, 56.124860801609124))
    assert (np.isclose(bnorm, 0.0739710713711999))
예제 #4
0
def generate_ref_tensor(element: FiniteElement = None):
    def monkey_patch_ufl():
        from ufl.referencevalue import ReferenceValue
        oldinit = ReferenceValue.__init__

        def newinit(self, f):
            if isinstance(f, ReferenceValue):
                f = f.ufl_operands[0]
            oldinit(self, f)

        ReferenceValue.__init__ = newinit

    monkey_patch_ufl()

    def generate_reference_tetrahedron_mesh():
        vertices = np.array([[0, 0, 0], [1, 0, 0], [0, 1, 0], [0, 0, 1]],
                            dtype=np.float64)

        cells = np.array([[0, 1, 2, 3]], dtype=np.int32)

        mesh = Mesh(MPI.comm_world, CellType.Type.tetrahedron, vertices, cells,
                    [], cpp.mesh.GhostMode.none)

        return mesh

    mesh = generate_reference_tetrahedron_mesh()

    if element is None:
        element = FiniteElement("P", tetrahedron, 1)
    V = FunctionSpace(mesh, element)

    dofmap = V.dofmap().cell_dofs(0)
    dofmap_inverse = np.argsort(dofmap)

    u, v = TrialFunction(V), TestFunction(V)
    detJ = JacobianDeterminant(SpatialCoordinate(mesh))

    A0 = np.zeros(
        (dofmap.size, dofmap.size, mesh.topology.dim, mesh.topology.dim),
        dtype=np.double)

    for i in range(mesh.topology.dim):
        for j in range(mesh.topology.dim):
            jit_result = jit.jit.ffc_jit(
                outer(Grad(Val(u)), Grad(Val(v)))[i, j] / detJ * dx)
            ufc_form = cpp.fem.make_ufc_form(jit_result[0])
            a = cpp.fem.Form(ufc_form, [V._cpp_object, V._cpp_object])

            assembler = cpp.fem.Assembler([[a]], [], [])
            A_scp = PETScMatrix()
            assembler.assemble(A_scp, cpp.fem.Assembler.BlockType.monolithic)

            A = utils.scipy2numpy(A_scp)
            A0[:, :, i, j] = A[dofmap_inverse].transpose()

            #print(79 * '=')
            #print("dphi_i/dX({})*dphi_j/dX({})".format(i, j))
            #print(A[dofmap_inverse])

    A0 = A0[dofmap_inverse, :, :, :]
    return A0
예제 #5
0
def solve():
    # Whether to use custom Numba kernels instead of FFC
    useCustomKernels = True

    # Generate a unit cube with (n+1)^3 vertices
    n = 22
    mesh = UnitCubeMesh(MPI.comm_world, n, n, n)
    Q = FunctionSpace(mesh, "Lagrange", 1)

    u = TrialFunction(Q)
    v = TestFunction(Q)

    # Define the boundary: vertices where any component is in machine precision accuracy 0 or 1
    def boundary(x):
        return np.sum(np.logical_or(x < DOLFIN_EPS, x > 1.0 - DOLFIN_EPS),
                      axis=1) > 0

    u0 = Constant(0.0)
    bc = DirichletBC(Q, u0, boundary)

    # Initialize bilinear form and rhs
    a = dolfin.cpp.fem.Form([Q._cpp_object, Q._cpp_object])
    L = dolfin.cpp.fem.Form([Q._cpp_object])

    # Signature of tabulate_tensor functions
    sig = nb.types.void(nb.types.CPointer(nb.types.double),
                        nb.types.CPointer(nb.types.CPointer(nb.types.double)),
                        nb.types.CPointer(nb.types.double), nb.types.intc)

    # Compile the python functions using Numba
    fnA = nb.cfunc(sig, cache=True, nopython=True)(tabulate_tensor_A)
    fnL = nb.cfunc(sig, cache=True, nopython=True)(tabulate_tensor_L)

    module_name = "_laplace_kernel"

    # Build the kernel
    ffi = cffi.FFI()
    ffi.set_source(module_name, TABULATE_C)
    ffi.cdef(TABULATE_H)
    ffi.compile()

    # Import the compiled kernel
    kernel_mod = importlib.import_module(module_name)
    ffi, lib = kernel_mod.ffi, kernel_mod.lib

    # Get pointer to the compiled function
    fnA_ptr = ffi.cast("uintptr_t", ffi.addressof(lib, "tabulate_tensor_A"))

    # Get pointers to Numba functions
    #fnA_ptr = fnA.address
    fnL_ptr = fnL.address

    if useCustomKernels:
        # Configure Forms to use own tabulate functions
        a.set_cell_tabulate(0, fnA_ptr)
        L.set_cell_tabulate(0, fnL_ptr)
    else:
        # Use FFC

        # Bilinear form
        jit_result = ffc_jit(dot(grad(u), grad(v)) * dx)
        ufc_form = dolfin.cpp.fem.make_ufc_form(jit_result[0])
        a = dolfin.cpp.fem.Form(ufc_form, [Q._cpp_object, Q._cpp_object])

        # Rhs
        f = Expression("2.0", element=Q.ufl_element())
        jit_result = ffc_jit(f * v * dx)
        ufc_form = dolfin.cpp.fem.make_ufc_form(jit_result[0])
        L = dolfin.cpp.fem.Form(ufc_form, [Q._cpp_object])
        # Attach rhs expression as coefficient
        L.set_coefficient(0, f._cpp_object)

    assembler = dolfin.cpp.fem.Assembler([[a]], [L], [bc])
    A = PETScMatrix()
    b = PETScVector()

    # Perform assembly
    start = time.time()
    assembler.assemble(A, dolfin.cpp.fem.Assembler.BlockType.monolithic)
    end = time.time()

    # We don't care about the RHS
    assembler.assemble(b, dolfin.cpp.fem.Assembler.BlockType.monolithic)

    print(f"Time for assembly: {(end-start)*1000.0}ms")

    Anorm = A.norm(dolfin.cpp.la.Norm.frobenius)
    bnorm = b.norm(dolfin.cpp.la.Norm.l2)
    print(Anorm, bnorm)

    # Norms obtained with FFC and n=13
    assert (np.isclose(Anorm, 60.86192203436385))
    assert (np.isclose(bnorm, 0.018075523965828778))

    comm = L.mesh().mpi_comm()
    solver = PETScKrylovSolver(comm)

    u = Function(Q)
    solver.set_operator(A)
    solver.solve(u.vector(), b)

    # Export result
    file = XDMFFile(MPI.comm_world, "poisson_3d.xdmf")
    file.write(u, XDMFFile.Encoding.HDF5)
예제 #6
0
def test_krylov_reuse_pc():
    "Test preconditioner re-use with PETScKrylovSolver"

    # Define problem
    mesh = UnitSquareMesh(MPI.comm_world, 8, 8)
    V = FunctionSpace(mesh, ('Lagrange', 1))
    bc = DirichletBC(V, Constant(0.0), lambda x, on_boundary: on_boundary)
    u = TrialFunction(V)
    v = TestFunction(V)

    # Forms
    a, L = inner(grad(u), grad(v)) * dx, dot(Constant(1.0), v) * dx

    A, P = PETScMatrix(), PETScMatrix()
    b = PETScVector()

    # Assemble linear algebra objects
    assemble(a, tensor=A)  # noqa
    assemble(a, tensor=P)  # noqa
    assemble(L, tensor=b)  # noqa

    # Apply boundary conditions
    bc.apply(A)
    bc.apply(P)
    bc.apply(b)

    # Create Krysolv solver and set operators
    solver = PETScKrylovSolver("gmres", "bjacobi")
    solver.set_operators(A, P)

    # Solve
    x = PETScVector()
    num_iter_ref = solver.solve(x, b)

    # Change preconditioner matrix (bad matrix) and solve (PC will be
    # updated)
    a_p = u * v * dx
    assemble(a_p, tensor=P)  # noqa
    bc.apply(P)
    x = PETScVector()
    num_iter_mod = solver.solve(x, b)
    assert num_iter_mod > num_iter_ref

    # Change preconditioner matrix (good matrix) and solve (PC will be
    # updated)
    a_p = a
    assemble(a_p, tensor=P)  # noqa
    bc.apply(P)
    x = PETScVector()
    num_iter = solver.solve(x, b)
    assert num_iter == num_iter_ref

    # Change preconditioner matrix (bad matrix) and solve (PC will not
    # be updated)
    solver.set_reuse_preconditioner(True)
    a_p = u * v * dx
    assemble(a_p, tensor=P)  # noqa
    bc.apply(P)
    x = PETScVector()
    num_iter = solver.solve(x, b)
    assert num_iter == num_iter_ref

    # Update preconditioner (bad PC, will increase iteration count)
    solver.set_reuse_preconditioner(False)
    x = PETScVector()
    num_iter = solver.solve(x, b)
    assert num_iter == num_iter_mod
def solve(n_runs: int, mesh_size: int, element: FiniteElement,
          reference_tensor: ReferenceTensor, kernel_generator):
    # Whether to use custom kernels instead of FFC
    useCustomKernels = True

    # Generate a unit cube with (n+1)^3 vertices
    mesh = generate_mesh(mesh_size)
    print("Mesh generated.")

    A0 = reference_tensor

    Q = FunctionSpace(mesh, element)
    u = TrialFunction(Q)
    v = TestFunction(Q)

    # Define the boundary: vertices where any component is in machine precision accuracy 0 or 1
    def boundary(x):
        return np.sum(np.logical_or(x < DOLFIN_EPS, x > 1.0 - DOLFIN_EPS),
                      axis=1) > 0

    u0 = Constant(0.0)
    bc = DirichletBC(Q, u0, boundary)

    if useCustomKernels:
        # Initialize bilinear form and rhs
        a = dolfin.cpp.fem.Form([Q._cpp_object, Q._cpp_object])
        L = dolfin.cpp.fem.Form([Q._cpp_object])

        # Signature of tabulate_tensor functions
        sig = nb.types.void(
            nb.types.CPointer(nb.types.double),
            nb.types.CPointer(nb.types.CPointer(nb.types.double)),
            nb.types.CPointer(nb.types.double), nb.types.intc)

        # Compile the python functions using Numba
        fnA = nb.cfunc(sig, cache=True,
                       nopython=True)(numba_kernels.tabulate_tensor_A)
        fnL = nb.cfunc(sig, cache=True,
                       nopython=True)(numba_kernels.tabulate_tensor_L)

        module_name = "_laplace_kernel"
        compile_poisson_kernel(module_name,
                               kernel_generator,
                               A0,
                               verbose=False)
        print("Finished compiling kernel.")

        # Import the compiled kernel
        kernel_mod = importlib.import_module(f"simd.tmp.{module_name}")
        ffi, lib = kernel_mod.ffi, kernel_mod.lib

        # Get pointer to the compiled function
        fnA_ptr = ffi.cast("uintptr_t", ffi.addressof(lib,
                                                      "tabulate_tensor_A"))

        # Get pointers to Numba functions
        # fnA_ptr = fnA.address
        fnL_ptr = fnL.address

        # Configure Forms to use own tabulate functions
        a.set_cell_tabulate(0, fnA_ptr)
        L.set_cell_tabulate(0, fnL_ptr)
    else:
        # Use FFC

        # Bilinear form
        jit_result = ffc_jit(dot(grad(u), grad(v)) * dx)
        ufc_form = dolfin.cpp.fem.make_ufc_form(jit_result[0])
        a = dolfin.cpp.fem.Form(ufc_form, [Q._cpp_object, Q._cpp_object])

        # Rhs
        f = Expression("2.0", element=Q.ufl_element())
        jit_result = ffc_jit(f * v * dx)
        ufc_form = dolfin.cpp.fem.make_ufc_form(jit_result[0])
        L = dolfin.cpp.fem.Form(ufc_form, [Q._cpp_object])
        # Attach rhs expression as coefficient
        L.set_coefficient(0, f._cpp_object)
        print("Built form.")

    assembler = dolfin.cpp.fem.Assembler([[a]], [L], [bc])
    A = PETScMatrix()
    b = PETScVector()

    # Callable that performs assembly of matrix
    assembly_callable = lambda: assembler.assemble(
        A, dolfin.cpp.fem.Assembler.BlockType.monolithic)

    # Get timings for assembly of matrix over several runs
    time_avg, time_min, time_max = utils.timing(n_runs,
                                                assembly_callable,
                                                verbose=True)
    print(
        f"Timings for element matrix assembly (n={n_runs}) avg: {round(time_avg*1000, 2)}ms, min: {round(time_min*1000, 2)}ms, max: {round(time_max*1000, 2)}ms"
    )

    # Assemble again to get correct results
    A = PETScMatrix()
    assembler.assemble(A, dolfin.cpp.fem.Assembler.BlockType.monolithic)
    assembler.assemble(b, dolfin.cpp.fem.Assembler.BlockType.monolithic)

    Anorm = A.norm(dolfin.cpp.la.Norm.frobenius)
    bnorm = b.norm(dolfin.cpp.la.Norm.l2)
    print(Anorm, bnorm)

    # Check norms of assembled system
    if useCustomKernels:
        # Norms obtained with FFC and n=22
        assert (np.isclose(Anorm, 118.19435458024503))
        #assert (np.isclose(bnorm, 0.009396467472097566))

    return

    # Solve the system
    comm = L.mesh().mpi_comm()
    solver = PETScKrylovSolver(comm)

    u = Function(Q)
    solver.set_operator(A)
    solver.solve(u.vector(), b)

    # Export result
    file = XDMFFile(MPI.comm_world, "poisson_3d.xdmf")
    file.write(u, XDMFFile.Encoding.HDF5)
예제 #8
0
                     form_compiler_parameters={"cell_batch_size": 4, "enable_cross_cell_gcc_ext": True})
ufc_form = dolfin.cpp.fem.make_ufc_form(jit_result[0])
a = dolfin.cpp.fem.Form(ufc_form, [Q._cpp_object, Q._cpp_object])

# Rhs
f = Expression("2.0", element=Q.ufl_element())
jit_result = ffc_jit(f*v * dx,
                     form_compiler_parameters={"cell_batch_size": 4, "enable_cross_cell_gcc_ext": True})
ufc_form = dolfin.cpp.fem.make_ufc_form(jit_result[0])
L = dolfin.cpp.fem.Form(ufc_form, [Q._cpp_object])
# Attach rhs expression as coefficient
L.set_coefficient(0, f._cpp_object)

assembler = dolfin.cpp.fem.Assembler([[a]], [L], [bc])

A = PETScMatrix()
b = PETScVector()

print("Running assembly...")
assembler.assemble(A, dolfin.cpp.fem.Assembler.BlockType.monolithic)
assembler.assemble(b, dolfin.cpp.fem.Assembler.BlockType.monolithic)

Anorm = A.norm(dolfin.cpp.la.Norm.frobenius)
bnorm = b.norm(dolfin.cpp.la.Norm.l2)
print("A.norm(frobenius)={:.12e}\nb.norm(l2)={:.12e}".format(Anorm, bnorm))

# Norms obtained with n=22 and bcs
assert (np.isclose(Anorm, 60.86192203436385))
assert (np.isclose(bnorm, 0.018075523965828778))

# Norms obtained with n=22 and no bcs
def assembly():
    # Whether to use custom kernels instead of FFC
    useCustomKernels = True

    # Generate a unit cube with (n+1)^3 vertices
    n = 20
    mesh = UnitCubeMesh(MPI.comm_world, n, n, n)
    Q = FunctionSpace(mesh, "Lagrange", 1)

    u = TrialFunction(Q)
    v = TestFunction(Q)

    def boundary0(x):
        wrong = np.logical_or(x[:, 1] < DOLFIN_EPS, x[:, 1] > 1.0 - DOLFIN_EPS)
        one = np.logical_or(x[:, 0] < DOLFIN_EPS, x[:, 0] > 1.0 - DOLFIN_EPS)
        two = np.logical_or(x[:, 2] < DOLFIN_EPS, x[:, 2] > 1.0 - DOLFIN_EPS)

        return np.logical_and(np.logical_or(one, two), np.logical_not(wrong))

    def boundary1(x):
        return np.logical_or(x[:, 1] < DOLFIN_EPS, x[:, 1] > 1.0 - DOLFIN_EPS)

    u0 = Constant(0.0)
    bc0 = DirichletBC(Q, u0, boundary0)

    u1 = Constant(1.0)
    bc1 = DirichletBC(Q, u1, boundary1)

    # Initialize bilinear form and rhs
    a = dolfin.cpp.fem.Form([Q._cpp_object, Q._cpp_object])
    L = dolfin.cpp.fem.Form([Q._cpp_object])

    # Signature of tabulate_tensor functions
    sig = nb.types.void(nb.types.CPointer(nb.types.double),
                        nb.types.CPointer(nb.types.CPointer(nb.types.double)),
                        nb.types.CPointer(nb.types.double), nb.types.intc)

    # Compile the numba functions
    fnA = nb.cfunc(sig, cache=True, nopython=True)(tabulate_tensor_A)
    fnb = nb.cfunc(sig, cache=True, nopython=True)(tabulate_tensor_b)

    if useCustomKernels:
        # Configure Forms to use own tabulate functions
        a.set_cell_tabulate(0, fnA.address)
        L.set_cell_tabulate(0, fnb.address)
    else:
        # Use FFC
        jit_result = ffc_jit(dot(grad(u), grad(v)) * dx)
        ufc_form = dolfin.cpp.fem.make_ufc_form(jit_result[0])
        a = dolfin.cpp.fem.Form(ufc_form, [Q._cpp_object, Q._cpp_object])
        f = Expression("20.0", element=Q.ufl_element())
        jit_result = ffc_jit(f * v * dx)
        ufc_form = dolfin.cpp.fem.make_ufc_form(jit_result[0])
        L = dolfin.cpp.fem.Form(ufc_form, [Q._cpp_object])
        L.set_coefficient(0, f._cpp_object)

    start = time.time()
    assembler = dolfin.cpp.fem.Assembler([[a]], [L], [bc0, bc1])
    A = PETScMatrix()
    b = PETScVector()
    assembler.assemble(A, dolfin.cpp.fem.Assembler.BlockType.monolithic)
    assembler.assemble(b, dolfin.cpp.fem.Assembler.BlockType.monolithic)
    end = time.time()

    print(f"Time for assembly: {(end-start)*1000.0}ms")

    Anorm = A.norm(dolfin.cpp.la.Norm.frobenius)
    bnorm = b.norm(dolfin.cpp.la.Norm.l2)

    print(Anorm, bnorm)

    # Norms obtained with FFC and n=20
    #assert (np.isclose(Anorm, 55.82812911070811))
    #assert (np.isclose(bnorm, 29.73261456296761))

    comm = L.mesh().mpi_comm()
    solver = PETScKrylovSolver(comm)

    u = Function(Q)

    solver.set_operator(A)
    solver.solve(u.vector(), b)

    file = XDMFFile(MPI.comm_world, "poisson_3d.xdmf")
    file.write(u, XDMFFile.Encoding.HDF5)