def test_krylov_reuse_pc_lu(): """Test that LU re-factorisation is only performed after set_operator(A) is called""" # Test requires PETSc version 3.5 or later. Use petsc4py to check # version number. try: from petsc4py import PETSc except ImportError: pytest.skip("petsc4py required to check PETSc version") else: if not PETSc.Sys.getVersion() >= (3, 5, 0): pytest.skip("PETSc version must be 3.5 of higher") mesh = UnitSquareMesh(MPI.comm_world, 12, 12) V = FunctionSpace(mesh, ("Lagrange", 1)) u, v = TrialFunction(V), TestFunction(V) a = Constant(1.0) * u * v * dx L = Constant(1.0) * v * dx assembler = fem.Assembler(a, L) A = assembler.assemble_matrix() b = assembler.assemble_vector() norm = 13.0 solver = PETScKrylovSolver(mesh.mpi_comm()) solver.set_options_prefix("test_lu_") PETScOptions.set("test_lu_ksp_type", "preonly") PETScOptions.set("test_lu_pc_type", "lu") solver.set_from_options() solver.set_operator(A) x = PETScVector(mesh.mpi_comm()) solver.solve(x, b) assert round(x.norm(cpp.la.Norm.l2) - norm, 10) == 0 assembler = fem.assemble.Assembler(Constant(0.5) * u * v * dx, L) assembler.assemble(A) x = PETScVector(mesh.mpi_comm()) solver.solve(x, b) assert round(x.norm(cpp.la.Norm.l2) - 2.0 * norm, 10) == 0 solver.set_operator(A) solver.solve(x, b) assert round(x.norm(cpp.la.Norm.l2) - 2.0 * norm, 10) == 0
def assembly(): # Whether to use custom kernels instead of FFC useCustomKernels = False mesh = UnitSquareMesh(MPI.comm_world, 13, 13) Q = FunctionSpace(mesh, "Lagrange", 1) u = TrialFunction(Q) v = TestFunction(Q) a = dolfin.cpp.fem.Form([Q._cpp_object, Q._cpp_object]) L = dolfin.cpp.fem.Form([Q._cpp_object]) # Compile the Poisson kernel using CFFI kernel_name = "_poisson_kernel" compile_kernels(kernel_name, verbose=True) # Import the compiled kernel kernel_mod = importlib.import_module(kernel_name) ffi, lib = kernel_mod.ffi, kernel_mod.lib # Get pointers to the CFFI functions fnA_ptr = ffi.cast("uintptr_t", ffi.addressof(lib, "tabulate_tensor_A")) fnB_ptr = ffi.cast("uintptr_t", ffi.addressof(lib, "tabulate_tensor_b")) if useCustomKernels: # Configure Forms to use own tabulate functions a.set_cell_tabulate(0, fnA_ptr) L.set_cell_tabulate(0, fnB_ptr) else: # Use FFC ufc_form = ffc_jit(dot(grad(u), grad(v)) * dx) ufc_form = cpp.fem.make_ufc_form(ufc_form[0]) a = cpp.fem.Form(ufc_form, [Q._cpp_object, Q._cpp_object]) ufc_form = ffc_jit(v * dx) ufc_form = cpp.fem.make_ufc_form(ufc_form[0]) L = cpp.fem.Form(ufc_form, [Q._cpp_object]) start = time.time() assembler = cpp.fem.Assembler([[a]], [L], []) A = PETScMatrix() b = PETScVector() assembler.assemble(A, cpp.fem.Assembler.BlockType.monolithic) assembler.assemble(b, cpp.fem.Assembler.BlockType.monolithic) end = time.time() print(f"Time for assembly: {(end-start)*1000.0}ms") Anorm = A.norm(cpp.la.Norm.frobenius) bnorm = b.norm(cpp.la.Norm.l2) print(Anorm, bnorm) #A_np = scipy2numpy(A.mat()) assert (np.isclose(Anorm, 56.124860801609124)) assert (np.isclose(bnorm, 0.0739710713711999))
def test_save_and_read_vector(tempdir): filename = os.path.join(tempdir, "vector.h5") # Write to file local_range = MPI.local_range(MPI.comm_world, 305) x = PETScVector(MPI.comm_world, local_range, [], 1) x[:] = 1.2 with HDF5File(MPI.comm_world, filename, "w") as vector_file: vector_file.write(x, "/my_vector") # Read from file with HDF5File(MPI.comm_world, filename, "r") as vector_file: y = vector_file.read_vector(MPI.comm_world, "/my_vector", False) assert y.size() == x.size() x.axpy(-1.0, y) assert x.norm(dolfin.cpp.la.Norm.l2) == 0.0
def test_numba_assembly(): mesh = UnitSquareMesh(MPI.comm_world, 13, 13) Q = FunctionSpace(mesh, "Lagrange", 1) u = TrialFunction(Q) v = TestFunction(Q) a = cpp.fem.Form([Q._cpp_object, Q._cpp_object]) L = cpp.fem.Form([Q._cpp_object]) sig = types.void(types.CPointer(typeof(ScalarType())), types.CPointer(types.CPointer(typeof(ScalarType()))), types.CPointer(types.double), types.intc) fnA = cfunc(sig, cache=True)(tabulate_tensor_A) a.set_cell_tabulate(0, fnA.address) fnb = cfunc(sig, cache=True)(tabulate_tensor_b) L.set_cell_tabulate(0, fnb.address) if (False): ufc_form = ffc_jit(dot(grad(u), grad(v)) * dx) ufc_form = cpp.fem.make_ufc_form(ufc_form[0]) a = cpp.fem.Form(ufc_form, [Q._cpp_object, Q._cpp_object]) ufc_form = ffc_jit(v * dx) ufc_form = cpp.fem.make_ufc_form(ufc_form[0]) L = cpp.fem.Form(ufc_form, [Q._cpp_object]) assembler = cpp.fem.Assembler([[a]], [L], []) A = PETScMatrix() b = PETScVector() assembler.assemble(A, cpp.fem.Assembler.BlockType.monolithic) assembler.assemble(b, cpp.fem.Assembler.BlockType.monolithic) Anorm = A.norm(cpp.la.Norm.frobenius) bnorm = b.norm(cpp.la.Norm.l2) print(Anorm, bnorm) assert (np.isclose(Anorm, 56.124860801609124)) assert (np.isclose(bnorm, 0.0739710713711999)) list_timings([TimingType.wall])
def test_krylov_solver_lu(): mesh = UnitSquareMesh(MPI.comm_world, 12, 12) V = FunctionSpace(mesh, ("Lagrange", 1)) u, v = TrialFunction(V), TestFunction(V) a = Constant(1.0) * inner(u, v) * dx L = inner(Constant(1.0), v) * dx A = assemble(a) b = assemble(L) norm = 13.0 solver = PETScKrylovSolver(mesh.mpi_comm()) solver.set_options_prefix("test_lu_") PETScOptions.set("test_lu_ksp_type", "preonly") PETScOptions.set("test_lu_pc_type", "lu") solver.set_from_options() x = PETScVector() solver.set_operator(A) solver.solve(x, b) # *Tight* tolerance for LU solves assert round(x.norm(cpp.la.Norm.l2) - norm, 12) == 0
def assembly(): # Whether to use custom kernels instead of FFC useCustomKernel = True # Whether to use CFFI kernels instead of Numba kernels useCffiKernel = False mesh = UnitSquareMesh(MPI.comm_world, 13, 13) Q = FunctionSpace(mesh, "Lagrange", 1) u = TrialFunction(Q) v = TestFunction(Q) a = dolfin.cpp.fem.Form([Q._cpp_object, Q._cpp_object]) L = dolfin.cpp.fem.Form([Q._cpp_object]) # Variant 1: Compile the Poisson kernel using CFFI kernel_name = "_poisson_kernel" compile_kernels(kernel_name) # Import the compiled kernel kernel_mod = importlib.import_module(kernel_name) ffi, lib = kernel_mod.ffi, kernel_mod.lib # Variant 2: Get pointers to the Numba kernels sig = nb.types.void(nb.types.CPointer(nb.types.double), nb.types.CPointer(nb.types.CPointer(nb.types.double)), nb.types.CPointer(nb.types.double), nb.types.intc) fnA = nb.cfunc(sig, cache=True, nopython=True)(tabulate_tensor_A) fnb = nb.cfunc(sig, cache=True, nopython=True)(tabulate_tensor_b) if useCustomKernel: if useCffiKernel: # Use the cffi kernel, compiled from raw C fnA_ptr = ffi.cast("uintptr_t", ffi.addressof(lib, "tabulate_tensor_A")) fnB_ptr = ffi.cast("uintptr_t", ffi.addressof(lib, "tabulate_tensor_b")) else: # Use the numba generated kernels fnA_ptr = fnA.address fnB_ptr = fnb.address a.set_cell_tabulate(0, fnA_ptr) L.set_cell_tabulate(0, fnB_ptr) else: # Use FFC ufc_form = ffc_jit(dot(grad(u), grad(v)) * dx) ufc_form = cpp.fem.make_ufc_form(ufc_form[0]) a = cpp.fem.Form(ufc_form, [Q._cpp_object, Q._cpp_object]) ufc_form = ffc_jit(v * dx) ufc_form = cpp.fem.make_ufc_form(ufc_form[0]) L = cpp.fem.Form(ufc_form, [Q._cpp_object]) assembler = cpp.fem.Assembler([[a]], [L], []) A = PETScMatrix() b = PETScVector() assembler.assemble(A, cpp.fem.Assembler.BlockType.monolithic) assembler.assemble(b, cpp.fem.Assembler.BlockType.monolithic) Anorm = A.norm(cpp.la.Norm.frobenius) bnorm = b.norm(cpp.la.Norm.l2) print(Anorm, bnorm) assert (np.isclose(Anorm, 56.124860801609124)) assert (np.isclose(bnorm, 0.0739710713711999))
def solve(): # Whether to use custom Numba kernels instead of FFC useCustomKernels = True # Generate a unit cube with (n+1)^3 vertices n = 22 mesh = UnitCubeMesh(MPI.comm_world, n, n, n) Q = FunctionSpace(mesh, "Lagrange", 1) u = TrialFunction(Q) v = TestFunction(Q) # Define the boundary: vertices where any component is in machine precision accuracy 0 or 1 def boundary(x): return np.sum(np.logical_or(x < DOLFIN_EPS, x > 1.0 - DOLFIN_EPS), axis=1) > 0 u0 = Constant(0.0) bc = DirichletBC(Q, u0, boundary) # Initialize bilinear form and rhs a = dolfin.cpp.fem.Form([Q._cpp_object, Q._cpp_object]) L = dolfin.cpp.fem.Form([Q._cpp_object]) # Signature of tabulate_tensor functions sig = nb.types.void(nb.types.CPointer(nb.types.double), nb.types.CPointer(nb.types.CPointer(nb.types.double)), nb.types.CPointer(nb.types.double), nb.types.intc) # Compile the python functions using Numba fnA = nb.cfunc(sig, cache=True, nopython=True)(tabulate_tensor_A) fnL = nb.cfunc(sig, cache=True, nopython=True)(tabulate_tensor_L) module_name = "_laplace_kernel" # Build the kernel ffi = cffi.FFI() ffi.set_source(module_name, TABULATE_C) ffi.cdef(TABULATE_H) ffi.compile() # Import the compiled kernel kernel_mod = importlib.import_module(module_name) ffi, lib = kernel_mod.ffi, kernel_mod.lib # Get pointer to the compiled function fnA_ptr = ffi.cast("uintptr_t", ffi.addressof(lib, "tabulate_tensor_A")) # Get pointers to Numba functions #fnA_ptr = fnA.address fnL_ptr = fnL.address if useCustomKernels: # Configure Forms to use own tabulate functions a.set_cell_tabulate(0, fnA_ptr) L.set_cell_tabulate(0, fnL_ptr) else: # Use FFC # Bilinear form jit_result = ffc_jit(dot(grad(u), grad(v)) * dx) ufc_form = dolfin.cpp.fem.make_ufc_form(jit_result[0]) a = dolfin.cpp.fem.Form(ufc_form, [Q._cpp_object, Q._cpp_object]) # Rhs f = Expression("2.0", element=Q.ufl_element()) jit_result = ffc_jit(f * v * dx) ufc_form = dolfin.cpp.fem.make_ufc_form(jit_result[0]) L = dolfin.cpp.fem.Form(ufc_form, [Q._cpp_object]) # Attach rhs expression as coefficient L.set_coefficient(0, f._cpp_object) assembler = dolfin.cpp.fem.Assembler([[a]], [L], [bc]) A = PETScMatrix() b = PETScVector() # Perform assembly start = time.time() assembler.assemble(A, dolfin.cpp.fem.Assembler.BlockType.monolithic) end = time.time() # We don't care about the RHS assembler.assemble(b, dolfin.cpp.fem.Assembler.BlockType.monolithic) print(f"Time for assembly: {(end-start)*1000.0}ms") Anorm = A.norm(dolfin.cpp.la.Norm.frobenius) bnorm = b.norm(dolfin.cpp.la.Norm.l2) print(Anorm, bnorm) # Norms obtained with FFC and n=13 assert (np.isclose(Anorm, 60.86192203436385)) assert (np.isclose(bnorm, 0.018075523965828778)) comm = L.mesh().mpi_comm() solver = PETScKrylovSolver(comm) u = Function(Q) solver.set_operator(A) solver.solve(u.vector(), b) # Export result file = XDMFFile(MPI.comm_world, "poisson_3d.xdmf") file.write(u, XDMFFile.Encoding.HDF5)
def solve(n_runs: int, mesh_size: int, element: FiniteElement, reference_tensor: ReferenceTensor, kernel_generator): # Whether to use custom kernels instead of FFC useCustomKernels = True # Generate a unit cube with (n+1)^3 vertices mesh = generate_mesh(mesh_size) print("Mesh generated.") A0 = reference_tensor Q = FunctionSpace(mesh, element) u = TrialFunction(Q) v = TestFunction(Q) # Define the boundary: vertices where any component is in machine precision accuracy 0 or 1 def boundary(x): return np.sum(np.logical_or(x < DOLFIN_EPS, x > 1.0 - DOLFIN_EPS), axis=1) > 0 u0 = Constant(0.0) bc = DirichletBC(Q, u0, boundary) if useCustomKernels: # Initialize bilinear form and rhs a = dolfin.cpp.fem.Form([Q._cpp_object, Q._cpp_object]) L = dolfin.cpp.fem.Form([Q._cpp_object]) # Signature of tabulate_tensor functions sig = nb.types.void( nb.types.CPointer(nb.types.double), nb.types.CPointer(nb.types.CPointer(nb.types.double)), nb.types.CPointer(nb.types.double), nb.types.intc) # Compile the python functions using Numba fnA = nb.cfunc(sig, cache=True, nopython=True)(numba_kernels.tabulate_tensor_A) fnL = nb.cfunc(sig, cache=True, nopython=True)(numba_kernels.tabulate_tensor_L) module_name = "_laplace_kernel" compile_poisson_kernel(module_name, kernel_generator, A0, verbose=False) print("Finished compiling kernel.") # Import the compiled kernel kernel_mod = importlib.import_module(f"simd.tmp.{module_name}") ffi, lib = kernel_mod.ffi, kernel_mod.lib # Get pointer to the compiled function fnA_ptr = ffi.cast("uintptr_t", ffi.addressof(lib, "tabulate_tensor_A")) # Get pointers to Numba functions # fnA_ptr = fnA.address fnL_ptr = fnL.address # Configure Forms to use own tabulate functions a.set_cell_tabulate(0, fnA_ptr) L.set_cell_tabulate(0, fnL_ptr) else: # Use FFC # Bilinear form jit_result = ffc_jit(dot(grad(u), grad(v)) * dx) ufc_form = dolfin.cpp.fem.make_ufc_form(jit_result[0]) a = dolfin.cpp.fem.Form(ufc_form, [Q._cpp_object, Q._cpp_object]) # Rhs f = Expression("2.0", element=Q.ufl_element()) jit_result = ffc_jit(f * v * dx) ufc_form = dolfin.cpp.fem.make_ufc_form(jit_result[0]) L = dolfin.cpp.fem.Form(ufc_form, [Q._cpp_object]) # Attach rhs expression as coefficient L.set_coefficient(0, f._cpp_object) print("Built form.") assembler = dolfin.cpp.fem.Assembler([[a]], [L], [bc]) A = PETScMatrix() b = PETScVector() # Callable that performs assembly of matrix assembly_callable = lambda: assembler.assemble( A, dolfin.cpp.fem.Assembler.BlockType.monolithic) # Get timings for assembly of matrix over several runs time_avg, time_min, time_max = utils.timing(n_runs, assembly_callable, verbose=True) print( f"Timings for element matrix assembly (n={n_runs}) avg: {round(time_avg*1000, 2)}ms, min: {round(time_min*1000, 2)}ms, max: {round(time_max*1000, 2)}ms" ) # Assemble again to get correct results A = PETScMatrix() assembler.assemble(A, dolfin.cpp.fem.Assembler.BlockType.monolithic) assembler.assemble(b, dolfin.cpp.fem.Assembler.BlockType.monolithic) Anorm = A.norm(dolfin.cpp.la.Norm.frobenius) bnorm = b.norm(dolfin.cpp.la.Norm.l2) print(Anorm, bnorm) # Check norms of assembled system if useCustomKernels: # Norms obtained with FFC and n=22 assert (np.isclose(Anorm, 118.19435458024503)) #assert (np.isclose(bnorm, 0.009396467472097566)) return # Solve the system comm = L.mesh().mpi_comm() solver = PETScKrylovSolver(comm) u = Function(Q) solver.set_operator(A) solver.solve(u.vector(), b) # Export result file = XDMFFile(MPI.comm_world, "poisson_3d.xdmf") file.write(u, XDMFFile.Encoding.HDF5)
ufc_form = dolfin.cpp.fem.make_ufc_form(jit_result[0]) L = dolfin.cpp.fem.Form(ufc_form, [Q._cpp_object]) # Attach rhs expression as coefficient L.set_coefficient(0, f._cpp_object) assembler = dolfin.cpp.fem.Assembler([[a]], [L], [bc]) A = PETScMatrix() b = PETScVector() print("Running assembly...") assembler.assemble(A, dolfin.cpp.fem.Assembler.BlockType.monolithic) assembler.assemble(b, dolfin.cpp.fem.Assembler.BlockType.monolithic) Anorm = A.norm(dolfin.cpp.la.Norm.frobenius) bnorm = b.norm(dolfin.cpp.la.Norm.l2) print("A.norm(frobenius)={:.12e}\nb.norm(l2)={:.12e}".format(Anorm, bnorm)) # Norms obtained with n=22 and bcs assert (np.isclose(Anorm, 60.86192203436385)) assert (np.isclose(bnorm, 0.018075523965828778)) # Norms obtained with n=22 and no bcs #assert (np.isclose(Anorm, 29.416127208482518)) #assert (np.isclose(bnorm, 0.018726593629987284)) print("Running solver...") comm = L.mesh().mpi_comm() solver = PETScKrylovSolver(comm) u = Function(Q)
def assembly(): # Whether to use custom kernels instead of FFC useCustomKernels = True # Generate a unit cube with (n+1)^3 vertices n = 20 mesh = UnitCubeMesh(MPI.comm_world, n, n, n) Q = FunctionSpace(mesh, "Lagrange", 1) u = TrialFunction(Q) v = TestFunction(Q) def boundary0(x): wrong = np.logical_or(x[:, 1] < DOLFIN_EPS, x[:, 1] > 1.0 - DOLFIN_EPS) one = np.logical_or(x[:, 0] < DOLFIN_EPS, x[:, 0] > 1.0 - DOLFIN_EPS) two = np.logical_or(x[:, 2] < DOLFIN_EPS, x[:, 2] > 1.0 - DOLFIN_EPS) return np.logical_and(np.logical_or(one, two), np.logical_not(wrong)) def boundary1(x): return np.logical_or(x[:, 1] < DOLFIN_EPS, x[:, 1] > 1.0 - DOLFIN_EPS) u0 = Constant(0.0) bc0 = DirichletBC(Q, u0, boundary0) u1 = Constant(1.0) bc1 = DirichletBC(Q, u1, boundary1) # Initialize bilinear form and rhs a = dolfin.cpp.fem.Form([Q._cpp_object, Q._cpp_object]) L = dolfin.cpp.fem.Form([Q._cpp_object]) # Signature of tabulate_tensor functions sig = nb.types.void(nb.types.CPointer(nb.types.double), nb.types.CPointer(nb.types.CPointer(nb.types.double)), nb.types.CPointer(nb.types.double), nb.types.intc) # Compile the numba functions fnA = nb.cfunc(sig, cache=True, nopython=True)(tabulate_tensor_A) fnb = nb.cfunc(sig, cache=True, nopython=True)(tabulate_tensor_b) if useCustomKernels: # Configure Forms to use own tabulate functions a.set_cell_tabulate(0, fnA.address) L.set_cell_tabulate(0, fnb.address) else: # Use FFC jit_result = ffc_jit(dot(grad(u), grad(v)) * dx) ufc_form = dolfin.cpp.fem.make_ufc_form(jit_result[0]) a = dolfin.cpp.fem.Form(ufc_form, [Q._cpp_object, Q._cpp_object]) f = Expression("20.0", element=Q.ufl_element()) jit_result = ffc_jit(f * v * dx) ufc_form = dolfin.cpp.fem.make_ufc_form(jit_result[0]) L = dolfin.cpp.fem.Form(ufc_form, [Q._cpp_object]) L.set_coefficient(0, f._cpp_object) start = time.time() assembler = dolfin.cpp.fem.Assembler([[a]], [L], [bc0, bc1]) A = PETScMatrix() b = PETScVector() assembler.assemble(A, dolfin.cpp.fem.Assembler.BlockType.monolithic) assembler.assemble(b, dolfin.cpp.fem.Assembler.BlockType.monolithic) end = time.time() print(f"Time for assembly: {(end-start)*1000.0}ms") Anorm = A.norm(dolfin.cpp.la.Norm.frobenius) bnorm = b.norm(dolfin.cpp.la.Norm.l2) print(Anorm, bnorm) # Norms obtained with FFC and n=20 #assert (np.isclose(Anorm, 55.82812911070811)) #assert (np.isclose(bnorm, 29.73261456296761)) comm = L.mesh().mpi_comm() solver = PETScKrylovSolver(comm) u = Function(Q) solver.set_operator(A) solver.solve(u.vector(), b) file = XDMFFile(MPI.comm_world, "poisson_3d.xdmf") file.write(u, XDMFFile.Encoding.HDF5)