def assembly(): # Whether to use custom kernels instead of FFC useCustomKernels = False mesh = UnitSquareMesh(MPI.comm_world, 13, 13) Q = FunctionSpace(mesh, "Lagrange", 1) u = TrialFunction(Q) v = TestFunction(Q) a = dolfin.cpp.fem.Form([Q._cpp_object, Q._cpp_object]) L = dolfin.cpp.fem.Form([Q._cpp_object]) # Compile the Poisson kernel using CFFI kernel_name = "_poisson_kernel" compile_kernels(kernel_name, verbose=True) # Import the compiled kernel kernel_mod = importlib.import_module(kernel_name) ffi, lib = kernel_mod.ffi, kernel_mod.lib # Get pointers to the CFFI functions fnA_ptr = ffi.cast("uintptr_t", ffi.addressof(lib, "tabulate_tensor_A")) fnB_ptr = ffi.cast("uintptr_t", ffi.addressof(lib, "tabulate_tensor_b")) if useCustomKernels: # Configure Forms to use own tabulate functions a.set_cell_tabulate(0, fnA_ptr) L.set_cell_tabulate(0, fnB_ptr) else: # Use FFC ufc_form = ffc_jit(dot(grad(u), grad(v)) * dx) ufc_form = cpp.fem.make_ufc_form(ufc_form[0]) a = cpp.fem.Form(ufc_form, [Q._cpp_object, Q._cpp_object]) ufc_form = ffc_jit(v * dx) ufc_form = cpp.fem.make_ufc_form(ufc_form[0]) L = cpp.fem.Form(ufc_form, [Q._cpp_object]) start = time.time() assembler = cpp.fem.Assembler([[a]], [L], []) A = PETScMatrix() b = PETScVector() assembler.assemble(A, cpp.fem.Assembler.BlockType.monolithic) assembler.assemble(b, cpp.fem.Assembler.BlockType.monolithic) end = time.time() print(f"Time for assembly: {(end-start)*1000.0}ms") Anorm = A.norm(cpp.la.Norm.frobenius) bnorm = b.norm(cpp.la.Norm.l2) print(Anorm, bnorm) #A_np = scipy2numpy(A.mat()) assert (np.isclose(Anorm, 56.124860801609124)) assert (np.isclose(bnorm, 0.0739710713711999))
def test_numba_assembly(): mesh = UnitSquareMesh(MPI.comm_world, 13, 13) Q = FunctionSpace(mesh, "Lagrange", 1) u = TrialFunction(Q) v = TestFunction(Q) a = cpp.fem.Form([Q._cpp_object, Q._cpp_object]) L = cpp.fem.Form([Q._cpp_object]) sig = types.void(types.CPointer(typeof(ScalarType())), types.CPointer(types.CPointer(typeof(ScalarType()))), types.CPointer(types.double), types.intc) fnA = cfunc(sig, cache=True)(tabulate_tensor_A) a.set_cell_tabulate(0, fnA.address) fnb = cfunc(sig, cache=True)(tabulate_tensor_b) L.set_cell_tabulate(0, fnb.address) if (False): ufc_form = ffc_jit(dot(grad(u), grad(v)) * dx) ufc_form = cpp.fem.make_ufc_form(ufc_form[0]) a = cpp.fem.Form(ufc_form, [Q._cpp_object, Q._cpp_object]) ufc_form = ffc_jit(v * dx) ufc_form = cpp.fem.make_ufc_form(ufc_form[0]) L = cpp.fem.Form(ufc_form, [Q._cpp_object]) assembler = cpp.fem.Assembler([[a]], [L], []) A = PETScMatrix() b = PETScVector() assembler.assemble(A, cpp.fem.Assembler.BlockType.monolithic) assembler.assemble(b, cpp.fem.Assembler.BlockType.monolithic) Anorm = A.norm(cpp.la.Norm.frobenius) bnorm = b.norm(cpp.la.Norm.l2) print(Anorm, bnorm) assert (np.isclose(Anorm, 56.124860801609124)) assert (np.isclose(bnorm, 0.0739710713711999)) list_timings([TimingType.wall])
def assembly(): # Whether to use custom kernels instead of FFC useCustomKernel = True # Whether to use CFFI kernels instead of Numba kernels useCffiKernel = False mesh = UnitSquareMesh(MPI.comm_world, 13, 13) Q = FunctionSpace(mesh, "Lagrange", 1) u = TrialFunction(Q) v = TestFunction(Q) a = dolfin.cpp.fem.Form([Q._cpp_object, Q._cpp_object]) L = dolfin.cpp.fem.Form([Q._cpp_object]) # Variant 1: Compile the Poisson kernel using CFFI kernel_name = "_poisson_kernel" compile_kernels(kernel_name) # Import the compiled kernel kernel_mod = importlib.import_module(kernel_name) ffi, lib = kernel_mod.ffi, kernel_mod.lib # Variant 2: Get pointers to the Numba kernels sig = nb.types.void(nb.types.CPointer(nb.types.double), nb.types.CPointer(nb.types.CPointer(nb.types.double)), nb.types.CPointer(nb.types.double), nb.types.intc) fnA = nb.cfunc(sig, cache=True, nopython=True)(tabulate_tensor_A) fnb = nb.cfunc(sig, cache=True, nopython=True)(tabulate_tensor_b) if useCustomKernel: if useCffiKernel: # Use the cffi kernel, compiled from raw C fnA_ptr = ffi.cast("uintptr_t", ffi.addressof(lib, "tabulate_tensor_A")) fnB_ptr = ffi.cast("uintptr_t", ffi.addressof(lib, "tabulate_tensor_b")) else: # Use the numba generated kernels fnA_ptr = fnA.address fnB_ptr = fnb.address a.set_cell_tabulate(0, fnA_ptr) L.set_cell_tabulate(0, fnB_ptr) else: # Use FFC ufc_form = ffc_jit(dot(grad(u), grad(v)) * dx) ufc_form = cpp.fem.make_ufc_form(ufc_form[0]) a = cpp.fem.Form(ufc_form, [Q._cpp_object, Q._cpp_object]) ufc_form = ffc_jit(v * dx) ufc_form = cpp.fem.make_ufc_form(ufc_form[0]) L = cpp.fem.Form(ufc_form, [Q._cpp_object]) assembler = cpp.fem.Assembler([[a]], [L], []) A = PETScMatrix() b = PETScVector() assembler.assemble(A, cpp.fem.Assembler.BlockType.monolithic) assembler.assemble(b, cpp.fem.Assembler.BlockType.monolithic) Anorm = A.norm(cpp.la.Norm.frobenius) bnorm = b.norm(cpp.la.Norm.l2) print(Anorm, bnorm) assert (np.isclose(Anorm, 56.124860801609124)) assert (np.isclose(bnorm, 0.0739710713711999))
def generate_ref_tensor(element: FiniteElement = None): def monkey_patch_ufl(): from ufl.referencevalue import ReferenceValue oldinit = ReferenceValue.__init__ def newinit(self, f): if isinstance(f, ReferenceValue): f = f.ufl_operands[0] oldinit(self, f) ReferenceValue.__init__ = newinit monkey_patch_ufl() def generate_reference_tetrahedron_mesh(): vertices = np.array([[0, 0, 0], [1, 0, 0], [0, 1, 0], [0, 0, 1]], dtype=np.float64) cells = np.array([[0, 1, 2, 3]], dtype=np.int32) mesh = Mesh(MPI.comm_world, CellType.Type.tetrahedron, vertices, cells, [], cpp.mesh.GhostMode.none) return mesh mesh = generate_reference_tetrahedron_mesh() if element is None: element = FiniteElement("P", tetrahedron, 1) V = FunctionSpace(mesh, element) dofmap = V.dofmap().cell_dofs(0) dofmap_inverse = np.argsort(dofmap) u, v = TrialFunction(V), TestFunction(V) detJ = JacobianDeterminant(SpatialCoordinate(mesh)) A0 = np.zeros( (dofmap.size, dofmap.size, mesh.topology.dim, mesh.topology.dim), dtype=np.double) for i in range(mesh.topology.dim): for j in range(mesh.topology.dim): jit_result = jit.jit.ffc_jit( outer(Grad(Val(u)), Grad(Val(v)))[i, j] / detJ * dx) ufc_form = cpp.fem.make_ufc_form(jit_result[0]) a = cpp.fem.Form(ufc_form, [V._cpp_object, V._cpp_object]) assembler = cpp.fem.Assembler([[a]], [], []) A_scp = PETScMatrix() assembler.assemble(A_scp, cpp.fem.Assembler.BlockType.monolithic) A = utils.scipy2numpy(A_scp) A0[:, :, i, j] = A[dofmap_inverse].transpose() #print(79 * '=') #print("dphi_i/dX({})*dphi_j/dX({})".format(i, j)) #print(A[dofmap_inverse]) A0 = A0[dofmap_inverse, :, :, :] return A0
def solve(): # Whether to use custom Numba kernels instead of FFC useCustomKernels = True # Generate a unit cube with (n+1)^3 vertices n = 22 mesh = UnitCubeMesh(MPI.comm_world, n, n, n) Q = FunctionSpace(mesh, "Lagrange", 1) u = TrialFunction(Q) v = TestFunction(Q) # Define the boundary: vertices where any component is in machine precision accuracy 0 or 1 def boundary(x): return np.sum(np.logical_or(x < DOLFIN_EPS, x > 1.0 - DOLFIN_EPS), axis=1) > 0 u0 = Constant(0.0) bc = DirichletBC(Q, u0, boundary) # Initialize bilinear form and rhs a = dolfin.cpp.fem.Form([Q._cpp_object, Q._cpp_object]) L = dolfin.cpp.fem.Form([Q._cpp_object]) # Signature of tabulate_tensor functions sig = nb.types.void(nb.types.CPointer(nb.types.double), nb.types.CPointer(nb.types.CPointer(nb.types.double)), nb.types.CPointer(nb.types.double), nb.types.intc) # Compile the python functions using Numba fnA = nb.cfunc(sig, cache=True, nopython=True)(tabulate_tensor_A) fnL = nb.cfunc(sig, cache=True, nopython=True)(tabulate_tensor_L) module_name = "_laplace_kernel" # Build the kernel ffi = cffi.FFI() ffi.set_source(module_name, TABULATE_C) ffi.cdef(TABULATE_H) ffi.compile() # Import the compiled kernel kernel_mod = importlib.import_module(module_name) ffi, lib = kernel_mod.ffi, kernel_mod.lib # Get pointer to the compiled function fnA_ptr = ffi.cast("uintptr_t", ffi.addressof(lib, "tabulate_tensor_A")) # Get pointers to Numba functions #fnA_ptr = fnA.address fnL_ptr = fnL.address if useCustomKernels: # Configure Forms to use own tabulate functions a.set_cell_tabulate(0, fnA_ptr) L.set_cell_tabulate(0, fnL_ptr) else: # Use FFC # Bilinear form jit_result = ffc_jit(dot(grad(u), grad(v)) * dx) ufc_form = dolfin.cpp.fem.make_ufc_form(jit_result[0]) a = dolfin.cpp.fem.Form(ufc_form, [Q._cpp_object, Q._cpp_object]) # Rhs f = Expression("2.0", element=Q.ufl_element()) jit_result = ffc_jit(f * v * dx) ufc_form = dolfin.cpp.fem.make_ufc_form(jit_result[0]) L = dolfin.cpp.fem.Form(ufc_form, [Q._cpp_object]) # Attach rhs expression as coefficient L.set_coefficient(0, f._cpp_object) assembler = dolfin.cpp.fem.Assembler([[a]], [L], [bc]) A = PETScMatrix() b = PETScVector() # Perform assembly start = time.time() assembler.assemble(A, dolfin.cpp.fem.Assembler.BlockType.monolithic) end = time.time() # We don't care about the RHS assembler.assemble(b, dolfin.cpp.fem.Assembler.BlockType.monolithic) print(f"Time for assembly: {(end-start)*1000.0}ms") Anorm = A.norm(dolfin.cpp.la.Norm.frobenius) bnorm = b.norm(dolfin.cpp.la.Norm.l2) print(Anorm, bnorm) # Norms obtained with FFC and n=13 assert (np.isclose(Anorm, 60.86192203436385)) assert (np.isclose(bnorm, 0.018075523965828778)) comm = L.mesh().mpi_comm() solver = PETScKrylovSolver(comm) u = Function(Q) solver.set_operator(A) solver.solve(u.vector(), b) # Export result file = XDMFFile(MPI.comm_world, "poisson_3d.xdmf") file.write(u, XDMFFile.Encoding.HDF5)
def test_krylov_reuse_pc(): "Test preconditioner re-use with PETScKrylovSolver" # Define problem mesh = UnitSquareMesh(MPI.comm_world, 8, 8) V = FunctionSpace(mesh, ('Lagrange', 1)) bc = DirichletBC(V, Constant(0.0), lambda x, on_boundary: on_boundary) u = TrialFunction(V) v = TestFunction(V) # Forms a, L = inner(grad(u), grad(v)) * dx, dot(Constant(1.0), v) * dx A, P = PETScMatrix(), PETScMatrix() b = PETScVector() # Assemble linear algebra objects assemble(a, tensor=A) # noqa assemble(a, tensor=P) # noqa assemble(L, tensor=b) # noqa # Apply boundary conditions bc.apply(A) bc.apply(P) bc.apply(b) # Create Krysolv solver and set operators solver = PETScKrylovSolver("gmres", "bjacobi") solver.set_operators(A, P) # Solve x = PETScVector() num_iter_ref = solver.solve(x, b) # Change preconditioner matrix (bad matrix) and solve (PC will be # updated) a_p = u * v * dx assemble(a_p, tensor=P) # noqa bc.apply(P) x = PETScVector() num_iter_mod = solver.solve(x, b) assert num_iter_mod > num_iter_ref # Change preconditioner matrix (good matrix) and solve (PC will be # updated) a_p = a assemble(a_p, tensor=P) # noqa bc.apply(P) x = PETScVector() num_iter = solver.solve(x, b) assert num_iter == num_iter_ref # Change preconditioner matrix (bad matrix) and solve (PC will not # be updated) solver.set_reuse_preconditioner(True) a_p = u * v * dx assemble(a_p, tensor=P) # noqa bc.apply(P) x = PETScVector() num_iter = solver.solve(x, b) assert num_iter == num_iter_ref # Update preconditioner (bad PC, will increase iteration count) solver.set_reuse_preconditioner(False) x = PETScVector() num_iter = solver.solve(x, b) assert num_iter == num_iter_mod
def solve(n_runs: int, mesh_size: int, element: FiniteElement, reference_tensor: ReferenceTensor, kernel_generator): # Whether to use custom kernels instead of FFC useCustomKernels = True # Generate a unit cube with (n+1)^3 vertices mesh = generate_mesh(mesh_size) print("Mesh generated.") A0 = reference_tensor Q = FunctionSpace(mesh, element) u = TrialFunction(Q) v = TestFunction(Q) # Define the boundary: vertices where any component is in machine precision accuracy 0 or 1 def boundary(x): return np.sum(np.logical_or(x < DOLFIN_EPS, x > 1.0 - DOLFIN_EPS), axis=1) > 0 u0 = Constant(0.0) bc = DirichletBC(Q, u0, boundary) if useCustomKernels: # Initialize bilinear form and rhs a = dolfin.cpp.fem.Form([Q._cpp_object, Q._cpp_object]) L = dolfin.cpp.fem.Form([Q._cpp_object]) # Signature of tabulate_tensor functions sig = nb.types.void( nb.types.CPointer(nb.types.double), nb.types.CPointer(nb.types.CPointer(nb.types.double)), nb.types.CPointer(nb.types.double), nb.types.intc) # Compile the python functions using Numba fnA = nb.cfunc(sig, cache=True, nopython=True)(numba_kernels.tabulate_tensor_A) fnL = nb.cfunc(sig, cache=True, nopython=True)(numba_kernels.tabulate_tensor_L) module_name = "_laplace_kernel" compile_poisson_kernel(module_name, kernel_generator, A0, verbose=False) print("Finished compiling kernel.") # Import the compiled kernel kernel_mod = importlib.import_module(f"simd.tmp.{module_name}") ffi, lib = kernel_mod.ffi, kernel_mod.lib # Get pointer to the compiled function fnA_ptr = ffi.cast("uintptr_t", ffi.addressof(lib, "tabulate_tensor_A")) # Get pointers to Numba functions # fnA_ptr = fnA.address fnL_ptr = fnL.address # Configure Forms to use own tabulate functions a.set_cell_tabulate(0, fnA_ptr) L.set_cell_tabulate(0, fnL_ptr) else: # Use FFC # Bilinear form jit_result = ffc_jit(dot(grad(u), grad(v)) * dx) ufc_form = dolfin.cpp.fem.make_ufc_form(jit_result[0]) a = dolfin.cpp.fem.Form(ufc_form, [Q._cpp_object, Q._cpp_object]) # Rhs f = Expression("2.0", element=Q.ufl_element()) jit_result = ffc_jit(f * v * dx) ufc_form = dolfin.cpp.fem.make_ufc_form(jit_result[0]) L = dolfin.cpp.fem.Form(ufc_form, [Q._cpp_object]) # Attach rhs expression as coefficient L.set_coefficient(0, f._cpp_object) print("Built form.") assembler = dolfin.cpp.fem.Assembler([[a]], [L], [bc]) A = PETScMatrix() b = PETScVector() # Callable that performs assembly of matrix assembly_callable = lambda: assembler.assemble( A, dolfin.cpp.fem.Assembler.BlockType.monolithic) # Get timings for assembly of matrix over several runs time_avg, time_min, time_max = utils.timing(n_runs, assembly_callable, verbose=True) print( f"Timings for element matrix assembly (n={n_runs}) avg: {round(time_avg*1000, 2)}ms, min: {round(time_min*1000, 2)}ms, max: {round(time_max*1000, 2)}ms" ) # Assemble again to get correct results A = PETScMatrix() assembler.assemble(A, dolfin.cpp.fem.Assembler.BlockType.monolithic) assembler.assemble(b, dolfin.cpp.fem.Assembler.BlockType.monolithic) Anorm = A.norm(dolfin.cpp.la.Norm.frobenius) bnorm = b.norm(dolfin.cpp.la.Norm.l2) print(Anorm, bnorm) # Check norms of assembled system if useCustomKernels: # Norms obtained with FFC and n=22 assert (np.isclose(Anorm, 118.19435458024503)) #assert (np.isclose(bnorm, 0.009396467472097566)) return # Solve the system comm = L.mesh().mpi_comm() solver = PETScKrylovSolver(comm) u = Function(Q) solver.set_operator(A) solver.solve(u.vector(), b) # Export result file = XDMFFile(MPI.comm_world, "poisson_3d.xdmf") file.write(u, XDMFFile.Encoding.HDF5)
form_compiler_parameters={"cell_batch_size": 4, "enable_cross_cell_gcc_ext": True}) ufc_form = dolfin.cpp.fem.make_ufc_form(jit_result[0]) a = dolfin.cpp.fem.Form(ufc_form, [Q._cpp_object, Q._cpp_object]) # Rhs f = Expression("2.0", element=Q.ufl_element()) jit_result = ffc_jit(f*v * dx, form_compiler_parameters={"cell_batch_size": 4, "enable_cross_cell_gcc_ext": True}) ufc_form = dolfin.cpp.fem.make_ufc_form(jit_result[0]) L = dolfin.cpp.fem.Form(ufc_form, [Q._cpp_object]) # Attach rhs expression as coefficient L.set_coefficient(0, f._cpp_object) assembler = dolfin.cpp.fem.Assembler([[a]], [L], [bc]) A = PETScMatrix() b = PETScVector() print("Running assembly...") assembler.assemble(A, dolfin.cpp.fem.Assembler.BlockType.monolithic) assembler.assemble(b, dolfin.cpp.fem.Assembler.BlockType.monolithic) Anorm = A.norm(dolfin.cpp.la.Norm.frobenius) bnorm = b.norm(dolfin.cpp.la.Norm.l2) print("A.norm(frobenius)={:.12e}\nb.norm(l2)={:.12e}".format(Anorm, bnorm)) # Norms obtained with n=22 and bcs assert (np.isclose(Anorm, 60.86192203436385)) assert (np.isclose(bnorm, 0.018075523965828778)) # Norms obtained with n=22 and no bcs
def assembly(): # Whether to use custom kernels instead of FFC useCustomKernels = True # Generate a unit cube with (n+1)^3 vertices n = 20 mesh = UnitCubeMesh(MPI.comm_world, n, n, n) Q = FunctionSpace(mesh, "Lagrange", 1) u = TrialFunction(Q) v = TestFunction(Q) def boundary0(x): wrong = np.logical_or(x[:, 1] < DOLFIN_EPS, x[:, 1] > 1.0 - DOLFIN_EPS) one = np.logical_or(x[:, 0] < DOLFIN_EPS, x[:, 0] > 1.0 - DOLFIN_EPS) two = np.logical_or(x[:, 2] < DOLFIN_EPS, x[:, 2] > 1.0 - DOLFIN_EPS) return np.logical_and(np.logical_or(one, two), np.logical_not(wrong)) def boundary1(x): return np.logical_or(x[:, 1] < DOLFIN_EPS, x[:, 1] > 1.0 - DOLFIN_EPS) u0 = Constant(0.0) bc0 = DirichletBC(Q, u0, boundary0) u1 = Constant(1.0) bc1 = DirichletBC(Q, u1, boundary1) # Initialize bilinear form and rhs a = dolfin.cpp.fem.Form([Q._cpp_object, Q._cpp_object]) L = dolfin.cpp.fem.Form([Q._cpp_object]) # Signature of tabulate_tensor functions sig = nb.types.void(nb.types.CPointer(nb.types.double), nb.types.CPointer(nb.types.CPointer(nb.types.double)), nb.types.CPointer(nb.types.double), nb.types.intc) # Compile the numba functions fnA = nb.cfunc(sig, cache=True, nopython=True)(tabulate_tensor_A) fnb = nb.cfunc(sig, cache=True, nopython=True)(tabulate_tensor_b) if useCustomKernels: # Configure Forms to use own tabulate functions a.set_cell_tabulate(0, fnA.address) L.set_cell_tabulate(0, fnb.address) else: # Use FFC jit_result = ffc_jit(dot(grad(u), grad(v)) * dx) ufc_form = dolfin.cpp.fem.make_ufc_form(jit_result[0]) a = dolfin.cpp.fem.Form(ufc_form, [Q._cpp_object, Q._cpp_object]) f = Expression("20.0", element=Q.ufl_element()) jit_result = ffc_jit(f * v * dx) ufc_form = dolfin.cpp.fem.make_ufc_form(jit_result[0]) L = dolfin.cpp.fem.Form(ufc_form, [Q._cpp_object]) L.set_coefficient(0, f._cpp_object) start = time.time() assembler = dolfin.cpp.fem.Assembler([[a]], [L], [bc0, bc1]) A = PETScMatrix() b = PETScVector() assembler.assemble(A, dolfin.cpp.fem.Assembler.BlockType.monolithic) assembler.assemble(b, dolfin.cpp.fem.Assembler.BlockType.monolithic) end = time.time() print(f"Time for assembly: {(end-start)*1000.0}ms") Anorm = A.norm(dolfin.cpp.la.Norm.frobenius) bnorm = b.norm(dolfin.cpp.la.Norm.l2) print(Anorm, bnorm) # Norms obtained with FFC and n=20 #assert (np.isclose(Anorm, 55.82812911070811)) #assert (np.isclose(bnorm, 29.73261456296761)) comm = L.mesh().mpi_comm() solver = PETScKrylovSolver(comm) u = Function(Q) solver.set_operator(A) solver.solve(u.vector(), b) file = XDMFFile(MPI.comm_world, "poisson_3d.xdmf") file.write(u, XDMFFile.Encoding.HDF5)