def assemble_dense(domain, dual_to_range, parameters, operator_descriptor, device_interface): """Assembles the operator and returns a dense matrix.""" import bempp.api from bempp.api.utils.helpers import get_type from bempp.core.dispatcher import dense_assembler_dispatcher from bempp.core.singular_assembler import assemble_singular_part precision = operator_descriptor.precision rows = dual_to_range.global_dof_count cols = domain.global_dof_count if operator_descriptor.is_complex: result_type = get_type(precision).complex else: result_type = get_type(precision).real result = _np.zeros((rows, cols), dtype=result_type) with bempp.api.Timer( message= f"Regular assembler:{operator_descriptor.identifier}:{device_interface}" ): dense_assembler_dispatcher( device_interface, operator_descriptor, domain, dual_to_range, parameters, result, ) grids_identical = domain.grid == dual_to_range.grid if grids_identical: trial_local2global = domain.local2global.ravel() test_local2global = dual_to_range.local2global.ravel() trial_multipliers = domain.local_multipliers.ravel() test_multipliers = dual_to_range.local_multipliers.ravel() singular_rows, singular_cols, singular_values = assemble_singular_part( domain.localised_space, dual_to_range.localised_space, parameters, operator_descriptor, device_interface, ) rows = test_local2global[singular_rows] cols = trial_local2global[singular_cols] values = (singular_values * trial_multipliers[singular_cols] * test_multipliers[singular_rows]) _np.add.at(result, (rows, cols), values) return result
def potential_assembler(device_interface, space, operator_descriptor, points, parameters): """Return an evaluator function to evaluate a potential.""" from bempp.core.numba_kernels import select_numba_kernels from bempp.api.integration.triangle_gauss import rule from bempp.api.utils.helpers import get_type (numba_assembly_function, numba_kernel_function_regular) = select_numba_kernels(operator_descriptor, mode="potential") quad_points, quad_weights = rule(parameters.quadrature.regular) # Perform Numba assembly always in double precision # precision = operator_descriptor.precision precision = "double" dtype = _np.dtype(get_type(precision).real) if operator_descriptor.is_complex: result_type = _np.dtype(get_type(precision).complex) else: result_type = dtype kernel_dimension = operator_descriptor.kernel_dimension points_transformed = points.astype(dtype) grid_data = space.grid.data(precision) kernel_parameters = _np.array(operator_descriptor.options, dtype=dtype) def evaluator(x): """Actually evaluate the potential.""" return numba_assembly_function( dtype, result_type, kernel_dimension, points_transformed, x.astype(result_type), grid_data, quad_points.astype(precision), quad_weights.astype(precision), space.number_of_shape_functions, space.shapeset.evaluate, numba_kernel_function_regular, kernel_parameters, space.normal_multipliers, space.support_elements, ) return evaluator
def dense_assembler(device_interface, operator_descriptor, domain, dual_to_range, parameters, result): """Numba based dense assembler.""" from bempp.core.numba_kernels import select_numba_kernels from bempp.api.utils.helpers import get_type from bempp.api.integration.triangle_gauss import rule ( numba_assembly_function_regular, numba_kernel_function_regular, ) = select_numba_kernels(operator_descriptor, mode="regular") order = parameters.quadrature.regular quad_points, quad_weights = rule(order) # Perform Numba assembly always in double precision # precision = operator_descriptor.precision precision = "double" data_type = get_type(precision).real test_indices, test_color_indexptr = dual_to_range.get_elements_by_color() trial_indices, trial_color_indexptr = domain.get_elements_by_color() number_of_test_colors = len(test_color_indexptr) - 1 # number_of_trial_colors = len(trial_color_indexptr) - 1 # rows = dual_to_range.global_dof_count # cols = domain.global_dof_count nshape_test = dual_to_range.number_of_shape_functions nshape_trial = domain.number_of_shape_functions grids_identical = domain.grid == dual_to_range.grid for test_color_index in range(number_of_test_colors): numba_assembly_function_regular( dual_to_range.grid.data(precision), domain.grid.data(precision), nshape_test, nshape_trial, test_indices[test_color_indexptr[test_color_index]: test_color_indexptr[1 + test_color_index]], trial_indices, dual_to_range.local_multipliers.astype(data_type), domain.local_multipliers.astype(data_type), dual_to_range.local2global, domain.local2global, dual_to_range.normal_multipliers, domain.normal_multipliers, quad_points.astype(data_type), quad_weights.astype(data_type), numba_kernel_function_regular, _np.array(operator_descriptor.options, dtype=data_type), grids_identical, dual_to_range.shapeset.evaluate, domain.shapeset.evaluate, result, )
def singular_assembler( device_interface, operator_descriptor, grid, domain, dual_to_range, test_points, trial_points, quad_weights, test_elements, trial_elements, test_offsets, trial_offsets, weights_offsets, number_of_quad_points, kernel_options, result, ): """Numba assembler for the singular part of integral operators.""" from bempp.api.utils.helpers import get_type from bempp.core.numba_kernels import select_numba_kernels numba_assembly_function, numba_kernel_function = select_numba_kernels( operator_descriptor, mode="singular") # Perform Numba assembly always in double precision # precision = operator_descriptor.precision precision = "double" dtype = get_type(precision).real numba_assembly_function( grid.data(precision), test_points, trial_points, quad_weights, test_elements, trial_elements, test_offsets, trial_offsets, weights_offsets, number_of_quad_points, dual_to_range.normal_multipliers, domain.normal_multipliers, dual_to_range.number_of_shape_functions, domain.number_of_shape_functions, dual_to_range.shapeset.evaluate, domain.shapeset.evaluate, numba_kernel_function, _np.array(kernel_options, dtype=dtype), result, )
def potential_assembler(device_interface, space, operator_descriptor, points, parameters): """Assemble dense with OpenCL.""" import bempp.api from bempp.api.integration.triangle_gauss import rule from bempp.api.utils.helpers import get_type from bempp.core.opencl_kernels import get_kernel_from_name from bempp.core.opencl_kernels import get_kernel_from_operator_descriptor from bempp.core.opencl_kernels import ( default_context, default_device, get_vector_width, ) if bempp.api.POTENTIAL_OPERATOR_DEVICE_TYPE == "gpu": device_type = "gpu" elif bempp.api.POTENTIAL_OPERATOR_DEVICE_TYPE == "cpu": device_type = "cpu" else: raise RuntimeError( f"Unknown device type {bempp.api.POTENTIAL_OPERATOR_DEVICE_TYPE}") mf = _cl.mem_flags ctx = default_context(device_type) device = default_device(device_type) quad_points, quad_weights = rule(parameters.quadrature.regular) precision = operator_descriptor.precision dtype = get_type(precision).real kernel_options = operator_descriptor.options kernel_dimension = operator_descriptor.kernel_dimension if operator_descriptor.is_complex: result_type = _np.dtype(get_type(precision).complex) else: result_type = dtype result_type = _np.dtype(result_type) indices = space.support_elements nelements = len(indices) vector_width = get_vector_width(precision, device_type=device_type) npoints = points.shape[1] remainder_size = nelements % WORKGROUP_SIZE_POTENTIAL main_size = nelements - remainder_size main_kernel = None remainder_kernel = None sum_kernel = None options = { "NUMBER_OF_QUAD_POINTS": len(quad_weights), "SHAPESET": space.shapeset.identifier, "NUMBER_OF_SHAPE_FUNCTIONS": space.number_of_shape_functions, "WORKGROUP_SIZE": WORKGROUP_SIZE_POTENTIAL // vector_width, } if operator_descriptor.is_complex: options["COMPLEX_KERNEL"] = None options["COMPLEX_COEFFICIENTS"] = None options["COMPLEX_RESULT"] = None if main_size > 0: main_kernel = get_kernel_from_operator_descriptor( operator_descriptor, options, "potential", device_type=device_type) sum_kernel = get_kernel_from_name("sum_for_potential_novec", options, precision, device_type=device_type) if remainder_size > 0: options["WORKGROUP_SIZE"] = remainder_size remainder_kernel = get_kernel_from_operator_descriptor( operator_descriptor, options, "potential", force_novec=True, device_type=device_type, ) indices_buffer = _cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=indices) normals_buffer = _cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=space.normal_multipliers) points_buffer = _cl.Buffer( ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=points.ravel(order="F").astype(dtype), ) grid_buffer = _cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=space.grid.as_array.astype(dtype)) # elements_buffer = _cl.Buffer( # ctx, # mf.READ_ONLY | mf.COPY_HOST_PTR, # hostbuf=space.grid.elements.ravel(order="F"), # ) quad_points_buffer = _cl.Buffer( ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=quad_points.ravel(order="F").astype(dtype), ) quad_weights_buffer = _cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=quad_weights.astype(dtype)) result_buffer = _cl.Buffer(ctx, mf.READ_WRITE, size=result_type.itemsize * kernel_dimension * npoints) coefficients_buffer = _cl.Buffer(ctx, mf.READ_ONLY, size=result_type.itemsize * space.map_to_full_grid.shape[0]) if main_size > 0: sum_size = (kernel_dimension * npoints * (nelements // WORKGROUP_SIZE_POTENTIAL) * result_type.itemsize) sum_buffer = _cl.Buffer(ctx, mf.READ_WRITE, size=sum_size) if not kernel_options: kernel_options = [0.0] kernel_options_array = _np.array(kernel_options, dtype=dtype) kernel_options_buffer = _cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=kernel_options_array) def evaluator(x): """Evaluate a potential.""" result = _np.empty(kernel_dimension * npoints, dtype=result_type) with _cl.CommandQueue(ctx, device=device) as queue: _cl.enqueue_copy(queue, coefficients_buffer, x.astype(result_type)) _cl.enqueue_fill_buffer( queue, result_buffer, _np.uint8(0), 0, kernel_dimension * npoints * result_type.itemsize, ) if main_size > 0: _cl.enqueue_fill_buffer(queue, sum_buffer, _np.uint8(0), 0, sum_size) queue.finish() main_kernel( queue, (npoints, main_size // vector_width), (1, WORKGROUP_SIZE_POTENTIAL // vector_width), grid_buffer, indices_buffer, normals_buffer, points_buffer, coefficients_buffer, quad_points_buffer, quad_weights_buffer, sum_buffer, kernel_options_buffer, ) sum_kernel( queue, (kernel_dimension * npoints, ), (1, ), sum_buffer, result_buffer, _np.uint32(nelements // WORKGROUP_SIZE_POTENTIAL), ) if remainder_size > 0: remainder_kernel( queue, (npoints, remainder_size), (1, remainder_size), grid_buffer, indices_buffer, normals_buffer, points_buffer, coefficients_buffer, quad_points_buffer, quad_weights_buffer, result_buffer, kernel_options_buffer, global_offset=(0, main_size), ) _cl.enqueue_copy(queue, result, result_buffer) return result return evaluator
def dense_assembler(device_interface, operator_descriptor, domain, dual_to_range, parameters, result): """Assemble dense with OpenCL.""" import bempp.api from bempp.api.integration.triangle_gauss import rule from bempp.api.utils.helpers import get_type from bempp.core.opencl_kernels import get_kernel_from_operator_descriptor from bempp.core.opencl_kernels import ( default_context, default_device, get_vector_width, ) if bempp.api.BOUNDARY_OPERATOR_DEVICE_TYPE == "gpu": device_type = "gpu" elif bempp.api.BOUNDARY_OPERATOR_DEVICE_TYPE == "cpu": device_type = "cpu" else: raise RuntimeError( f"Unknown device type {bempp.api.POTENTIAL_OPERATOR_DEVICE_TYPE}") mf = _cl.mem_flags ctx = default_context(device_type) device = default_device(device_type) precision = operator_descriptor.precision dtype = get_type(precision).real kernel_options = operator_descriptor.options quad_points, quad_weights = rule(parameters.quadrature.regular) test_indices, test_color_indexptr = dual_to_range.get_elements_by_color() trial_indices, trial_color_indexptr = domain.get_elements_by_color() number_of_test_colors = len(test_color_indexptr) - 1 number_of_trial_colors = len(trial_color_indexptr) - 1 options = { "NUMBER_OF_QUAD_POINTS": len(quad_weights), "TEST": dual_to_range.shapeset.identifier, "TRIAL": domain.shapeset.identifier, "TRIAL_NUMBER_OF_ELEMENTS": domain.number_of_support_elements, "TEST_NUMBER_OF_ELEMENTS": dual_to_range.number_of_support_elements, "NUMBER_OF_TEST_SHAPE_FUNCTIONS": dual_to_range.number_of_shape_functions, "NUMBER_OF_TRIAL_SHAPE_FUNCTIONS": domain.number_of_shape_functions, } if operator_descriptor.is_complex: options["COMPLEX_KERNEL"] = None main_kernel = get_kernel_from_operator_descriptor(operator_descriptor, options, "regular", device_type=device_type) remainder_kernel = get_kernel_from_operator_descriptor( operator_descriptor, options, "regular", force_novec=True, device_type=device_type, ) test_indices_buffer = _cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=test_indices) trial_indices_buffer = _cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=trial_indices) test_normals_buffer = _cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=dual_to_range.normal_multipliers) trial_normals_buffer = _cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=domain.normal_multipliers) test_grid_buffer = _cl.Buffer( ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=dual_to_range.grid.as_array.astype(dtype), ) trial_grid_buffer = _cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=domain.grid.as_array.astype(dtype)) test_elements_buffer = _cl.Buffer( ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=dual_to_range.grid.elements.ravel(order="F"), ) trial_elements_buffer = _cl.Buffer( ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=domain.grid.elements.ravel(order="F"), ) test_local2global_buffer = _cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=dual_to_range.local2global) trial_local2global_buffer = _cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=domain.local2global) test_multipliers_buffer = _cl.Buffer( ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=dual_to_range.local_multipliers.astype(dtype), ) trial_multipliers_buffer = _cl.Buffer( ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=domain.local_multipliers.astype(dtype), ) quad_points_buffer = _cl.Buffer( ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=quad_points.ravel(order="F").astype(dtype), ) quad_weights_buffer = _cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=quad_weights.astype(dtype)) result_buffer = _cl.Buffer(ctx, mf.READ_WRITE, size=result.nbytes) if not kernel_options: kernel_options = [0.0] kernel_options_array = _np.array(kernel_options, dtype=dtype) kernel_options_buffer = _cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=kernel_options_array) vector_width = get_vector_width(precision, device_type=device_type) def kernel_runner( queue, test_offset, trial_offset, test_number_of_indices, trial_number_of_indices, ): """Actually run the kernel for a given range.""" remainder_size = trial_number_of_indices % vector_width main_size = trial_number_of_indices - remainder_size buffers = [ test_indices_buffer, trial_indices_buffer, test_normals_buffer, trial_normals_buffer, test_grid_buffer, trial_grid_buffer, test_elements_buffer, trial_elements_buffer, test_local2global_buffer, trial_local2global_buffer, test_multipliers_buffer, trial_multipliers_buffer, quad_points_buffer, quad_weights_buffer, result_buffer, kernel_options_buffer, _np.int32(dual_to_range.global_dof_count), _np.int32(domain.global_dof_count), _np.uint8(domain.grid != dual_to_range.grid), ] if main_size > 0: main_kernel( queue, (test_number_of_indices, main_size // vector_width), (1, 1), *buffers, global_offset=(test_offset, trial_offset), ) if remainder_size > 0: remainder_kernel( queue, (test_number_of_indices, remainder_size), (1, 1), *buffers, global_offset=(test_offset, trial_offset + main_size), ) with _cl.CommandQueue(ctx, device=device) as queue: _cl.enqueue_fill_buffer(queue, result_buffer, _np.uint8(0), 0, result.nbytes) for test_index in range(number_of_test_colors): test_offset = test_color_indexptr[test_index] n_test_indices = (test_color_indexptr[1 + test_index] - test_color_indexptr[test_index]) for trial_index in range(number_of_trial_colors): n_trial_indices = (trial_color_indexptr[1 + trial_index] - trial_color_indexptr[trial_index]) trial_offset = trial_color_indexptr[trial_index] kernel_runner(queue, test_offset, trial_offset, n_test_indices, n_trial_indices) _cl.enqueue_copy(queue, result, result_buffer)
def singular_assembler( device_interface, operator_descriptor, grid, domain, dual_to_range, test_points, trial_points, quad_weights, test_elements, trial_elements, test_offsets, trial_offsets, weights_offsets, number_of_quad_points, kernel_options, result, ): """Assemble singular part of integral operators with OpenCL.""" from bempp.api.utils.helpers import get_type from bempp.core.opencl_kernels import get_kernel_from_operator_descriptor from bempp.core.opencl_kernels import default_context, default_device mf = _cl.mem_flags ctx = default_context() device = default_device() precision = operator_descriptor.precision dtype = get_type(precision).real options = { "WORKGROUP_SIZE": WORKGROUP_SIZE_GALERKIN, "TEST": dual_to_range.shapeset.identifier, "TRIAL": domain.shapeset.identifier, "NUMBER_OF_TEST_SHAPE_FUNCTIONS": dual_to_range.number_of_shape_functions, "NUMBER_OF_TRIAL_SHAPE_FUNCTIONS": domain.number_of_shape_functions, } if operator_descriptor.is_complex: options["COMPLEX_KERNEL"] = None kernel = get_kernel_from_operator_descriptor(operator_descriptor, options, "singular") # Initialize OpenCL Buffers grid_buffer = _cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=grid.as_array.astype(dtype)) test_normals_buffer = _cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=dual_to_range.normal_multipliers) trial_normals_buffer = _cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=domain.normal_multipliers) test_points_buffer = _cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=test_points.astype(dtype)) trial_points_buffer = _cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=trial_points.astype(dtype)) quad_weights_buffer = _cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=quad_weights.astype(dtype)) test_elements_buffer = _cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=test_elements) trial_elements_buffer = _cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=trial_elements) test_offsets_buffer = _cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=test_offsets) trial_offsets_buffer = _cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=trial_offsets) weights_offsets_buffer = _cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=weights_offsets) local_quad_points = number_of_quad_points // WORKGROUP_SIZE_GALERKIN local_quad_points_buffer = _cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=local_quad_points) result_buffer = _cl.Buffer(ctx, mf.WRITE_ONLY, size=result.nbytes) if not kernel_options: kernel_options = [0.0] kernel_options_array = _np.array(kernel_options, dtype=dtype) kernel_options_buffer = _cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=kernel_options_array) number_of_singular_indices = len(test_elements) with _cl.CommandQueue(ctx, device=device) as queue: kernel( queue, (number_of_singular_indices, ), (WORKGROUP_SIZE_GALERKIN, ), grid_buffer, test_normals_buffer, trial_normals_buffer, test_points_buffer, trial_points_buffer, quad_weights_buffer, test_elements_buffer, trial_elements_buffer, test_offsets_buffer, trial_offsets_buffer, weights_offsets_buffer, local_quad_points_buffer, result_buffer, kernel_options_buffer, g_times_l=True, ) _cl.enqueue_copy(queue, result, result_buffer)
def get_local_interaction_operator(grid, local_points, kernel_function, kernel_parameters, precision, is_complex): import bempp.api from bempp.api import GLOBAL_PARAMETERS from bempp.api.utils.helpers import get_type from scipy.sparse import csr_matrix from scipy.sparse.linalg import aslinearoperator from scipy.sparse.linalg import LinearOperator npoints = local_points.shape[1] dtype = _np.dtype(get_type(precision).real) if is_complex: result_type = _np.dtype(get_type(precision).complex) else: result_type = dtype rows = 4 * npoints * grid.number_of_elements cols = npoints * grid.number_of_elements if kernel_function == "laplace": kernel = laplace_kernel elif kernel_function == "helmholtz": kernel = helmholtz_kernel elif kernel_function == "modified_helmholtz": kernel = modified_helmholtz_kernel if GLOBAL_PARAMETERS.fmm.near_field_representation == "sparse": data, indices, indexptr = get_local_interaction_matrix_impl( grid.data(precision), local_points.astype(dtype), kernel, _np.array(kernel_parameters, dtype=dtype), dtype, result_type, ) return aslinearoperator( csr_matrix((data, indices, indexptr), shape=(rows, cols))) elif GLOBAL_PARAMETERS.fmm.near_field_representation == "evaluate": if bempp.api.DEFAULT_DEVICE_INTERFACE == "numba": evaluator = get_local_interaction_evaluator_numba( grid.data(precision), local_points.astype(dtype), kernel, _np.array(kernel_parameters, dtype=dtype), dtype, result_type, ) return LinearOperator(shape=(rows, cols), matvec=evaluator, dtype=result_type) elif bempp.api.DEFAULT_DEVICE_INTERFACE == "opencl": evaluator = get_local_interaction_evaluator_opencl( grid, local_points.astype(dtype), kernel_function, _np.array(kernel_parameters, dtype=dtype), dtype, result_type, ) return LinearOperator(shape=(rows, cols), matvec=evaluator, dtype=result_type) else: raise ValueError( "DEFAULT_DEVICE_INTERFACE must be one of 'numba', 'opencl'.") else: raise ValueError("Unknown value for near_field_representation.")
def assemble_singular_part(domain, dual_to_range, parameters, operator_descriptor, device_interface): """Actually assemble the Numba kernel.""" from bempp.api.utils.helpers import get_type from bempp.core.dispatcher import singular_assembler_dispatcher import bempp.api precision = operator_descriptor.precision kernel_options = operator_descriptor.options is_complex = operator_descriptor.is_complex grid = domain.grid order = parameters.quadrature.singular rule = _SingularQuadratureRuleInterfaceGalerkin(grid, order, dual_to_range.support, domain.support) number_of_test_shape_functions = dual_to_range.number_of_shape_functions number_of_trial_shape_functions = domain.number_of_shape_functions [ test_points, trial_points, quad_weights, test_elements, trial_elements, test_offsets, trial_offsets, weights_offsets, number_of_quad_points, ] = rule.get_arrays() if is_complex: result_type = get_type(precision).complex else: result_type = get_type(precision).real result = _np.zeros( number_of_test_shape_functions * number_of_trial_shape_functions * len(test_elements), dtype=result_type, ) with bempp.api.Timer( message= (f"Singular assembler:{operator_descriptor.identifier}:{device_interface}" )): singular_assembler_dispatcher( device_interface, operator_descriptor, grid, domain, dual_to_range, test_points, trial_points, quad_weights, test_elements, trial_elements, test_offsets, trial_offsets, weights_offsets, number_of_quad_points, kernel_options, result, ) irange = _np.arange(number_of_test_shape_functions) jrange = _np.arange(number_of_trial_shape_functions) i_ind = _np.tile( _np.repeat(irange, number_of_trial_shape_functions), len(rule.trial_indices)) + _np.repeat( rule.test_indices * number_of_test_shape_functions, number_of_test_shape_functions * number_of_trial_shape_functions, ) j_ind = _np.tile( _np.tile(jrange, number_of_test_shape_functions), len(rule.trial_indices)) + _np.repeat( rule.trial_indices * number_of_trial_shape_functions, number_of_test_shape_functions * number_of_trial_shape_functions, ) return (i_ind, j_ind, result)
def assemble_sparse( domain, dual_to_range, parameters, operator_descriptor, numba_assembly_function, numba_kernel_function, ): """Actually assemble the operator.""" import bempp.api from bempp.api.integration.triangle_gauss import rule as regular_rule from bempp.api.utils.helpers import get_type order = parameters.quadrature.regular quad_points, quad_weights = regular_rule(order) support = domain.support * dual_to_range.support elements = _np.flatnonzero(support) number_of_elements = len(elements) nshape_test = dual_to_range.number_of_shape_functions nshape_trial = domain.number_of_shape_functions # Always assemble in double precision for sparse ops # precision = operator_descriptor.precision precision = "double" if operator_descriptor.is_complex: result_type = get_type(precision).complex else: result_type = get_type(precision).real result = _np.zeros(nshape_test * nshape_trial * number_of_elements, dtype=result_type) with bempp.api.Timer() as t: # noqa: F841 numba_assembly_function( domain.grid.data(precision), nshape_test, nshape_trial, elements, quad_points, quad_weights, dual_to_range.normal_multipliers, domain.normal_multipliers, dual_to_range.local_multipliers, domain.local_multipliers, dual_to_range.shapeset.evaluate, domain.shapeset.evaluate, dual_to_range.numba_evaluate, domain.numba_evaluate, numba_kernel_function, result, ) irange = _np.arange(nshape_test) jrange = _np.arange(nshape_trial) i_ind = _np.tile(_np.repeat(irange, nshape_trial), len(elements)) + _np.repeat( elements * nshape_test, nshape_test * nshape_trial, ) j_ind = _np.tile(_np.tile(jrange, nshape_test), len(elements)) + _np.repeat( elements * nshape_trial, nshape_test * nshape_trial, ) return i_ind, j_ind, result