예제 #1
0
def assemble_dense(domain, dual_to_range, parameters, operator_descriptor,
                   device_interface):
    """Assembles the operator and returns a dense matrix."""
    import bempp.api
    from bempp.api.utils.helpers import get_type
    from bempp.core.dispatcher import dense_assembler_dispatcher
    from bempp.core.singular_assembler import assemble_singular_part

    precision = operator_descriptor.precision

    rows = dual_to_range.global_dof_count
    cols = domain.global_dof_count

    if operator_descriptor.is_complex:
        result_type = get_type(precision).complex
    else:
        result_type = get_type(precision).real

    result = _np.zeros((rows, cols), dtype=result_type)

    with bempp.api.Timer(
            message=
            f"Regular assembler:{operator_descriptor.identifier}:{device_interface}"
    ):
        dense_assembler_dispatcher(
            device_interface,
            operator_descriptor,
            domain,
            dual_to_range,
            parameters,
            result,
        )

    grids_identical = domain.grid == dual_to_range.grid

    if grids_identical:

        trial_local2global = domain.local2global.ravel()
        test_local2global = dual_to_range.local2global.ravel()
        trial_multipliers = domain.local_multipliers.ravel()
        test_multipliers = dual_to_range.local_multipliers.ravel()

        singular_rows, singular_cols, singular_values = assemble_singular_part(
            domain.localised_space,
            dual_to_range.localised_space,
            parameters,
            operator_descriptor,
            device_interface,
        )

        rows = test_local2global[singular_rows]
        cols = trial_local2global[singular_cols]
        values = (singular_values * trial_multipliers[singular_cols] *
                  test_multipliers[singular_rows])

        _np.add.at(result, (rows, cols), values)

    return result
예제 #2
0
def potential_assembler(device_interface, space, operator_descriptor, points,
                        parameters):
    """Return an evaluator function to evaluate a potential."""
    from bempp.core.numba_kernels import select_numba_kernels
    from bempp.api.integration.triangle_gauss import rule
    from bempp.api.utils.helpers import get_type

    (numba_assembly_function,
     numba_kernel_function_regular) = select_numba_kernels(operator_descriptor,
                                                           mode="potential")

    quad_points, quad_weights = rule(parameters.quadrature.regular)

    # Perform Numba assembly always in double precision
    # precision = operator_descriptor.precision
    precision = "double"

    dtype = _np.dtype(get_type(precision).real)

    if operator_descriptor.is_complex:
        result_type = _np.dtype(get_type(precision).complex)
    else:
        result_type = dtype

    kernel_dimension = operator_descriptor.kernel_dimension

    points_transformed = points.astype(dtype)
    grid_data = space.grid.data(precision)

    kernel_parameters = _np.array(operator_descriptor.options, dtype=dtype)

    def evaluator(x):
        """Actually evaluate the potential."""

        return numba_assembly_function(
            dtype,
            result_type,
            kernel_dimension,
            points_transformed,
            x.astype(result_type),
            grid_data,
            quad_points.astype(precision),
            quad_weights.astype(precision),
            space.number_of_shape_functions,
            space.shapeset.evaluate,
            numba_kernel_function_regular,
            kernel_parameters,
            space.normal_multipliers,
            space.support_elements,
        )

    return evaluator
예제 #3
0
def dense_assembler(device_interface, operator_descriptor, domain,
                    dual_to_range, parameters, result):
    """Numba based dense assembler."""
    from bempp.core.numba_kernels import select_numba_kernels
    from bempp.api.utils.helpers import get_type
    from bempp.api.integration.triangle_gauss import rule

    (
        numba_assembly_function_regular,
        numba_kernel_function_regular,
    ) = select_numba_kernels(operator_descriptor, mode="regular")

    order = parameters.quadrature.regular
    quad_points, quad_weights = rule(order)

    # Perform Numba assembly always in double precision
    # precision = operator_descriptor.precision
    precision = "double"

    data_type = get_type(precision).real

    test_indices, test_color_indexptr = dual_to_range.get_elements_by_color()
    trial_indices, trial_color_indexptr = domain.get_elements_by_color()
    number_of_test_colors = len(test_color_indexptr) - 1
    # number_of_trial_colors = len(trial_color_indexptr) - 1

    # rows = dual_to_range.global_dof_count
    # cols = domain.global_dof_count

    nshape_test = dual_to_range.number_of_shape_functions
    nshape_trial = domain.number_of_shape_functions
    grids_identical = domain.grid == dual_to_range.grid

    for test_color_index in range(number_of_test_colors):
        numba_assembly_function_regular(
            dual_to_range.grid.data(precision),
            domain.grid.data(precision),
            nshape_test,
            nshape_trial,
            test_indices[test_color_indexptr[test_color_index]:
                         test_color_indexptr[1 + test_color_index]],
            trial_indices,
            dual_to_range.local_multipliers.astype(data_type),
            domain.local_multipliers.astype(data_type),
            dual_to_range.local2global,
            domain.local2global,
            dual_to_range.normal_multipliers,
            domain.normal_multipliers,
            quad_points.astype(data_type),
            quad_weights.astype(data_type),
            numba_kernel_function_regular,
            _np.array(operator_descriptor.options, dtype=data_type),
            grids_identical,
            dual_to_range.shapeset.evaluate,
            domain.shapeset.evaluate,
            result,
        )
예제 #4
0
def singular_assembler(
    device_interface,
    operator_descriptor,
    grid,
    domain,
    dual_to_range,
    test_points,
    trial_points,
    quad_weights,
    test_elements,
    trial_elements,
    test_offsets,
    trial_offsets,
    weights_offsets,
    number_of_quad_points,
    kernel_options,
    result,
):
    """Numba assembler for the singular part of integral operators."""
    from bempp.api.utils.helpers import get_type
    from bempp.core.numba_kernels import select_numba_kernels

    numba_assembly_function, numba_kernel_function = select_numba_kernels(
        operator_descriptor, mode="singular")

    # Perform Numba assembly always in double precision
    # precision = operator_descriptor.precision
    precision = "double"
    dtype = get_type(precision).real

    numba_assembly_function(
        grid.data(precision),
        test_points,
        trial_points,
        quad_weights,
        test_elements,
        trial_elements,
        test_offsets,
        trial_offsets,
        weights_offsets,
        number_of_quad_points,
        dual_to_range.normal_multipliers,
        domain.normal_multipliers,
        dual_to_range.number_of_shape_functions,
        domain.number_of_shape_functions,
        dual_to_range.shapeset.evaluate,
        domain.shapeset.evaluate,
        numba_kernel_function,
        _np.array(kernel_options, dtype=dtype),
        result,
    )
예제 #5
0
def potential_assembler(device_interface, space, operator_descriptor, points,
                        parameters):
    """Assemble dense with OpenCL."""
    import bempp.api
    from bempp.api.integration.triangle_gauss import rule
    from bempp.api.utils.helpers import get_type
    from bempp.core.opencl_kernels import get_kernel_from_name
    from bempp.core.opencl_kernels import get_kernel_from_operator_descriptor
    from bempp.core.opencl_kernels import (
        default_context,
        default_device,
        get_vector_width,
    )

    if bempp.api.POTENTIAL_OPERATOR_DEVICE_TYPE == "gpu":
        device_type = "gpu"
    elif bempp.api.POTENTIAL_OPERATOR_DEVICE_TYPE == "cpu":
        device_type = "cpu"
    else:
        raise RuntimeError(
            f"Unknown device type {bempp.api.POTENTIAL_OPERATOR_DEVICE_TYPE}")

    mf = _cl.mem_flags
    ctx = default_context(device_type)
    device = default_device(device_type)

    quad_points, quad_weights = rule(parameters.quadrature.regular)

    precision = operator_descriptor.precision
    dtype = get_type(precision).real
    kernel_options = operator_descriptor.options
    kernel_dimension = operator_descriptor.kernel_dimension

    if operator_descriptor.is_complex:
        result_type = _np.dtype(get_type(precision).complex)
    else:
        result_type = dtype
    result_type = _np.dtype(result_type)

    indices = space.support_elements
    nelements = len(indices)
    vector_width = get_vector_width(precision, device_type=device_type)
    npoints = points.shape[1]
    remainder_size = nelements % WORKGROUP_SIZE_POTENTIAL
    main_size = nelements - remainder_size

    main_kernel = None
    remainder_kernel = None
    sum_kernel = None

    options = {
        "NUMBER_OF_QUAD_POINTS": len(quad_weights),
        "SHAPESET": space.shapeset.identifier,
        "NUMBER_OF_SHAPE_FUNCTIONS": space.number_of_shape_functions,
        "WORKGROUP_SIZE": WORKGROUP_SIZE_POTENTIAL // vector_width,
    }

    if operator_descriptor.is_complex:
        options["COMPLEX_KERNEL"] = None
        options["COMPLEX_COEFFICIENTS"] = None
        options["COMPLEX_RESULT"] = None

    if main_size > 0:
        main_kernel = get_kernel_from_operator_descriptor(
            operator_descriptor, options, "potential", device_type=device_type)
        sum_kernel = get_kernel_from_name("sum_for_potential_novec",
                                          options,
                                          precision,
                                          device_type=device_type)

    if remainder_size > 0:
        options["WORKGROUP_SIZE"] = remainder_size
        remainder_kernel = get_kernel_from_operator_descriptor(
            operator_descriptor,
            options,
            "potential",
            force_novec=True,
            device_type=device_type,
        )

    indices_buffer = _cl.Buffer(ctx,
                                mf.READ_ONLY | mf.COPY_HOST_PTR,
                                hostbuf=indices)

    normals_buffer = _cl.Buffer(ctx,
                                mf.READ_ONLY | mf.COPY_HOST_PTR,
                                hostbuf=space.normal_multipliers)

    points_buffer = _cl.Buffer(
        ctx,
        mf.READ_ONLY | mf.COPY_HOST_PTR,
        hostbuf=points.ravel(order="F").astype(dtype),
    )

    grid_buffer = _cl.Buffer(ctx,
                             mf.READ_ONLY | mf.COPY_HOST_PTR,
                             hostbuf=space.grid.as_array.astype(dtype))

    # elements_buffer = _cl.Buffer(
    #     ctx,
    #     mf.READ_ONLY | mf.COPY_HOST_PTR,
    #     hostbuf=space.grid.elements.ravel(order="F"),
    # )

    quad_points_buffer = _cl.Buffer(
        ctx,
        mf.READ_ONLY | mf.COPY_HOST_PTR,
        hostbuf=quad_points.ravel(order="F").astype(dtype),
    )

    quad_weights_buffer = _cl.Buffer(ctx,
                                     mf.READ_ONLY | mf.COPY_HOST_PTR,
                                     hostbuf=quad_weights.astype(dtype))

    result_buffer = _cl.Buffer(ctx,
                               mf.READ_WRITE,
                               size=result_type.itemsize * kernel_dimension *
                               npoints)

    coefficients_buffer = _cl.Buffer(ctx,
                                     mf.READ_ONLY,
                                     size=result_type.itemsize *
                                     space.map_to_full_grid.shape[0])

    if main_size > 0:
        sum_size = (kernel_dimension * npoints *
                    (nelements // WORKGROUP_SIZE_POTENTIAL) *
                    result_type.itemsize)
        sum_buffer = _cl.Buffer(ctx, mf.READ_WRITE, size=sum_size)

    if not kernel_options:
        kernel_options = [0.0]

    kernel_options_array = _np.array(kernel_options, dtype=dtype)

    kernel_options_buffer = _cl.Buffer(ctx,
                                       mf.READ_ONLY | mf.COPY_HOST_PTR,
                                       hostbuf=kernel_options_array)

    def evaluator(x):
        """Evaluate a potential."""
        result = _np.empty(kernel_dimension * npoints, dtype=result_type)

        with _cl.CommandQueue(ctx, device=device) as queue:
            _cl.enqueue_copy(queue, coefficients_buffer, x.astype(result_type))
            _cl.enqueue_fill_buffer(
                queue,
                result_buffer,
                _np.uint8(0),
                0,
                kernel_dimension * npoints * result_type.itemsize,
            )

            if main_size > 0:
                _cl.enqueue_fill_buffer(queue, sum_buffer, _np.uint8(0), 0,
                                        sum_size)
                queue.finish()

                main_kernel(
                    queue,
                    (npoints, main_size // vector_width),
                    (1, WORKGROUP_SIZE_POTENTIAL // vector_width),
                    grid_buffer,
                    indices_buffer,
                    normals_buffer,
                    points_buffer,
                    coefficients_buffer,
                    quad_points_buffer,
                    quad_weights_buffer,
                    sum_buffer,
                    kernel_options_buffer,
                )

                sum_kernel(
                    queue,
                    (kernel_dimension * npoints, ),
                    (1, ),
                    sum_buffer,
                    result_buffer,
                    _np.uint32(nelements // WORKGROUP_SIZE_POTENTIAL),
                )

            if remainder_size > 0:
                remainder_kernel(
                    queue,
                    (npoints, remainder_size),
                    (1, remainder_size),
                    grid_buffer,
                    indices_buffer,
                    normals_buffer,
                    points_buffer,
                    coefficients_buffer,
                    quad_points_buffer,
                    quad_weights_buffer,
                    result_buffer,
                    kernel_options_buffer,
                    global_offset=(0, main_size),
                )

            _cl.enqueue_copy(queue, result, result_buffer)

        return result

    return evaluator
예제 #6
0
def dense_assembler(device_interface, operator_descriptor, domain,
                    dual_to_range, parameters, result):
    """Assemble dense with OpenCL."""
    import bempp.api
    from bempp.api.integration.triangle_gauss import rule
    from bempp.api.utils.helpers import get_type
    from bempp.core.opencl_kernels import get_kernel_from_operator_descriptor
    from bempp.core.opencl_kernels import (
        default_context,
        default_device,
        get_vector_width,
    )

    if bempp.api.BOUNDARY_OPERATOR_DEVICE_TYPE == "gpu":
        device_type = "gpu"
    elif bempp.api.BOUNDARY_OPERATOR_DEVICE_TYPE == "cpu":
        device_type = "cpu"
    else:
        raise RuntimeError(
            f"Unknown device type {bempp.api.POTENTIAL_OPERATOR_DEVICE_TYPE}")

    mf = _cl.mem_flags
    ctx = default_context(device_type)
    device = default_device(device_type)

    precision = operator_descriptor.precision
    dtype = get_type(precision).real
    kernel_options = operator_descriptor.options

    quad_points, quad_weights = rule(parameters.quadrature.regular)

    test_indices, test_color_indexptr = dual_to_range.get_elements_by_color()
    trial_indices, trial_color_indexptr = domain.get_elements_by_color()

    number_of_test_colors = len(test_color_indexptr) - 1
    number_of_trial_colors = len(trial_color_indexptr) - 1

    options = {
        "NUMBER_OF_QUAD_POINTS": len(quad_weights),
        "TEST": dual_to_range.shapeset.identifier,
        "TRIAL": domain.shapeset.identifier,
        "TRIAL_NUMBER_OF_ELEMENTS": domain.number_of_support_elements,
        "TEST_NUMBER_OF_ELEMENTS": dual_to_range.number_of_support_elements,
        "NUMBER_OF_TEST_SHAPE_FUNCTIONS":
        dual_to_range.number_of_shape_functions,
        "NUMBER_OF_TRIAL_SHAPE_FUNCTIONS": domain.number_of_shape_functions,
    }

    if operator_descriptor.is_complex:
        options["COMPLEX_KERNEL"] = None

    main_kernel = get_kernel_from_operator_descriptor(operator_descriptor,
                                                      options,
                                                      "regular",
                                                      device_type=device_type)
    remainder_kernel = get_kernel_from_operator_descriptor(
        operator_descriptor,
        options,
        "regular",
        force_novec=True,
        device_type=device_type,
    )

    test_indices_buffer = _cl.Buffer(ctx,
                                     mf.READ_ONLY | mf.COPY_HOST_PTR,
                                     hostbuf=test_indices)
    trial_indices_buffer = _cl.Buffer(ctx,
                                      mf.READ_ONLY | mf.COPY_HOST_PTR,
                                      hostbuf=trial_indices)

    test_normals_buffer = _cl.Buffer(ctx,
                                     mf.READ_ONLY | mf.COPY_HOST_PTR,
                                     hostbuf=dual_to_range.normal_multipliers)
    trial_normals_buffer = _cl.Buffer(ctx,
                                      mf.READ_ONLY | mf.COPY_HOST_PTR,
                                      hostbuf=domain.normal_multipliers)
    test_grid_buffer = _cl.Buffer(
        ctx,
        mf.READ_ONLY | mf.COPY_HOST_PTR,
        hostbuf=dual_to_range.grid.as_array.astype(dtype),
    )
    trial_grid_buffer = _cl.Buffer(ctx,
                                   mf.READ_ONLY | mf.COPY_HOST_PTR,
                                   hostbuf=domain.grid.as_array.astype(dtype))

    test_elements_buffer = _cl.Buffer(
        ctx,
        mf.READ_ONLY | mf.COPY_HOST_PTR,
        hostbuf=dual_to_range.grid.elements.ravel(order="F"),
    )

    trial_elements_buffer = _cl.Buffer(
        ctx,
        mf.READ_ONLY | mf.COPY_HOST_PTR,
        hostbuf=domain.grid.elements.ravel(order="F"),
    )

    test_local2global_buffer = _cl.Buffer(ctx,
                                          mf.READ_ONLY | mf.COPY_HOST_PTR,
                                          hostbuf=dual_to_range.local2global)

    trial_local2global_buffer = _cl.Buffer(ctx,
                                           mf.READ_ONLY | mf.COPY_HOST_PTR,
                                           hostbuf=domain.local2global)

    test_multipliers_buffer = _cl.Buffer(
        ctx,
        mf.READ_ONLY | mf.COPY_HOST_PTR,
        hostbuf=dual_to_range.local_multipliers.astype(dtype),
    )

    trial_multipliers_buffer = _cl.Buffer(
        ctx,
        mf.READ_ONLY | mf.COPY_HOST_PTR,
        hostbuf=domain.local_multipliers.astype(dtype),
    )

    quad_points_buffer = _cl.Buffer(
        ctx,
        mf.READ_ONLY | mf.COPY_HOST_PTR,
        hostbuf=quad_points.ravel(order="F").astype(dtype),
    )

    quad_weights_buffer = _cl.Buffer(ctx,
                                     mf.READ_ONLY | mf.COPY_HOST_PTR,
                                     hostbuf=quad_weights.astype(dtype))

    result_buffer = _cl.Buffer(ctx, mf.READ_WRITE, size=result.nbytes)

    if not kernel_options:
        kernel_options = [0.0]

    kernel_options_array = _np.array(kernel_options, dtype=dtype)

    kernel_options_buffer = _cl.Buffer(ctx,
                                       mf.READ_ONLY | mf.COPY_HOST_PTR,
                                       hostbuf=kernel_options_array)

    vector_width = get_vector_width(precision, device_type=device_type)

    def kernel_runner(
        queue,
        test_offset,
        trial_offset,
        test_number_of_indices,
        trial_number_of_indices,
    ):
        """Actually run the kernel for a given range."""
        remainder_size = trial_number_of_indices % vector_width
        main_size = trial_number_of_indices - remainder_size

        buffers = [
            test_indices_buffer,
            trial_indices_buffer,
            test_normals_buffer,
            trial_normals_buffer,
            test_grid_buffer,
            trial_grid_buffer,
            test_elements_buffer,
            trial_elements_buffer,
            test_local2global_buffer,
            trial_local2global_buffer,
            test_multipliers_buffer,
            trial_multipliers_buffer,
            quad_points_buffer,
            quad_weights_buffer,
            result_buffer,
            kernel_options_buffer,
            _np.int32(dual_to_range.global_dof_count),
            _np.int32(domain.global_dof_count),
            _np.uint8(domain.grid != dual_to_range.grid),
        ]

        if main_size > 0:
            main_kernel(
                queue,
                (test_number_of_indices, main_size // vector_width),
                (1, 1),
                *buffers,
                global_offset=(test_offset, trial_offset),
            )

        if remainder_size > 0:
            remainder_kernel(
                queue,
                (test_number_of_indices, remainder_size),
                (1, 1),
                *buffers,
                global_offset=(test_offset, trial_offset + main_size),
            )

    with _cl.CommandQueue(ctx, device=device) as queue:
        _cl.enqueue_fill_buffer(queue, result_buffer, _np.uint8(0), 0,
                                result.nbytes)
        for test_index in range(number_of_test_colors):
            test_offset = test_color_indexptr[test_index]
            n_test_indices = (test_color_indexptr[1 + test_index] -
                              test_color_indexptr[test_index])
            for trial_index in range(number_of_trial_colors):
                n_trial_indices = (trial_color_indexptr[1 + trial_index] -
                                   trial_color_indexptr[trial_index])
                trial_offset = trial_color_indexptr[trial_index]
                kernel_runner(queue, test_offset, trial_offset, n_test_indices,
                              n_trial_indices)
        _cl.enqueue_copy(queue, result, result_buffer)
예제 #7
0
def singular_assembler(
    device_interface,
    operator_descriptor,
    grid,
    domain,
    dual_to_range,
    test_points,
    trial_points,
    quad_weights,
    test_elements,
    trial_elements,
    test_offsets,
    trial_offsets,
    weights_offsets,
    number_of_quad_points,
    kernel_options,
    result,
):
    """Assemble singular part of integral operators with OpenCL."""
    from bempp.api.utils.helpers import get_type
    from bempp.core.opencl_kernels import get_kernel_from_operator_descriptor
    from bempp.core.opencl_kernels import default_context, default_device

    mf = _cl.mem_flags
    ctx = default_context()
    device = default_device()

    precision = operator_descriptor.precision
    dtype = get_type(precision).real

    options = {
        "WORKGROUP_SIZE": WORKGROUP_SIZE_GALERKIN,
        "TEST": dual_to_range.shapeset.identifier,
        "TRIAL": domain.shapeset.identifier,
        "NUMBER_OF_TEST_SHAPE_FUNCTIONS":
        dual_to_range.number_of_shape_functions,
        "NUMBER_OF_TRIAL_SHAPE_FUNCTIONS": domain.number_of_shape_functions,
    }

    if operator_descriptor.is_complex:
        options["COMPLEX_KERNEL"] = None

    kernel = get_kernel_from_operator_descriptor(operator_descriptor, options,
                                                 "singular")

    # Initialize OpenCL Buffers

    grid_buffer = _cl.Buffer(ctx,
                             mf.READ_ONLY | mf.COPY_HOST_PTR,
                             hostbuf=grid.as_array.astype(dtype))
    test_normals_buffer = _cl.Buffer(ctx,
                                     mf.READ_ONLY | mf.COPY_HOST_PTR,
                                     hostbuf=dual_to_range.normal_multipliers)
    trial_normals_buffer = _cl.Buffer(ctx,
                                      mf.READ_ONLY | mf.COPY_HOST_PTR,
                                      hostbuf=domain.normal_multipliers)
    test_points_buffer = _cl.Buffer(ctx,
                                    mf.READ_ONLY | mf.COPY_HOST_PTR,
                                    hostbuf=test_points.astype(dtype))
    trial_points_buffer = _cl.Buffer(ctx,
                                     mf.READ_ONLY | mf.COPY_HOST_PTR,
                                     hostbuf=trial_points.astype(dtype))
    quad_weights_buffer = _cl.Buffer(ctx,
                                     mf.READ_ONLY | mf.COPY_HOST_PTR,
                                     hostbuf=quad_weights.astype(dtype))
    test_elements_buffer = _cl.Buffer(ctx,
                                      mf.READ_ONLY | mf.COPY_HOST_PTR,
                                      hostbuf=test_elements)
    trial_elements_buffer = _cl.Buffer(ctx,
                                       mf.READ_ONLY | mf.COPY_HOST_PTR,
                                       hostbuf=trial_elements)
    test_offsets_buffer = _cl.Buffer(ctx,
                                     mf.READ_ONLY | mf.COPY_HOST_PTR,
                                     hostbuf=test_offsets)
    trial_offsets_buffer = _cl.Buffer(ctx,
                                      mf.READ_ONLY | mf.COPY_HOST_PTR,
                                      hostbuf=trial_offsets)
    weights_offsets_buffer = _cl.Buffer(ctx,
                                        mf.READ_ONLY | mf.COPY_HOST_PTR,
                                        hostbuf=weights_offsets)

    local_quad_points = number_of_quad_points // WORKGROUP_SIZE_GALERKIN

    local_quad_points_buffer = _cl.Buffer(ctx,
                                          mf.READ_ONLY | mf.COPY_HOST_PTR,
                                          hostbuf=local_quad_points)

    result_buffer = _cl.Buffer(ctx, mf.WRITE_ONLY, size=result.nbytes)

    if not kernel_options:
        kernel_options = [0.0]

    kernel_options_array = _np.array(kernel_options, dtype=dtype)

    kernel_options_buffer = _cl.Buffer(ctx,
                                       mf.READ_ONLY | mf.COPY_HOST_PTR,
                                       hostbuf=kernel_options_array)

    number_of_singular_indices = len(test_elements)

    with _cl.CommandQueue(ctx, device=device) as queue:
        kernel(
            queue,
            (number_of_singular_indices, ),
            (WORKGROUP_SIZE_GALERKIN, ),
            grid_buffer,
            test_normals_buffer,
            trial_normals_buffer,
            test_points_buffer,
            trial_points_buffer,
            quad_weights_buffer,
            test_elements_buffer,
            trial_elements_buffer,
            test_offsets_buffer,
            trial_offsets_buffer,
            weights_offsets_buffer,
            local_quad_points_buffer,
            result_buffer,
            kernel_options_buffer,
            g_times_l=True,
        )
        _cl.enqueue_copy(queue, result, result_buffer)
예제 #8
0
def get_local_interaction_operator(grid, local_points, kernel_function,
                                   kernel_parameters, precision, is_complex):

    import bempp.api
    from bempp.api import GLOBAL_PARAMETERS
    from bempp.api.utils.helpers import get_type
    from scipy.sparse import csr_matrix
    from scipy.sparse.linalg import aslinearoperator
    from scipy.sparse.linalg import LinearOperator

    npoints = local_points.shape[1]

    dtype = _np.dtype(get_type(precision).real)
    if is_complex:
        result_type = _np.dtype(get_type(precision).complex)
    else:
        result_type = dtype

    rows = 4 * npoints * grid.number_of_elements
    cols = npoints * grid.number_of_elements

    if kernel_function == "laplace":
        kernel = laplace_kernel
    elif kernel_function == "helmholtz":
        kernel = helmholtz_kernel
    elif kernel_function == "modified_helmholtz":
        kernel = modified_helmholtz_kernel

    if GLOBAL_PARAMETERS.fmm.near_field_representation == "sparse":
        data, indices, indexptr = get_local_interaction_matrix_impl(
            grid.data(precision),
            local_points.astype(dtype),
            kernel,
            _np.array(kernel_parameters, dtype=dtype),
            dtype,
            result_type,
        )
        return aslinearoperator(
            csr_matrix((data, indices, indexptr), shape=(rows, cols)))

    elif GLOBAL_PARAMETERS.fmm.near_field_representation == "evaluate":
        if bempp.api.DEFAULT_DEVICE_INTERFACE == "numba":
            evaluator = get_local_interaction_evaluator_numba(
                grid.data(precision),
                local_points.astype(dtype),
                kernel,
                _np.array(kernel_parameters, dtype=dtype),
                dtype,
                result_type,
            )
            return LinearOperator(shape=(rows, cols),
                                  matvec=evaluator,
                                  dtype=result_type)
        elif bempp.api.DEFAULT_DEVICE_INTERFACE == "opencl":
            evaluator = get_local_interaction_evaluator_opencl(
                grid,
                local_points.astype(dtype),
                kernel_function,
                _np.array(kernel_parameters, dtype=dtype),
                dtype,
                result_type,
            )
            return LinearOperator(shape=(rows, cols),
                                  matvec=evaluator,
                                  dtype=result_type)
        else:
            raise ValueError(
                "DEFAULT_DEVICE_INTERFACE must be one of 'numba', 'opencl'.")
    else:
        raise ValueError("Unknown value for near_field_representation.")
예제 #9
0
def assemble_singular_part(domain, dual_to_range, parameters,
                           operator_descriptor, device_interface):
    """Actually assemble the Numba kernel."""
    from bempp.api.utils.helpers import get_type
    from bempp.core.dispatcher import singular_assembler_dispatcher
    import bempp.api

    precision = operator_descriptor.precision
    kernel_options = operator_descriptor.options
    is_complex = operator_descriptor.is_complex

    grid = domain.grid
    order = parameters.quadrature.singular

    rule = _SingularQuadratureRuleInterfaceGalerkin(grid, order,
                                                    dual_to_range.support,
                                                    domain.support)

    number_of_test_shape_functions = dual_to_range.number_of_shape_functions
    number_of_trial_shape_functions = domain.number_of_shape_functions

    [
        test_points,
        trial_points,
        quad_weights,
        test_elements,
        trial_elements,
        test_offsets,
        trial_offsets,
        weights_offsets,
        number_of_quad_points,
    ] = rule.get_arrays()

    if is_complex:
        result_type = get_type(precision).complex
    else:
        result_type = get_type(precision).real

    result = _np.zeros(
        number_of_test_shape_functions * number_of_trial_shape_functions *
        len(test_elements),
        dtype=result_type,
    )

    with bempp.api.Timer(
            message=
        (f"Singular assembler:{operator_descriptor.identifier}:{device_interface}"
         )):
        singular_assembler_dispatcher(
            device_interface,
            operator_descriptor,
            grid,
            domain,
            dual_to_range,
            test_points,
            trial_points,
            quad_weights,
            test_elements,
            trial_elements,
            test_offsets,
            trial_offsets,
            weights_offsets,
            number_of_quad_points,
            kernel_options,
            result,
        )

    irange = _np.arange(number_of_test_shape_functions)
    jrange = _np.arange(number_of_trial_shape_functions)

    i_ind = _np.tile(
        _np.repeat(irange, number_of_trial_shape_functions),
        len(rule.trial_indices)) + _np.repeat(
            rule.test_indices * number_of_test_shape_functions,
            number_of_test_shape_functions * number_of_trial_shape_functions,
        )

    j_ind = _np.tile(
        _np.tile(jrange, number_of_test_shape_functions),
        len(rule.trial_indices)) + _np.repeat(
            rule.trial_indices * number_of_trial_shape_functions,
            number_of_test_shape_functions * number_of_trial_shape_functions,
        )

    return (i_ind, j_ind, result)
예제 #10
0
def assemble_sparse(
    domain,
    dual_to_range,
    parameters,
    operator_descriptor,
    numba_assembly_function,
    numba_kernel_function,
):
    """Actually assemble the operator."""
    import bempp.api
    from bempp.api.integration.triangle_gauss import rule as regular_rule
    from bempp.api.utils.helpers import get_type

    order = parameters.quadrature.regular
    quad_points, quad_weights = regular_rule(order)

    support = domain.support * dual_to_range.support

    elements = _np.flatnonzero(support)
    number_of_elements = len(elements)

    nshape_test = dual_to_range.number_of_shape_functions
    nshape_trial = domain.number_of_shape_functions

    # Always assemble in double precision for sparse ops
    # precision = operator_descriptor.precision

    precision = "double"

    if operator_descriptor.is_complex:
        result_type = get_type(precision).complex
    else:
        result_type = get_type(precision).real

    result = _np.zeros(nshape_test * nshape_trial * number_of_elements,
                       dtype=result_type)

    with bempp.api.Timer() as t:  # noqa: F841
        numba_assembly_function(
            domain.grid.data(precision),
            nshape_test,
            nshape_trial,
            elements,
            quad_points,
            quad_weights,
            dual_to_range.normal_multipliers,
            domain.normal_multipliers,
            dual_to_range.local_multipliers,
            domain.local_multipliers,
            dual_to_range.shapeset.evaluate,
            domain.shapeset.evaluate,
            dual_to_range.numba_evaluate,
            domain.numba_evaluate,
            numba_kernel_function,
            result,
        )

    irange = _np.arange(nshape_test)
    jrange = _np.arange(nshape_trial)

    i_ind = _np.tile(_np.repeat(irange, nshape_trial),
                     len(elements)) + _np.repeat(
                         elements * nshape_test,
                         nshape_test * nshape_trial,
                     )

    j_ind = _np.tile(_np.tile(jrange, nshape_test),
                     len(elements)) + _np.repeat(
                         elements * nshape_trial,
                         nshape_test * nshape_trial,
                     )

    return i_ind, j_ind, result