Beispiel #1
0
 def refine_locals(
     self,
     level_start_target_or_target_parent_box_nrs,
     target_or_target_parent_boxes,
     local_exps,
 ):
     return FMMLibExpansionWrangler.refine_locals(
         self,
         level_start_target_or_target_parent_box_nrs,
         target_or_target_parent_boxes,
         local_exps,
     )
Beispiel #2
0
 def form_locals(
     self,
     level_start_target_or_target_parent_box_nrs,
     target_or_target_parent_boxes,
     starts,
     lists,
     src_weights,
 ):
     return FMMLibExpansionWrangler.form_locals(
         self,
         level_start_target_or_target_parent_box_nrs,
         target_or_target_parent_boxes,
         starts,
         lists,
         src_weights,
     )
Beispiel #3
0
 def multipole_to_local(
     self,
     level_start_target_box_nrs,
     target_boxes,
     src_box_starts,
     src_box_lists,
     mpole_exps,
 ):
     return FMMLibExpansionWrangler.multipole_to_local(
         self,
         level_start_target_box_nrs,
         target_boxes,
         src_box_starts,
         src_box_lists,
         mpole_exps,
     )
Beispiel #4
0
def demo_cost_model():
    if not SUPPORTS_PROCESS_TIME:
        raise NotImplementedError(
            "Currently this script uses process time which only works on Python>=3.3"
        )

    from boxtree.pyfmmlib_integration import FMMLibExpansionWrangler

    nsources_list = [1000, 2000, 3000, 4000, 5000]
    ntargets_list = [1000, 2000, 3000, 4000, 5000]
    dims = 3
    dtype = np.float64

    ctx = cl.create_some_context()
    queue = cl.CommandQueue(ctx)

    traversals = []
    traversals_dev = []
    level_to_orders = []
    timing_results = []

    def fmm_level_to_nterms(tree, ilevel):
        return 10

    for nsources, ntargets in zip(nsources_list, ntargets_list):
        # {{{ Generate sources, targets and target_radii

        from boxtree.tools import make_normal_particle_array as p_normal
        sources = p_normal(queue, nsources, dims, dtype, seed=15)
        targets = p_normal(queue, ntargets, dims, dtype, seed=18)

        from pyopencl.clrandom import PhiloxGenerator
        rng = PhiloxGenerator(queue.context, seed=22)
        target_radii = rng.uniform(queue, ntargets, a=0, b=0.05,
                                   dtype=dtype).get()

        # }}}

        # {{{ Generate tree and traversal

        from boxtree import TreeBuilder
        tb = TreeBuilder(ctx)
        tree, _ = tb(queue,
                     sources,
                     targets=targets,
                     target_radii=target_radii,
                     stick_out_factor=0.15,
                     max_particles_in_box=30,
                     debug=True)

        from boxtree.traversal import FMMTraversalBuilder
        tg = FMMTraversalBuilder(ctx, well_sep_is_n_away=2)
        trav_dev, _ = tg(queue, tree, debug=True)
        trav = trav_dev.get(queue=queue)

        traversals.append(trav)
        traversals_dev.append(trav_dev)

        # }}}

        wrangler = FMMLibExpansionWrangler(trav.tree, 0, fmm_level_to_nterms)
        level_to_orders.append(wrangler.level_nterms)

        timing_data = {}
        from boxtree.fmm import drive_fmm
        src_weights = np.random.rand(tree.nsources).astype(tree.coord_dtype)
        drive_fmm(trav, wrangler, src_weights, timing_data=timing_data)

        timing_results.append(timing_data)

    time_field_name = "process_elapsed"

    from boxtree.cost import FMMCostModel
    from boxtree.cost import make_pde_aware_translation_cost_model
    cost_model = FMMCostModel(make_pde_aware_translation_cost_model)

    model_results = []
    for icase in range(len(traversals) - 1):
        traversal = traversals_dev[icase]
        model_results.append(
            cost_model.cost_per_stage(
                queue,
                traversal,
                level_to_orders[icase],
                FMMCostModel.get_unit_calibration_params(),
            ))
    queue.finish()

    params = cost_model.estimate_calibration_params(
        model_results, timing_results[:-1], time_field_name=time_field_name)

    predicted_time = cost_model.cost_per_stage(
        queue,
        traversals_dev[-1],
        level_to_orders[-1],
        params,
    )
    queue.finish()

    for field in [
            "form_multipoles", "eval_direct", "multipole_to_local",
            "eval_multipoles", "form_locals", "eval_locals",
            "coarsen_multipoles", "refine_locals"
    ]:
        measured = timing_results[-1][field]["process_elapsed"]
        pred_err = ((measured - predicted_time[field]) / measured)
        logger.info("actual/predicted time for %s: %.3g/%.3g -> %g %% error",
                    field, measured, predicted_time[field],
                    abs(100 * pred_err))
Beispiel #5
0
 def coarsen_multipoles(
     self, level_start_source_parent_box_nrs, source_parent_boxes, mpoles
 ):
     return FMMLibExpansionWrangler.coarsen_multipoles(
         self, level_start_source_parent_box_nrs, source_parent_boxes, mpoles
     )
Beispiel #6
0
 def form_multipoles(self, level_start_source_box_nrs, source_boxes, src_weights):
     return FMMLibExpansionWrangler.form_multipoles(
         self, level_start_source_box_nrs, source_boxes, src_weights
     )
Beispiel #7
0
 def finalize_potentials(self, potentials):
     # return potentials
     return FMMLibExpansionWrangler.finalize_potentials(self, potentials)
Beispiel #8
0
 def reorder_potentials(self, potentials):
     return FMMLibExpansionWrangler.reorder_potentials(self, potentials)
Beispiel #9
0
 def eval_multipoles(
     self, target_boxes_by_source_level, source_boxes_by_level, mpole_exps
 ):
     return FMMLibExpansionWrangler.eval_multipoles(
         self, target_boxes_by_source_level, source_boxes_by_level, mpole_exps
     )
Beispiel #10
0
def test_fmm_with_optimized_3d_m2l(ctx_factory, nsrcntgts, helmholtz_k,
                                   well_sep_is_n_away):
    logging.basicConfig(level=logging.INFO)

    from pytest import importorskip
    importorskip("pyfmmlib")

    dims = 3

    ctx = ctx_factory()
    queue = cl.CommandQueue(ctx)

    nsources = ntargets = nsrcntgts // 2
    dtype = np.float64

    sources = p_normal(queue, nsources, dims, dtype, seed=15)
    targets = (p_normal(queue, ntargets, dims, dtype, seed=18) +
               np.array([2, 0, 0])[:dims])

    from boxtree import TreeBuilder
    tb = TreeBuilder(ctx)

    tree, _ = tb(queue,
                 sources,
                 targets=targets,
                 max_particles_in_box=30,
                 debug=True)

    from boxtree.traversal import FMMTraversalBuilder
    tbuild = FMMTraversalBuilder(ctx)
    trav, _ = tbuild(queue, tree, debug=True)

    trav = trav.get(queue=queue)

    from pyopencl.clrandom import PhiloxGenerator
    rng = PhiloxGenerator(queue.context, seed=20)

    weights = rng.uniform(queue, nsources, dtype=np.float64).get()

    base_nterms = 10

    def fmm_level_to_nterms(tree, lev):
        result = base_nterms

        if lev < 3 and helmholtz_k:
            # exercise order-varies-by-level capability
            result += 5

        return result

    from boxtree.pyfmmlib_integration import (FMMLibExpansionWrangler,
                                              FMMLibRotationData)

    baseline_wrangler = FMMLibExpansionWrangler(
        trav.tree, helmholtz_k, fmm_level_to_nterms=fmm_level_to_nterms)

    optimized_wrangler = FMMLibExpansionWrangler(
        trav.tree,
        helmholtz_k,
        fmm_level_to_nterms=fmm_level_to_nterms,
        rotation_data=FMMLibRotationData(queue, trav))

    from boxtree.fmm import drive_fmm

    baseline_timing_data = {}
    baseline_pot = drive_fmm(trav,
                             baseline_wrangler, (weights, ),
                             timing_data=baseline_timing_data)

    optimized_timing_data = {}
    optimized_pot = drive_fmm(trav,
                              optimized_wrangler, (weights, ),
                              timing_data=optimized_timing_data)

    baseline_time = baseline_timing_data["multipole_to_local"][
        "process_elapsed"]
    if baseline_time is not None:
        print("Baseline M2L time : %#.4g s" % baseline_time)

    opt_time = optimized_timing_data["multipole_to_local"]["process_elapsed"]
    if opt_time is not None:
        print("Optimized M2L time: %#.4g s" % opt_time)

    assert np.allclose(baseline_pot, optimized_pot, atol=1e-13, rtol=1e-13)
Beispiel #11
0
 def local_expansion_zeros(self):
     return FMMLibExpansionWrangler.local_expansion_zeros(self)
Beispiel #12
0
 def multipole_expansion_zeros(self):
     return FMMLibExpansionWrangler.multipole_expansion_zeros(self)
Beispiel #13
0
    def __init__(self, code_container, queue, tree,
            near_field_table, dtype,
            fmm_level_to_order,
            quad_order,
            potential_kind=1,
            source_extra_kwargs=None,
            kernel_extra_kwargs=None,
            self_extra_kwargs=None,
            list1_extra_kwargs=None,
            *args, **kwargs):
        self.code = code_container
        self.queue = queue

        tree = tree.get(queue)
        self.tree = tree

        self.dtype = dtype
        self.quad_order = quad_order
        self.potential_kind = potential_kind

        # {{{ digest out_kernels

        ifgrad = False
        outputs = []
        source_deriv_names = []
        k_names = []

        for out_knl in self.code.out_kernels:

            if self.is_supported_helmknl(out_knl):
                outputs.append(())
                no_target_deriv_knl = out_knl

            elif (isinstance(out_knl, AxisTargetDerivative)
                    and self.is_supported_helmknl(out_knl.inner_kernel)):
                outputs.append((out_knl.axis,))
                ifgrad = True
                no_target_deriv_knl = out_knl.inner_kernel

            else:
                raise ValueError(
                        "only the 2/3D Laplace and Helmholtz kernel "
                        "and their derivatives are supported")

            source_deriv_names.append(no_target_deriv_knl.dir_vec_name
                    if isinstance(no_target_deriv_knl, DirectionalSourceDerivative)
                    else None)

            base_knl = out_knl.get_base_kernel()
            k_names.append(base_knl.helmholtz_k_name
                    if isinstance(base_knl, HelmholtzKernel)
                    else None)

        self.outputs = outputs

        from pytools import is_single_valued

        if not is_single_valued(source_deriv_names):
            raise ValueError("not all kernels passed are the same in "
                    "whether they represent a source derivative")

        source_deriv_name = source_deriv_names[0]

        if not is_single_valued(k_names):
            raise ValueError("not all kernels passed have the same "
                    "Helmholtz parameter")

        k_name = k_names[0]

        if k_name is None:
            helmholtz_k = 0
        else:
            helmholtz_k = kernel_extra_kwargs[k_name]

        # }}}

        # {{{ table setup
        # TODO put this part into the inteferce class

        self.near_field_table = {}
        # list of tables for a single out kernel
        if isinstance(near_field_table, list):
            assert len(self.code.out_kernels) == 1
            self.near_field_table[
                self.code.out_kernels[0].__repr__()
            ] = near_field_table
            self.n_tables = len(near_field_table)

        # single table
        elif isinstance(near_field_table, NearFieldInteractionTable):
            assert len(self.code.out_kernels) == 1
            self.near_field_table[self.code.out_kernels[0].__repr__()] = [
                near_field_table
            ]
            self.n_tables = 1

        # dictionary of lists of tables
        elif isinstance(near_field_table, dict):
            self.n_tables = dict()
            for out_knl in self.code.out_kernels:
                if repr(out_knl) not in near_field_table:
                    raise RuntimeError(
                            "Missing nearfield table for %s." % repr(out_knl))
                if isinstance(near_field_table[repr(out_knl)],
                        NearFieldInteractionTable):
                    near_field_table[repr(out_knl)] = [
                            near_field_table[repr(out_knl)]]
                else:
                    assert isinstance(near_field_table[repr(out_knl)], list)

                self.n_tables[repr(out_knl)] = len(near_field_table[repr(out_knl)])

            self.near_field_table = near_field_table
        else:
            raise RuntimeError("Table type unrecognized.")

        # TODO: make all parameters table-specific (allow using inhomogeneous tables)
        kname = repr(self.code.out_kernels[0])
        self.root_table_source_box_extent = (
                self.near_field_table[kname][0].source_box_extent)
        table_starting_level = np.round(
            np.log(self.tree.root_extent / self.root_table_source_box_extent)
            / np.log(2)
            )
        for kid in range(len(self.code.out_kernels)):
            kname = self.code.out_kernels[kid].__repr__()
            for lev, table in zip(
                    range(len(self.near_field_table[kname])),
                    self.near_field_table[kname]
                    ):
                assert table.quad_order == self.quad_order

                if not table.is_built:
                    raise RuntimeError(
                        "Near field interaction table needs to be built "
                        "prior to being used"
                    )

                table_root_extent = table.source_box_extent * 2 ** lev
                assert (
                    abs(self.root_table_source_box_extent - table_root_extent)
                    < 1e-15
                )

                # If the kernel cannot be scaled,
                # - tree_root_extent must be integral times of table_root_extent
                # - n_tables must be sufficient
                if not isinstance(self.n_tables, dict) and self.n_tables > 1:
                    if (
                        not abs(
                            int(self.tree.root_extent / table_root_extent)
                            * table_root_extent
                            - self.tree.root_extent
                        )
                        < 1e-15
                    ):
                        raise RuntimeError(
                            "Incompatible list of tables: the "
                            "source_box_extent of the root table must "
                            "divide the bounding box's extent by an integer."
                        )

            if not isinstance(self.n_tables, dict) and self.n_tables > 1:
                # this checks that the boxes at the highest level are covered
                if (
                    not tree.nlevels
                    <= len(self.near_field_table[kname]) + table_starting_level
                ):
                    raise RuntimeError(
                        "Insufficient list of tables: the "
                        "finest level mesh cells at level "
                        + str(tree.nlevels)
                        + " are not covered."
                    )

                # the check that the boxes at the coarsest level are covered is
                # deferred until trav.target_boxes is passed when invoking
                # eval_direct

        if source_extra_kwargs is None:
            source_extra_kwargs = {}

        if kernel_extra_kwargs is None:
            kernel_extra_kwargs = {}

        if self_extra_kwargs is None:
            self_extra_kwargs = {}

        if list1_extra_kwargs is None:
            list1_extra_kwargs = {}

        self.list1_extra_kwargs = list1_extra_kwargs

        # }}} End table setup

        if not callable(fmm_level_to_order):
            raise TypeError("fmm_level_to_order not passed")

        dipole_vec = None
        if source_deriv_name is not None:
            dipole_vec = np.array([
                    d_i.get(queue=queue)
                    for d_i in source_extra_kwargs[source_deriv_name]],
                    order="F")

        def inner_fmm_level_to_nterms(tree, level):
            if helmholtz_k == 0:
                return fmm_level_to_order(
                        LaplaceKernel(tree.dimensions),
                        frozenset(), tree, level)
            else:
                return fmm_level_to_order(
                        HelmholtzKernel(tree.dimensions),
                        frozenset([("k", helmholtz_k)]), tree, level)

        rotation_data = None
        if 'traversal' in kwargs:
            # add rotation data if traversal is passed as a keyword argument
            from boxtree.pyfmmlib_integration import FMMLibRotationData
            rotation_data = FMMLibRotationData(self.queue, kwargs['traversal'])
        else:
            logger.warning("Rotation data is not utilized since traversal is "
                           "not known to FPNDFMMLibExpansionWrangler.")

        FMMLibExpansionWrangler.__init__(
                self, tree,

                helmholtz_k=helmholtz_k,
                dipole_vec=dipole_vec,
                dipoles_already_reordered=True,

                fmm_level_to_nterms=inner_fmm_level_to_nterms,
                rotation_data=rotation_data,

                ifgrad=ifgrad)
Beispiel #14
0
 def eval_direct_p2p(
     self, target_boxes, source_box_starts, source_box_lists, src_weights
 ):
     return FMMLibExpansionWrangler.eval_direct(
         self, target_boxes, source_box_starts, source_box_lists, src_weights
     )
Beispiel #15
0
 def eval_locals(self, level_start_target_box_nrs, target_boxes, local_exps):
     return FMMLibExpansionWrangler.eval_locals(
         self, level_start_target_box_nrs, target_boxes, local_exps
     )
Beispiel #16
0
def test_estimate_calibration_params(ctx_factory):
    from boxtree.pyfmmlib_integration import FMMLibExpansionWrangler

    nsources_list = [1000, 2000, 3000, 4000]
    ntargets_list = [1000, 2000, 3000, 4000]
    dims = 3
    dtype = np.float64

    ctx = ctx_factory()
    queue = cl.CommandQueue(ctx)

    traversals = []
    traversals_dev = []
    level_to_orders = []
    timing_results = []

    def fmm_level_to_nterms(tree, ilevel):
        return 10

    for nsources, ntargets in zip(nsources_list, ntargets_list):
        # {{{ Generate sources, targets and target_radii

        from boxtree.tools import make_normal_particle_array as p_normal
        sources = p_normal(queue, nsources, dims, dtype, seed=15)
        targets = p_normal(queue, ntargets, dims, dtype, seed=18)

        from pyopencl.clrandom import PhiloxGenerator
        rng = PhiloxGenerator(queue.context, seed=22)
        target_radii = rng.uniform(
            queue, ntargets, a=0, b=0.05, dtype=dtype
        ).get()

        # }}}

        # {{{ Generate tree and traversal

        from boxtree import TreeBuilder
        tb = TreeBuilder(ctx)
        tree, _ = tb(
            queue, sources, targets=targets, target_radii=target_radii,
            stick_out_factor=0.15, max_particles_in_box=30, debug=True
        )

        from boxtree.traversal import FMMTraversalBuilder
        tg = FMMTraversalBuilder(ctx, well_sep_is_n_away=2)
        trav_dev, _ = tg(queue, tree, debug=True)
        trav = trav_dev.get(queue=queue)

        traversals.append(trav)
        traversals_dev.append(trav_dev)

        # }}}

        wrangler = FMMLibExpansionWrangler(trav.tree, 0, fmm_level_to_nterms)
        level_to_orders.append(wrangler.level_nterms)

        timing_data = {}
        from boxtree.fmm import drive_fmm
        src_weights = np.random.rand(tree.nsources).astype(tree.coord_dtype)
        drive_fmm(trav, wrangler, (src_weights,), timing_data=timing_data)

        timing_results.append(timing_data)

    if SUPPORTS_PROCESS_TIME:
        time_field_name = "process_elapsed"
    else:
        time_field_name = "wall_elapsed"

    def test_params_sanity(test_params):
        param_names = ["c_p2m", "c_m2m", "c_p2p", "c_m2l", "c_m2p", "c_p2l", "c_l2l",
                       "c_l2p"]
        for name in param_names:
            assert isinstance(test_params[name], np.float64)

    def test_params_equal(test_params1, test_params2):
        param_names = ["c_p2m", "c_m2m", "c_p2p", "c_m2l", "c_m2p", "c_p2l", "c_l2l",
                       "c_l2p"]
        for name in param_names:
            assert test_params1[name] == test_params2[name]

    python_cost_model = _PythonFMMCostModel(make_pde_aware_translation_cost_model)

    python_model_results = []

    for icase in range(len(traversals)-1):
        traversal = traversals[icase]
        level_to_order = level_to_orders[icase]

        python_model_results.append(python_cost_model.cost_per_stage(
            queue, traversal, level_to_order,
            _PythonFMMCostModel.get_unit_calibration_params(),
        ))

    python_params = python_cost_model.estimate_calibration_params(
        python_model_results, timing_results[:-1], time_field_name=time_field_name
    )

    test_params_sanity(python_params)

    cl_cost_model = FMMCostModel(make_pde_aware_translation_cost_model)

    cl_model_results = []

    for icase in range(len(traversals_dev)-1):
        traversal = traversals_dev[icase]
        level_to_order = level_to_orders[icase]

        cl_model_results.append(cl_cost_model.cost_per_stage(
            queue, traversal, level_to_order,
            FMMCostModel.get_unit_calibration_params(),
        ))

    cl_params = cl_cost_model.estimate_calibration_params(
        cl_model_results, timing_results[:-1], time_field_name=time_field_name
    )

    test_params_sanity(cl_params)

    if SUPPORTS_PROCESS_TIME:
        test_params_equal(cl_params, python_params)
Beispiel #17
0
def test_pyfmmlib_numerical_stability(ctx_factory, dims, helmholtz_k, order):
    logging.basicConfig(level=logging.INFO)

    from pytest import importorskip
    importorskip("pyfmmlib")

    ctx = ctx_factory()
    queue = cl.CommandQueue(ctx)

    nsources = 30
    dtype = np.float64

    # The input particles are arranged with geometrically increasing/decreasing
    # spacing along a line, to build a deep tree that stress-tests the
    # translations.
    particle_line = np.array([2**-i for i in range(nsources // 2)],
                             dtype=dtype)
    particle_line = np.hstack([particle_line, 3 - particle_line])
    zero = np.zeros(nsources, dtype=dtype)

    sources = np.vstack([particle_line, zero, zero])[:dims]

    targets = sources * (1 + 1e-3)

    from boxtree import TreeBuilder
    tb = TreeBuilder(ctx)

    tree, _ = tb(queue,
                 sources,
                 targets=targets,
                 max_particles_in_box=2,
                 debug=True)

    assert tree.nlevels >= 15

    from boxtree.traversal import FMMTraversalBuilder
    tbuild = FMMTraversalBuilder(ctx)
    trav, _ = tbuild(queue, tree, debug=True)

    trav = trav.get(queue=queue)
    weights = np.ones_like(sources[0])

    from boxtree.pyfmmlib_integration import (FMMLibExpansionWrangler,
                                              FMMLibRotationData)

    def fmm_level_to_nterms(tree, lev):
        return order

    wrangler = FMMLibExpansionWrangler(trav.tree,
                                       helmholtz_k,
                                       fmm_level_to_nterms=fmm_level_to_nterms,
                                       rotation_data=FMMLibRotationData(
                                           queue, trav))

    from boxtree.fmm import drive_fmm

    pot = drive_fmm(trav, wrangler, (weights, ))
    assert not np.isnan(pot).any()

    # {{{ ref fmmlib computation

    logger.info("computing direct (reference) result")

    ref_pot = get_fmmlib_ref_pot(wrangler, weights, sources, targets,
                                 helmholtz_k)

    rel_err = la.norm(pot - ref_pot, np.inf) / la.norm(ref_pot, np.inf)
    logger.info("relative l2 error vs fmmlib direct: %g" % rel_err)

    if dims == 2:
        error_bound = (1 / 2)**(1 + order)
    else:
        error_bound = (3 / 4)**(1 + order)

    assert rel_err < error_bound, rel_err
Beispiel #18
0
 def output_zeros(self):
     return FMMLibExpansionWrangler.output_zeros(self)
Beispiel #19
0
def test_pyfmmlib_fmm(ctx_getter, dims, use_dipoles, helmholtz_k):
    logging.basicConfig(level=logging.INFO)

    from pytest import importorskip
    importorskip("pyfmmlib")

    ctx = ctx_getter()
    queue = cl.CommandQueue(ctx)

    nsources = 3000
    ntargets = 1000
    dtype = np.float64

    sources = p_normal(queue, nsources, dims, dtype, seed=15)
    targets = (p_normal(queue, ntargets, dims, dtype, seed=18) +
               np.array([2, 0, 0])[:dims])

    sources_host = particle_array_to_host(sources)
    targets_host = particle_array_to_host(targets)

    from boxtree import TreeBuilder
    tb = TreeBuilder(ctx)

    tree, _ = tb(queue,
                 sources,
                 targets=targets,
                 max_particles_in_box=30,
                 debug=True)

    from boxtree.traversal import FMMTraversalBuilder
    tbuild = FMMTraversalBuilder(ctx)
    trav, _ = tbuild(queue, tree, debug=True)

    trav = trav.get(queue=queue)

    from pyopencl.clrandom import PhiloxGenerator
    rng = PhiloxGenerator(queue.context, seed=20)

    weights = rng.uniform(queue, nsources, dtype=np.float64).get()
    #weights = np.ones(nsources)

    if use_dipoles:
        np.random.seed(13)
        dipole_vec = np.random.randn(dims, nsources)
    else:
        dipole_vec = None

    if dims == 2 and helmholtz_k == 0:
        base_nterms = 20
    else:
        base_nterms = 10

    def fmm_level_to_nterms(tree, lev):
        result = base_nterms

        if lev < 3 and helmholtz_k:
            # exercise order-varies-by-level capability
            result += 5

        if use_dipoles:
            result += 1

        return result

    from boxtree.pyfmmlib_integration import FMMLibExpansionWrangler
    wrangler = FMMLibExpansionWrangler(trav.tree,
                                       helmholtz_k,
                                       fmm_level_to_nterms=fmm_level_to_nterms,
                                       dipole_vec=dipole_vec)

    from boxtree.fmm import drive_fmm

    timing_data = {}
    pot = drive_fmm(trav, wrangler, weights, timing_data=timing_data)
    print(timing_data)
    assert timing_data

    # {{{ ref fmmlib computation

    logger.info("computing direct (reference) result")

    import pyfmmlib
    fmmlib_routine = getattr(
        pyfmmlib, "%spot%s%ddall%s_vec" %
        (wrangler.eqn_letter, "fld" if dims == 3 else "grad", dims,
         "_dp" if use_dipoles else ""))

    kwargs = {}
    if dims == 3:
        kwargs["iffld"] = False
    else:
        kwargs["ifgrad"] = False
        kwargs["ifhess"] = False

    if use_dipoles:
        if helmholtz_k == 0 and dims == 2:
            kwargs["dipstr"] = -weights * (dipole_vec[0] + 1j * dipole_vec[1])
        else:
            kwargs["dipstr"] = weights
            kwargs["dipvec"] = dipole_vec
    else:
        kwargs["charge"] = weights
    if helmholtz_k:
        kwargs["zk"] = helmholtz_k

    ref_pot = wrangler.finalize_potentials(
        fmmlib_routine(sources=sources_host.T,
                       targets=targets_host.T,
                       **kwargs)[0])

    rel_err = la.norm(pot - ref_pot, np.inf) / la.norm(ref_pot, np.inf)
    logger.info("relative l2 error vs fmmlib direct: %g" % rel_err)
    assert rel_err < 1e-5, rel_err

    # }}}

    # {{{ check against sumpy

    try:
        import sumpy  # noqa
    except ImportError:
        have_sumpy = False
        from warnings import warn
        warn("sumpy unavailable: cannot compute independent reference "
             "values for pyfmmlib")
    else:
        have_sumpy = True

    if have_sumpy:
        from sumpy.kernel import (LaplaceKernel, HelmholtzKernel,
                                  DirectionalSourceDerivative)
        from sumpy.p2p import P2P

        sumpy_extra_kwargs = {}
        if helmholtz_k:
            knl = HelmholtzKernel(dims)
            sumpy_extra_kwargs["k"] = helmholtz_k
        else:
            knl = LaplaceKernel(dims)

        if use_dipoles:
            knl = DirectionalSourceDerivative(knl)
            sumpy_extra_kwargs["src_derivative_dir"] = dipole_vec

        p2p = P2P(ctx, [knl], exclude_self=False)

        evt, (sumpy_ref_pot, ) = p2p(queue,
                                     targets,
                                     sources, [weights],
                                     out_host=True,
                                     **sumpy_extra_kwargs)

        sumpy_rel_err = (la.norm(pot - sumpy_ref_pot, np.inf) /
                         la.norm(sumpy_ref_pot, np.inf))

        logger.info("relative l2 error vs sumpy direct: %g" % sumpy_rel_err)
        assert sumpy_rel_err < 1e-5, sumpy_rel_err
Beispiel #20
0
 def reorder_sources(self, source_array):
     return FMMLibExpansionWrangler.reorder_sources(self, source_array)