def test_interaction_list_particle_count_thresholding(ctx_getter, enable_extents): ctx = ctx_getter() queue = cl.CommandQueue(ctx) logging.basicConfig(level=logging.INFO) dims = 2 nsources = 1000 ntargets = 1000 dtype = np.float max_particles_in_box = 30 # Ensure that we have underfilled boxes. from_sep_smaller_min_nsources_cumul = 1 + max_particles_in_box from boxtree.fmm import drive_fmm sources = p_normal(queue, nsources, dims, dtype, seed=15) targets = p_normal(queue, ntargets, dims, dtype, seed=15) from pyopencl.clrandom import PhiloxGenerator rng = PhiloxGenerator(queue.context, seed=12) if enable_extents: target_radii = 2**rng.uniform(queue, ntargets, dtype=dtype, a=-10, b=0) else: target_radii = None from boxtree import TreeBuilder tb = TreeBuilder(ctx) tree, _ = tb(queue, sources, targets=targets, max_particles_in_box=max_particles_in_box, target_radii=target_radii, debug=True, stick_out_factor=0.25) from boxtree.traversal import FMMTraversalBuilder tbuild = FMMTraversalBuilder(ctx) trav, _ = tbuild( queue, tree, debug=True, _from_sep_smaller_min_nsources_cumul=from_sep_smaller_min_nsources_cumul ) weights = np.ones(nsources) weights_sum = np.sum(weights) host_trav = trav.get(queue=queue) host_tree = host_trav.tree wrangler = ConstantOneExpansionWrangler(host_tree) pot = drive_fmm(host_trav, wrangler, weights) assert (pot == weights_sum).all()
def test_pyfmmlib_fmm(ctx_getter): logging.basicConfig(level=logging.INFO) from pytest import importorskip importorskip("pyfmmlib") ctx = ctx_getter() queue = cl.CommandQueue(ctx) nsources = 3000 ntargets = 1000 dims = 2 dtype = np.float64 helmholtz_k = 2 sources = p_normal(queue, nsources, dims, dtype, seed=15) targets = ( p_normal(queue, ntargets, dims, dtype, seed=18) + np.array([2, 0])) sources_host = particle_array_to_host(sources) targets_host = particle_array_to_host(targets) from boxtree import TreeBuilder tb = TreeBuilder(ctx) tree, _ = tb(queue, sources, targets=targets, max_particles_in_box=30, debug=True) from boxtree.traversal import FMMTraversalBuilder tbuild = FMMTraversalBuilder(ctx) trav, _ = tbuild(queue, tree, debug=True) trav = trav.get(queue=queue) from pyopencl.clrandom import PhiloxGenerator rng = PhiloxGenerator(queue.context, seed=20) weights = rng.uniform(queue, nsources, dtype=np.float64).get() #weights = np.ones(nsources) logger.info("computing direct (reference) result") from pyfmmlib import hpotgrad2dall_vec ref_pot, _, _ = hpotgrad2dall_vec(ifgrad=False, ifhess=False, sources=sources_host.T, charge=weights, targets=targets_host.T, zk=helmholtz_k) from boxtree.pyfmmlib_integration import Helmholtz2DExpansionWrangler wrangler = Helmholtz2DExpansionWrangler(trav.tree, helmholtz_k, nterms=10) from boxtree.fmm import drive_fmm pot = drive_fmm(trav, wrangler, weights) rel_err = la.norm(pot - ref_pot) / la.norm(ref_pot) logger.info("relative l2 error: %g" % rel_err) assert rel_err < 1e-5
def test_fmm_float32(ctx_getter=cl.create_some_context, enable_extents=True): from time import time ctx = ctx_getter() queue = cl.CommandQueue(ctx) from pyopencl.characterize import has_struct_arg_count_bug if has_struct_arg_count_bug(queue.device): pytest.xfail("won't work on devices with the struct arg count issue") logging.basicConfig(level=logging.INFO) dims = 2 nsources = 3000000 ntargets = 3000000 dtype = np.float32 from boxtree.fmm import drive_fmm sources = p_normal(queue, nsources, dims, dtype, seed=15) targets = p_normal(queue, ntargets, dims, dtype, seed=15) from pyopencl.clrandom import PhiloxGenerator rng = PhiloxGenerator(queue.context, seed=12) if enable_extents: target_radii = 2**rng.uniform(queue, ntargets, dtype=dtype, a=-10, b=0) else: target_radii = None from boxtree import TreeBuilder tb = TreeBuilder(ctx) tree, _ = tb(queue, sources, targets=targets, max_particles_in_box=30, target_radii=target_radii,stick_out_factor=0.25, debug=True) from boxtree.traversal import FMMTraversalBuilder tbuild = FMMTraversalBuilder(ctx) trav, _ = tbuild(queue, tree, debug=True) weights = np.ones(nsources) weights_sum = np.sum(weights) host_trav = trav.get(queue=queue) host_tree = host_trav.tree wrangler = ConstantOneExpansionWrangler(host_tree) ti = time() pot = drive_fmm(host_trav, wrangler, weights) print(time() - ti) assert (pot == weights_sum).all()
def test_sumpy_fmm_timing_data_collection(ctx_getter): logging.basicConfig(level=logging.INFO) ctx = ctx_getter() queue = cl.CommandQueue( ctx, properties=cl.command_queue_properties.PROFILING_ENABLE) nsources = 500 dtype = np.float64 from boxtree.tools import ( make_normal_particle_array as p_normal) knl = LaplaceKernel(2) local_expn_class = VolumeTaylorLocalExpansion mpole_expn_class = VolumeTaylorMultipoleExpansion order = 1 sources = p_normal(queue, nsources, knl.dim, dtype, seed=15) from boxtree import TreeBuilder tb = TreeBuilder(ctx) tree, _ = tb(queue, sources, max_particles_in_box=30, debug=True) from boxtree.traversal import FMMTraversalBuilder tbuild = FMMTraversalBuilder(ctx) trav, _ = tbuild(queue, tree, debug=True) from pyopencl.clrandom import PhiloxGenerator rng = PhiloxGenerator(ctx) weights = rng.uniform(queue, nsources, dtype=np.float64) out_kernels = [knl] from functools import partial from sumpy.fmm import SumpyExpansionWranglerCodeContainer wcc = SumpyExpansionWranglerCodeContainer( ctx, partial(mpole_expn_class, knl), partial(local_expn_class, knl), out_kernels) wrangler = wcc.get_wrangler(queue, tree, dtype, fmm_level_to_order=lambda kernel, kernel_args, tree, lev: order) from boxtree.fmm import drive_fmm timing_data = {} pot, = drive_fmm(trav, wrangler, weights, timing_data=timing_data) print(timing_data) assert timing_data
def test_sumpy_fmm_timing_data_collection(ctx_factory): logging.basicConfig(level=logging.INFO) ctx = ctx_factory() queue = cl.CommandQueue( ctx, properties=cl.command_queue_properties.PROFILING_ENABLE) nsources = 500 dtype = np.float64 from boxtree.tools import ( make_normal_particle_array as p_normal) knl = LaplaceKernel(2) local_expn_class = VolumeTaylorLocalExpansion mpole_expn_class = VolumeTaylorMultipoleExpansion order = 1 sources = p_normal(queue, nsources, knl.dim, dtype, seed=15) from boxtree import TreeBuilder tb = TreeBuilder(ctx) tree, _ = tb(queue, sources, max_particles_in_box=30, debug=True) from boxtree.traversal import FMMTraversalBuilder tbuild = FMMTraversalBuilder(ctx) trav, _ = tbuild(queue, tree, debug=True) from pyopencl.clrandom import PhiloxGenerator rng = PhiloxGenerator(ctx) weights = rng.uniform(queue, nsources, dtype=np.float64) out_kernels = [knl] from functools import partial from sumpy.fmm import SumpyExpansionWranglerCodeContainer wcc = SumpyExpansionWranglerCodeContainer( ctx, partial(mpole_expn_class, knl), partial(local_expn_class, knl), out_kernels) wrangler = wcc.get_wrangler(queue, tree, dtype, fmm_level_to_order=lambda kernel, kernel_args, tree, lev: order) from boxtree.fmm import drive_fmm timing_data = {} pot, = drive_fmm(trav, wrangler, (weights,), timing_data=timing_data) print(timing_data) assert timing_data
def exec_compute_potential_insn_fmm(self, queue, insn, bound_expr, evaluate): # {{{ gather unique target discretizations used target_name_to_index = {} targets = [] for o in insn.outputs: assert o.qbx_forced_limit not in (-1, 1) if o.target_name in target_name_to_index: continue target_name_to_index[o.target_name] = len(targets) targets.append(bound_expr.places.get_geometry(o.target_name)) targets = tuple(targets) # }}} # {{{ get wrangler geo_data = self.fmm_geometry_data(targets) strengths = (evaluate(insn.density).with_queue(queue) * self.weights_and_area_elements()) out_kernels = tuple(knl for knl in insn.kernels) fmm_kernel = self.get_fmm_kernel(out_kernels) output_and_expansion_dtype = (self.get_fmm_output_and_expansion_dtype( fmm_kernel, strengths)) kernel_extra_kwargs, source_extra_kwargs = ( self.get_fmm_expansion_wrangler_extra_kwargs( queue, out_kernels, geo_data.tree().user_source_ids, insn.kernel_arguments, evaluate)) wrangler = self.expansion_wrangler_code_container( fmm_kernel, out_kernels).get_wrangler(queue, geo_data.tree(), output_and_expansion_dtype, self.fmm_level_to_order, source_extra_kwargs=source_extra_kwargs, kernel_extra_kwargs=kernel_extra_kwargs) # }}} from boxtree.fmm import drive_fmm all_potentials_on_every_tgt = drive_fmm(geo_data.traversal(), wrangler, strengths, timing_data=None) # {{{ postprocess fmm result = [] for o in insn.outputs: target_index = target_name_to_index[o.target_name] target_slice = slice(*geo_data.target_info( ).target_discr_starts[target_index:target_index + 2]) result.append( (o.name, all_potentials_on_every_tgt[o.kernel_index][target_slice])) # }}} timing_data = {} return result, timing_data
def test_pyfmmlib_fmm(ctx_getter, dims, use_dipoles, helmholtz_k): logging.basicConfig(level=logging.INFO) from pytest import importorskip importorskip("pyfmmlib") ctx = ctx_getter() queue = cl.CommandQueue(ctx) nsources = 3000 ntargets = 1000 dtype = np.float64 sources = p_normal(queue, nsources, dims, dtype, seed=15) targets = (p_normal(queue, ntargets, dims, dtype, seed=18) + np.array([2, 0, 0])[:dims]) sources_host = particle_array_to_host(sources) targets_host = particle_array_to_host(targets) from boxtree import TreeBuilder tb = TreeBuilder(ctx) tree, _ = tb(queue, sources, targets=targets, max_particles_in_box=30, debug=True) from boxtree.traversal import FMMTraversalBuilder tbuild = FMMTraversalBuilder(ctx) trav, _ = tbuild(queue, tree, debug=True) trav = trav.get(queue=queue) from pyopencl.clrandom import PhiloxGenerator rng = PhiloxGenerator(queue.context, seed=20) weights = rng.uniform(queue, nsources, dtype=np.float64).get() #weights = np.ones(nsources) if use_dipoles: np.random.seed(13) dipole_vec = np.random.randn(dims, nsources) else: dipole_vec = None if dims == 2 and helmholtz_k == 0: base_nterms = 20 else: base_nterms = 10 def fmm_level_to_nterms(tree, lev): result = base_nterms if lev < 3 and helmholtz_k: # exercise order-varies-by-level capability result += 5 if use_dipoles: result += 1 return result from boxtree.pyfmmlib_integration import FMMLibExpansionWrangler wrangler = FMMLibExpansionWrangler(trav.tree, helmholtz_k, fmm_level_to_nterms=fmm_level_to_nterms, dipole_vec=dipole_vec) from boxtree.fmm import drive_fmm timing_data = {} pot = drive_fmm(trav, wrangler, weights, timing_data=timing_data) print(timing_data) assert timing_data # {{{ ref fmmlib computation logger.info("computing direct (reference) result") import pyfmmlib fmmlib_routine = getattr( pyfmmlib, "%spot%s%ddall%s_vec" % (wrangler.eqn_letter, "fld" if dims == 3 else "grad", dims, "_dp" if use_dipoles else "")) kwargs = {} if dims == 3: kwargs["iffld"] = False else: kwargs["ifgrad"] = False kwargs["ifhess"] = False if use_dipoles: if helmholtz_k == 0 and dims == 2: kwargs["dipstr"] = -weights * (dipole_vec[0] + 1j * dipole_vec[1]) else: kwargs["dipstr"] = weights kwargs["dipvec"] = dipole_vec else: kwargs["charge"] = weights if helmholtz_k: kwargs["zk"] = helmholtz_k ref_pot = wrangler.finalize_potentials( fmmlib_routine(sources=sources_host.T, targets=targets_host.T, **kwargs)[0]) rel_err = la.norm(pot - ref_pot, np.inf) / la.norm(ref_pot, np.inf) logger.info("relative l2 error vs fmmlib direct: %g" % rel_err) assert rel_err < 1e-5, rel_err # }}} # {{{ check against sumpy try: import sumpy # noqa except ImportError: have_sumpy = False from warnings import warn warn("sumpy unavailable: cannot compute independent reference " "values for pyfmmlib") else: have_sumpy = True if have_sumpy: from sumpy.kernel import (LaplaceKernel, HelmholtzKernel, DirectionalSourceDerivative) from sumpy.p2p import P2P sumpy_extra_kwargs = {} if helmholtz_k: knl = HelmholtzKernel(dims) sumpy_extra_kwargs["k"] = helmholtz_k else: knl = LaplaceKernel(dims) if use_dipoles: knl = DirectionalSourceDerivative(knl) sumpy_extra_kwargs["src_derivative_dir"] = dipole_vec p2p = P2P(ctx, [knl], exclude_self=False) evt, (sumpy_ref_pot, ) = p2p(queue, targets, sources, [weights], out_host=True, **sumpy_extra_kwargs) sumpy_rel_err = (la.norm(pot - sumpy_ref_pot, np.inf) / la.norm(sumpy_ref_pot, np.inf)) logger.info("relative l2 error vs sumpy direct: %g" % sumpy_rel_err) assert sumpy_rel_err < 1e-5, sumpy_rel_err
def test_fmm_completeness(ctx_getter, dims, nsources_req, ntargets_req, who_has_extent, source_gen, target_gen, filter_kind, well_sep_is_n_away, extent_norm, from_sep_smaller_crit): """Tests whether the built FMM traversal structures and driver completely capture all interactions. """ sources_have_extent = "s" in who_has_extent targets_have_extent = "t" in who_has_extent logging.basicConfig(level=logging.INFO) ctx = ctx_getter() queue = cl.CommandQueue(ctx) dtype = np.float64 try: sources = source_gen(queue, nsources_req, dims, dtype, seed=15) nsources = len(sources[0]) if ntargets_req is None: # This says "same as sources" to the tree builder. targets = None ntargets = ntargets_req else: targets = target_gen(queue, ntargets_req, dims, dtype, seed=16) ntargets = len(targets[0]) except ImportError: pytest.skip("loo.py not available, but needed for particle array " "generation") from pyopencl.clrandom import PhiloxGenerator rng = PhiloxGenerator(queue.context, seed=12) if sources_have_extent: source_radii = 2**rng.uniform(queue, nsources, dtype=dtype, a=-10, b=0) else: source_radii = None if targets_have_extent: target_radii = 2**rng.uniform(queue, ntargets, dtype=dtype, a=-10, b=0) else: target_radii = None from boxtree import TreeBuilder tb = TreeBuilder(ctx) tree, _ = tb(queue, sources, targets=targets, max_particles_in_box=30, source_radii=source_radii, target_radii=target_radii, debug=True, stick_out_factor=0.25, extent_norm=extent_norm) if 0: tree.get().plot() import matplotlib.pyplot as pt pt.show() from boxtree.traversal import FMMTraversalBuilder tbuild = FMMTraversalBuilder(ctx, well_sep_is_n_away=well_sep_is_n_away, from_sep_smaller_crit=from_sep_smaller_crit) trav, _ = tbuild(queue, tree, debug=True) if who_has_extent: pre_merge_trav = trav trav = trav.merge_close_lists(queue) #weights = np.random.randn(nsources) weights = np.ones(nsources) weights_sum = np.sum(weights) host_trav = trav.get(queue=queue) host_tree = host_trav.tree if who_has_extent: pre_merge_host_trav = pre_merge_trav.get(queue=queue) from boxtree.tree import ParticleListFilter plfilt = ParticleListFilter(ctx) if filter_kind: flags = rng.uniform(queue, ntargets or nsources, np.int32, a=0, b=2) \ .astype(np.int8) if filter_kind == "user": filtered_targets = plfilt.filter_target_lists_in_user_order( queue, tree, flags) wrangler = ConstantOneExpansionWranglerWithFilteredTargetsInUserOrder( host_tree, filtered_targets.get(queue=queue)) elif filter_kind == "tree": filtered_targets = plfilt.filter_target_lists_in_tree_order( queue, tree, flags) wrangler = ConstantOneExpansionWranglerWithFilteredTargetsInTreeOrder( host_tree, filtered_targets.get(queue=queue)) else: raise ValueError("unsupported value of 'filter_kind'") else: wrangler = ConstantOneExpansionWrangler(host_tree) flags = cl.array.empty(queue, ntargets or nsources, dtype=np.int8) flags.fill(1) if ntargets is None and not filter_kind: # This check only works for targets == sources. assert (wrangler.reorder_potentials( wrangler.reorder_sources(weights)) == weights).all() from boxtree.fmm import drive_fmm pot = drive_fmm(host_trav, wrangler, weights) if filter_kind: pot = pot[flags.get() > 0] rel_err = la.norm((pot - weights_sum) / nsources) good = rel_err < 1e-8 # {{{ build, evaluate matrix (and identify incorrect interactions) if 0 and not good: mat = np.zeros((ntargets, nsources), dtype) from pytools import ProgressBar logging.getLogger().setLevel(logging.WARNING) pb = ProgressBar("matrix", nsources) for i in range(nsources): unit_vec = np.zeros(nsources, dtype=dtype) unit_vec[i] = 1 mat[:, i] = drive_fmm(host_trav, wrangler, unit_vec) pb.progress() pb.finished() logging.getLogger().setLevel(logging.INFO) import matplotlib.pyplot as pt if 0: pt.imshow(mat) pt.colorbar() pt.show() incorrect_tgts, incorrect_srcs = np.where(mat != 1) if 1 and len(incorrect_tgts): from boxtree.visualization import TreePlotter plotter = TreePlotter(host_tree) plotter.draw_tree(fill=False, edgecolor="black") plotter.draw_box_numbers() plotter.set_bounding_box() tree_order_incorrect_tgts = \ host_tree.indices_to_tree_target_order(incorrect_tgts) tree_order_incorrect_srcs = \ host_tree.indices_to_tree_source_order(incorrect_srcs) src_boxes = [ host_tree.find_box_nr_for_source(i) for i in tree_order_incorrect_srcs ] tgt_boxes = [ host_tree.find_box_nr_for_target(i) for i in tree_order_incorrect_tgts ] print(src_boxes) print(tgt_boxes) # plot all sources/targets if 0: pt.plot(host_tree.targets[0], host_tree.targets[1], "v", alpha=0.9) pt.plot(host_tree.sources[0], host_tree.sources[1], "gx", alpha=0.9) # plot offending sources/targets if 0: pt.plot(host_tree.targets[0][tree_order_incorrect_tgts], host_tree.targets[1][tree_order_incorrect_tgts], "rv") pt.plot(host_tree.sources[0][tree_order_incorrect_srcs], host_tree.sources[1][tree_order_incorrect_srcs], "go") pt.gca().set_aspect("equal") from boxtree.visualization import draw_box_lists draw_box_lists( plotter, pre_merge_host_trav if who_has_extent else host_trav, 22) # from boxtree.visualization import draw_same_level_non_well_sep_boxes # draw_same_level_non_well_sep_boxes(plotter, host_trav, 2) pt.show() # }}} if 0 and not good: import matplotlib.pyplot as pt pt.plot(pot - weights_sum) pt.show() if 0 and not good: import matplotlib.pyplot as pt filt_targets = [ host_tree.targets[0][flags.get() > 0], host_tree.targets[1][flags.get() > 0], ] host_tree.plot() bad = np.abs(pot - weights_sum) >= 1e-3 bad_targets = [ filt_targets[0][bad], filt_targets[1][bad], ] print(bad_targets[0].shape) pt.plot(filt_targets[0], filt_targets[1], "x") pt.plot(bad_targets[0], bad_targets[1], "v") pt.show() assert good
def test_pyfmmlib_fmm(ctx_getter): logging.basicConfig(level=logging.INFO) from pytest import importorskip importorskip("pyfmmlib") ctx = ctx_getter() queue = cl.CommandQueue(ctx) nsources = 3000 ntargets = 1000 dims = 2 dtype = np.float64 helmholtz_k = 2 sources = p_normal(queue, nsources, dims, dtype, seed=15) targets = (p_normal(queue, ntargets, dims, dtype, seed=18) + np.array([2, 0])) sources_host = particle_array_to_host(sources) targets_host = particle_array_to_host(targets) from boxtree import TreeBuilder tb = TreeBuilder(ctx) tree, _ = tb(queue, sources, targets=targets, max_particles_in_box=30, debug=True) from boxtree.traversal import FMMTraversalBuilder tbuild = FMMTraversalBuilder(ctx) trav, _ = tbuild(queue, tree, debug=True) trav = trav.get(queue=queue) from pyopencl.clrandom import RanluxGenerator rng = RanluxGenerator(queue, seed=20) weights = rng.uniform(queue, nsources, dtype=np.float64).get() #weights = np.ones(nsources) logger.info("computing direct (reference) result") from pyfmmlib import hpotgrad2dall_vec ref_pot, _, _ = hpotgrad2dall_vec(ifgrad=False, ifhess=False, sources=sources_host.T, charge=weights, targets=targets_host.T, zk=helmholtz_k) from boxtree.pyfmmlib_integration import Helmholtz2DExpansionWrangler wrangler = Helmholtz2DExpansionWrangler(trav.tree, helmholtz_k, nterms=10) from boxtree.fmm import drive_fmm pot = drive_fmm(trav, wrangler, weights) rel_err = la.norm(pot - ref_pot) / la.norm(ref_pot) logger.info("relative l2 error: %g" % rel_err) assert rel_err < 1e-5
def test_sumpy_fmm(ctx_getter, knl, local_expn_class, mpole_expn_class): logging.basicConfig(level=logging.INFO) ctx = ctx_getter() queue = cl.CommandQueue(ctx) nsources = 1000 ntargets = 300 dtype = np.float64 from boxtree.tools import (make_normal_particle_array as p_normal) sources = p_normal(queue, nsources, knl.dim, dtype, seed=15) if 1: offset = np.zeros(knl.dim) offset[0] = 0.1 targets = (p_normal(queue, ntargets, knl.dim, dtype, seed=18) + offset) del offset else: from sumpy.visualization import FieldPlotter fp = FieldPlotter(np.array([0.5, 0]), extent=3, npoints=200) from pytools.obj_array import make_obj_array targets = make_obj_array([fp.points[i] for i in range(knl.dim)]) from boxtree import TreeBuilder tb = TreeBuilder(ctx) tree, _ = tb(queue, sources, targets=targets, max_particles_in_box=30, debug=True) from boxtree.traversal import FMMTraversalBuilder tbuild = FMMTraversalBuilder(ctx) trav, _ = tbuild(queue, tree, debug=True) # {{{ plot tree if 0: host_tree = tree.get() host_trav = trav.get() if 1: print("src_box", host_tree.find_box_nr_for_source(403)) print("tgt_box", host_tree.find_box_nr_for_target(28)) print(list(host_trav.target_or_target_parent_boxes).index(37)) print(host_trav.get_box_list("sep_bigger", 22)) from boxtree.visualization import TreePlotter plotter = TreePlotter(host_tree) plotter.draw_tree(fill=False, edgecolor="black", zorder=10) plotter.set_bounding_box() plotter.draw_box_numbers() import matplotlib.pyplot as pt pt.show() # }}} from pyopencl.clrandom import PhiloxGenerator rng = PhiloxGenerator(ctx, seed=44) weights = rng.uniform(queue, nsources, dtype=np.float64) logger.info("computing direct (reference) result") from pytools.convergence import PConvergenceVerifier pconv_verifier = PConvergenceVerifier() extra_kwargs = {} dtype = np.float64 order_values = [1, 2, 3] if isinstance(knl, HelmholtzKernel): extra_kwargs["k"] = 0.05 dtype = np.complex128 if knl.dim == 3: order_values = [1, 2] elif knl.dim == 2 and issubclass(local_expn_class, H2DLocalExpansion): order_values = [10, 12] elif isinstance(knl, YukawaKernel): extra_kwargs["lam"] = 2 dtype = np.complex128 if knl.dim == 3: order_values = [1, 2] elif knl.dim == 2 and issubclass(local_expn_class, Y2DLocalExpansion): order_values = [10, 12] from functools import partial for order in order_values: out_kernels = [knl] from sumpy.fmm import SumpyExpansionWranglerCodeContainer wcc = SumpyExpansionWranglerCodeContainer( ctx, partial(mpole_expn_class, knl), partial(local_expn_class, knl), out_kernels) wrangler = wcc.get_wrangler( queue, tree, dtype, fmm_level_to_order=lambda kernel, kernel_args, tree, lev: order, kernel_extra_kwargs=extra_kwargs) from boxtree.fmm import drive_fmm pot, = drive_fmm(trav, wrangler, weights) from sumpy import P2P p2p = P2P(ctx, out_kernels, exclude_self=False) evt, (ref_pot, ) = p2p(queue, targets, sources, (weights, ), **extra_kwargs) pot = pot.get() ref_pot = ref_pot.get() rel_err = la.norm(pot - ref_pot, np.inf) / la.norm(ref_pot, np.inf) logger.info("order %d -> relative l2 error: %g" % (order, rel_err)) pconv_verifier.add_data_point(order, rel_err) print(pconv_verifier) pconv_verifier()
def test_estimate_calibration_params(ctx_factory): from boxtree.pyfmmlib_integration import FMMLibExpansionWrangler nsources_list = [1000, 2000, 3000, 4000] ntargets_list = [1000, 2000, 3000, 4000] dims = 3 dtype = np.float64 ctx = ctx_factory() queue = cl.CommandQueue(ctx) traversals = [] traversals_dev = [] level_to_orders = [] timing_results = [] def fmm_level_to_nterms(tree, ilevel): return 10 for nsources, ntargets in zip(nsources_list, ntargets_list): # {{{ Generate sources, targets and target_radii from boxtree.tools import make_normal_particle_array as p_normal sources = p_normal(queue, nsources, dims, dtype, seed=15) targets = p_normal(queue, ntargets, dims, dtype, seed=18) from pyopencl.clrandom import PhiloxGenerator rng = PhiloxGenerator(queue.context, seed=22) target_radii = rng.uniform( queue, ntargets, a=0, b=0.05, dtype=dtype ).get() # }}} # {{{ Generate tree and traversal from boxtree import TreeBuilder tb = TreeBuilder(ctx) tree, _ = tb( queue, sources, targets=targets, target_radii=target_radii, stick_out_factor=0.15, max_particles_in_box=30, debug=True ) from boxtree.traversal import FMMTraversalBuilder tg = FMMTraversalBuilder(ctx, well_sep_is_n_away=2) trav_dev, _ = tg(queue, tree, debug=True) trav = trav_dev.get(queue=queue) traversals.append(trav) traversals_dev.append(trav_dev) # }}} wrangler = FMMLibExpansionWrangler(trav.tree, 0, fmm_level_to_nterms) level_to_orders.append(wrangler.level_nterms) timing_data = {} from boxtree.fmm import drive_fmm src_weights = np.random.rand(tree.nsources).astype(tree.coord_dtype) drive_fmm(trav, wrangler, (src_weights,), timing_data=timing_data) timing_results.append(timing_data) if SUPPORTS_PROCESS_TIME: time_field_name = "process_elapsed" else: time_field_name = "wall_elapsed" def test_params_sanity(test_params): param_names = ["c_p2m", "c_m2m", "c_p2p", "c_m2l", "c_m2p", "c_p2l", "c_l2l", "c_l2p"] for name in param_names: assert isinstance(test_params[name], np.float64) def test_params_equal(test_params1, test_params2): param_names = ["c_p2m", "c_m2m", "c_p2p", "c_m2l", "c_m2p", "c_p2l", "c_l2l", "c_l2p"] for name in param_names: assert test_params1[name] == test_params2[name] python_cost_model = _PythonFMMCostModel(make_pde_aware_translation_cost_model) python_model_results = [] for icase in range(len(traversals)-1): traversal = traversals[icase] level_to_order = level_to_orders[icase] python_model_results.append(python_cost_model.cost_per_stage( queue, traversal, level_to_order, _PythonFMMCostModel.get_unit_calibration_params(), )) python_params = python_cost_model.estimate_calibration_params( python_model_results, timing_results[:-1], time_field_name=time_field_name ) test_params_sanity(python_params) cl_cost_model = FMMCostModel(make_pde_aware_translation_cost_model) cl_model_results = [] for icase in range(len(traversals_dev)-1): traversal = traversals_dev[icase] level_to_order = level_to_orders[icase] cl_model_results.append(cl_cost_model.cost_per_stage( queue, traversal, level_to_order, FMMCostModel.get_unit_calibration_params(), )) cl_params = cl_cost_model.estimate_calibration_params( cl_model_results, timing_results[:-1], time_field_name=time_field_name ) test_params_sanity(cl_params) if SUPPORTS_PROCESS_TIME: test_params_equal(cl_params, python_params)
def exec_compute_potential_insn_fmm(self, actx: PyOpenCLArrayContext, insn, bound_expr, evaluate): # {{{ gather unique target discretizations used target_name_to_index = {} targets = [] for o in insn.outputs: assert o.qbx_forced_limit not in (-1, 1) if o.target_name in target_name_to_index: continue target_name_to_index[o.target_name] = len(targets) targets.append(bound_expr.places.get_geometry(o.target_name.geometry)) targets = tuple(targets) # }}} # {{{ get wrangler geo_data = self.fmm_geometry_data(targets) from pytential import bind, sym waa = bind(bound_expr.places, sym.weights_and_area_elements( self.ambient_dim, dofdesc=insn.source))(actx) strengths = waa * evaluate(insn.density) from meshmode.dof_array import flatten flat_strengths = flatten(strengths) out_kernels = tuple(knl for knl in insn.kernels) fmm_kernel = self.get_fmm_kernel(out_kernels) output_and_expansion_dtype = ( self.get_fmm_output_and_expansion_dtype(fmm_kernel, strengths)) kernel_extra_kwargs, source_extra_kwargs = ( self.get_fmm_expansion_wrangler_extra_kwargs( actx, out_kernels, geo_data.tree().user_source_ids, insn.kernel_arguments, evaluate)) wrangler = self.expansion_wrangler_code_container( fmm_kernel, out_kernels).get_wrangler( actx.queue, geo_data.tree(), output_and_expansion_dtype, self.fmm_level_to_order, source_extra_kwargs=source_extra_kwargs, kernel_extra_kwargs=kernel_extra_kwargs) # }}} from boxtree.fmm import drive_fmm all_potentials_on_every_tgt = drive_fmm( geo_data.traversal(), wrangler, (flat_strengths,), timing_data=None) # {{{ postprocess fmm results = [] for o in insn.outputs: target_index = target_name_to_index[o.target_name] target_slice = slice(*geo_data.target_info().target_discr_starts[ target_index:target_index+2]) target_discr = targets[target_index] result = all_potentials_on_every_tgt[o.kernel_index][target_slice] from meshmode.discretization import Discretization if isinstance(target_discr, Discretization): from meshmode.dof_array import unflatten result = unflatten(actx, target_discr, result) results.append((o.name, result)) # }}} timing_data = {} return results, timing_data
def test_fmm_completeness( ctx_getter, dims, nsources_req, ntargets_req, who_has_extent, source_gen, target_gen, filter_kind ): """Tests whether the built FMM traversal structures and driver completely capture all interactions. """ sources_have_extent = "s" in who_has_extent targets_have_extent = "t" in who_has_extent logging.basicConfig(level=logging.INFO) ctx = ctx_getter() queue = cl.CommandQueue(ctx) dtype = np.float64 try: sources = source_gen(queue, nsources_req, dims, dtype, seed=15) nsources = len(sources[0]) if ntargets_req is None: # This says "same as sources" to the tree builder. targets = None ntargets = ntargets_req else: targets = target_gen(queue, ntargets_req, dims, dtype, seed=16) ntargets = len(targets[0]) except ImportError: pytest.skip("loo.py not available, but needed for particle array " "generation") from pyopencl.clrandom import RanluxGenerator rng = RanluxGenerator(queue, seed=13) if sources_have_extent: source_radii = 2 ** rng.uniform(queue, nsources, dtype=dtype, a=-10, b=0) else: source_radii = None if targets_have_extent: target_radii = 2 ** rng.uniform(queue, ntargets, dtype=dtype, a=-10, b=0) else: target_radii = None from boxtree import TreeBuilder tb = TreeBuilder(ctx) tree, _ = tb( queue, sources, targets=targets, max_particles_in_box=30, source_radii=source_radii, target_radii=target_radii, debug=True, ) if 0: tree.get().plot() import matplotlib.pyplot as pt pt.show() from boxtree.traversal import FMMTraversalBuilder tbuild = FMMTraversalBuilder(ctx) trav, _ = tbuild(queue, tree, debug=True) if trav.sep_close_smaller_starts is not None: trav = trav.merge_close_lists(queue) weights = np.random.randn(nsources) # weights = np.ones(nsources) weights_sum = np.sum(weights) host_trav = trav.get(queue=queue) host_tree = host_trav.tree if filter_kind: flags = rng.uniform(queue, ntargets or nsources, np.int32, a=0, b=2).astype(np.int8) if filter_kind == "user": from boxtree.tree import filter_target_lists_in_user_order filtered_targets = filter_target_lists_in_user_order(queue, tree, flags) wrangler = ConstantOneExpansionWranglerWithFilteredTargetsInUserOrder( host_tree, filtered_targets.get(queue=queue) ) elif filter_kind == "tree": from boxtree.tree import filter_target_lists_in_tree_order filtered_targets = filter_target_lists_in_tree_order(queue, tree, flags) wrangler = ConstantOneExpansionWranglerWithFilteredTargetsInTreeOrder( host_tree, filtered_targets.get(queue=queue) ) else: raise ValueError("unsupported value of 'filter_kind'") else: wrangler = ConstantOneExpansionWrangler(host_tree) if ntargets is None and not filter_kind: # This check only works for targets == sources. assert (wrangler.reorder_potentials(wrangler.reorder_sources(weights)) == weights).all() from boxtree.fmm import drive_fmm pot = drive_fmm(host_trav, wrangler, weights) # {{{ build, evaluate matrix (and identify missing interactions) if 0: mat = np.zeros((ntargets, nsources), dtype) from pytools import ProgressBar logging.getLogger().setLevel(logging.WARNING) pb = ProgressBar("matrix", nsources) for i in range(nsources): unit_vec = np.zeros(nsources, dtype=dtype) unit_vec[i] = 1 mat[:, i] = drive_fmm(host_trav, wrangler, unit_vec) pb.progress() pb.finished() logging.getLogger().setLevel(logging.INFO) import matplotlib.pyplot as pt if 1: pt.spy(mat) pt.show() missing_tgts, missing_srcs = np.where(mat == 0) if 1 and len(missing_tgts): from boxtree.visualization import TreePlotter plotter = TreePlotter(host_tree) plotter.draw_tree(fill=False, edgecolor="black") plotter.draw_box_numbers() plotter.set_bounding_box() tree_order_missing_tgts = host_tree.indices_to_tree_target_order(missing_tgts) tree_order_missing_srcs = host_tree.indices_to_tree_source_order(missing_srcs) src_boxes = [host_tree.find_box_nr_for_source(i) for i in tree_order_missing_srcs] tgt_boxes = [host_tree.find_box_nr_for_target(i) for i in tree_order_missing_tgts] print(src_boxes) print(tgt_boxes) pt.plot(host_tree.targets[0][tree_order_missing_tgts], host_tree.targets[1][tree_order_missing_tgts], "rv") pt.plot(host_tree.sources[0][tree_order_missing_srcs], host_tree.sources[1][tree_order_missing_srcs], "go") pt.gca().set_aspect("equal") pt.show() # }}} if filter_kind: pot = pot[flags.get() > 0] rel_err = la.norm((pot - weights_sum) / nsources) good = rel_err < 1e-8 if 0 and not good: import matplotlib.pyplot as pt pt.plot(pot - weights_sum) pt.show() if 0 and not good: import matplotlib.pyplot as pt filt_targets = [host_tree.targets[0][flags.get() > 0], host_tree.targets[1][flags.get() > 0]] host_tree.plot() bad = np.abs(pot - weights_sum) >= 1e-3 bad_targets = [filt_targets[0][bad], filt_targets[1][bad]] print(bad_targets[0].shape) pt.plot(filt_targets[0], filt_targets[1], "x") pt.plot(bad_targets[0], bad_targets[1], "v") pt.show() assert good
def exec_compute_potential_insn_fmm(self, actx: PyOpenCLArrayContext, insn, bound_expr, evaluate): # {{{ gather unique target discretizations used target_name_to_index = {} targets = [] for o in insn.outputs: assert o.qbx_forced_limit not in (-1, 1) if o.target_name in target_name_to_index: continue target_name_to_index[o.target_name] = len(targets) targets.append( bound_expr.places.get_geometry(o.target_name.geometry)) targets = tuple(targets) # }}} # {{{ get wrangler geo_data = self.fmm_geometry_data(targets) from pytential import bind, sym waa = bind( bound_expr.places, sym.weights_and_area_elements(self.ambient_dim, dofdesc=insn.source))(actx) strengths = [waa * evaluate(density) for density in insn.densities] flat_strengths = [flatten(strength, actx) for strength in strengths] fmm_kernel = self.get_fmm_kernel(insn.target_kernels) output_and_expansion_dtype = (self.get_fmm_output_and_expansion_dtype( insn.target_kernels, strengths[0])) kernel_extra_kwargs, source_extra_kwargs = ( self.get_fmm_expansion_wrangler_extra_kwargs( actx, insn.target_kernels + insn.source_kernels, geo_data.tree().user_source_ids, insn.kernel_arguments, evaluate)) tree_indep = self._tree_indep_data_for_wrangler( fmm_kernel, target_kernels=insn.target_kernels, source_kernels=insn.source_kernels) from sumpy.fmm import SumpyExpansionWrangler wrangler = SumpyExpansionWrangler( tree_indep, geo_data.traversal(), output_and_expansion_dtype, self.fmm_level_to_order, source_extra_kwargs=source_extra_kwargs, kernel_extra_kwargs=kernel_extra_kwargs) # }}} from boxtree.fmm import drive_fmm all_potentials_on_every_tgt = drive_fmm(wrangler, flat_strengths, timing_data=None) # {{{ postprocess fmm results = [] for o in insn.outputs: target_index = target_name_to_index[o.target_name] target_slice = slice(*geo_data.target_info( ).target_discr_starts[target_index:target_index + 2]) target_discr = targets[target_index] result = all_potentials_on_every_tgt[ o.target_kernel_index][target_slice] from meshmode.discretization import Discretization if isinstance(target_discr, Discretization): template_ary = thaw(target_discr.nodes()[0], actx) result = unflatten(template_ary, result, actx, strict=False) results.append((o.name, result)) # }}} timing_data = {} return results, timing_data
def demo_cost_model(): if not SUPPORTS_PROCESS_TIME: raise NotImplementedError( "Currently this script uses process time which only works on Python>=3.3" ) from boxtree.pyfmmlib_integration import FMMLibExpansionWrangler nsources_list = [1000, 2000, 3000, 4000, 5000] ntargets_list = [1000, 2000, 3000, 4000, 5000] dims = 3 dtype = np.float64 ctx = cl.create_some_context() queue = cl.CommandQueue(ctx) traversals = [] traversals_dev = [] level_to_orders = [] timing_results = [] def fmm_level_to_nterms(tree, ilevel): return 10 for nsources, ntargets in zip(nsources_list, ntargets_list): # {{{ Generate sources, targets and target_radii from boxtree.tools import make_normal_particle_array as p_normal sources = p_normal(queue, nsources, dims, dtype, seed=15) targets = p_normal(queue, ntargets, dims, dtype, seed=18) from pyopencl.clrandom import PhiloxGenerator rng = PhiloxGenerator(queue.context, seed=22) target_radii = rng.uniform(queue, ntargets, a=0, b=0.05, dtype=dtype).get() # }}} # {{{ Generate tree and traversal from boxtree import TreeBuilder tb = TreeBuilder(ctx) tree, _ = tb(queue, sources, targets=targets, target_radii=target_radii, stick_out_factor=0.15, max_particles_in_box=30, debug=True) from boxtree.traversal import FMMTraversalBuilder tg = FMMTraversalBuilder(ctx, well_sep_is_n_away=2) trav_dev, _ = tg(queue, tree, debug=True) trav = trav_dev.get(queue=queue) traversals.append(trav) traversals_dev.append(trav_dev) # }}} wrangler = FMMLibExpansionWrangler(trav.tree, 0, fmm_level_to_nterms) level_to_orders.append(wrangler.level_nterms) timing_data = {} from boxtree.fmm import drive_fmm src_weights = np.random.rand(tree.nsources).astype(tree.coord_dtype) drive_fmm(trav, wrangler, src_weights, timing_data=timing_data) timing_results.append(timing_data) time_field_name = "process_elapsed" from boxtree.cost import FMMCostModel from boxtree.cost import make_pde_aware_translation_cost_model cost_model = FMMCostModel(make_pde_aware_translation_cost_model) model_results = [] for icase in range(len(traversals) - 1): traversal = traversals_dev[icase] model_results.append( cost_model.cost_per_stage( queue, traversal, level_to_orders[icase], FMMCostModel.get_unit_calibration_params(), )) queue.finish() params = cost_model.estimate_calibration_params( model_results, timing_results[:-1], time_field_name=time_field_name) predicted_time = cost_model.cost_per_stage( queue, traversals_dev[-1], level_to_orders[-1], params, ) queue.finish() for field in [ "form_multipoles", "eval_direct", "multipole_to_local", "eval_multipoles", "form_locals", "eval_locals", "coarsen_multipoles", "refine_locals" ]: measured = timing_results[-1][field]["process_elapsed"] pred_err = ((measured - predicted_time[field]) / measured) logger.info("actual/predicted time for %s: %.3g/%.3g -> %g %% error", field, measured, predicted_time[field], abs(100 * pred_err))
def exec_compute_potential_insn_fmm(self, queue, insn, bound_expr, evaluate): # {{{ gather unique target discretizations used target_name_to_index = {} targets = [] for o in insn.outputs: assert o.qbx_forced_limit not in (-1, 1) if o.target_name in target_name_to_index: continue target_name_to_index[o.target_name] = len(targets) targets.append(bound_expr.places[o.target_name]) targets = tuple(targets) # }}} # {{{ get wrangler geo_data = self.fmm_geometry_data(targets) strengths = (evaluate(insn.density).with_queue(queue) * self.weights_and_area_elements()) out_kernels = tuple(knl for knl in insn.kernels) fmm_kernel = self.get_fmm_kernel(out_kernels) output_and_expansion_dtype = ( self.get_fmm_output_and_expansion_dtype(fmm_kernel, strengths)) kernel_extra_kwargs, source_extra_kwargs = ( self.get_fmm_expansion_wrangler_extra_kwargs( queue, out_kernels, geo_data.tree().user_source_ids, insn.kernel_arguments, evaluate)) wrangler = self.expansion_wrangler_code_container( fmm_kernel, out_kernels).get_wrangler( queue, geo_data.tree(), output_and_expansion_dtype, self.fmm_level_to_order, source_extra_kwargs=source_extra_kwargs, kernel_extra_kwargs=kernel_extra_kwargs) # }}} from boxtree.fmm import drive_fmm all_potentials_on_every_tgt = drive_fmm( geo_data.traversal(), wrangler, strengths) # {{{ postprocess fmm result = [] for o in insn.outputs: target_index = target_name_to_index[o.target_name] target_slice = slice(*geo_data.target_info().target_discr_starts[ target_index:target_index+2]) result.append( (o.name, all_potentials_on_every_tgt[o.kernel_index][target_slice])) # }}} return result
def test_sumpy_fmm_exclude_self(ctx_getter): logging.basicConfig(level=logging.INFO) ctx = ctx_getter() queue = cl.CommandQueue(ctx) nsources = 500 dtype = np.float64 from boxtree.tools import (make_normal_particle_array as p_normal) knl = LaplaceKernel(2) local_expn_class = VolumeTaylorLocalExpansion mpole_expn_class = VolumeTaylorMultipoleExpansion order = 10 sources = p_normal(queue, nsources, knl.dim, dtype, seed=15) from boxtree import TreeBuilder tb = TreeBuilder(ctx) tree, _ = tb(queue, sources, max_particles_in_box=30, debug=True) from boxtree.traversal import FMMTraversalBuilder tbuild = FMMTraversalBuilder(ctx) trav, _ = tbuild(queue, tree, debug=True) from pyopencl.clrandom import PhiloxGenerator rng = PhiloxGenerator(ctx) weights = rng.uniform(queue, nsources, dtype=np.float64) target_to_source = np.arange(tree.ntargets, dtype=np.int32) self_extra_kwargs = {"target_to_source": target_to_source} out_kernels = [knl] from functools import partial from sumpy.fmm import SumpyExpansionWranglerCodeContainer wcc = SumpyExpansionWranglerCodeContainer(ctx, partial(mpole_expn_class, knl), partial(local_expn_class, knl), out_kernels, exclude_self=True) wrangler = wcc.get_wrangler( queue, tree, dtype, fmm_level_to_order=lambda kernel, kernel_args, tree, lev: order, self_extra_kwargs=self_extra_kwargs) from boxtree.fmm import drive_fmm pot, = drive_fmm(trav, wrangler, weights) from sumpy import P2P p2p = P2P(ctx, out_kernels, exclude_self=True) evt, (ref_pot, ) = p2p(queue, sources, sources, (weights, ), **self_extra_kwargs) pot = pot.get() ref_pot = ref_pot.get() rel_err = la.norm(pot - ref_pot) / la.norm(ref_pot) logger.info("order %d -> relative l2 error: %g" % (order, rel_err)) assert np.isclose(rel_err, 0, atol=1e-7)
def test_cost_model_op_counts_agree_with_constantone_wrangler( ctx_factory, nsources, ntargets, dims, dtype): ctx = ctx_factory() queue = cl.CommandQueue(ctx) from boxtree.tools import make_normal_particle_array as p_normal sources = p_normal(queue, nsources, dims, dtype, seed=16) targets = p_normal(queue, ntargets, dims, dtype, seed=19) from pyopencl.clrandom import PhiloxGenerator rng = PhiloxGenerator(queue.context, seed=20) target_radii = rng.uniform(queue, ntargets, a=0, b=0.04, dtype=dtype).get() from boxtree import TreeBuilder tb = TreeBuilder(ctx) tree, _ = tb( queue, sources, targets=targets, target_radii=target_radii, stick_out_factor=0.15, max_particles_in_box=30, debug=True ) from boxtree.traversal import FMMTraversalBuilder tg = FMMTraversalBuilder(ctx, well_sep_is_n_away=2) trav_dev, _ = tg(queue, tree, debug=True) trav = trav_dev.get(queue=queue) from boxtree.tools import ConstantOneExpansionWrangler wrangler = ConstantOneExpansionWrangler(trav.tree) timing_data = {} from boxtree.fmm import drive_fmm src_weights = np.random.rand(tree.nsources).astype(tree.coord_dtype) drive_fmm(trav, wrangler, (src_weights,), timing_data=timing_data) cost_model = FMMCostModel( translation_cost_model_factory=OpCountingTranslationCostModel ) level_to_order = np.array([1 for _ in range(tree.nlevels)]) modeled_time = cost_model.cost_per_stage( queue, trav_dev, level_to_order, FMMCostModel.get_unit_calibration_params(), ) mismatches = [] for stage in timing_data: if timing_data[stage]["ops_elapsed"] != modeled_time[stage]: mismatches.append( (stage, timing_data[stage]["ops_elapsed"], modeled_time[stage])) assert not mismatches, "\n".join(str(s) for s in mismatches) # {{{ Test per-box cost total_cost = 0.0 for stage in timing_data: total_cost += timing_data[stage]["ops_elapsed"] per_box_cost = cost_model.cost_per_box( queue, trav_dev, level_to_order, FMMCostModel.get_unit_calibration_params(), ) total_aggregate_cost = cost_model.aggregate_over_boxes(per_box_cost) assert total_cost == ( total_aggregate_cost + modeled_time["coarsen_multipoles"] + modeled_time["refine_locals"] )
def test_sumpy_fmm(ctx_getter, knl, local_expn_class, mpole_expn_class): logging.basicConfig(level=logging.INFO) ctx = ctx_getter() queue = cl.CommandQueue(ctx) nsources = 1000 ntargets = 300 dtype = np.float64 from boxtree.tools import ( make_normal_particle_array as p_normal) sources = p_normal(queue, nsources, knl.dim, dtype, seed=15) if 1: offset = np.zeros(knl.dim) offset[0] = 0.1 targets = ( p_normal(queue, ntargets, knl.dim, dtype, seed=18) + offset) del offset else: from sumpy.visualization import FieldPlotter fp = FieldPlotter(np.array([0.5, 0]), extent=3, npoints=200) from pytools.obj_array import make_obj_array targets = make_obj_array( [fp.points[i] for i in range(knl.dim)]) from boxtree import TreeBuilder tb = TreeBuilder(ctx) tree, _ = tb(queue, sources, targets=targets, max_particles_in_box=30, debug=True) from boxtree.traversal import FMMTraversalBuilder tbuild = FMMTraversalBuilder(ctx) trav, _ = tbuild(queue, tree, debug=True) # {{{ plot tree if 0: host_tree = tree.get() host_trav = trav.get() if 1: print("src_box", host_tree.find_box_nr_for_source(403)) print("tgt_box", host_tree.find_box_nr_for_target(28)) print(list(host_trav.target_or_target_parent_boxes).index(37)) print(host_trav.get_box_list("sep_bigger", 22)) from boxtree.visualization import TreePlotter plotter = TreePlotter(host_tree) plotter.draw_tree(fill=False, edgecolor="black", zorder=10) plotter.set_bounding_box() plotter.draw_box_numbers() import matplotlib.pyplot as pt pt.show() # }}} from pyopencl.clrandom import PhiloxGenerator rng = PhiloxGenerator(ctx, seed=44) weights = rng.uniform(queue, nsources, dtype=np.float64) logger.info("computing direct (reference) result") from pytools.convergence import PConvergenceVerifier pconv_verifier = PConvergenceVerifier() extra_kwargs = {} dtype = np.float64 order_values = [1, 2, 3] if isinstance(knl, HelmholtzKernel): extra_kwargs["k"] = 0.05 dtype = np.complex128 if knl.dim == 3: order_values = [1, 2] elif knl.dim == 2 and issubclass(local_expn_class, H2DLocalExpansion): order_values = [10, 12] elif isinstance(knl, YukawaKernel): extra_kwargs["lam"] = 2 dtype = np.complex128 if knl.dim == 3: order_values = [1, 2] elif knl.dim == 2 and issubclass(local_expn_class, Y2DLocalExpansion): order_values = [10, 12] from functools import partial for order in order_values: out_kernels = [knl] from sumpy.fmm import SumpyExpansionWranglerCodeContainer wcc = SumpyExpansionWranglerCodeContainer( ctx, partial(mpole_expn_class, knl), partial(local_expn_class, knl), out_kernels) wrangler = wcc.get_wrangler(queue, tree, dtype, fmm_level_to_order=lambda kernel, kernel_args, tree, lev: order, kernel_extra_kwargs=extra_kwargs) from boxtree.fmm import drive_fmm pot, = drive_fmm(trav, wrangler, weights) from sumpy import P2P p2p = P2P(ctx, out_kernels, exclude_self=False) evt, (ref_pot,) = p2p(queue, targets, sources, (weights,), **extra_kwargs) pot = pot.get() ref_pot = ref_pot.get() rel_err = la.norm(pot - ref_pot, np.inf) / la.norm(ref_pot, np.inf) logger.info("order %d -> relative l2 error: %g" % (order, rel_err)) pconv_verifier.add_data_point(order, rel_err) print(pconv_verifier) pconv_verifier()
def test_pyfmmlib_numerical_stability(ctx_factory, dims, helmholtz_k, order): logging.basicConfig(level=logging.INFO) from pytest import importorskip importorskip("pyfmmlib") ctx = ctx_factory() queue = cl.CommandQueue(ctx) nsources = 30 dtype = np.float64 # The input particles are arranged with geometrically increasing/decreasing # spacing along a line, to build a deep tree that stress-tests the # translations. particle_line = np.array([2**-i for i in range(nsources // 2)], dtype=dtype) particle_line = np.hstack([particle_line, 3 - particle_line]) zero = np.zeros(nsources, dtype=dtype) sources = np.vstack([particle_line, zero, zero])[:dims] targets = sources * (1 + 1e-3) from boxtree import TreeBuilder tb = TreeBuilder(ctx) tree, _ = tb(queue, sources, targets=targets, max_particles_in_box=2, debug=True) assert tree.nlevels >= 15 from boxtree.traversal import FMMTraversalBuilder tbuild = FMMTraversalBuilder(ctx) trav, _ = tbuild(queue, tree, debug=True) trav = trav.get(queue=queue) weights = np.ones_like(sources[0]) from boxtree.pyfmmlib_integration import (FMMLibExpansionWrangler, FMMLibRotationData) def fmm_level_to_nterms(tree, lev): return order wrangler = FMMLibExpansionWrangler(trav.tree, helmholtz_k, fmm_level_to_nterms=fmm_level_to_nterms, rotation_data=FMMLibRotationData( queue, trav)) from boxtree.fmm import drive_fmm pot = drive_fmm(trav, wrangler, (weights, )) assert not np.isnan(pot).any() # {{{ ref fmmlib computation logger.info("computing direct (reference) result") ref_pot = get_fmmlib_ref_pot(wrangler, weights, sources, targets, helmholtz_k) rel_err = la.norm(pot - ref_pot, np.inf) / la.norm(ref_pot, np.inf) logger.info("relative l2 error vs fmmlib direct: %g" % rel_err) if dims == 2: error_bound = (1 / 2)**(1 + order) else: error_bound = (3 / 4)**(1 + order) assert rel_err < error_bound, rel_err
def test_sumpy_fmm_exclude_self(ctx_getter): logging.basicConfig(level=logging.INFO) ctx = ctx_getter() queue = cl.CommandQueue(ctx) nsources = 500 dtype = np.float64 from boxtree.tools import ( make_normal_particle_array as p_normal) knl = LaplaceKernel(2) local_expn_class = VolumeTaylorLocalExpansion mpole_expn_class = VolumeTaylorMultipoleExpansion order = 10 sources = p_normal(queue, nsources, knl.dim, dtype, seed=15) from boxtree import TreeBuilder tb = TreeBuilder(ctx) tree, _ = tb(queue, sources, max_particles_in_box=30, debug=True) from boxtree.traversal import FMMTraversalBuilder tbuild = FMMTraversalBuilder(ctx) trav, _ = tbuild(queue, tree, debug=True) from pyopencl.clrandom import PhiloxGenerator rng = PhiloxGenerator(ctx) weights = rng.uniform(queue, nsources, dtype=np.float64) target_to_source = np.arange(tree.ntargets, dtype=np.int32) self_extra_kwargs = {"target_to_source": target_to_source} out_kernels = [knl] from functools import partial from sumpy.fmm import SumpyExpansionWranglerCodeContainer wcc = SumpyExpansionWranglerCodeContainer( ctx, partial(mpole_expn_class, knl), partial(local_expn_class, knl), out_kernels, exclude_self=True) wrangler = wcc.get_wrangler(queue, tree, dtype, fmm_level_to_order=lambda kernel, kernel_args, tree, lev: order, self_extra_kwargs=self_extra_kwargs) from boxtree.fmm import drive_fmm pot, = drive_fmm(trav, wrangler, weights) from sumpy import P2P p2p = P2P(ctx, out_kernels, exclude_self=True) evt, (ref_pot,) = p2p(queue, sources, sources, (weights,), **self_extra_kwargs) pot = pot.get() ref_pot = ref_pot.get() rel_err = la.norm(pot - ref_pot) / la.norm(ref_pot) logger.info("order %d -> relative l2 error: %g" % (order, rel_err)) assert np.isclose(rel_err, 0, atol=1e-7)
def test_fmm_with_optimized_3d_m2l(ctx_factory, nsrcntgts, helmholtz_k, well_sep_is_n_away): logging.basicConfig(level=logging.INFO) from pytest import importorskip importorskip("pyfmmlib") dims = 3 ctx = ctx_factory() queue = cl.CommandQueue(ctx) nsources = ntargets = nsrcntgts // 2 dtype = np.float64 sources = p_normal(queue, nsources, dims, dtype, seed=15) targets = (p_normal(queue, ntargets, dims, dtype, seed=18) + np.array([2, 0, 0])[:dims]) from boxtree import TreeBuilder tb = TreeBuilder(ctx) tree, _ = tb(queue, sources, targets=targets, max_particles_in_box=30, debug=True) from boxtree.traversal import FMMTraversalBuilder tbuild = FMMTraversalBuilder(ctx) trav, _ = tbuild(queue, tree, debug=True) trav = trav.get(queue=queue) from pyopencl.clrandom import PhiloxGenerator rng = PhiloxGenerator(queue.context, seed=20) weights = rng.uniform(queue, nsources, dtype=np.float64).get() base_nterms = 10 def fmm_level_to_nterms(tree, lev): result = base_nterms if lev < 3 and helmholtz_k: # exercise order-varies-by-level capability result += 5 return result from boxtree.pyfmmlib_integration import (FMMLibExpansionWrangler, FMMLibRotationData) baseline_wrangler = FMMLibExpansionWrangler( trav.tree, helmholtz_k, fmm_level_to_nterms=fmm_level_to_nterms) optimized_wrangler = FMMLibExpansionWrangler( trav.tree, helmholtz_k, fmm_level_to_nterms=fmm_level_to_nterms, rotation_data=FMMLibRotationData(queue, trav)) from boxtree.fmm import drive_fmm baseline_timing_data = {} baseline_pot = drive_fmm(trav, baseline_wrangler, (weights, ), timing_data=baseline_timing_data) optimized_timing_data = {} optimized_pot = drive_fmm(trav, optimized_wrangler, (weights, ), timing_data=optimized_timing_data) baseline_time = baseline_timing_data["multipole_to_local"][ "process_elapsed"] if baseline_time is not None: print("Baseline M2L time : %#.4g s" % baseline_time) opt_time = optimized_timing_data["multipole_to_local"]["process_elapsed"] if opt_time is not None: print("Optimized M2L time: %#.4g s" % opt_time) assert np.allclose(baseline_pot, optimized_pot, atol=1e-13, rtol=1e-13)