def test_area_query_balls_outside_bbox(ctx_getter, dims, do_plot=False): """ The input to the area query includes balls whose centers are not within the tree bounding box. """ ctx = ctx_getter() queue = cl.CommandQueue(ctx) nparticles = 10**4 dtype = np.float64 particles = make_normal_particle_array(queue, nparticles, dims, dtype) if do_plot: import matplotlib.pyplot as pt pt.plot(particles[0].get(), particles[1].get(), "x") from boxtree import TreeBuilder tb = TreeBuilder(ctx) queue.finish() tree, _ = tb(queue, particles, max_particles_in_box=30, debug=True) nballs = 10**4 from pyopencl.clrandom import PhiloxGenerator rng = PhiloxGenerator(ctx, seed=13) bbox_min = tree.bounding_box[0].min() bbox_max = tree.bounding_box[1].max() from pytools.obj_array import make_obj_array ball_centers = make_obj_array([ rng.uniform(queue, nballs, dtype=dtype, a=bbox_min-1, b=bbox_max+1) for i in range(dims)]) ball_radii = cl.array.empty(queue, nballs, dtype).fill(0.1) run_area_query_test(ctx, queue, tree, ball_centers, ball_radii)
def make_normal_particle_array(queue, nparticles, dims, dtype, seed=15): from pyopencl.clrandom import PhiloxGenerator rng = PhiloxGenerator(queue.context, seed=seed) return make_obj_array([ rng.normal(queue, nparticles, dtype=dtype) for i in range(dims)])
def __init_particle(self): print("Info- init particles") gen = PhiloxGenerator(self.ocl_ctx) self.x_gpu = cl_array.empty(self.ocl_queue, self.dim * self.np, dtype=self.dtype) # Init position on a sphere of diameter 0.05 and center (mu,mu,mu) # self.x_gpu = gen.normal( # self.ocl_queue, (self.np * self.dim), self.dtype, mu=0.5, sigma=0.05 # ) # Init velocity self.v_gpu = gen.normal(self.ocl_queue, (self.np * self.dim), self.dtype, mu=0, sigma=1) # Init time self.t_gpu = cl_array.zeros(self.ocl_queue, self.np, dtype=self.dtype) self.ocl_prg.rt_init_particles( self.ocl_queue, (self.np, ), None, self.x_gpu.data, self.v_gpu.data, ).wait()
def __init__(self, ctx_getter=cl.create_some_context, enable_extents=False): ctx = ctx_getter() queue = cl.CommandQueue(ctx) from pyopencl.characterize import has_struct_arg_count_bug if has_struct_arg_count_bug(queue.device): pytest.xfail( "won't work on devices with the struct arg count issue") logging.basicConfig(level=logging.INFO) dims = 2 nsources = 9000000 ntargets = 9000000 dtype = np.float32 from boxtree.fmm import drive_fmm sources = p_normal(queue, nsources, dims, dtype, seed=15) targets = p_normal(queue, ntargets, dims, dtype, seed=15) from pyopencl.clrandom import PhiloxGenerator rng = PhiloxGenerator(queue.context, seed=12) if enable_extents: target_radii = 2**rng.uniform(queue, ntargets, dtype=dtype, a=-10, b=0) else: target_radii = None from boxtree import TreeBuilder tb = TreeBuilder(ctx) tree, _ = tb( queue, sources, #targets=targets, max_particles_in_box=30, #target_radii=target_radii, #stick_out_factor=0.25, debug=True) from boxtree.traversal import FMMTraversalBuilder tbuild = FMMTraversalBuilder(ctx) trav, _ = tbuild(queue, tree, debug=True) weights = np.ones(nsources) weights_sum = np.sum(weights) host_trav = trav.get(queue=queue) host_tree = host_trav.tree self.tree = host_tree self.trav = host_trav self.input = [host_tree, weights, weights_sum, host_trav] self.pot = None
def test_interaction_list_particle_count_thresholding(ctx_getter, enable_extents): ctx = ctx_getter() queue = cl.CommandQueue(ctx) logging.basicConfig(level=logging.INFO) dims = 2 nsources = 1000 ntargets = 1000 dtype = np.float max_particles_in_box = 30 # Ensure that we have underfilled boxes. from_sep_smaller_min_nsources_cumul = 1 + max_particles_in_box from boxtree.fmm import drive_fmm sources = p_normal(queue, nsources, dims, dtype, seed=15) targets = p_normal(queue, ntargets, dims, dtype, seed=15) from pyopencl.clrandom import PhiloxGenerator rng = PhiloxGenerator(queue.context, seed=12) if enable_extents: target_radii = 2**rng.uniform(queue, ntargets, dtype=dtype, a=-10, b=0) else: target_radii = None from boxtree import TreeBuilder tb = TreeBuilder(ctx) tree, _ = tb(queue, sources, targets=targets, max_particles_in_box=max_particles_in_box, target_radii=target_radii, debug=True, stick_out_factor=0.25) from boxtree.traversal import FMMTraversalBuilder tbuild = FMMTraversalBuilder(ctx) trav, _ = tbuild( queue, tree, debug=True, _from_sep_smaller_min_nsources_cumul=from_sep_smaller_min_nsources_cumul ) weights = np.ones(nsources) weights_sum = np.sum(weights) host_trav = trav.get(queue=queue) host_tree = host_trav.tree wrangler = ConstantOneExpansionWrangler(host_tree) pot = drive_fmm(host_trav, wrangler, weights) assert (pot == weights_sum).all()
def test_plot_traversal(ctx_factory, well_sep_is_n_away=1, plot=False): pytest.importorskip("matplotlib") ctx = ctx_factory() queue = cl.CommandQueue(ctx) #for dims in [2, 3]: for dims in [2]: nparticles = 10**4 dtype = np.float64 from pyopencl.clrandom import PhiloxGenerator rng = PhiloxGenerator(queue.context, seed=15) from pytools.obj_array import make_obj_array particles = make_obj_array([ rng.normal(queue, nparticles, dtype=dtype) for i in range(dims)]) # if do_plot: # pt.plot(particles[0].get(), particles[1].get(), "x") from boxtree import TreeBuilder tb = TreeBuilder(ctx) queue.finish() tree, _ = tb(queue, particles, max_particles_in_box=30, debug=True) from boxtree.traversal import FMMTraversalBuilder tg = FMMTraversalBuilder(ctx, well_sep_is_n_away=well_sep_is_n_away) trav, _ = tg(queue, tree) tree = tree.get(queue=queue) trav = trav.get(queue=queue) from boxtree.visualization import TreePlotter plotter = TreePlotter(tree) plotter.draw_tree(fill=False, edgecolor="black") #plotter.draw_box_numbers() plotter.set_bounding_box() from random import randrange, seed # noqa seed(7) from boxtree.visualization import draw_box_lists #draw_box_lists(randrange(tree.nboxes)) if well_sep_is_n_away == 1: draw_box_lists(plotter, trav, 380) elif well_sep_is_n_away == 2: draw_box_lists(plotter, trav, 320) #plotter.draw_box_numbers() if plot: import matplotlib.pyplot as pt pt.gca().set_xticks([]) pt.gca().set_yticks([]) pt.show()
def test_pyfmmlib_fmm(ctx_getter): logging.basicConfig(level=logging.INFO) from pytest import importorskip importorskip("pyfmmlib") ctx = ctx_getter() queue = cl.CommandQueue(ctx) nsources = 3000 ntargets = 1000 dims = 2 dtype = np.float64 helmholtz_k = 2 sources = p_normal(queue, nsources, dims, dtype, seed=15) targets = ( p_normal(queue, ntargets, dims, dtype, seed=18) + np.array([2, 0])) sources_host = particle_array_to_host(sources) targets_host = particle_array_to_host(targets) from boxtree import TreeBuilder tb = TreeBuilder(ctx) tree, _ = tb(queue, sources, targets=targets, max_particles_in_box=30, debug=True) from boxtree.traversal import FMMTraversalBuilder tbuild = FMMTraversalBuilder(ctx) trav, _ = tbuild(queue, tree, debug=True) trav = trav.get(queue=queue) from pyopencl.clrandom import PhiloxGenerator rng = PhiloxGenerator(queue.context, seed=20) weights = rng.uniform(queue, nsources, dtype=np.float64).get() #weights = np.ones(nsources) logger.info("computing direct (reference) result") from pyfmmlib import hpotgrad2dall_vec ref_pot, _, _ = hpotgrad2dall_vec(ifgrad=False, ifhess=False, sources=sources_host.T, charge=weights, targets=targets_host.T, zk=helmholtz_k) from boxtree.pyfmmlib_integration import Helmholtz2DExpansionWrangler wrangler = Helmholtz2DExpansionWrangler(trav.tree, helmholtz_k, nterms=10) from boxtree.fmm import drive_fmm pot = drive_fmm(trav, wrangler, weights) rel_err = la.norm(pot - ref_pot) / la.norm(ref_pot) logger.info("relative l2 error: %g" % rel_err) assert rel_err < 1e-5
def test_fmm_float32(ctx_getter=cl.create_some_context, enable_extents=True): from time import time ctx = ctx_getter() queue = cl.CommandQueue(ctx) from pyopencl.characterize import has_struct_arg_count_bug if has_struct_arg_count_bug(queue.device): pytest.xfail("won't work on devices with the struct arg count issue") logging.basicConfig(level=logging.INFO) dims = 2 nsources = 3000000 ntargets = 3000000 dtype = np.float32 from boxtree.fmm import drive_fmm sources = p_normal(queue, nsources, dims, dtype, seed=15) targets = p_normal(queue, ntargets, dims, dtype, seed=15) from pyopencl.clrandom import PhiloxGenerator rng = PhiloxGenerator(queue.context, seed=12) if enable_extents: target_radii = 2**rng.uniform(queue, ntargets, dtype=dtype, a=-10, b=0) else: target_radii = None from boxtree import TreeBuilder tb = TreeBuilder(ctx) tree, _ = tb(queue, sources, targets=targets, max_particles_in_box=30, target_radii=target_radii,stick_out_factor=0.25, debug=True) from boxtree.traversal import FMMTraversalBuilder tbuild = FMMTraversalBuilder(ctx) trav, _ = tbuild(queue, tree, debug=True) weights = np.ones(nsources) weights_sum = np.sum(weights) host_trav = trav.get(queue=queue) host_tree = host_trav.tree wrangler = ConstantOneExpansionWrangler(host_tree) ti = time() pot = drive_fmm(host_trav, wrangler, weights) print(time() - ti) assert (pot == weights_sum).all()
def test_sumpy_fmm_timing_data_collection(ctx_getter): logging.basicConfig(level=logging.INFO) ctx = ctx_getter() queue = cl.CommandQueue( ctx, properties=cl.command_queue_properties.PROFILING_ENABLE) nsources = 500 dtype = np.float64 from boxtree.tools import ( make_normal_particle_array as p_normal) knl = LaplaceKernel(2) local_expn_class = VolumeTaylorLocalExpansion mpole_expn_class = VolumeTaylorMultipoleExpansion order = 1 sources = p_normal(queue, nsources, knl.dim, dtype, seed=15) from boxtree import TreeBuilder tb = TreeBuilder(ctx) tree, _ = tb(queue, sources, max_particles_in_box=30, debug=True) from boxtree.traversal import FMMTraversalBuilder tbuild = FMMTraversalBuilder(ctx) trav, _ = tbuild(queue, tree, debug=True) from pyopencl.clrandom import PhiloxGenerator rng = PhiloxGenerator(ctx) weights = rng.uniform(queue, nsources, dtype=np.float64) out_kernels = [knl] from functools import partial from sumpy.fmm import SumpyExpansionWranglerCodeContainer wcc = SumpyExpansionWranglerCodeContainer( ctx, partial(mpole_expn_class, knl), partial(local_expn_class, knl), out_kernels) wrangler = wcc.get_wrangler(queue, tree, dtype, fmm_level_to_order=lambda kernel, kernel_args, tree, lev: order) from boxtree.fmm import drive_fmm timing_data = {} pot, = drive_fmm(trav, wrangler, weights, timing_data=timing_data) print(timing_data) assert timing_data
def test_sumpy_fmm_timing_data_collection(ctx_factory): logging.basicConfig(level=logging.INFO) ctx = ctx_factory() queue = cl.CommandQueue( ctx, properties=cl.command_queue_properties.PROFILING_ENABLE) nsources = 500 dtype = np.float64 from boxtree.tools import ( make_normal_particle_array as p_normal) knl = LaplaceKernel(2) local_expn_class = VolumeTaylorLocalExpansion mpole_expn_class = VolumeTaylorMultipoleExpansion order = 1 sources = p_normal(queue, nsources, knl.dim, dtype, seed=15) from boxtree import TreeBuilder tb = TreeBuilder(ctx) tree, _ = tb(queue, sources, max_particles_in_box=30, debug=True) from boxtree.traversal import FMMTraversalBuilder tbuild = FMMTraversalBuilder(ctx) trav, _ = tbuild(queue, tree, debug=True) from pyopencl.clrandom import PhiloxGenerator rng = PhiloxGenerator(ctx) weights = rng.uniform(queue, nsources, dtype=np.float64) out_kernels = [knl] from functools import partial from sumpy.fmm import SumpyExpansionWranglerCodeContainer wcc = SumpyExpansionWranglerCodeContainer( ctx, partial(mpole_expn_class, knl), partial(local_expn_class, knl), out_kernels) wrangler = wcc.get_wrangler(queue, tree, dtype, fmm_level_to_order=lambda kernel, kernel_args, tree, lev: order) from boxtree.fmm import drive_fmm timing_data = {} pot, = drive_fmm(trav, wrangler, (weights,), timing_data=timing_data) print(timing_data) assert timing_data
def plot_traversal(ctx_getter, do_plot=False, well_sep_is_n_away=1): ctx = ctx_getter() queue = cl.CommandQueue(ctx) #for dims in [2, 3]: for dims in [2]: nparticles = 10**4 dtype = np.float64 from pyopencl.clrandom import PhiloxGenerator rng = PhiloxGenerator(queue.context, seed=15) from pytools.obj_array import make_obj_array particles = make_obj_array([ rng.normal(queue, nparticles, dtype=dtype) for i in range(dims)]) # if do_plot: # pt.plot(particles[0].get(), particles[1].get(), "x") from boxtree import TreeBuilder tb = TreeBuilder(ctx) queue.finish() tree, _ = tb(queue, particles, max_particles_in_box=30, debug=True) from boxtree.traversal import FMMTraversalBuilder tg = FMMTraversalBuilder(ctx, well_sep_is_n_away=well_sep_is_n_away) trav, _ = tg(queue, tree) tree = tree.get(queue=queue) trav = trav.get(queue=queue) from boxtree.visualization import TreePlotter plotter = TreePlotter(tree) plotter.draw_tree(fill=False, edgecolor="black") #plotter.draw_box_numbers() plotter.set_bounding_box() from random import randrange, seed # noqa seed(7) from boxtree.visualization import draw_box_lists #draw_box_lists(randrange(tree.nboxes)) draw_box_lists(plotter, trav, 320) #plotter.draw_box_numbers() import matplotlib.pyplot as pt pt.show()
def __push_particle(self): gen = PhiloxGenerator(self.ocl_ctx) rand_gpu = gen.uniform(self.ocl_queue, (self.np, 4), dtype=self.dtype) self.ocl_prg.rt_push_particles( self.ocl_queue, (self.np, ), None, rand_gpu.data, self.x_gpu.data, self.v_gpu.data, self.t_gpu.data, ).wait()
def test_explicit_refine_weights_particle_tree(ctx_getter, dtype, dims, do_plot=False): ctx = ctx_getter() queue = cl.CommandQueue(ctx) from boxtree import TreeBuilder builder = TreeBuilder(ctx) nparticles = 10**5 from pyopencl.clrandom import PhiloxGenerator rng = PhiloxGenerator(ctx, seed=10) refine_weights = rng.uniform(queue, nparticles, dtype=np.int32, a=1, b=10) run_build_test(builder, queue, dims, dtype, nparticles, refine_weights=refine_weights, max_leaf_refine_weight=100, do_plot=do_plot)
def test_area_query_balls_outside_bbox(ctx_factory, dims, do_plot=False): """ The input to the area query includes balls whose centers are not within the tree bounding box. """ ctx = ctx_factory() queue = cl.CommandQueue(ctx) nparticles = 10**4 dtype = np.float64 particles = make_normal_particle_array(queue, nparticles, dims, dtype) if do_plot: import matplotlib.pyplot as pt pt.plot(particles[0].get(), particles[1].get(), "x") from boxtree import TreeBuilder tb = TreeBuilder(ctx) queue.finish() tree, _ = tb(queue, particles, max_particles_in_box=30, debug=True) nballs = 10**4 from pyopencl.clrandom import PhiloxGenerator rng = PhiloxGenerator(ctx, seed=13) bbox_min = tree.bounding_box[0].min() bbox_max = tree.bounding_box[1].max() from pytools.obj_array import make_obj_array ball_centers = make_obj_array([ rng.uniform(queue, nballs, dtype=dtype, a=bbox_min - 1, b=bbox_max + 1) for i in range(dims) ]) ball_radii = cl.array.empty(queue, nballs, dtype).fill(0.1) run_area_query_test(ctx, queue, tree, ball_centers, ball_radii)
def test_explicit_refine_weights_particle_tree(ctx_factory, dtype, dims, do_plot=False): ctx = ctx_factory() queue = cl.CommandQueue(ctx) from boxtree import TreeBuilder builder = TreeBuilder(ctx) nparticles = 10**5 from pyopencl.clrandom import PhiloxGenerator rng = PhiloxGenerator(ctx, seed=10) refine_weights = rng.uniform(queue, nparticles, dtype=np.int32, a=1, b=10) run_build_test(builder, queue, dims, dtype, nparticles, refine_weights=refine_weights, max_leaf_refine_weight=100, do_plot=do_plot)
def test_fmm_completeness(ctx_getter, dims, nsources_req, ntargets_req, who_has_extent, source_gen, target_gen, filter_kind, well_sep_is_n_away, extent_norm, from_sep_smaller_crit): """Tests whether the built FMM traversal structures and driver completely capture all interactions. """ sources_have_extent = "s" in who_has_extent targets_have_extent = "t" in who_has_extent logging.basicConfig(level=logging.INFO) ctx = ctx_getter() queue = cl.CommandQueue(ctx) dtype = np.float64 try: sources = source_gen(queue, nsources_req, dims, dtype, seed=15) nsources = len(sources[0]) if ntargets_req is None: # This says "same as sources" to the tree builder. targets = None ntargets = ntargets_req else: targets = target_gen(queue, ntargets_req, dims, dtype, seed=16) ntargets = len(targets[0]) except ImportError: pytest.skip("loo.py not available, but needed for particle array " "generation") from pyopencl.clrandom import PhiloxGenerator rng = PhiloxGenerator(queue.context, seed=12) if sources_have_extent: source_radii = 2**rng.uniform(queue, nsources, dtype=dtype, a=-10, b=0) else: source_radii = None if targets_have_extent: target_radii = 2**rng.uniform(queue, ntargets, dtype=dtype, a=-10, b=0) else: target_radii = None from boxtree import TreeBuilder tb = TreeBuilder(ctx) tree, _ = tb(queue, sources, targets=targets, max_particles_in_box=30, source_radii=source_radii, target_radii=target_radii, debug=True, stick_out_factor=0.25, extent_norm=extent_norm) if 0: tree.get().plot() import matplotlib.pyplot as pt pt.show() from boxtree.traversal import FMMTraversalBuilder tbuild = FMMTraversalBuilder(ctx, well_sep_is_n_away=well_sep_is_n_away, from_sep_smaller_crit=from_sep_smaller_crit) trav, _ = tbuild(queue, tree, debug=True) if who_has_extent: pre_merge_trav = trav trav = trav.merge_close_lists(queue) #weights = np.random.randn(nsources) weights = np.ones(nsources) weights_sum = np.sum(weights) host_trav = trav.get(queue=queue) host_tree = host_trav.tree if who_has_extent: pre_merge_host_trav = pre_merge_trav.get(queue=queue) from boxtree.tree import ParticleListFilter plfilt = ParticleListFilter(ctx) if filter_kind: flags = rng.uniform(queue, ntargets or nsources, np.int32, a=0, b=2) \ .astype(np.int8) if filter_kind == "user": filtered_targets = plfilt.filter_target_lists_in_user_order( queue, tree, flags) wrangler = ConstantOneExpansionWranglerWithFilteredTargetsInUserOrder( host_tree, filtered_targets.get(queue=queue)) elif filter_kind == "tree": filtered_targets = plfilt.filter_target_lists_in_tree_order( queue, tree, flags) wrangler = ConstantOneExpansionWranglerWithFilteredTargetsInTreeOrder( host_tree, filtered_targets.get(queue=queue)) else: raise ValueError("unsupported value of 'filter_kind'") else: wrangler = ConstantOneExpansionWrangler(host_tree) flags = cl.array.empty(queue, ntargets or nsources, dtype=np.int8) flags.fill(1) if ntargets is None and not filter_kind: # This check only works for targets == sources. assert (wrangler.reorder_potentials( wrangler.reorder_sources(weights)) == weights).all() from boxtree.fmm import drive_fmm pot = drive_fmm(host_trav, wrangler, weights) if filter_kind: pot = pot[flags.get() > 0] rel_err = la.norm((pot - weights_sum) / nsources) good = rel_err < 1e-8 # {{{ build, evaluate matrix (and identify incorrect interactions) if 0 and not good: mat = np.zeros((ntargets, nsources), dtype) from pytools import ProgressBar logging.getLogger().setLevel(logging.WARNING) pb = ProgressBar("matrix", nsources) for i in range(nsources): unit_vec = np.zeros(nsources, dtype=dtype) unit_vec[i] = 1 mat[:, i] = drive_fmm(host_trav, wrangler, unit_vec) pb.progress() pb.finished() logging.getLogger().setLevel(logging.INFO) import matplotlib.pyplot as pt if 0: pt.imshow(mat) pt.colorbar() pt.show() incorrect_tgts, incorrect_srcs = np.where(mat != 1) if 1 and len(incorrect_tgts): from boxtree.visualization import TreePlotter plotter = TreePlotter(host_tree) plotter.draw_tree(fill=False, edgecolor="black") plotter.draw_box_numbers() plotter.set_bounding_box() tree_order_incorrect_tgts = \ host_tree.indices_to_tree_target_order(incorrect_tgts) tree_order_incorrect_srcs = \ host_tree.indices_to_tree_source_order(incorrect_srcs) src_boxes = [ host_tree.find_box_nr_for_source(i) for i in tree_order_incorrect_srcs ] tgt_boxes = [ host_tree.find_box_nr_for_target(i) for i in tree_order_incorrect_tgts ] print(src_boxes) print(tgt_boxes) # plot all sources/targets if 0: pt.plot(host_tree.targets[0], host_tree.targets[1], "v", alpha=0.9) pt.plot(host_tree.sources[0], host_tree.sources[1], "gx", alpha=0.9) # plot offending sources/targets if 0: pt.plot(host_tree.targets[0][tree_order_incorrect_tgts], host_tree.targets[1][tree_order_incorrect_tgts], "rv") pt.plot(host_tree.sources[0][tree_order_incorrect_srcs], host_tree.sources[1][tree_order_incorrect_srcs], "go") pt.gca().set_aspect("equal") from boxtree.visualization import draw_box_lists draw_box_lists( plotter, pre_merge_host_trav if who_has_extent else host_trav, 22) # from boxtree.visualization import draw_same_level_non_well_sep_boxes # draw_same_level_non_well_sep_boxes(plotter, host_trav, 2) pt.show() # }}} if 0 and not good: import matplotlib.pyplot as pt pt.plot(pot - weights_sum) pt.show() if 0 and not good: import matplotlib.pyplot as pt filt_targets = [ host_tree.targets[0][flags.get() > 0], host_tree.targets[1][flags.get() > 0], ] host_tree.plot() bad = np.abs(pot - weights_sum) >= 1e-3 bad_targets = [ filt_targets[0][bad], filt_targets[1][bad], ] print(bad_targets[0].shape) pt.plot(filt_targets[0], filt_targets[1], "x") pt.plot(bad_targets[0], bad_targets[1], "v") pt.show() assert good
def test_fmm_with_optimized_3d_m2l(ctx_factory, nsrcntgts, helmholtz_k, well_sep_is_n_away): logging.basicConfig(level=logging.INFO) from pytest import importorskip importorskip("pyfmmlib") dims = 3 ctx = ctx_factory() queue = cl.CommandQueue(ctx) nsources = ntargets = nsrcntgts // 2 dtype = np.float64 sources = p_normal(queue, nsources, dims, dtype, seed=15) targets = (p_normal(queue, ntargets, dims, dtype, seed=18) + np.array([2, 0, 0])[:dims]) from boxtree import TreeBuilder tb = TreeBuilder(ctx) tree, _ = tb(queue, sources, targets=targets, max_particles_in_box=30, debug=True) from boxtree.traversal import FMMTraversalBuilder tbuild = FMMTraversalBuilder(ctx) trav, _ = tbuild(queue, tree, debug=True) trav = trav.get(queue=queue) from pyopencl.clrandom import PhiloxGenerator rng = PhiloxGenerator(queue.context, seed=20) weights = rng.uniform(queue, nsources, dtype=np.float64).get() base_nterms = 10 def fmm_level_to_nterms(tree, lev): result = base_nterms if lev < 3 and helmholtz_k: # exercise order-varies-by-level capability result += 5 return result from boxtree.pyfmmlib_integration import (FMMLibExpansionWrangler, FMMLibRotationData) baseline_wrangler = FMMLibExpansionWrangler( trav.tree, helmholtz_k, fmm_level_to_nterms=fmm_level_to_nterms) optimized_wrangler = FMMLibExpansionWrangler( trav.tree, helmholtz_k, fmm_level_to_nterms=fmm_level_to_nterms, rotation_data=FMMLibRotationData(queue, trav)) from boxtree.fmm import drive_fmm baseline_timing_data = {} baseline_pot = drive_fmm(trav, baseline_wrangler, (weights, ), timing_data=baseline_timing_data) optimized_timing_data = {} optimized_pot = drive_fmm(trav, optimized_wrangler, (weights, ), timing_data=optimized_timing_data) baseline_time = baseline_timing_data["multipole_to_local"][ "process_elapsed"] if baseline_time is not None: print("Baseline M2L time : %#.4g s" % baseline_time) opt_time = optimized_timing_data["multipole_to_local"]["process_elapsed"] if opt_time is not None: print("Optimized M2L time: %#.4g s" % opt_time) assert np.allclose(baseline_pot, optimized_pot, atol=1e-13, rtol=1e-13)
def test_target_association(ctx_factory, curve_name, curve_f, nelements, visualize=False): cl_ctx = ctx_factory() queue = cl.CommandQueue(cl_ctx) actx = PyOpenCLArrayContext(queue) # {{{ generate lpot source order = 16 # Make the curve mesh. mesh = make_curve_mesh(curve_f, np.linspace(0, 1, nelements + 1), order) from meshmode.discretization import Discretization from meshmode.discretization.poly_element import \ InterpolatoryQuadratureSimplexGroupFactory factory = InterpolatoryQuadratureSimplexGroupFactory(order) discr = Discretization(actx, mesh, factory) lpot_source = QBXLayerPotentialSource( discr, qbx_order=order, # not used in target association fine_order=order) places = GeometryCollection(lpot_source) # }}} # {{{ generate targets from pyopencl.clrandom import PhiloxGenerator rng = PhiloxGenerator(cl_ctx, seed=RNG_SEED) dd = places.auto_source.to_stage1() centers = dof_array_to_numpy( actx, bind( places, sym.interleaved_expansion_centers(lpot_source.ambient_dim, dofdesc=dd))(actx)) density_discr = places.get_discretization(dd.geometry) noise = actx.to_numpy( rng.uniform(queue, density_discr.ndofs, dtype=np.float, a=0.01, b=1.0)) tunnel_radius = dof_array_to_numpy( actx, bind( places, sym._close_target_tunnel_radii(lpot_source.ambient_dim, dofdesc=dd))(actx)) def targets_from_sources(sign, dist, dim=2): nodes = dof_array_to_numpy( actx, bind(places, sym.nodes(dim, dofdesc=dd))(actx).as_vector(np.object)) normals = dof_array_to_numpy( actx, bind(places, sym.normal(dim, dofdesc=dd))(actx).as_vector(np.object)) return actx.from_numpy(nodes + normals * sign * dist) from pytential.target import PointsTarget int_targets = PointsTarget(targets_from_sources(-1, noise * tunnel_radius)) ext_targets = PointsTarget(targets_from_sources(+1, noise * tunnel_radius)) far_targets = PointsTarget( targets_from_sources(+1, FAR_TARGET_DIST_FROM_SOURCE)) # Create target discretizations. target_discrs = ( # On-surface targets, interior (density_discr, -1), # On-surface targets, exterior (density_discr, +1), # Interior close targets (int_targets, -2), # Exterior close targets (ext_targets, +2), # Far targets, should not need centers (far_targets, 0), ) sizes = np.cumsum([discr.ndofs for discr, _ in target_discrs]) ( surf_int_slice, surf_ext_slice, vol_int_slice, vol_ext_slice, far_slice, ) = [slice(start, end) for start, end in zip(np.r_[0, sizes], sizes)] # }}} # {{{ run target associator and check from pytential.qbx.target_assoc import (TargetAssociationCodeContainer, associate_targets_to_qbx_centers) from pytential.qbx.utils import TreeCodeContainer code_container = TargetAssociationCodeContainer(actx, TreeCodeContainer(actx)) target_assoc = (associate_targets_to_qbx_centers( places, places.auto_source, code_container.get_wrangler(actx), target_discrs, target_association_tolerance=1e-10).get(queue=queue)) expansion_radii = dof_array_to_numpy( actx, bind( places, sym.expansion_radii(lpot_source.ambient_dim, granularity=sym.GRANULARITY_CENTER))(actx)) from meshmode.dof_array import thaw surf_targets = dof_array_to_numpy(actx, thaw(actx, density_discr.nodes())) int_targets = actx.to_numpy(int_targets.nodes()) ext_targets = actx.to_numpy(ext_targets.nodes()) def visualize_curve_and_assoc(): import matplotlib.pyplot as plt from meshmode.mesh.visualization import draw_curve draw_curve(density_discr.mesh) targets = int_targets tgt_slice = surf_int_slice plt.plot(centers[0], centers[1], "+", color="orange") ax = plt.gca() for tx, ty, tcenter in zip(targets[0, tgt_slice], targets[1, tgt_slice], target_assoc.target_to_center[tgt_slice]): if tcenter >= 0: ax.add_artist( plt.Line2D( (tx, centers[0, tcenter]), (ty, centers[1, tcenter]), )) ax.set_aspect("equal") plt.show() if visualize: visualize_curve_and_assoc() # Checks that the targets match with centers on the appropriate side and # within the allowable distance. def check_close_targets(centers, targets, true_side, target_to_center, target_to_side_result, tgt_slice): targets_have_centers = (target_to_center >= 0).all() assert targets_have_centers assert (target_to_side_result == true_side).all() TOL = 1e-3 dists = la.norm((targets.T - centers.T[target_to_center]), axis=1) assert (dists <= (1 + TOL) * expansion_radii[target_to_center]).all() # Center side order = -1, 1, -1, 1, ... target_to_center_side = 2 * (target_assoc.target_to_center % 2) - 1 # interior surface check_close_targets(centers, surf_targets, -1, target_assoc.target_to_center[surf_int_slice], target_to_center_side[surf_int_slice], surf_int_slice) # exterior surface check_close_targets(centers, surf_targets, +1, target_assoc.target_to_center[surf_ext_slice], target_to_center_side[surf_ext_slice], surf_ext_slice) # interior volume check_close_targets(centers, int_targets, -1, target_assoc.target_to_center[vol_int_slice], target_to_center_side[vol_int_slice], vol_int_slice) # exterior volume check_close_targets(centers, ext_targets, +1, target_assoc.target_to_center[vol_ext_slice], target_to_center_side[vol_ext_slice], vol_ext_slice) # Checks that far targets are not assigned a center. assert (target_assoc.target_to_center[far_slice] == -1).all()
def test_target_association(ctx_getter, curve_name, curve_f, nelements, visualize=False): cl_ctx = ctx_getter() queue = cl.CommandQueue(cl_ctx) # {{{ generate lpot source order = 16 # Make the curve mesh. mesh = make_curve_mesh(curve_f, np.linspace(0, 1, nelements+1), order) from meshmode.discretization import Discretization from meshmode.discretization.poly_element import \ InterpolatoryQuadratureSimplexGroupFactory factory = InterpolatoryQuadratureSimplexGroupFactory(order) discr = Discretization(cl_ctx, mesh, factory) lpot_source, conn = QBXLayerPotentialSource(discr, qbx_order=order, # not used in target association fine_order=order).with_refinement() del discr from pytential.qbx.utils import get_interleaved_centers centers = np.array([ax.get(queue) for ax in get_interleaved_centers(queue, lpot_source)]) # }}} # {{{ generate targets from pyopencl.clrandom import PhiloxGenerator rng = PhiloxGenerator(cl_ctx, seed=RNG_SEED) nsources = lpot_source.density_discr.nnodes noise = rng.uniform(queue, nsources, dtype=np.float, a=0.01, b=1.0) tunnel_radius = \ lpot_source._close_target_tunnel_radius("nsources").with_queue(queue) def targets_from_sources(sign, dist): from pytential import sym, bind dim = 2 nodes = bind(lpot_source.density_discr, sym.nodes(dim))(queue) normals = bind(lpot_source.density_discr, sym.normal(dim))(queue) return (nodes + normals * sign * dist).as_vector(np.object) from pytential.target import PointsTarget int_targets = PointsTarget(targets_from_sources(-1, noise * tunnel_radius)) ext_targets = PointsTarget(targets_from_sources(+1, noise * tunnel_radius)) far_targets = PointsTarget(targets_from_sources(+1, FAR_TARGET_DIST_FROM_SOURCE)) # Create target discretizations. target_discrs = ( # On-surface targets, interior (lpot_source.density_discr, -1), # On-surface targets, exterior (lpot_source.density_discr, +1), # Interior close targets (int_targets, -2), # Exterior close targets (ext_targets, +2), # Far targets, should not need centers (far_targets, 0), ) sizes = np.cumsum([discr.nnodes for discr, _ in target_discrs]) (surf_int_slice, surf_ext_slice, vol_int_slice, vol_ext_slice, far_slice, ) = [slice(start, end) for start, end in zip(np.r_[0, sizes], sizes)] # }}} # {{{ run target associator and check from pytential.qbx.target_assoc import ( TargetAssociationCodeContainer, associate_targets_to_qbx_centers) from pytential.qbx.utils import TreeCodeContainer code_container = TargetAssociationCodeContainer( cl_ctx, TreeCodeContainer(cl_ctx)) target_assoc = (associate_targets_to_qbx_centers( lpot_source, code_container.get_wrangler(queue), target_discrs, target_association_tolerance=1e-10) .get(queue=queue)) expansion_radii = lpot_source._expansion_radii("ncenters").get(queue) surf_targets = np.array( [axis.get(queue) for axis in lpot_source.density_discr.nodes()]) int_targets = np.array([axis.get(queue) for axis in int_targets.nodes()]) ext_targets = np.array([axis.get(queue) for axis in ext_targets.nodes()]) def visualize_curve_and_assoc(): import matplotlib.pyplot as plt from meshmode.mesh.visualization import draw_curve draw_curve(lpot_source.density_discr.mesh) targets = int_targets tgt_slice = surf_int_slice plt.plot(centers[0], centers[1], "+", color="orange") ax = plt.gca() for tx, ty, tcenter in zip( targets[0, tgt_slice], targets[1, tgt_slice], target_assoc.target_to_center[tgt_slice]): if tcenter >= 0: ax.add_artist( plt.Line2D( (tx, centers[0, tcenter]), (ty, centers[1, tcenter]), )) ax.set_aspect("equal") plt.show() if visualize: visualize_curve_and_assoc() # Checks that the targets match with centers on the appropriate side and # within the allowable distance. def check_close_targets(centers, targets, true_side, target_to_center, target_to_side_result, tgt_slice): targets_have_centers = (target_to_center >= 0).all() assert targets_have_centers assert (target_to_side_result == true_side).all() TOL = 1e-3 dists = la.norm((targets.T - centers.T[target_to_center]), axis=1) assert (dists <= (1 + TOL) * expansion_radii[target_to_center]).all() # Center side order = -1, 1, -1, 1, ... target_to_center_side = 2 * (target_assoc.target_to_center % 2) - 1 # interior surface check_close_targets( centers, surf_targets, -1, target_assoc.target_to_center[surf_int_slice], target_to_center_side[surf_int_slice], surf_int_slice) # exterior surface check_close_targets( centers, surf_targets, +1, target_assoc.target_to_center[surf_ext_slice], target_to_center_side[surf_ext_slice], surf_ext_slice) # interior volume check_close_targets( centers, int_targets, -1, target_assoc.target_to_center[vol_int_slice], target_to_center_side[vol_int_slice], vol_int_slice) # exterior volume check_close_targets( centers, ext_targets, +1, target_assoc.target_to_center[vol_ext_slice], target_to_center_side[vol_ext_slice], vol_ext_slice) # Checks that far targets are not assigned a center. assert (target_assoc.target_to_center[far_slice] == -1).all()
def test_pyfmmlib_fmm(ctx_getter, dims): logging.basicConfig(level=logging.INFO) from pytest import importorskip importorskip("pyfmmlib") ctx = ctx_getter() queue = cl.CommandQueue(ctx) nsources = 3000 ntargets = 1000 dtype = np.float64 helmholtz_k = 2 sources = p_normal(queue, nsources, dims, dtype, seed=15) targets = (p_normal(queue, ntargets, dims, dtype, seed=18) + np.array([2, 0, 0])[:dims]) sources_host = particle_array_to_host(sources) targets_host = particle_array_to_host(targets) from boxtree import TreeBuilder tb = TreeBuilder(ctx) tree, _ = tb(queue, sources, targets=targets, max_particles_in_box=30, debug=True) from boxtree.traversal import FMMTraversalBuilder tbuild = FMMTraversalBuilder(ctx) trav, _ = tbuild(queue, tree, debug=True) trav = trav.get(queue=queue) from pyopencl.clrandom import PhiloxGenerator rng = PhiloxGenerator(queue.context, seed=20) weights = rng.uniform(queue, nsources, dtype=np.float64).get() #weights = np.ones(nsources) from boxtree.pyfmmlib_integration import HelmholtzExpansionWrangler wrangler = HelmholtzExpansionWrangler(trav.tree, helmholtz_k, nterms=10) from boxtree.fmm import drive_fmm pot = drive_fmm(trav, wrangler, weights) logger.info("computing direct (reference) result") if dims == 2: from pyfmmlib import hpotgrad2dall_vec ref_pot, _, _ = hpotgrad2dall_vec(ifgrad=False, ifhess=False, sources=sources_host.T, charge=weights, targets=targets_host.T, zk=helmholtz_k) else: from pyfmmlib import hpotfld3dall_vec ref_pot, _ = hpotfld3dall_vec(iffld=False, sources=sources_host.T, charge=weights, targets=targets_host.T, zk=helmholtz_k) rel_err = la.norm(pot - ref_pot) / la.norm(ref_pot) logger.info("relative l2 error: %g" % rel_err) assert rel_err < 1e-5
def test_sumpy_fmm_exclude_self(ctx_getter): logging.basicConfig(level=logging.INFO) ctx = ctx_getter() queue = cl.CommandQueue(ctx) nsources = 500 dtype = np.float64 from boxtree.tools import (make_normal_particle_array as p_normal) knl = LaplaceKernel(2) local_expn_class = VolumeTaylorLocalExpansion mpole_expn_class = VolumeTaylorMultipoleExpansion order = 10 sources = p_normal(queue, nsources, knl.dim, dtype, seed=15) from boxtree import TreeBuilder tb = TreeBuilder(ctx) tree, _ = tb(queue, sources, max_particles_in_box=30, debug=True) from boxtree.traversal import FMMTraversalBuilder tbuild = FMMTraversalBuilder(ctx) trav, _ = tbuild(queue, tree, debug=True) from pyopencl.clrandom import PhiloxGenerator rng = PhiloxGenerator(ctx) weights = rng.uniform(queue, nsources, dtype=np.float64) target_to_source = np.arange(tree.ntargets, dtype=np.int32) self_extra_kwargs = {"target_to_source": target_to_source} out_kernels = [knl] from functools import partial from sumpy.fmm import SumpyExpansionWranglerCodeContainer wcc = SumpyExpansionWranglerCodeContainer(ctx, partial(mpole_expn_class, knl), partial(local_expn_class, knl), out_kernels, exclude_self=True) wrangler = wcc.get_wrangler( queue, tree, dtype, fmm_level_to_order=lambda kernel, kernel_args, tree, lev: order, self_extra_kwargs=self_extra_kwargs) from boxtree.fmm import drive_fmm pot, = drive_fmm(trav, wrangler, weights) from sumpy import P2P p2p = P2P(ctx, out_kernels, exclude_self=True) evt, (ref_pot, ) = p2p(queue, sources, sources, (weights, ), **self_extra_kwargs) pot = pot.get() ref_pot = ref_pot.get() rel_err = la.norm(pot - ref_pot) / la.norm(ref_pot) logger.info("order %d -> relative l2 error: %g" % (order, rel_err)) assert np.isclose(rel_err, 0, atol=1e-7)
def test_extent_tree(ctx_getter, dims, do_plot=False): logging.basicConfig(level=logging.INFO) ctx = ctx_getter() queue = cl.CommandQueue(ctx) nsources = 100000 ntargets = 200000 dtype = np.float64 npoint_sources_per_source = 16 sources = make_normal_particle_array(queue, nsources, dims, dtype, seed=12) targets = make_normal_particle_array(queue, ntargets, dims, dtype, seed=19) from pyopencl.clrandom import PhiloxGenerator rng = PhiloxGenerator(queue.context, seed=13) source_radii = 2**rng.uniform(queue, nsources, dtype=dtype, a=-10, b=0) target_radii = 2**rng.uniform(queue, ntargets, dtype=dtype, a=-10, b=0) from boxtree import TreeBuilder tb = TreeBuilder(ctx) queue.finish() dev_tree, _ = tb(queue, sources, targets=targets, source_radii=source_radii, target_radii=target_radii, max_particles_in_box=10, debug=True) logger.info("transfer tree, check orderings") tree = dev_tree.get(queue=queue) sorted_sources = np.array(list(tree.sources)) sorted_targets = np.array(list(tree.targets)) sorted_source_radii = tree.source_radii sorted_target_radii = tree.target_radii unsorted_sources = np.array([pi.get() for pi in sources]) unsorted_targets = np.array([pi.get() for pi in targets]) unsorted_source_radii = source_radii.get() unsorted_target_radii = target_radii.get() assert (sorted_sources == unsorted_sources[:, tree.user_source_ids]).all() assert (sorted_source_radii == unsorted_source_radii[tree.user_source_ids]).all() # {{{ test box structure, stick-out criterion logger.info("test box structure, stick-out criterion") user_target_ids = np.empty(tree.ntargets, dtype=np.intp) user_target_ids[tree.sorted_target_ids] = np.arange(tree.ntargets, dtype=np.intp) if ntargets: assert (sorted_targets == unsorted_targets[:, user_target_ids]).all() assert (sorted_target_radii == unsorted_target_radii[user_target_ids]).all() all_good_so_far = True # {{{ check sources, targets for ibox in range(tree.nboxes): extent_low, extent_high = tree.get_box_extent(ibox) box_radius = np.max(extent_high-extent_low) * 0.5 stick_out_dist = tree.stick_out_factor * box_radius assert (extent_low >= tree.bounding_box[0] - 1e-12*tree.root_extent).all(), ibox assert (extent_high <= tree.bounding_box[1] + 1e-12*tree.root_extent).all(), ibox box_children = tree.box_child_ids[:, ibox] existing_children = box_children[box_children != 0] assert (tree.box_source_counts_nonchild[ibox] + np.sum(tree.box_source_counts_cumul[existing_children]) == tree.box_source_counts_cumul[ibox]) assert (tree.box_target_counts_nonchild[ibox] + np.sum(tree.box_target_counts_cumul[existing_children]) == tree.box_target_counts_cumul[ibox]) for what, starts, counts, points, radii in [ ("source", tree.box_source_starts, tree.box_source_counts_cumul, sorted_sources, sorted_source_radii), ("target", tree.box_target_starts, tree.box_target_counts_cumul, sorted_targets, sorted_target_radii), ]: bstart = starts[ibox] bslice = slice(bstart, bstart+counts[ibox]) check_particles = points[:, bslice] check_radii = radii[bslice] good = ( (check_particles + check_radii < extent_high[:, np.newaxis] + stick_out_dist) & (extent_low[:, np.newaxis] - stick_out_dist <= check_particles - check_radii) ).all(axis=0) all_good_here = good.all() if not all_good_here: print("BAD BOX %s %d level %d" % (what, ibox, tree.box_levels[ibox])) all_good_so_far = all_good_so_far and all_good_here assert all_good_here # }}} assert all_good_so_far # }}} # {{{ create, link point sources logger.info("creating point sources") np.random.seed(20) from pytools.obj_array import make_obj_array point_sources = make_obj_array([ cl.array.to_device(queue, unsorted_sources[i][:, np.newaxis] + unsorted_source_radii[:, np.newaxis] * np.random.uniform( -1, 1, size=(nsources, npoint_sources_per_source)) ) for i in range(dims)]) point_source_starts = cl.array.arange(queue, 0, (nsources+1)*npoint_sources_per_source, npoint_sources_per_source, dtype=tree.particle_id_dtype) from boxtree.tree import link_point_sources dev_tree = link_point_sources(queue, dev_tree, point_source_starts, point_sources, debug=True)
def demo_cost_model(): if not SUPPORTS_PROCESS_TIME: raise NotImplementedError( "Currently this script uses process time which only works on Python>=3.3" ) from boxtree.pyfmmlib_integration import FMMLibExpansionWrangler nsources_list = [1000, 2000, 3000, 4000, 5000] ntargets_list = [1000, 2000, 3000, 4000, 5000] dims = 3 dtype = np.float64 ctx = cl.create_some_context() queue = cl.CommandQueue(ctx) traversals = [] traversals_dev = [] level_to_orders = [] timing_results = [] def fmm_level_to_nterms(tree, ilevel): return 10 for nsources, ntargets in zip(nsources_list, ntargets_list): # {{{ Generate sources, targets and target_radii from boxtree.tools import make_normal_particle_array as p_normal sources = p_normal(queue, nsources, dims, dtype, seed=15) targets = p_normal(queue, ntargets, dims, dtype, seed=18) from pyopencl.clrandom import PhiloxGenerator rng = PhiloxGenerator(queue.context, seed=22) target_radii = rng.uniform(queue, ntargets, a=0, b=0.05, dtype=dtype).get() # }}} # {{{ Generate tree and traversal from boxtree import TreeBuilder tb = TreeBuilder(ctx) tree, _ = tb(queue, sources, targets=targets, target_radii=target_radii, stick_out_factor=0.15, max_particles_in_box=30, debug=True) from boxtree.traversal import FMMTraversalBuilder tg = FMMTraversalBuilder(ctx, well_sep_is_n_away=2) trav_dev, _ = tg(queue, tree, debug=True) trav = trav_dev.get(queue=queue) traversals.append(trav) traversals_dev.append(trav_dev) # }}} wrangler = FMMLibExpansionWrangler(trav.tree, 0, fmm_level_to_nterms) level_to_orders.append(wrangler.level_nterms) timing_data = {} from boxtree.fmm import drive_fmm src_weights = np.random.rand(tree.nsources).astype(tree.coord_dtype) drive_fmm(trav, wrangler, src_weights, timing_data=timing_data) timing_results.append(timing_data) time_field_name = "process_elapsed" from boxtree.cost import FMMCostModel from boxtree.cost import make_pde_aware_translation_cost_model cost_model = FMMCostModel(make_pde_aware_translation_cost_model) model_results = [] for icase in range(len(traversals) - 1): traversal = traversals_dev[icase] model_results.append( cost_model.cost_per_stage( queue, traversal, level_to_orders[icase], FMMCostModel.get_unit_calibration_params(), )) queue.finish() params = cost_model.estimate_calibration_params( model_results, timing_results[:-1], time_field_name=time_field_name) predicted_time = cost_model.cost_per_stage( queue, traversals_dev[-1], level_to_orders[-1], params, ) queue.finish() for field in [ "form_multipoles", "eval_direct", "multipole_to_local", "eval_multipoles", "form_locals", "eval_locals", "coarsen_multipoles", "refine_locals" ]: measured = timing_results[-1][field]["process_elapsed"] pred_err = ((measured - predicted_time[field]) / measured) logger.info("actual/predicted time for %s: %.3g/%.3g -> %g %% error", field, measured, predicted_time[field], abs(100 * pred_err))
def test_fmm_completeness(ctx_getter, dims, nsources_req, ntargets_req, who_has_extent, source_gen, target_gen, filter_kind): """Tests whether the built FMM traversal structures and driver completely capture all interactions. """ sources_have_extent = "s" in who_has_extent targets_have_extent = "t" in who_has_extent logging.basicConfig(level=logging.INFO) ctx = ctx_getter() queue = cl.CommandQueue(ctx) dtype = np.float64 try: sources = source_gen(queue, nsources_req, dims, dtype, seed=15) nsources = len(sources[0]) if ntargets_req is None: # This says "same as sources" to the tree builder. targets = None ntargets = ntargets_req else: targets = target_gen(queue, ntargets_req, dims, dtype, seed=16) ntargets = len(targets[0]) except ImportError: pytest.skip("loo.py not available, but needed for particle array " "generation") from pyopencl.clrandom import PhiloxGenerator rng = PhiloxGenerator(queue.context, seed=12) if sources_have_extent: source_radii = 2**rng.uniform(queue, nsources, dtype=dtype, a=-10, b=0) else: source_radii = None if targets_have_extent: target_radii = 2**rng.uniform(queue, ntargets, dtype=dtype, a=-10, b=0) else: target_radii = None from boxtree import TreeBuilder tb = TreeBuilder(ctx) tree, _ = tb(queue, sources, targets=targets, max_particles_in_box=30, source_radii=source_radii, target_radii=target_radii, debug=True) if 0: tree.get().plot() import matplotlib.pyplot as pt pt.show() from boxtree.traversal import FMMTraversalBuilder tbuild = FMMTraversalBuilder(ctx) trav, _ = tbuild(queue, tree, debug=True) if trav.sep_close_smaller_starts is not None: trav = trav.merge_close_lists(queue) weights = np.random.randn(nsources) #weights = np.ones(nsources) weights_sum = np.sum(weights) host_trav = trav.get(queue=queue) host_tree = host_trav.tree if filter_kind: flags = rng.uniform(queue, ntargets or nsources, np.int32, a=0, b=2) \ .astype(np.int8) if filter_kind == "user": from boxtree.tree import filter_target_lists_in_user_order filtered_targets = filter_target_lists_in_user_order(queue, tree, flags) wrangler = ConstantOneExpansionWranglerWithFilteredTargetsInUserOrder( host_tree, filtered_targets.get(queue=queue)) elif filter_kind == "tree": from boxtree.tree import filter_target_lists_in_tree_order filtered_targets = filter_target_lists_in_tree_order(queue, tree, flags) wrangler = ConstantOneExpansionWranglerWithFilteredTargetsInTreeOrder( host_tree, filtered_targets.get(queue=queue)) else: raise ValueError("unsupported value of 'filter_kind'") else: wrangler = ConstantOneExpansionWrangler(host_tree) if ntargets is None and not filter_kind: # This check only works for targets == sources. assert (wrangler.reorder_potentials( wrangler.reorder_sources(weights)) == weights).all() from boxtree.fmm import drive_fmm pot = drive_fmm(host_trav, wrangler, weights) # {{{ build, evaluate matrix (and identify missing interactions) if 0: mat = np.zeros((ntargets, nsources), dtype) from pytools import ProgressBar logging.getLogger().setLevel(logging.WARNING) pb = ProgressBar("matrix", nsources) for i in range(nsources): unit_vec = np.zeros(nsources, dtype=dtype) unit_vec[i] = 1 mat[:, i] = drive_fmm(host_trav, wrangler, unit_vec) pb.progress() pb.finished() logging.getLogger().setLevel(logging.INFO) import matplotlib.pyplot as pt if 1: pt.spy(mat) pt.show() missing_tgts, missing_srcs = np.where(mat == 0) if 1 and len(missing_tgts): from boxtree.visualization import TreePlotter plotter = TreePlotter(host_tree) plotter.draw_tree(fill=False, edgecolor="black") plotter.draw_box_numbers() plotter.set_bounding_box() tree_order_missing_tgts = \ host_tree.indices_to_tree_target_order(missing_tgts) tree_order_missing_srcs = \ host_tree.indices_to_tree_source_order(missing_srcs) src_boxes = [ host_tree.find_box_nr_for_source(i) for i in tree_order_missing_srcs] tgt_boxes = [ host_tree.find_box_nr_for_target(i) for i in tree_order_missing_tgts] print(src_boxes) print(tgt_boxes) pt.plot( host_tree.targets[0][tree_order_missing_tgts], host_tree.targets[1][tree_order_missing_tgts], "rv") pt.plot( host_tree.sources[0][tree_order_missing_srcs], host_tree.sources[1][tree_order_missing_srcs], "go") pt.gca().set_aspect("equal") pt.show() # }}} if filter_kind: pot = pot[flags.get() > 0] rel_err = la.norm((pot - weights_sum) / nsources) good = rel_err < 1e-8 if 0 and not good: import matplotlib.pyplot as pt pt.plot(pot-weights_sum) pt.show() if 0 and not good: import matplotlib.pyplot as pt filt_targets = [ host_tree.targets[0][flags.get() > 0], host_tree.targets[1][flags.get() > 0], ] host_tree.plot() bad = np.abs(pot - weights_sum) >= 1e-3 bad_targets = [ filt_targets[0][bad], filt_targets[1][bad], ] print(bad_targets[0].shape) pt.plot(filt_targets[0], filt_targets[1], "x") pt.plot(bad_targets[0], bad_targets[1], "v") pt.show() assert good
def test_extent_tree(ctx_getter, dims, extent_norm, do_plot=False): logging.basicConfig(level=logging.INFO) ctx = ctx_getter() queue = cl.CommandQueue(ctx) nsources = 100000 ntargets = 200000 dtype = np.float64 npoint_sources_per_source = 16 sources = make_normal_particle_array(queue, nsources, dims, dtype, seed=12) targets = make_normal_particle_array(queue, ntargets, dims, dtype, seed=19) refine_weights = cl.array.zeros(queue, nsources+ntargets, np.int32) refine_weights[:nsources] = 1 from pyopencl.clrandom import PhiloxGenerator rng = PhiloxGenerator(queue.context, seed=13) source_radii = 2**rng.uniform(queue, nsources, dtype=dtype, a=-10, b=0) target_radii = 2**rng.uniform(queue, ntargets, dtype=dtype, a=-10, b=0) from boxtree import TreeBuilder tb = TreeBuilder(ctx) queue.finish() dev_tree, _ = tb(queue, sources, targets=targets, source_radii=source_radii, target_radii=target_radii, extent_norm=extent_norm, refine_weights=refine_weights, max_leaf_refine_weight=20, #max_particles_in_box=10, # Set artificially small, to exercise the reallocation code. nboxes_guess=10, debug=True, stick_out_factor=0) logger.info("transfer tree, check orderings") tree = dev_tree.get(queue=queue) if do_plot: import matplotlib.pyplot as pt pt.plot(sources[0].get(), sources[1].get(), "rx") pt.plot(targets[0].get(), targets[1].get(), "g+") from boxtree.visualization import TreePlotter plotter = TreePlotter(tree) plotter.draw_tree(fill=False, edgecolor="black", zorder=10) plotter.draw_box_numbers() plotter.set_bounding_box() pt.gca().set_aspect("equal", "datalim") pt.show() sorted_sources = np.array(list(tree.sources)) sorted_targets = np.array(list(tree.targets)) sorted_source_radii = tree.source_radii sorted_target_radii = tree.target_radii unsorted_sources = np.array([pi.get() for pi in sources]) unsorted_targets = np.array([pi.get() for pi in targets]) unsorted_source_radii = source_radii.get() unsorted_target_radii = target_radii.get() assert (sorted_sources == unsorted_sources[:, tree.user_source_ids]).all() assert (sorted_source_radii == unsorted_source_radii[tree.user_source_ids]).all() # {{{ test box structure, stick-out criterion logger.info("test box structure, stick-out criterion") user_target_ids = np.empty(tree.ntargets, dtype=np.intp) user_target_ids[tree.sorted_target_ids] = np.arange(tree.ntargets, dtype=np.intp) if ntargets: assert (sorted_targets == unsorted_targets[:, user_target_ids]).all() assert (sorted_target_radii == unsorted_target_radii[user_target_ids]).all() all_good_so_far = True # {{{ check sources, targets assert np.sum(tree.box_source_counts_nonchild) == nsources assert np.sum(tree.box_target_counts_nonchild) == ntargets for ibox in range(tree.nboxes): kid_sum = sum( tree.box_target_counts_cumul[ichild_box] for ichild_box in tree.box_child_ids[:, ibox] if ichild_box != 0) assert ( tree.box_target_counts_cumul[ibox] == ( tree.box_target_counts_nonchild[ibox] + kid_sum)), ibox for ibox in range(tree.nboxes): extent_low, extent_high = tree.get_box_extent(ibox) assert (extent_low >= tree.bounding_box[0] - 1e-12*tree.root_extent).all(), ibox assert (extent_high <= tree.bounding_box[1] + 1e-12*tree.root_extent).all(), ibox box_children = tree.box_child_ids[:, ibox] existing_children = box_children[box_children != 0] assert (tree.box_source_counts_nonchild[ibox] + np.sum(tree.box_source_counts_cumul[existing_children]) == tree.box_source_counts_cumul[ibox]) assert (tree.box_target_counts_nonchild[ibox] + np.sum(tree.box_target_counts_cumul[existing_children]) == tree.box_target_counts_cumul[ibox]) del existing_children del box_children for ibox in range(tree.nboxes): lev = int(tree.box_levels[ibox]) box_radius = 0.5 * tree.root_extent / (1 << lev) box_center = tree.box_centers[:, ibox] extent_low = box_center - box_radius extent_high = box_center + box_radius stick_out_dist = tree.stick_out_factor * box_radius radius_with_stickout = (1 + tree.stick_out_factor) * box_radius for what, starts, counts, points, radii in [ ("source", tree.box_source_starts, tree.box_source_counts_cumul, sorted_sources, sorted_source_radii), ("target", tree.box_target_starts, tree.box_target_counts_cumul, sorted_targets, sorted_target_radii), ]: bstart = starts[ibox] bslice = slice(bstart, bstart+counts[ibox]) check_particles = points[:, bslice] check_radii = radii[bslice] if extent_norm == "linf": good = ( (check_particles + check_radii < extent_high[:, np.newaxis] + stick_out_dist) & # noqa: W504 (extent_low[:, np.newaxis] - stick_out_dist <= check_particles - check_radii) ).all(axis=0) elif extent_norm == "l2": center_dists = np.sqrt( np.sum( (check_particles - box_center.reshape(-1, 1))**2, axis=0)) good = ( (center_dists + check_radii)**2 < dims * radius_with_stickout**2) else: raise ValueError("unexpected value of extent_norm") all_good_here = good.all() if not all_good_here: print("BAD BOX %s %d level %d" % (what, ibox, tree.box_levels[ibox])) all_good_so_far = all_good_so_far and all_good_here assert all_good_here # }}} assert all_good_so_far # }}} # {{{ create, link point sources logger.info("creating point sources") np.random.seed(20) from pytools.obj_array import make_obj_array point_sources = make_obj_array([ cl.array.to_device(queue, unsorted_sources[i][:, np.newaxis] + unsorted_source_radii[:, np.newaxis] * np.random.uniform( -1, 1, size=(nsources, npoint_sources_per_source)) ) for i in range(dims)]) point_source_starts = cl.array.arange(queue, 0, (nsources+1)*npoint_sources_per_source, npoint_sources_per_source, dtype=tree.particle_id_dtype) from boxtree.tree import link_point_sources dev_tree = link_point_sources(queue, dev_tree, point_source_starts, point_sources, debug=True)
def test_pyfmmlib_fmm(ctx_getter, dims, use_dipoles, helmholtz_k): logging.basicConfig(level=logging.INFO) from pytest import importorskip importorskip("pyfmmlib") ctx = ctx_getter() queue = cl.CommandQueue(ctx) nsources = 3000 ntargets = 1000 dtype = np.float64 sources = p_normal(queue, nsources, dims, dtype, seed=15) targets = (p_normal(queue, ntargets, dims, dtype, seed=18) + np.array([2, 0, 0])[:dims]) sources_host = particle_array_to_host(sources) targets_host = particle_array_to_host(targets) from boxtree import TreeBuilder tb = TreeBuilder(ctx) tree, _ = tb(queue, sources, targets=targets, max_particles_in_box=30, debug=True) from boxtree.traversal import FMMTraversalBuilder tbuild = FMMTraversalBuilder(ctx) trav, _ = tbuild(queue, tree, debug=True) trav = trav.get(queue=queue) from pyopencl.clrandom import PhiloxGenerator rng = PhiloxGenerator(queue.context, seed=20) weights = rng.uniform(queue, nsources, dtype=np.float64).get() #weights = np.ones(nsources) if use_dipoles: np.random.seed(13) dipole_vec = np.random.randn(dims, nsources) else: dipole_vec = None if dims == 2 and helmholtz_k == 0: base_nterms = 20 else: base_nterms = 10 def fmm_level_to_nterms(tree, lev): result = base_nterms if lev < 3 and helmholtz_k: # exercise order-varies-by-level capability result += 5 if use_dipoles: result += 1 return result from boxtree.pyfmmlib_integration import FMMLibExpansionWrangler wrangler = FMMLibExpansionWrangler(trav.tree, helmholtz_k, fmm_level_to_nterms=fmm_level_to_nterms, dipole_vec=dipole_vec) from boxtree.fmm import drive_fmm timing_data = {} pot = drive_fmm(trav, wrangler, weights, timing_data=timing_data) print(timing_data) assert timing_data # {{{ ref fmmlib computation logger.info("computing direct (reference) result") import pyfmmlib fmmlib_routine = getattr( pyfmmlib, "%spot%s%ddall%s_vec" % (wrangler.eqn_letter, "fld" if dims == 3 else "grad", dims, "_dp" if use_dipoles else "")) kwargs = {} if dims == 3: kwargs["iffld"] = False else: kwargs["ifgrad"] = False kwargs["ifhess"] = False if use_dipoles: if helmholtz_k == 0 and dims == 2: kwargs["dipstr"] = -weights * (dipole_vec[0] + 1j * dipole_vec[1]) else: kwargs["dipstr"] = weights kwargs["dipvec"] = dipole_vec else: kwargs["charge"] = weights if helmholtz_k: kwargs["zk"] = helmholtz_k ref_pot = wrangler.finalize_potentials( fmmlib_routine(sources=sources_host.T, targets=targets_host.T, **kwargs)[0]) rel_err = la.norm(pot - ref_pot, np.inf) / la.norm(ref_pot, np.inf) logger.info("relative l2 error vs fmmlib direct: %g" % rel_err) assert rel_err < 1e-5, rel_err # }}} # {{{ check against sumpy try: import sumpy # noqa except ImportError: have_sumpy = False from warnings import warn warn("sumpy unavailable: cannot compute independent reference " "values for pyfmmlib") else: have_sumpy = True if have_sumpy: from sumpy.kernel import (LaplaceKernel, HelmholtzKernel, DirectionalSourceDerivative) from sumpy.p2p import P2P sumpy_extra_kwargs = {} if helmholtz_k: knl = HelmholtzKernel(dims) sumpy_extra_kwargs["k"] = helmholtz_k else: knl = LaplaceKernel(dims) if use_dipoles: knl = DirectionalSourceDerivative(knl) sumpy_extra_kwargs["src_derivative_dir"] = dipole_vec p2p = P2P(ctx, [knl], exclude_self=False) evt, (sumpy_ref_pot, ) = p2p(queue, targets, sources, [weights], out_host=True, **sumpy_extra_kwargs) sumpy_rel_err = (la.norm(pot - sumpy_ref_pot, np.inf) / la.norm(sumpy_ref_pot, np.inf)) logger.info("relative l2 error vs sumpy direct: %g" % sumpy_rel_err) assert sumpy_rel_err < 1e-5, sumpy_rel_err
def test_extent_tree(ctx_factory, dims, extent_norm, do_plot=False): logging.basicConfig(level=logging.INFO) ctx = ctx_factory() queue = cl.CommandQueue(ctx) nsources = 100000 ntargets = 200000 dtype = np.float64 npoint_sources_per_source = 16 sources = make_normal_particle_array(queue, nsources, dims, dtype, seed=12) targets = make_normal_particle_array(queue, ntargets, dims, dtype, seed=19) refine_weights = cl.array.zeros(queue, nsources + ntargets, np.int32) refine_weights[:nsources] = 1 from pyopencl.clrandom import PhiloxGenerator rng = PhiloxGenerator(queue.context, seed=13) source_radii = 2**rng.uniform(queue, nsources, dtype=dtype, a=-10, b=0) target_radii = 2**rng.uniform(queue, ntargets, dtype=dtype, a=-10, b=0) from boxtree import TreeBuilder tb = TreeBuilder(ctx) queue.finish() dev_tree, _ = tb( queue, sources, targets=targets, source_radii=source_radii, target_radii=target_radii, extent_norm=extent_norm, refine_weights=refine_weights, max_leaf_refine_weight=20, #max_particles_in_box=10, # Set artificially small, to exercise the reallocation code. nboxes_guess=10, debug=True, stick_out_factor=0) logger.info("transfer tree, check orderings") tree = dev_tree.get(queue=queue) if do_plot: import matplotlib.pyplot as pt pt.plot(sources[0].get(), sources[1].get(), "rx") pt.plot(targets[0].get(), targets[1].get(), "g+") from boxtree.visualization import TreePlotter plotter = TreePlotter(tree) plotter.draw_tree(fill=False, edgecolor="black", zorder=10) plotter.draw_box_numbers() plotter.set_bounding_box() pt.gca().set_aspect("equal", "datalim") pt.show() sorted_sources = np.array(list(tree.sources)) sorted_targets = np.array(list(tree.targets)) sorted_source_radii = tree.source_radii sorted_target_radii = tree.target_radii unsorted_sources = np.array([pi.get() for pi in sources]) unsorted_targets = np.array([pi.get() for pi in targets]) unsorted_source_radii = source_radii.get() unsorted_target_radii = target_radii.get() assert (sorted_sources == unsorted_sources[:, tree.user_source_ids]).all() assert (sorted_source_radii == unsorted_source_radii[tree.user_source_ids] ).all() # {{{ test box structure, stick-out criterion logger.info("test box structure, stick-out criterion") user_target_ids = np.empty(tree.ntargets, dtype=np.intp) user_target_ids[tree.sorted_target_ids] = np.arange(tree.ntargets, dtype=np.intp) if ntargets: assert (sorted_targets == unsorted_targets[:, user_target_ids]).all() assert (sorted_target_radii == unsorted_target_radii[user_target_ids] ).all() all_good_so_far = True # {{{ check sources, targets assert np.sum(tree.box_source_counts_nonchild) == nsources assert np.sum(tree.box_target_counts_nonchild) == ntargets for ibox in range(tree.nboxes): kid_sum = sum(tree.box_target_counts_cumul[ichild_box] for ichild_box in tree.box_child_ids[:, ibox] if ichild_box != 0) assert (tree.box_target_counts_cumul[ibox] == ( tree.box_target_counts_nonchild[ibox] + kid_sum)), ibox for ibox in range(tree.nboxes): extent_low, extent_high = tree.get_box_extent(ibox) assert (extent_low >= tree.bounding_box[0] - 1e-12 * tree.root_extent).all(), ibox assert (extent_high <= tree.bounding_box[1] + 1e-12 * tree.root_extent).all(), ibox box_children = tree.box_child_ids[:, ibox] existing_children = box_children[box_children != 0] assert (tree.box_source_counts_nonchild[ibox] + np.sum(tree.box_source_counts_cumul[existing_children]) == tree.box_source_counts_cumul[ibox]) assert (tree.box_target_counts_nonchild[ibox] + np.sum(tree.box_target_counts_cumul[existing_children]) == tree.box_target_counts_cumul[ibox]) del existing_children del box_children for ibox in range(tree.nboxes): lev = int(tree.box_levels[ibox]) box_radius = 0.5 * tree.root_extent / (1 << lev) box_center = tree.box_centers[:, ibox] extent_low = box_center - box_radius extent_high = box_center + box_radius stick_out_dist = tree.stick_out_factor * box_radius radius_with_stickout = (1 + tree.stick_out_factor) * box_radius for what, starts, counts, points, radii in [ ("source", tree.box_source_starts, tree.box_source_counts_cumul, sorted_sources, sorted_source_radii), ("target", tree.box_target_starts, tree.box_target_counts_cumul, sorted_targets, sorted_target_radii), ]: bstart = starts[ibox] bslice = slice(bstart, bstart + counts[ibox]) check_particles = points[:, bslice] check_radii = radii[bslice] if extent_norm == "linf": good = ((check_particles + check_radii < extent_high[:, np.newaxis] + stick_out_dist) & # noqa: W504 (extent_low[:, np.newaxis] - stick_out_dist <= check_particles - check_radii)).all(axis=0) elif extent_norm == "l2": center_dists = np.sqrt( np.sum((check_particles - box_center.reshape(-1, 1))**2, axis=0)) good = ((center_dists + check_radii)**2 < dims * radius_with_stickout**2) else: raise ValueError("unexpected value of extent_norm") all_good_here = good.all() if not all_good_here: print("BAD BOX %s %d level %d" % (what, ibox, tree.box_levels[ibox])) all_good_so_far = all_good_so_far and all_good_here assert all_good_here # }}} assert all_good_so_far # }}} # {{{ create, link point sources logger.info("creating point sources") np.random.seed(20) from pytools.obj_array import make_obj_array point_sources = make_obj_array([ cl.array.to_device( queue, unsorted_sources[i][:, np.newaxis] + unsorted_source_radii[:, np.newaxis] * np.random.uniform( -1, 1, size=(nsources, npoint_sources_per_source))) for i in range(dims) ]) point_source_starts = cl.array.arange(queue, 0, (nsources + 1) * npoint_sources_per_source, npoint_sources_per_source, dtype=tree.particle_id_dtype) from boxtree.tree import link_point_sources dev_tree = link_point_sources(queue, dev_tree, point_source_starts, point_sources, debug=True)
def plot_traversal(ctx_getter, do_plot=False): ctx = ctx_getter() queue = cl.CommandQueue(ctx) #for dims in [2, 3]: for dims in [2]: nparticles = 10**4 dtype = np.float64 from pyopencl.clrandom import PhiloxGenerator rng = PhiloxGenerator(queue.context, seed=15) from pytools.obj_array import make_obj_array particles = make_obj_array( [rng.normal(queue, nparticles, dtype=dtype) for i in range(dims)]) # if do_plot: # pt.plot(particles[0].get(), particles[1].get(), "x") from boxtree import TreeBuilder tb = TreeBuilder(ctx) queue.finish() tree = tb(queue, particles, max_particles_in_box=30, debug=True) from boxtree.traversal import FMMTraversalBuilder tg = FMMTraversalBuilder(ctx) trav = tg(queue, tree).get() from boxtree.visualization import TreePlotter plotter = TreePlotter(tree) plotter.draw_tree(fill=False, edgecolor="black") #plotter.draw_box_numbers() plotter.set_bounding_box() from random import randrange, seed seed(7) # {{{ generic box drawing helper def draw_some_box_lists(starts, lists, key_to_box=None, count=5): actual_count = 0 while actual_count < count: if key_to_box is not None: key = randrange(len(key_to_box)) ibox = key_to_box[key] else: key = ibox = randrange(tree.nboxes) start, end = starts[key:key + 2] if start == end: continue #print ibox, start, end, lists[start:end] for jbox in lists[start:end]: plotter.draw_box(jbox, facecolor='yellow') plotter.draw_box(ibox, facecolor='red') actual_count += 1 # }}} if 0: # colleagues draw_some_box_lists(trav.colleagues_starts, trav.colleagues_lists) elif 0: # near neighbors ("list 1") draw_some_box_lists(trav.neighbor_leaves_starts, trav.neighbor_leaves_lists, key_to_box=trav.source_boxes) elif 0: # well-separated siblings (list 2) draw_some_box_lists(trav.sep_siblings_starts, trav.sep_siblings_lists) elif 1: # separated smaller (list 3) draw_some_box_lists(trav.sep_smaller_starts, trav.sep_smaller_lists, key_to_box=trav.source_boxes) elif 1: # separated bigger (list 4) draw_some_box_lists(trav.sep_bigger_starts, trav.sep_bigger_lists) import matplotlib.pyplot as pt pt.show()
def test_sumpy_fmm(ctx_getter, knl, local_expn_class, mpole_expn_class): logging.basicConfig(level=logging.INFO) ctx = ctx_getter() queue = cl.CommandQueue(ctx) nsources = 1000 ntargets = 300 dtype = np.float64 from boxtree.tools import (make_normal_particle_array as p_normal) sources = p_normal(queue, nsources, knl.dim, dtype, seed=15) if 1: offset = np.zeros(knl.dim) offset[0] = 0.1 targets = (p_normal(queue, ntargets, knl.dim, dtype, seed=18) + offset) del offset else: from sumpy.visualization import FieldPlotter fp = FieldPlotter(np.array([0.5, 0]), extent=3, npoints=200) from pytools.obj_array import make_obj_array targets = make_obj_array([fp.points[i] for i in range(knl.dim)]) from boxtree import TreeBuilder tb = TreeBuilder(ctx) tree, _ = tb(queue, sources, targets=targets, max_particles_in_box=30, debug=True) from boxtree.traversal import FMMTraversalBuilder tbuild = FMMTraversalBuilder(ctx) trav, _ = tbuild(queue, tree, debug=True) # {{{ plot tree if 0: host_tree = tree.get() host_trav = trav.get() if 1: print("src_box", host_tree.find_box_nr_for_source(403)) print("tgt_box", host_tree.find_box_nr_for_target(28)) print(list(host_trav.target_or_target_parent_boxes).index(37)) print(host_trav.get_box_list("sep_bigger", 22)) from boxtree.visualization import TreePlotter plotter = TreePlotter(host_tree) plotter.draw_tree(fill=False, edgecolor="black", zorder=10) plotter.set_bounding_box() plotter.draw_box_numbers() import matplotlib.pyplot as pt pt.show() # }}} from pyopencl.clrandom import PhiloxGenerator rng = PhiloxGenerator(ctx, seed=44) weights = rng.uniform(queue, nsources, dtype=np.float64) logger.info("computing direct (reference) result") from pytools.convergence import PConvergenceVerifier pconv_verifier = PConvergenceVerifier() extra_kwargs = {} dtype = np.float64 order_values = [1, 2, 3] if isinstance(knl, HelmholtzKernel): extra_kwargs["k"] = 0.05 dtype = np.complex128 if knl.dim == 3: order_values = [1, 2] elif knl.dim == 2 and issubclass(local_expn_class, H2DLocalExpansion): order_values = [10, 12] elif isinstance(knl, YukawaKernel): extra_kwargs["lam"] = 2 dtype = np.complex128 if knl.dim == 3: order_values = [1, 2] elif knl.dim == 2 and issubclass(local_expn_class, Y2DLocalExpansion): order_values = [10, 12] from functools import partial for order in order_values: out_kernels = [knl] from sumpy.fmm import SumpyExpansionWranglerCodeContainer wcc = SumpyExpansionWranglerCodeContainer( ctx, partial(mpole_expn_class, knl), partial(local_expn_class, knl), out_kernels) wrangler = wcc.get_wrangler( queue, tree, dtype, fmm_level_to_order=lambda kernel, kernel_args, tree, lev: order, kernel_extra_kwargs=extra_kwargs) from boxtree.fmm import drive_fmm pot, = drive_fmm(trav, wrangler, weights) from sumpy import P2P p2p = P2P(ctx, out_kernels, exclude_self=False) evt, (ref_pot, ) = p2p(queue, targets, sources, (weights, ), **extra_kwargs) pot = pot.get() ref_pot = ref_pot.get() rel_err = la.norm(pot - ref_pot, np.inf) / la.norm(ref_pot, np.inf) logger.info("order %d -> relative l2 error: %g" % (order, rel_err)) pconv_verifier.add_data_point(order, rel_err) print(pconv_verifier) pconv_verifier()
def test_estimate_calibration_params(ctx_factory): from boxtree.pyfmmlib_integration import FMMLibExpansionWrangler nsources_list = [1000, 2000, 3000, 4000] ntargets_list = [1000, 2000, 3000, 4000] dims = 3 dtype = np.float64 ctx = ctx_factory() queue = cl.CommandQueue(ctx) traversals = [] traversals_dev = [] level_to_orders = [] timing_results = [] def fmm_level_to_nterms(tree, ilevel): return 10 for nsources, ntargets in zip(nsources_list, ntargets_list): # {{{ Generate sources, targets and target_radii from boxtree.tools import make_normal_particle_array as p_normal sources = p_normal(queue, nsources, dims, dtype, seed=15) targets = p_normal(queue, ntargets, dims, dtype, seed=18) from pyopencl.clrandom import PhiloxGenerator rng = PhiloxGenerator(queue.context, seed=22) target_radii = rng.uniform( queue, ntargets, a=0, b=0.05, dtype=dtype ).get() # }}} # {{{ Generate tree and traversal from boxtree import TreeBuilder tb = TreeBuilder(ctx) tree, _ = tb( queue, sources, targets=targets, target_radii=target_radii, stick_out_factor=0.15, max_particles_in_box=30, debug=True ) from boxtree.traversal import FMMTraversalBuilder tg = FMMTraversalBuilder(ctx, well_sep_is_n_away=2) trav_dev, _ = tg(queue, tree, debug=True) trav = trav_dev.get(queue=queue) traversals.append(trav) traversals_dev.append(trav_dev) # }}} wrangler = FMMLibExpansionWrangler(trav.tree, 0, fmm_level_to_nterms) level_to_orders.append(wrangler.level_nterms) timing_data = {} from boxtree.fmm import drive_fmm src_weights = np.random.rand(tree.nsources).astype(tree.coord_dtype) drive_fmm(trav, wrangler, (src_weights,), timing_data=timing_data) timing_results.append(timing_data) if SUPPORTS_PROCESS_TIME: time_field_name = "process_elapsed" else: time_field_name = "wall_elapsed" def test_params_sanity(test_params): param_names = ["c_p2m", "c_m2m", "c_p2p", "c_m2l", "c_m2p", "c_p2l", "c_l2l", "c_l2p"] for name in param_names: assert isinstance(test_params[name], np.float64) def test_params_equal(test_params1, test_params2): param_names = ["c_p2m", "c_m2m", "c_p2p", "c_m2l", "c_m2p", "c_p2l", "c_l2l", "c_l2p"] for name in param_names: assert test_params1[name] == test_params2[name] python_cost_model = _PythonFMMCostModel(make_pde_aware_translation_cost_model) python_model_results = [] for icase in range(len(traversals)-1): traversal = traversals[icase] level_to_order = level_to_orders[icase] python_model_results.append(python_cost_model.cost_per_stage( queue, traversal, level_to_order, _PythonFMMCostModel.get_unit_calibration_params(), )) python_params = python_cost_model.estimate_calibration_params( python_model_results, timing_results[:-1], time_field_name=time_field_name ) test_params_sanity(python_params) cl_cost_model = FMMCostModel(make_pde_aware_translation_cost_model) cl_model_results = [] for icase in range(len(traversals_dev)-1): traversal = traversals_dev[icase] level_to_order = level_to_orders[icase] cl_model_results.append(cl_cost_model.cost_per_stage( queue, traversal, level_to_order, FMMCostModel.get_unit_calibration_params(), )) cl_params = cl_cost_model.estimate_calibration_params( cl_model_results, timing_results[:-1], time_field_name=time_field_name ) test_params_sanity(cl_params) if SUPPORTS_PROCESS_TIME: test_params_equal(cl_params, python_params)
def test_sumpy_fmm(ctx_getter, knl, local_expn_class, mpole_expn_class): logging.basicConfig(level=logging.INFO) ctx = ctx_getter() queue = cl.CommandQueue(ctx) nsources = 1000 ntargets = 300 dtype = np.float64 from boxtree.tools import ( make_normal_particle_array as p_normal) sources = p_normal(queue, nsources, knl.dim, dtype, seed=15) if 1: offset = np.zeros(knl.dim) offset[0] = 0.1 targets = ( p_normal(queue, ntargets, knl.dim, dtype, seed=18) + offset) del offset else: from sumpy.visualization import FieldPlotter fp = FieldPlotter(np.array([0.5, 0]), extent=3, npoints=200) from pytools.obj_array import make_obj_array targets = make_obj_array( [fp.points[i] for i in range(knl.dim)]) from boxtree import TreeBuilder tb = TreeBuilder(ctx) tree, _ = tb(queue, sources, targets=targets, max_particles_in_box=30, debug=True) from boxtree.traversal import FMMTraversalBuilder tbuild = FMMTraversalBuilder(ctx) trav, _ = tbuild(queue, tree, debug=True) # {{{ plot tree if 0: host_tree = tree.get() host_trav = trav.get() if 1: print("src_box", host_tree.find_box_nr_for_source(403)) print("tgt_box", host_tree.find_box_nr_for_target(28)) print(list(host_trav.target_or_target_parent_boxes).index(37)) print(host_trav.get_box_list("sep_bigger", 22)) from boxtree.visualization import TreePlotter plotter = TreePlotter(host_tree) plotter.draw_tree(fill=False, edgecolor="black", zorder=10) plotter.set_bounding_box() plotter.draw_box_numbers() import matplotlib.pyplot as pt pt.show() # }}} from pyopencl.clrandom import PhiloxGenerator rng = PhiloxGenerator(ctx, seed=44) weights = rng.uniform(queue, nsources, dtype=np.float64) logger.info("computing direct (reference) result") from pytools.convergence import PConvergenceVerifier pconv_verifier = PConvergenceVerifier() extra_kwargs = {} dtype = np.float64 order_values = [1, 2, 3] if isinstance(knl, HelmholtzKernel): extra_kwargs["k"] = 0.05 dtype = np.complex128 if knl.dim == 3: order_values = [1, 2] elif knl.dim == 2 and issubclass(local_expn_class, H2DLocalExpansion): order_values = [10, 12] elif isinstance(knl, YukawaKernel): extra_kwargs["lam"] = 2 dtype = np.complex128 if knl.dim == 3: order_values = [1, 2] elif knl.dim == 2 and issubclass(local_expn_class, Y2DLocalExpansion): order_values = [10, 12] from functools import partial for order in order_values: out_kernels = [knl] from sumpy.fmm import SumpyExpansionWranglerCodeContainer wcc = SumpyExpansionWranglerCodeContainer( ctx, partial(mpole_expn_class, knl), partial(local_expn_class, knl), out_kernels) wrangler = wcc.get_wrangler(queue, tree, dtype, fmm_level_to_order=lambda kernel, kernel_args, tree, lev: order, kernel_extra_kwargs=extra_kwargs) from boxtree.fmm import drive_fmm pot, = drive_fmm(trav, wrangler, weights) from sumpy import P2P p2p = P2P(ctx, out_kernels, exclude_self=False) evt, (ref_pot,) = p2p(queue, targets, sources, (weights,), **extra_kwargs) pot = pot.get() ref_pot = ref_pot.get() rel_err = la.norm(pot - ref_pot, np.inf) / la.norm(ref_pot, np.inf) logger.info("order %d -> relative l2 error: %g" % (order, rel_err)) pconv_verifier.add_data_point(order, rel_err) print(pconv_verifier) pconv_verifier()
def test_cost_model_op_counts_agree_with_constantone_wrangler( ctx_factory, nsources, ntargets, dims, dtype): ctx = ctx_factory() queue = cl.CommandQueue(ctx) from boxtree.tools import make_normal_particle_array as p_normal sources = p_normal(queue, nsources, dims, dtype, seed=16) targets = p_normal(queue, ntargets, dims, dtype, seed=19) from pyopencl.clrandom import PhiloxGenerator rng = PhiloxGenerator(queue.context, seed=20) target_radii = rng.uniform(queue, ntargets, a=0, b=0.04, dtype=dtype).get() from boxtree import TreeBuilder tb = TreeBuilder(ctx) tree, _ = tb( queue, sources, targets=targets, target_radii=target_radii, stick_out_factor=0.15, max_particles_in_box=30, debug=True ) from boxtree.traversal import FMMTraversalBuilder tg = FMMTraversalBuilder(ctx, well_sep_is_n_away=2) trav_dev, _ = tg(queue, tree, debug=True) trav = trav_dev.get(queue=queue) from boxtree.tools import ConstantOneExpansionWrangler wrangler = ConstantOneExpansionWrangler(trav.tree) timing_data = {} from boxtree.fmm import drive_fmm src_weights = np.random.rand(tree.nsources).astype(tree.coord_dtype) drive_fmm(trav, wrangler, (src_weights,), timing_data=timing_data) cost_model = FMMCostModel( translation_cost_model_factory=OpCountingTranslationCostModel ) level_to_order = np.array([1 for _ in range(tree.nlevels)]) modeled_time = cost_model.cost_per_stage( queue, trav_dev, level_to_order, FMMCostModel.get_unit_calibration_params(), ) mismatches = [] for stage in timing_data: if timing_data[stage]["ops_elapsed"] != modeled_time[stage]: mismatches.append( (stage, timing_data[stage]["ops_elapsed"], modeled_time[stage])) assert not mismatches, "\n".join(str(s) for s in mismatches) # {{{ Test per-box cost total_cost = 0.0 for stage in timing_data: total_cost += timing_data[stage]["ops_elapsed"] per_box_cost = cost_model.cost_per_box( queue, trav_dev, level_to_order, FMMCostModel.get_unit_calibration_params(), ) total_aggregate_cost = cost_model.aggregate_over_boxes(per_box_cost) assert total_cost == ( total_aggregate_cost + modeled_time["coarsen_multipoles"] + modeled_time["refine_locals"] )
def test_sumpy_fmm_exclude_self(ctx_getter): logging.basicConfig(level=logging.INFO) ctx = ctx_getter() queue = cl.CommandQueue(ctx) nsources = 500 dtype = np.float64 from boxtree.tools import ( make_normal_particle_array as p_normal) knl = LaplaceKernel(2) local_expn_class = VolumeTaylorLocalExpansion mpole_expn_class = VolumeTaylorMultipoleExpansion order = 10 sources = p_normal(queue, nsources, knl.dim, dtype, seed=15) from boxtree import TreeBuilder tb = TreeBuilder(ctx) tree, _ = tb(queue, sources, max_particles_in_box=30, debug=True) from boxtree.traversal import FMMTraversalBuilder tbuild = FMMTraversalBuilder(ctx) trav, _ = tbuild(queue, tree, debug=True) from pyopencl.clrandom import PhiloxGenerator rng = PhiloxGenerator(ctx) weights = rng.uniform(queue, nsources, dtype=np.float64) target_to_source = np.arange(tree.ntargets, dtype=np.int32) self_extra_kwargs = {"target_to_source": target_to_source} out_kernels = [knl] from functools import partial from sumpy.fmm import SumpyExpansionWranglerCodeContainer wcc = SumpyExpansionWranglerCodeContainer( ctx, partial(mpole_expn_class, knl), partial(local_expn_class, knl), out_kernels, exclude_self=True) wrangler = wcc.get_wrangler(queue, tree, dtype, fmm_level_to_order=lambda kernel, kernel_args, tree, lev: order, self_extra_kwargs=self_extra_kwargs) from boxtree.fmm import drive_fmm pot, = drive_fmm(trav, wrangler, weights) from sumpy import P2P p2p = P2P(ctx, out_kernels, exclude_self=True) evt, (ref_pot,) = p2p(queue, sources, sources, (weights,), **self_extra_kwargs) pot = pot.get() ref_pot = ref_pot.get() rel_err = la.norm(pot - ref_pot) / la.norm(ref_pot) logger.info("order %d -> relative l2 error: %g" % (order, rel_err)) assert np.isclose(rel_err, 0, atol=1e-7)
def test_compare_cl_and_py_cost_model(ctx_factory, nsources, ntargets, dims, dtype): ctx = ctx_factory() queue = cl.CommandQueue(ctx) # {{{ Generate sources, targets and target_radii from boxtree.tools import make_normal_particle_array as p_normal sources = p_normal(queue, nsources, dims, dtype, seed=15) targets = p_normal(queue, ntargets, dims, dtype, seed=18) from pyopencl.clrandom import PhiloxGenerator rng = PhiloxGenerator(queue.context, seed=22) target_radii = rng.uniform( queue, ntargets, a=0, b=0.05, dtype=dtype ).get() # }}} # {{{ Generate tree and traversal from boxtree import TreeBuilder tb = TreeBuilder(ctx) tree, _ = tb( queue, sources, targets=targets, target_radii=target_radii, stick_out_factor=0.15, max_particles_in_box=30, debug=True ) from boxtree.traversal import FMMTraversalBuilder tg = FMMTraversalBuilder(ctx, well_sep_is_n_away=2) trav_dev, _ = tg(queue, tree, debug=True) trav = trav_dev.get(queue=queue) # }}} # {{{ Construct cost models cl_cost_model = FMMCostModel(None) python_cost_model = _PythonFMMCostModel(None) constant_one_params = cl_cost_model.get_unit_calibration_params().copy() for ilevel in range(trav.tree.nlevels): constant_one_params["p_fmm_lev%d" % ilevel] = 10 xlat_cost = make_pde_aware_translation_cost_model(dims, trav.tree.nlevels) # }}} # {{{ Test process_form_multipoles nlevels = trav.tree.nlevels p2m_cost = np.zeros(nlevels, dtype=np.float64) for ilevel in range(nlevels): p2m_cost[ilevel] = evaluate( xlat_cost.p2m(ilevel), context=constant_one_params ) p2m_cost_dev = cl.array.to_device(queue, p2m_cost) queue.finish() start_time = time.time() cl_form_multipoles = cl_cost_model.process_form_multipoles( queue, trav_dev, p2m_cost_dev ) queue.finish() logger.info("OpenCL time for process_form_multipoles: {0}".format( str(time.time() - start_time) )) start_time = time.time() python_form_multipoles = python_cost_model.process_form_multipoles( queue, trav, p2m_cost ) logger.info("Python time for process_form_multipoles: {0}".format( str(time.time() - start_time) )) assert np.array_equal(cl_form_multipoles.get(), python_form_multipoles) # }}} # {{{ Test process_coarsen_multipoles m2m_cost = np.zeros(nlevels - 1, dtype=np.float64) for target_level in range(nlevels - 1): m2m_cost[target_level] = evaluate( xlat_cost.m2m(target_level + 1, target_level), context=constant_one_params ) m2m_cost_dev = cl.array.to_device(queue, m2m_cost) queue.finish() start_time = time.time() cl_coarsen_multipoles = cl_cost_model.process_coarsen_multipoles( queue, trav_dev, m2m_cost_dev ) queue.finish() logger.info("OpenCL time for coarsen_multipoles: {0}".format( str(time.time() - start_time) )) start_time = time.time() python_coarsen_multipoles = python_cost_model.process_coarsen_multipoles( queue, trav, m2m_cost ) logger.info("Python time for coarsen_multipoles: {0}".format( str(time.time() - start_time) )) assert cl_coarsen_multipoles == python_coarsen_multipoles # }}} # {{{ Test process_direct queue.finish() start_time = time.time() cl_ndirect_sources_per_target_box = \ cl_cost_model.get_ndirect_sources_per_target_box(queue, trav_dev) cl_direct = cl_cost_model.process_direct( queue, trav_dev, cl_ndirect_sources_per_target_box, 5.0 ) queue.finish() logger.info("OpenCL time for process_direct: {0}".format( str(time.time() - start_time) )) start_time = time.time() python_ndirect_sources_per_target_box = \ python_cost_model.get_ndirect_sources_per_target_box(queue, trav) python_direct = python_cost_model.process_direct( queue, trav, python_ndirect_sources_per_target_box, 5.0 ) logger.info("Python time for process_direct: {0}".format( str(time.time() - start_time) )) assert np.array_equal(cl_direct.get(), python_direct) # }}} # {{{ Test aggregate_over_boxes start_time = time.time() cl_direct_aggregate = cl_cost_model.aggregate_over_boxes(cl_direct) queue.finish() logger.info("OpenCL time for aggregate_over_boxes: {0}".format( str(time.time() - start_time) )) start_time = time.time() python_direct_aggregate = python_cost_model.aggregate_over_boxes(python_direct) logger.info("Python time for aggregate_over_boxes: {0}".format( str(time.time() - start_time) )) assert cl_direct_aggregate == python_direct_aggregate # }}} # {{{ Test process_list2 nlevels = trav.tree.nlevels m2l_cost = np.zeros(nlevels, dtype=np.float64) for ilevel in range(nlevels): m2l_cost[ilevel] = evaluate( xlat_cost.m2l(ilevel, ilevel), context=constant_one_params ) m2l_cost_dev = cl.array.to_device(queue, m2l_cost) queue.finish() start_time = time.time() cl_m2l_cost = cl_cost_model.process_list2(queue, trav_dev, m2l_cost_dev) queue.finish() logger.info("OpenCL time for process_list2: {0}".format( str(time.time() - start_time) )) start_time = time.time() python_m2l_cost = python_cost_model.process_list2(queue, trav, m2l_cost) logger.info("Python time for process_list2: {0}".format( str(time.time() - start_time) )) assert np.array_equal(cl_m2l_cost.get(), python_m2l_cost) # }}} # {{{ Test process_list 3 m2p_cost = np.zeros(nlevels, dtype=np.float64) for ilevel in range(nlevels): m2p_cost[ilevel] = evaluate( xlat_cost.m2p(ilevel), context=constant_one_params ) m2p_cost_dev = cl.array.to_device(queue, m2p_cost) queue.finish() start_time = time.time() cl_m2p_cost = cl_cost_model.process_list3(queue, trav_dev, m2p_cost_dev) queue.finish() logger.info("OpenCL time for process_list3: {0}".format( str(time.time() - start_time) )) start_time = time.time() python_m2p_cost = python_cost_model.process_list3(queue, trav, m2p_cost) logger.info("Python time for process_list3: {0}".format( str(time.time() - start_time) )) assert np.array_equal(cl_m2p_cost.get(), python_m2p_cost) # }}} # {{{ Test process_list4 p2l_cost = np.zeros(nlevels, dtype=np.float64) for ilevel in range(nlevels): p2l_cost[ilevel] = evaluate( xlat_cost.p2l(ilevel), context=constant_one_params ) p2l_cost_dev = cl.array.to_device(queue, p2l_cost) queue.finish() start_time = time.time() cl_p2l_cost = cl_cost_model.process_list4(queue, trav_dev, p2l_cost_dev) queue.finish() logger.info("OpenCL time for process_list4: {0}".format( str(time.time() - start_time) )) start_time = time.time() python_p2l_cost = python_cost_model.process_list4(queue, trav, p2l_cost) logger.info("Python time for process_list4: {0}".format( str(time.time() - start_time) )) assert np.array_equal(cl_p2l_cost.get(), python_p2l_cost) # }}} # {{{ Test process_refine_locals l2l_cost = np.zeros(nlevels - 1, dtype=np.float64) for ilevel in range(nlevels - 1): l2l_cost[ilevel] = evaluate( xlat_cost.l2l(ilevel, ilevel + 1), context=constant_one_params ) l2l_cost_dev = cl.array.to_device(queue, l2l_cost) queue.finish() start_time = time.time() cl_refine_locals_cost = cl_cost_model.process_refine_locals( queue, trav_dev, l2l_cost_dev ) queue.finish() logger.info("OpenCL time for refine_locals: {0}".format( str(time.time() - start_time) )) start_time = time.time() python_refine_locals_cost = python_cost_model.process_refine_locals( queue, trav, l2l_cost ) logger.info("Python time for refine_locals: {0}".format( str(time.time() - start_time) )) assert cl_refine_locals_cost == python_refine_locals_cost # }}} # {{{ Test process_eval_locals l2p_cost = np.zeros(nlevels, dtype=np.float64) for ilevel in range(nlevels): l2p_cost[ilevel] = evaluate( xlat_cost.l2p(ilevel), context=constant_one_params ) l2p_cost_dev = cl.array.to_device(queue, l2p_cost) queue.finish() start_time = time.time() cl_l2p_cost = cl_cost_model.process_eval_locals(queue, trav_dev, l2p_cost_dev) queue.finish() logger.info("OpenCL time for process_eval_locals: {0}".format( str(time.time() - start_time) )) start_time = time.time() python_l2p_cost = python_cost_model.process_eval_locals(queue, trav, l2p_cost) logger.info("Python time for process_eval_locals: {0}".format( str(time.time() - start_time) )) assert np.array_equal(cl_l2p_cost.get(), python_l2p_cost)
def plot_traversal(ctx_getter, do_plot=False): ctx = ctx_getter() queue = cl.CommandQueue(ctx) #for dims in [2, 3]: for dims in [2]: nparticles = 10**4 dtype = np.float64 from pyopencl.clrandom import PhiloxGenerator rng = PhiloxGenerator(queue.context, seed=15) from pytools.obj_array import make_obj_array particles = make_obj_array([ rng.normal(queue, nparticles, dtype=dtype) for i in range(dims)]) # if do_plot: # pt.plot(particles[0].get(), particles[1].get(), "x") from boxtree import TreeBuilder tb = TreeBuilder(ctx) queue.finish() tree = tb(queue, particles, max_particles_in_box=30, debug=True) from boxtree.traversal import FMMTraversalBuilder tg = FMMTraversalBuilder(ctx) trav = tg(queue, tree).get() from boxtree.visualization import TreePlotter plotter = TreePlotter(tree) plotter.draw_tree(fill=False, edgecolor="black") #plotter.draw_box_numbers() plotter.set_bounding_box() from random import randrange, seed seed(7) # {{{ generic box drawing helper def draw_some_box_lists(starts, lists, key_to_box=None, count=5): actual_count = 0 while actual_count < count: if key_to_box is not None: key = randrange(len(key_to_box)) ibox = key_to_box[key] else: key = ibox = randrange(tree.nboxes) start, end = starts[key:key+2] if start == end: continue #print ibox, start, end, lists[start:end] for jbox in lists[start:end]: plotter.draw_box(jbox, facecolor='yellow') plotter.draw_box(ibox, facecolor='red') actual_count += 1 # }}} if 0: # colleagues draw_some_box_lists( trav.colleagues_starts, trav.colleagues_lists) elif 0: # near neighbors ("list 1") draw_some_box_lists( trav.neighbor_leaves_starts, trav.neighbor_leaves_lists, key_to_box=trav.source_boxes) elif 0: # well-separated siblings (list 2) draw_some_box_lists( trav.sep_siblings_starts, trav.sep_siblings_lists) elif 1: # separated smaller (list 3) draw_some_box_lists( trav.sep_smaller_starts, trav.sep_smaller_lists, key_to_box=trav.source_boxes) elif 1: # separated bigger (list 4) draw_some_box_lists( trav.sep_bigger_starts, trav.sep_bigger_lists) import matplotlib.pyplot as pt pt.show()
def test_from_sep_siblings_rotation_classes(ctx_factory, well_sep_is_n_away): ctx = ctx_factory() queue = cl.CommandQueue(ctx) dims = 3 nparticles = 10**4 dtype = np.float64 # {{{ build tree from pyopencl.clrandom import PhiloxGenerator rng = PhiloxGenerator(queue.context, seed=15) from pytools.obj_array import make_obj_array particles = make_obj_array([ rng.normal(queue, nparticles, dtype=dtype) for i in range(dims)]) from boxtree import TreeBuilder tb = TreeBuilder(ctx) queue.finish() tree, _ = tb(queue, particles, max_particles_in_box=30, debug=True) # }}} # {{{ build traversal from boxtree.traversal import FMMTraversalBuilder from boxtree.rotation_classes import RotationClassesBuilder tg = FMMTraversalBuilder(ctx, well_sep_is_n_away=well_sep_is_n_away) trav, _ = tg(queue, tree) rb = RotationClassesBuilder(ctx) result, _ = rb(queue, trav, tree) rot_classes = result.from_sep_siblings_rotation_classes.get(queue) rot_angles = result.from_sep_siblings_rotation_class_to_angle.get(queue) tree = tree.get(queue=queue) trav = trav.get(queue=queue) centers = tree.box_centers.T # }}} # For each entry of from_sep_siblings, compute the source-target translation # direction as a vector, and check that the from_sep_siblings rotation class # in the traversal corresponds to the angle with the z-axis of the # translation direction. for itgt_box, tgt_ibox in enumerate(trav.target_or_target_parent_boxes): start, end = trav.from_sep_siblings_starts[itgt_box:itgt_box+2] seps = trav.from_sep_siblings_lists[start:end] level_rot_classes = rot_classes[start:end] translation_vecs = centers[tgt_ibox] - centers[seps] theta = np.arctan2( la.norm(translation_vecs[:, :dims - 1], axis=1), translation_vecs[:, dims - 1]) level_rot_angles = rot_angles[level_rot_classes] assert np.allclose(theta, level_rot_angles, atol=1e-13, rtol=1e-13)