def __init__(self, ctx_getter=cl.create_some_context, enable_extents=False): ctx = ctx_getter() queue = cl.CommandQueue(ctx) from pyopencl.characterize import has_struct_arg_count_bug if has_struct_arg_count_bug(queue.device): pytest.xfail( "won't work on devices with the struct arg count issue") logging.basicConfig(level=logging.INFO) dims = 2 nsources = 9000000 ntargets = 9000000 dtype = np.float32 from boxtree.fmm import drive_fmm sources = p_normal(queue, nsources, dims, dtype, seed=15) targets = p_normal(queue, ntargets, dims, dtype, seed=15) from pyopencl.clrandom import PhiloxGenerator rng = PhiloxGenerator(queue.context, seed=12) if enable_extents: target_radii = 2**rng.uniform(queue, ntargets, dtype=dtype, a=-10, b=0) else: target_radii = None from boxtree import TreeBuilder tb = TreeBuilder(ctx) tree, _ = tb( queue, sources, #targets=targets, max_particles_in_box=30, #target_radii=target_radii, #stick_out_factor=0.25, debug=True) from boxtree.traversal import FMMTraversalBuilder tbuild = FMMTraversalBuilder(ctx) trav, _ = tbuild(queue, tree, debug=True) weights = np.ones(nsources) weights_sum = np.sum(weights) host_trav = trav.get(queue=queue) host_tree = host_trav.tree self.tree = host_tree self.trav = host_trav self.input = [host_tree, weights, weights_sum, host_trav] self.pot = None
def test_interaction_list_particle_count_thresholding(ctx_getter, enable_extents): ctx = ctx_getter() queue = cl.CommandQueue(ctx) logging.basicConfig(level=logging.INFO) dims = 2 nsources = 1000 ntargets = 1000 dtype = np.float max_particles_in_box = 30 # Ensure that we have underfilled boxes. from_sep_smaller_min_nsources_cumul = 1 + max_particles_in_box from boxtree.fmm import drive_fmm sources = p_normal(queue, nsources, dims, dtype, seed=15) targets = p_normal(queue, ntargets, dims, dtype, seed=15) from pyopencl.clrandom import PhiloxGenerator rng = PhiloxGenerator(queue.context, seed=12) if enable_extents: target_radii = 2**rng.uniform(queue, ntargets, dtype=dtype, a=-10, b=0) else: target_radii = None from boxtree import TreeBuilder tb = TreeBuilder(ctx) tree, _ = tb(queue, sources, targets=targets, max_particles_in_box=max_particles_in_box, target_radii=target_radii, debug=True, stick_out_factor=0.25) from boxtree.traversal import FMMTraversalBuilder tbuild = FMMTraversalBuilder(ctx) trav, _ = tbuild( queue, tree, debug=True, _from_sep_smaller_min_nsources_cumul=from_sep_smaller_min_nsources_cumul ) weights = np.ones(nsources) weights_sum = np.sum(weights) host_trav = trav.get(queue=queue) host_tree = host_trav.tree wrangler = ConstantOneExpansionWrangler(host_tree) pot = drive_fmm(host_trav, wrangler, weights) assert (pot == weights_sum).all()
def test_plot_traversal(ctx_factory, well_sep_is_n_away=1, plot=False): pytest.importorskip("matplotlib") ctx = ctx_factory() queue = cl.CommandQueue(ctx) #for dims in [2, 3]: for dims in [2]: nparticles = 10**4 dtype = np.float64 from pyopencl.clrandom import PhiloxGenerator rng = PhiloxGenerator(queue.context, seed=15) from pytools.obj_array import make_obj_array particles = make_obj_array([ rng.normal(queue, nparticles, dtype=dtype) for i in range(dims)]) # if do_plot: # pt.plot(particles[0].get(), particles[1].get(), "x") from boxtree import TreeBuilder tb = TreeBuilder(ctx) queue.finish() tree, _ = tb(queue, particles, max_particles_in_box=30, debug=True) from boxtree.traversal import FMMTraversalBuilder tg = FMMTraversalBuilder(ctx, well_sep_is_n_away=well_sep_is_n_away) trav, _ = tg(queue, tree) tree = tree.get(queue=queue) trav = trav.get(queue=queue) from boxtree.visualization import TreePlotter plotter = TreePlotter(tree) plotter.draw_tree(fill=False, edgecolor="black") #plotter.draw_box_numbers() plotter.set_bounding_box() from random import randrange, seed # noqa seed(7) from boxtree.visualization import draw_box_lists #draw_box_lists(randrange(tree.nboxes)) if well_sep_is_n_away == 1: draw_box_lists(plotter, trav, 380) elif well_sep_is_n_away == 2: draw_box_lists(plotter, trav, 320) #plotter.draw_box_numbers() if plot: import matplotlib.pyplot as pt pt.gca().set_xticks([]) pt.gca().set_yticks([]) pt.show()
def test_non_adaptive_particle_tree(ctx_getter, dtype, dims, do_plot=False): ctx = ctx_getter() queue = cl.CommandQueue(ctx) from boxtree import TreeBuilder builder = TreeBuilder(ctx) run_build_test(builder, queue, dims, dtype, 10**4, do_plot=do_plot, non_adaptive=True)
def test_vanilla_particle_tree(ctx_getter, dtype, dims, do_plot=False): ctx = ctx_getter() queue = cl.CommandQueue(ctx) from boxtree import TreeBuilder builder = TreeBuilder(ctx) run_build_test(builder, queue, dims, dtype, 10**5, do_plot=do_plot)
def test_particle_tree_with_reallocations(ctx_getter, dtype, dims, do_plot=False): ctx = ctx_getter() queue = cl.CommandQueue(ctx) from boxtree import TreeBuilder builder = TreeBuilder(ctx) run_build_test(builder, queue, dims, dtype, 10**5, do_plot=do_plot, nboxes_guess=5)
def test_fmm_float32(ctx_getter=cl.create_some_context, enable_extents=True): from time import time ctx = ctx_getter() queue = cl.CommandQueue(ctx) from pyopencl.characterize import has_struct_arg_count_bug if has_struct_arg_count_bug(queue.device): pytest.xfail("won't work on devices with the struct arg count issue") logging.basicConfig(level=logging.INFO) dims = 2 nsources = 3000000 ntargets = 3000000 dtype = np.float32 from boxtree.fmm import drive_fmm sources = p_normal(queue, nsources, dims, dtype, seed=15) targets = p_normal(queue, ntargets, dims, dtype, seed=15) from pyopencl.clrandom import PhiloxGenerator rng = PhiloxGenerator(queue.context, seed=12) if enable_extents: target_radii = 2**rng.uniform(queue, ntargets, dtype=dtype, a=-10, b=0) else: target_radii = None from boxtree import TreeBuilder tb = TreeBuilder(ctx) tree, _ = tb(queue, sources, targets=targets, max_particles_in_box=30, target_radii=target_radii,stick_out_factor=0.25, debug=True) from boxtree.traversal import FMMTraversalBuilder tbuild = FMMTraversalBuilder(ctx) trav, _ = tbuild(queue, tree, debug=True) weights = np.ones(nsources) weights_sum = np.sum(weights) host_trav = trav.get(queue=queue) host_tree = host_trav.tree wrangler = ConstantOneExpansionWrangler(host_tree) ti = time() pot = drive_fmm(host_trav, wrangler, weights) print(time() - ti) assert (pot == weights_sum).all()
def test_unpruned_particle_tree(ctx_getter, dtype, dims, do_plot=False): ctx = ctx_getter() queue = cl.CommandQueue(ctx) from boxtree import TreeBuilder builder = TreeBuilder(ctx) # test unpruned tree build run_build_test(builder, queue, dims, dtype, 10**5, do_plot=do_plot, skip_prune=True)
def test_particle_tree_with_many_empty_leaves( ctx_getter, dtype, dims, do_plot=False): ctx = ctx_getter() queue = cl.CommandQueue(ctx) from boxtree import TreeBuilder builder = TreeBuilder(ctx) run_build_test(builder, queue, dims, dtype, 10**5, do_plot=do_plot, max_particles_in_box=5)
def test_leaves_to_balls_query(ctx_factory, dims, do_plot=False): logging.basicConfig(level=logging.INFO) ctx = ctx_factory() queue = cl.CommandQueue(ctx) nparticles = 10**5 dtype = np.float64 particles = make_normal_particle_array(queue, nparticles, dims, dtype) if do_plot: import matplotlib.pyplot as pt pt.plot(particles[0].get(), particles[1].get(), "x") from boxtree import TreeBuilder tb = TreeBuilder(ctx) queue.finish() tree, _ = tb(queue, particles, max_particles_in_box=30, debug=True) nballs = 10**4 ball_centers = make_normal_particle_array(queue, nballs, dims, dtype) ball_radii = cl.array.empty(queue, nballs, dtype).fill(0.1) from boxtree.area_query import LeavesToBallsLookupBuilder lblb = LeavesToBallsLookupBuilder(ctx) lbl, _ = lblb(queue, tree, ball_centers, ball_radii) # get data to host for test tree = tree.get(queue=queue) lbl = lbl.get(queue=queue) ball_centers = np.array([x.get() for x in ball_centers]).T ball_radii = ball_radii.get() assert len(lbl.balls_near_box_starts) == tree.nboxes + 1 from boxtree import box_flags_enum for ibox in range(tree.nboxes): # We only want leaves here. if tree.box_flags[ibox] & box_flags_enum.HAS_CHILDREN: continue box_center = tree.box_centers[:, ibox] ext_l, ext_h = tree.get_box_extent(ibox) box_rad = 0.5 * (ext_h - ext_l)[0] linf_circle_dists = np.max(np.abs(ball_centers - box_center), axis=-1) near_circles, = np.where(linf_circle_dists - ball_radii < box_rad) start, end = lbl.balls_near_box_starts[ibox:ibox + 2] assert sorted( lbl.balls_near_box_lists[start:end]) == sorted(near_circles)
def test_same_tree_with_zero_weight_particles(ctx_factory, dims): logging.basicConfig(level=logging.INFO) ntargets_values = [300, 400, 500] stick_out_factors = [0, 0.1, 0.3, 1] nsources = 20 ctx = ctx_factory() queue = cl.CommandQueue(ctx) from boxtree import TreeBuilder tb = TreeBuilder(ctx) trees = [] for stick_out_factor in stick_out_factors: for ntargets in [40]: np.random.seed(10) sources = np.random.rand(dims, nsources)**2 sources[:, 0] = -0.1 sources[:, 1] = 1.1 np.random.seed() targets = np.random.rand( dims, max(ntargets_values))[:, :ntargets].copy() target_radii = np.random.rand(max(ntargets_values))[:ntargets] sources = cl.array.to_device(queue, sources) targets = cl.array.to_device(queue, targets) refine_weights = cl.array.empty(queue, nsources + ntargets, np.int32) refine_weights[:nsources] = 1 refine_weights[nsources:] = 0 tree, _ = tb(queue, sources, targets=targets, target_radii=target_radii, stick_out_factor=stick_out_factor, max_leaf_refine_weight=10, refine_weights=refine_weights, debug=True) tree = tree.get(queue=queue) trees.append(tree) print("TREE:", tree.nboxes) if 0: import matplotlib.pyplot as plt for tree in trees: plt.figure() tree.plot() plt.show()
def partition_by_nodes(actx, discr, tree_kind="adaptive-level-restricted", max_nodes_in_box=None): """Generate equally sized ranges of nodes. The partition is created at the lowest level of granularity, i.e. nodes. This results in balanced ranges of points, but will split elements across different ranges. :arg discr: a :class:`meshmode.discretization.Discretization`. :arg tree_kind: if not *None*, it is passed to :class:`boxtree.TreeBuilder`. :arg max_nodes_in_box: passed to :class:`boxtree.TreeBuilder`. :return: a :class:`sumpy.tools.BlockIndexRanges`. """ if max_nodes_in_box is None: # FIXME: this is just an arbitrary value max_nodes_in_box = 32 if tree_kind is not None: from boxtree import box_flags_enum from boxtree import TreeBuilder builder = TreeBuilder(actx.context) from meshmode.dof_array import flatten, thaw tree, _ = builder(actx.queue, flatten(thaw(actx, discr.nodes())), max_particles_in_box=max_nodes_in_box, kind=tree_kind) tree = tree.get(actx.queue) leaf_boxes, = (tree.box_flags & box_flags_enum.HAS_CHILDREN == 0).nonzero() indices = np.empty(len(leaf_boxes), dtype=np.object) for i, ibox in enumerate(leaf_boxes): box_start = tree.box_source_starts[ibox] box_end = box_start + tree.box_source_counts_cumul[ibox] indices[i] = tree.user_source_ids[box_start:box_end] ranges = actx.from_numpy( np.cumsum([0] + [box.shape[0] for box in indices]) ) indices = actx.from_numpy(np.hstack(indices)) else: indices = actx.from_numpy(np.arange(0, discr.ndofs, dtype=np.int)) ranges = actx.from_numpy(np.arange( 0, discr.ndofs + 1, max_nodes_in_box, dtype=np.int)) assert ranges[-1] == discr.ndofs return BlockIndexRanges(actx.context, actx.freeze(indices), actx.freeze(ranges))
def partition_by_nodes( actx: PyOpenCLArrayContext, discr: Discretization, *, tree_kind: Optional[str] = "adaptive-level-restricted", max_particles_in_box: Optional[int] = None) -> BlockIndexRanges: """Generate equally sized ranges of nodes. The partition is created at the lowest level of granularity, i.e. nodes. This results in balanced ranges of points, but will split elements across different ranges. :arg tree_kind: if not *None*, it is passed to :class:`boxtree.TreeBuilder`. :arg max_particles_in_box: passed to :class:`boxtree.TreeBuilder`. """ if max_particles_in_box is None: # FIXME: this is just an arbitrary value max_particles_in_box = 32 if tree_kind is not None: from boxtree import box_flags_enum from boxtree import TreeBuilder builder = TreeBuilder(actx.context) from arraycontext import thaw tree, _ = builder(actx.queue, particles=flatten(thaw(discr.nodes(), actx), actx, leaf_class=DOFArray), max_particles_in_box=max_particles_in_box, kind=tree_kind) tree = tree.get(actx.queue) leaf_boxes, = (tree.box_flags & box_flags_enum.HAS_CHILDREN == 0).nonzero() indices = np.empty(len(leaf_boxes), dtype=object) ranges = None for i, ibox in enumerate(leaf_boxes): box_start = tree.box_source_starts[ibox] box_end = box_start + tree.box_source_counts_cumul[ibox] indices[i] = tree.user_source_ids[box_start:box_end] else: if discr.ambient_dim != 2 and discr.dim == 1: raise ValueError("only curves are supported for 'tree_kind=None'") nblocks = max(discr.ndofs // max_particles_in_box, 2) indices = np.arange(0, discr.ndofs, dtype=np.int64) ranges = np.linspace(0, discr.ndofs, nblocks + 1, dtype=np.int64) assert ranges[-1] == discr.ndofs from pytential.linalg import make_block_index_from_array return make_block_index_from_array(indices, ranges=ranges)
def partition_by_nodes(discr, use_tree=True, max_nodes_in_box=None): """Generate equally sized ranges of nodes. The partition is created at the lowest level of granularity, i.e. nodes. This results in balanced ranges of points, but will split elements across different ranges. :arg discr: a :class:`meshmode.discretization.Discretization`. :arg use_tree: if ``True``, node partitions are generated using a :class:`boxtree.TreeBuilder`, which leads to geometrically close points to belong to the same partition. If ``False``, a simple linear partition is constructed. :arg max_nodes_in_box: passed to :class:`boxtree.TreeBuilder`. :return: a :class:`sumpy.tools.BlockIndexRanges`. """ if max_nodes_in_box is None: # FIXME: this is just an arbitrary value max_nodes_in_box = 32 with cl.CommandQueue(discr.cl_context) as queue: if use_tree: from boxtree import box_flags_enum from boxtree import TreeBuilder builder = TreeBuilder(discr.cl_context) tree, _ = builder(queue, discr.nodes(), max_particles_in_box=max_nodes_in_box) tree = tree.get(queue) leaf_boxes, = (tree.box_flags & box_flags_enum.HAS_CHILDREN == 0).nonzero() indices = np.empty(len(leaf_boxes), dtype=np.object) for i, ibox in enumerate(leaf_boxes): box_start = tree.box_source_starts[ibox] box_end = box_start + tree.box_source_counts_cumul[ibox] indices[i] = tree.user_source_ids[box_start:box_end] ranges = to_device( queue, np.cumsum([0] + [box.shape[0] for box in indices])) indices = to_device(queue, np.hstack(indices)) else: indices = cl.array.arange(queue, 0, discr.nnodes, dtype=np.int) ranges = cl.array.arange(queue, 0, discr.nnodes + 1, discr.nnodes // max_nodes_in_box, dtype=np.int) assert ranges[-1] == discr.nnodes return BlockIndexRanges(discr.cl_context, indices.with_queue(None), ranges.with_queue(None))
def test_sumpy_fmm_timing_data_collection(ctx_factory): logging.basicConfig(level=logging.INFO) ctx = ctx_factory() queue = cl.CommandQueue( ctx, properties=cl.command_queue_properties.PROFILING_ENABLE) nsources = 500 dtype = np.float64 from boxtree.tools import ( make_normal_particle_array as p_normal) knl = LaplaceKernel(2) local_expn_class = VolumeTaylorLocalExpansion mpole_expn_class = VolumeTaylorMultipoleExpansion order = 1 sources = p_normal(queue, nsources, knl.dim, dtype, seed=15) from boxtree import TreeBuilder tb = TreeBuilder(ctx) tree, _ = tb(queue, sources, max_particles_in_box=30, debug=True) from boxtree.traversal import FMMTraversalBuilder tbuild = FMMTraversalBuilder(ctx) trav, _ = tbuild(queue, tree, debug=True) from pyopencl.clrandom import PhiloxGenerator rng = PhiloxGenerator(ctx) weights = rng.uniform(queue, nsources, dtype=np.float64) out_kernels = [knl] from functools import partial from sumpy.fmm import SumpyExpansionWranglerCodeContainer wcc = SumpyExpansionWranglerCodeContainer( ctx, partial(mpole_expn_class, knl), partial(local_expn_class, knl), out_kernels) wrangler = wcc.get_wrangler(queue, tree, dtype, fmm_level_to_order=lambda kernel, kernel_args, tree, lev: order) from boxtree.fmm import drive_fmm timing_data = {} pot, = drive_fmm(trav, wrangler, (weights,), timing_data=timing_data) print(timing_data) assert timing_data
def test_area_query_elwise(ctx_factory, dims, do_plot=False): ctx = ctx_factory() queue = cl.CommandQueue(ctx) nparticles = 10**5 dtype = np.float64 particles = make_normal_particle_array(queue, nparticles, dims, dtype) if do_plot: import matplotlib.pyplot as pt pt.plot(particles[0].get(), particles[1].get(), "x") from boxtree import TreeBuilder tb = TreeBuilder(ctx) queue.finish() tree, _ = tb(queue, particles, max_particles_in_box=30, debug=True) nballs = 10**4 ball_centers = make_normal_particle_array(queue, nballs, dims, dtype) ball_radii = cl.array.empty(queue, nballs, dtype).fill(0.1) from boxtree.area_query import (AreaQueryElementwiseTemplate, PeerListFinder) template = AreaQueryElementwiseTemplate(extra_args=""" coord_t *ball_radii, %for ax in AXIS_NAMES[:dimensions]: coord_t *ball_${ax}, %endfor """, ball_center_and_radius_expr=""" %for ax in AXIS_NAMES[:dimensions]: ${ball_center}.${ax} = ball_${ax}[${i}]; %endfor ${ball_radius} = ball_radii[${i}]; """, leaf_found_op="") peer_lists, evt = PeerListFinder(ctx)(queue, tree) kernel = template.generate(ctx, dims, tree.coord_dtype, tree.box_id_dtype, peer_lists.peer_list_starts.dtype, tree.nlevels) evt = kernel(*template.unwrap_args(tree, peer_lists, ball_radii, *ball_centers), queue=queue, wait_for=[evt], range=slice(len(ball_radii))) cl.wait_for_events([evt])
def test_max_levels_error(ctx_factory): ctx = ctx_factory() queue = cl.CommandQueue(ctx) from boxtree import TreeBuilder tb = TreeBuilder(ctx) logging.basicConfig(level=logging.INFO) sources = [cl.array.zeros(queue, 11, float) for i in range(2)] from boxtree.tree_build import MaxLevelsExceeded with pytest.raises(MaxLevelsExceeded): tree, _ = tb(queue, sources, max_particles_in_box=10, debug=True)
def test_vanilla_particle_tree(ctx_factory, dtype, dims, do_plot=False): ctx = ctx_factory() queue = cl.CommandQueue(ctx) from boxtree import TreeBuilder builder = TreeBuilder(ctx) run_build_test(builder, queue, dims, dtype, 10**5, max_particles_in_box=30, do_plot=do_plot)
def test_two_level_particle_tree(ctx_getter, dtype, dims, do_plot=False): ctx = ctx_getter() queue = cl.CommandQueue(ctx) from boxtree import TreeBuilder builder = TreeBuilder(ctx) run_build_test(builder, queue, dims, dtype, 50, max_particles_in_box=30, do_plot=do_plot)
def test_non_adaptive_particle_tree(ctx_factory, dtype, dims, do_plot=False): ctx = ctx_factory() queue = cl.CommandQueue(ctx) from boxtree import TreeBuilder builder = TreeBuilder(ctx) run_build_test(builder, queue, dims, dtype, 10**4, max_particles_in_box=30, do_plot=do_plot, kind="non-adaptive")
def test_level_to_order_lookup(ctx_getter, lookup_func, extra_args): pytest.importorskip("pyfmmlib") ctx = ctx_getter() queue = cl.CommandQueue(ctx) nsources = 500 dtype = np.float64 epsilon = 1e-9 from boxtree.tools import (make_normal_particle_array as p_normal) sources = p_normal(queue, nsources, 2, dtype, seed=15) from boxtree import TreeBuilder tb = TreeBuilder(ctx) tree, _ = tb(queue, sources, max_particles_in_box=30, debug=True) levels = lookup_func(tree, *(extra_args + (epsilon, ))) assert len(levels) == tree.nlevels # Should be monotonically nonincreasing. assert all(levels[i] >= levels[i + 1] for i in range(tree.nlevels - 1))
def __call__(self, queue=None): if queue is None: queue = cl.CommandQueue(self.cl_context) from boxtree import TreeBuilder tb = TreeBuilder(self.cl_context) q_points = self._get_q_points(queue) tree, _ = tb(queue, particles=q_points, targets=q_points, bbox=self._bbox, max_particles_in_box=( (self.n_q_points_per_cell**self.dim) * (2**self.dim) - 1), kind="adaptive-level-restricted") from boxtree.traversal import FMMTraversalBuilder tg = FMMTraversalBuilder(self.cl_context) trav, _ = tg(queue, tree) return BoxFMMGeometryData( self.cl_context, q_points, self._get_q_weights(queue), tree, trav)
def test_area_query_balls_outside_bbox(ctx_factory, dims, do_plot=False): """ The input to the area query includes balls whose centers are not within the tree bounding box. """ ctx = ctx_factory() queue = cl.CommandQueue(ctx) nparticles = 10**4 dtype = np.float64 particles = make_normal_particle_array(queue, nparticles, dims, dtype) if do_plot: import matplotlib.pyplot as pt pt.plot(particles[0].get(), particles[1].get(), "x") from boxtree import TreeBuilder tb = TreeBuilder(ctx) queue.finish() tree, _ = tb(queue, particles, max_particles_in_box=30, debug=True) nballs = 10**4 from pyopencl.clrandom import PhiloxGenerator rng = PhiloxGenerator(ctx, seed=13) bbox_min = tree.bounding_box[0].min() bbox_max = tree.bounding_box[1].max() from pytools.obj_array import make_obj_array ball_centers = make_obj_array([ rng.uniform(queue, nballs, dtype=dtype, a=bbox_min - 1, b=bbox_max + 1) for i in range(dims) ]) ball_radii = cl.array.empty(queue, nballs, dtype).fill(0.1) run_area_query_test(ctx, queue, tree, ball_centers, ball_radii)
def test_explicit_refine_weights_particle_tree(ctx_factory, dtype, dims, do_plot=False): ctx = ctx_factory() queue = cl.CommandQueue(ctx) from boxtree import TreeBuilder builder = TreeBuilder(ctx) nparticles = 10**5 from pyopencl.clrandom import PhiloxGenerator rng = PhiloxGenerator(ctx, seed=10) refine_weights = rng.uniform(queue, nparticles, dtype=np.int32, a=1, b=10) run_build_test(builder, queue, dims, dtype, nparticles, refine_weights=refine_weights, max_leaf_refine_weight=100, do_plot=do_plot)
def test_area_query(ctx_factory, dims, do_plot=False): ctx = ctx_factory() queue = cl.CommandQueue(ctx) nparticles = 10**5 dtype = np.float64 particles = make_normal_particle_array(queue, nparticles, dims, dtype) if do_plot: import matplotlib.pyplot as pt pt.plot(particles[0].get(), particles[1].get(), "x") from boxtree import TreeBuilder tb = TreeBuilder(ctx) queue.finish() tree, _ = tb(queue, particles, max_particles_in_box=30, debug=True) nballs = 10**4 ball_centers = make_normal_particle_array(queue, nballs, dims, dtype) ball_radii = cl.array.empty(queue, nballs, dtype).fill(0.1) run_area_query_test(ctx, queue, tree, ball_centers, ball_radii)
def test_sumpy_fmm(ctx_getter, knl, local_expn_class, mpole_expn_class): logging.basicConfig(level=logging.INFO) ctx = ctx_getter() queue = cl.CommandQueue(ctx) nsources = 1000 ntargets = 300 dtype = np.float64 from boxtree.tools import (make_normal_particle_array as p_normal) sources = p_normal(queue, nsources, knl.dim, dtype, seed=15) if 1: offset = np.zeros(knl.dim) offset[0] = 0.1 targets = (p_normal(queue, ntargets, knl.dim, dtype, seed=18) + offset) del offset else: from sumpy.visualization import FieldPlotter fp = FieldPlotter(np.array([0.5, 0]), extent=3, npoints=200) from pytools.obj_array import make_obj_array targets = make_obj_array([fp.points[i] for i in range(knl.dim)]) from boxtree import TreeBuilder tb = TreeBuilder(ctx) tree, _ = tb(queue, sources, targets=targets, max_particles_in_box=30, debug=True) from boxtree.traversal import FMMTraversalBuilder tbuild = FMMTraversalBuilder(ctx) trav, _ = tbuild(queue, tree, debug=True) # {{{ plot tree if 0: host_tree = tree.get() host_trav = trav.get() if 1: print("src_box", host_tree.find_box_nr_for_source(403)) print("tgt_box", host_tree.find_box_nr_for_target(28)) print(list(host_trav.target_or_target_parent_boxes).index(37)) print(host_trav.get_box_list("sep_bigger", 22)) from boxtree.visualization import TreePlotter plotter = TreePlotter(host_tree) plotter.draw_tree(fill=False, edgecolor="black", zorder=10) plotter.set_bounding_box() plotter.draw_box_numbers() import matplotlib.pyplot as pt pt.show() # }}} from pyopencl.clrandom import PhiloxGenerator rng = PhiloxGenerator(ctx, seed=44) weights = rng.uniform(queue, nsources, dtype=np.float64) logger.info("computing direct (reference) result") from pytools.convergence import PConvergenceVerifier pconv_verifier = PConvergenceVerifier() extra_kwargs = {} dtype = np.float64 order_values = [1, 2, 3] if isinstance(knl, HelmholtzKernel): extra_kwargs["k"] = 0.05 dtype = np.complex128 if knl.dim == 3: order_values = [1, 2] elif knl.dim == 2 and issubclass(local_expn_class, H2DLocalExpansion): order_values = [10, 12] elif isinstance(knl, YukawaKernel): extra_kwargs["lam"] = 2 dtype = np.complex128 if knl.dim == 3: order_values = [1, 2] elif knl.dim == 2 and issubclass(local_expn_class, Y2DLocalExpansion): order_values = [10, 12] from functools import partial for order in order_values: out_kernels = [knl] from sumpy.fmm import SumpyExpansionWranglerCodeContainer wcc = SumpyExpansionWranglerCodeContainer( ctx, partial(mpole_expn_class, knl), partial(local_expn_class, knl), out_kernels) wrangler = wcc.get_wrangler( queue, tree, dtype, fmm_level_to_order=lambda kernel, kernel_args, tree, lev: order, kernel_extra_kwargs=extra_kwargs) from boxtree.fmm import drive_fmm pot, = drive_fmm(trav, wrangler, weights) from sumpy import P2P p2p = P2P(ctx, out_kernels, exclude_self=False) evt, (ref_pot, ) = p2p(queue, targets, sources, (weights, ), **extra_kwargs) pot = pot.get() ref_pot = ref_pot.get() rel_err = la.norm(pot - ref_pot, np.inf) / la.norm(ref_pot, np.inf) logger.info("order %d -> relative l2 error: %g" % (order, rel_err)) pconv_verifier.add_data_point(order, rel_err) print(pconv_verifier) pconv_verifier()
def test_sumpy_fmm_exclude_self(ctx_getter): logging.basicConfig(level=logging.INFO) ctx = ctx_getter() queue = cl.CommandQueue(ctx) nsources = 500 dtype = np.float64 from boxtree.tools import (make_normal_particle_array as p_normal) knl = LaplaceKernel(2) local_expn_class = VolumeTaylorLocalExpansion mpole_expn_class = VolumeTaylorMultipoleExpansion order = 10 sources = p_normal(queue, nsources, knl.dim, dtype, seed=15) from boxtree import TreeBuilder tb = TreeBuilder(ctx) tree, _ = tb(queue, sources, max_particles_in_box=30, debug=True) from boxtree.traversal import FMMTraversalBuilder tbuild = FMMTraversalBuilder(ctx) trav, _ = tbuild(queue, tree, debug=True) from pyopencl.clrandom import PhiloxGenerator rng = PhiloxGenerator(ctx) weights = rng.uniform(queue, nsources, dtype=np.float64) target_to_source = np.arange(tree.ntargets, dtype=np.int32) self_extra_kwargs = {"target_to_source": target_to_source} out_kernels = [knl] from functools import partial from sumpy.fmm import SumpyExpansionWranglerCodeContainer wcc = SumpyExpansionWranglerCodeContainer(ctx, partial(mpole_expn_class, knl), partial(local_expn_class, knl), out_kernels, exclude_self=True) wrangler = wcc.get_wrangler( queue, tree, dtype, fmm_level_to_order=lambda kernel, kernel_args, tree, lev: order, self_extra_kwargs=self_extra_kwargs) from boxtree.fmm import drive_fmm pot, = drive_fmm(trav, wrangler, weights) from sumpy import P2P p2p = P2P(ctx, out_kernels, exclude_self=True) evt, (ref_pot, ) = p2p(queue, sources, sources, (weights, ), **self_extra_kwargs) pot = pot.get() ref_pot = ref_pot.get() rel_err = la.norm(pot - ref_pot) / la.norm(ref_pot) logger.info("order %d -> relative l2 error: %g" % (order, rel_err)) assert np.isclose(rel_err, 0, atol=1e-7)
def build_tree(self): from boxtree import TreeBuilder return TreeBuilder(self.cl_context)
def test_pyfmmlib_fmm(ctx_getter, dims, use_dipoles, helmholtz_k): logging.basicConfig(level=logging.INFO) from pytest import importorskip importorskip("pyfmmlib") ctx = ctx_getter() queue = cl.CommandQueue(ctx) nsources = 3000 ntargets = 1000 dtype = np.float64 sources = p_normal(queue, nsources, dims, dtype, seed=15) targets = (p_normal(queue, ntargets, dims, dtype, seed=18) + np.array([2, 0, 0])[:dims]) sources_host = particle_array_to_host(sources) targets_host = particle_array_to_host(targets) from boxtree import TreeBuilder tb = TreeBuilder(ctx) tree, _ = tb(queue, sources, targets=targets, max_particles_in_box=30, debug=True) from boxtree.traversal import FMMTraversalBuilder tbuild = FMMTraversalBuilder(ctx) trav, _ = tbuild(queue, tree, debug=True) trav = trav.get(queue=queue) from pyopencl.clrandom import PhiloxGenerator rng = PhiloxGenerator(queue.context, seed=20) weights = rng.uniform(queue, nsources, dtype=np.float64).get() #weights = np.ones(nsources) if use_dipoles: np.random.seed(13) dipole_vec = np.random.randn(dims, nsources) else: dipole_vec = None if dims == 2 and helmholtz_k == 0: base_nterms = 20 else: base_nterms = 10 def fmm_level_to_nterms(tree, lev): result = base_nterms if lev < 3 and helmholtz_k: # exercise order-varies-by-level capability result += 5 if use_dipoles: result += 1 return result from boxtree.pyfmmlib_integration import FMMLibExpansionWrangler wrangler = FMMLibExpansionWrangler(trav.tree, helmholtz_k, fmm_level_to_nterms=fmm_level_to_nterms, dipole_vec=dipole_vec) from boxtree.fmm import drive_fmm timing_data = {} pot = drive_fmm(trav, wrangler, weights, timing_data=timing_data) print(timing_data) assert timing_data # {{{ ref fmmlib computation logger.info("computing direct (reference) result") import pyfmmlib fmmlib_routine = getattr( pyfmmlib, "%spot%s%ddall%s_vec" % (wrangler.eqn_letter, "fld" if dims == 3 else "grad", dims, "_dp" if use_dipoles else "")) kwargs = {} if dims == 3: kwargs["iffld"] = False else: kwargs["ifgrad"] = False kwargs["ifhess"] = False if use_dipoles: if helmholtz_k == 0 and dims == 2: kwargs["dipstr"] = -weights * (dipole_vec[0] + 1j * dipole_vec[1]) else: kwargs["dipstr"] = weights kwargs["dipvec"] = dipole_vec else: kwargs["charge"] = weights if helmholtz_k: kwargs["zk"] = helmholtz_k ref_pot = wrangler.finalize_potentials( fmmlib_routine(sources=sources_host.T, targets=targets_host.T, **kwargs)[0]) rel_err = la.norm(pot - ref_pot, np.inf) / la.norm(ref_pot, np.inf) logger.info("relative l2 error vs fmmlib direct: %g" % rel_err) assert rel_err < 1e-5, rel_err # }}} # {{{ check against sumpy try: import sumpy # noqa except ImportError: have_sumpy = False from warnings import warn warn("sumpy unavailable: cannot compute independent reference " "values for pyfmmlib") else: have_sumpy = True if have_sumpy: from sumpy.kernel import (LaplaceKernel, HelmholtzKernel, DirectionalSourceDerivative) from sumpy.p2p import P2P sumpy_extra_kwargs = {} if helmholtz_k: knl = HelmholtzKernel(dims) sumpy_extra_kwargs["k"] = helmholtz_k else: knl = LaplaceKernel(dims) if use_dipoles: knl = DirectionalSourceDerivative(knl) sumpy_extra_kwargs["src_derivative_dir"] = dipole_vec p2p = P2P(ctx, [knl], exclude_self=False) evt, (sumpy_ref_pot, ) = p2p(queue, targets, sources, [weights], out_host=True, **sumpy_extra_kwargs) sumpy_rel_err = (la.norm(pot - sumpy_ref_pot, np.inf) / la.norm(sumpy_ref_pot, np.inf)) logger.info("relative l2 error vs sumpy direct: %g" % sumpy_rel_err) assert sumpy_rel_err < 1e-5, sumpy_rel_err
def test_fmm_completeness(ctx_getter, dims, nsources_req, ntargets_req, who_has_extent, source_gen, target_gen, filter_kind, well_sep_is_n_away, extent_norm, from_sep_smaller_crit): """Tests whether the built FMM traversal structures and driver completely capture all interactions. """ sources_have_extent = "s" in who_has_extent targets_have_extent = "t" in who_has_extent logging.basicConfig(level=logging.INFO) ctx = ctx_getter() queue = cl.CommandQueue(ctx) dtype = np.float64 try: sources = source_gen(queue, nsources_req, dims, dtype, seed=15) nsources = len(sources[0]) if ntargets_req is None: # This says "same as sources" to the tree builder. targets = None ntargets = ntargets_req else: targets = target_gen(queue, ntargets_req, dims, dtype, seed=16) ntargets = len(targets[0]) except ImportError: pytest.skip("loo.py not available, but needed for particle array " "generation") from pyopencl.clrandom import PhiloxGenerator rng = PhiloxGenerator(queue.context, seed=12) if sources_have_extent: source_radii = 2**rng.uniform(queue, nsources, dtype=dtype, a=-10, b=0) else: source_radii = None if targets_have_extent: target_radii = 2**rng.uniform(queue, ntargets, dtype=dtype, a=-10, b=0) else: target_radii = None from boxtree import TreeBuilder tb = TreeBuilder(ctx) tree, _ = tb(queue, sources, targets=targets, max_particles_in_box=30, source_radii=source_radii, target_radii=target_radii, debug=True, stick_out_factor=0.25, extent_norm=extent_norm) if 0: tree.get().plot() import matplotlib.pyplot as pt pt.show() from boxtree.traversal import FMMTraversalBuilder tbuild = FMMTraversalBuilder(ctx, well_sep_is_n_away=well_sep_is_n_away, from_sep_smaller_crit=from_sep_smaller_crit) trav, _ = tbuild(queue, tree, debug=True) if who_has_extent: pre_merge_trav = trav trav = trav.merge_close_lists(queue) #weights = np.random.randn(nsources) weights = np.ones(nsources) weights_sum = np.sum(weights) host_trav = trav.get(queue=queue) host_tree = host_trav.tree if who_has_extent: pre_merge_host_trav = pre_merge_trav.get(queue=queue) from boxtree.tree import ParticleListFilter plfilt = ParticleListFilter(ctx) if filter_kind: flags = rng.uniform(queue, ntargets or nsources, np.int32, a=0, b=2) \ .astype(np.int8) if filter_kind == "user": filtered_targets = plfilt.filter_target_lists_in_user_order( queue, tree, flags) wrangler = ConstantOneExpansionWranglerWithFilteredTargetsInUserOrder( host_tree, filtered_targets.get(queue=queue)) elif filter_kind == "tree": filtered_targets = plfilt.filter_target_lists_in_tree_order( queue, tree, flags) wrangler = ConstantOneExpansionWranglerWithFilteredTargetsInTreeOrder( host_tree, filtered_targets.get(queue=queue)) else: raise ValueError("unsupported value of 'filter_kind'") else: wrangler = ConstantOneExpansionWrangler(host_tree) flags = cl.array.empty(queue, ntargets or nsources, dtype=np.int8) flags.fill(1) if ntargets is None and not filter_kind: # This check only works for targets == sources. assert (wrangler.reorder_potentials( wrangler.reorder_sources(weights)) == weights).all() from boxtree.fmm import drive_fmm pot = drive_fmm(host_trav, wrangler, weights) if filter_kind: pot = pot[flags.get() > 0] rel_err = la.norm((pot - weights_sum) / nsources) good = rel_err < 1e-8 # {{{ build, evaluate matrix (and identify incorrect interactions) if 0 and not good: mat = np.zeros((ntargets, nsources), dtype) from pytools import ProgressBar logging.getLogger().setLevel(logging.WARNING) pb = ProgressBar("matrix", nsources) for i in range(nsources): unit_vec = np.zeros(nsources, dtype=dtype) unit_vec[i] = 1 mat[:, i] = drive_fmm(host_trav, wrangler, unit_vec) pb.progress() pb.finished() logging.getLogger().setLevel(logging.INFO) import matplotlib.pyplot as pt if 0: pt.imshow(mat) pt.colorbar() pt.show() incorrect_tgts, incorrect_srcs = np.where(mat != 1) if 1 and len(incorrect_tgts): from boxtree.visualization import TreePlotter plotter = TreePlotter(host_tree) plotter.draw_tree(fill=False, edgecolor="black") plotter.draw_box_numbers() plotter.set_bounding_box() tree_order_incorrect_tgts = \ host_tree.indices_to_tree_target_order(incorrect_tgts) tree_order_incorrect_srcs = \ host_tree.indices_to_tree_source_order(incorrect_srcs) src_boxes = [ host_tree.find_box_nr_for_source(i) for i in tree_order_incorrect_srcs ] tgt_boxes = [ host_tree.find_box_nr_for_target(i) for i in tree_order_incorrect_tgts ] print(src_boxes) print(tgt_boxes) # plot all sources/targets if 0: pt.plot(host_tree.targets[0], host_tree.targets[1], "v", alpha=0.9) pt.plot(host_tree.sources[0], host_tree.sources[1], "gx", alpha=0.9) # plot offending sources/targets if 0: pt.plot(host_tree.targets[0][tree_order_incorrect_tgts], host_tree.targets[1][tree_order_incorrect_tgts], "rv") pt.plot(host_tree.sources[0][tree_order_incorrect_srcs], host_tree.sources[1][tree_order_incorrect_srcs], "go") pt.gca().set_aspect("equal") from boxtree.visualization import draw_box_lists draw_box_lists( plotter, pre_merge_host_trav if who_has_extent else host_trav, 22) # from boxtree.visualization import draw_same_level_non_well_sep_boxes # draw_same_level_non_well_sep_boxes(plotter, host_trav, 2) pt.show() # }}} if 0 and not good: import matplotlib.pyplot as pt pt.plot(pot - weights_sum) pt.show() if 0 and not good: import matplotlib.pyplot as pt filt_targets = [ host_tree.targets[0][flags.get() > 0], host_tree.targets[1][flags.get() > 0], ] host_tree.plot() bad = np.abs(pot - weights_sum) >= 1e-3 bad_targets = [ filt_targets[0][bad], filt_targets[1][bad], ] print(bad_targets[0].shape) pt.plot(filt_targets[0], filt_targets[1], "x") pt.plot(bad_targets[0], bad_targets[1], "v") pt.show() assert good