def test_random(ctx_factory): context = ctx_factory() queue = cl.CommandQueue(context) from pyopencl.clrandom import RanluxGenerator if has_double_support(context.devices[0]): dtypes = [np.float32, np.float64] else: dtypes = [np.float32] gen = RanluxGenerator(queue, 5120) for ary_size in [300, 301, 302, 303, 10007]: for dtype in dtypes: ran = cl_array.zeros(queue, ary_size, dtype) gen.fill_uniform(ran) assert (0 < ran.get()).all() assert (ran.get() < 1).all() gen.synchronize(queue) ran = cl_array.zeros(queue, ary_size, dtype) gen.fill_uniform(ran, a=4, b=7) assert (4 < ran.get()).all() assert (ran.get() < 7).all() ran = gen.normal(queue, (10007,), dtype, mu=4, sigma=3) dtypes = [np.int32] for dtype in dtypes: ran = gen.uniform(queue, (10000007,), dtype, a=200, b=300) assert (200 <= ran.get()).all() assert (ran.get() < 300).all()
def test_random_int_in_range(ctx_factory, dtype): context = ctx_factory() queue = cl.CommandQueue(context) from pyopencl.clrandom import RanluxGenerator gen = RanluxGenerator(queue, 5120) if (dtype == np.int64 and context.devices[0].platform.vendor.startswith("Advanced Micro")): pytest.xfail("AMD miscompiles 64-bit RNG math") ran = gen.uniform(queue, (10000007,), dtype, a=200, b=300) assert (200 <= ran.get()).all() assert (ran.get() < 300).all()
def test_sort(ctx_factory, scan_kernel): from pytest import importorskip importorskip("mako") context = ctx_factory() queue = cl.CommandQueue(context) dtype = np.int32 from pyopencl.algorithm import RadixSort sort = RadixSort(context, "int *ary", key_expr="ary[i]", sort_arg_names=["ary"], scan_kernel=scan_kernel) from pyopencl.clrandom import RanluxGenerator rng = RanluxGenerator(queue, seed=15) from time import time # intermediate arrays for largest size cause out-of-memory on low-end GPUs for n in scan_test_counts[:-1]: if n >= 2000 and isinstance(scan_kernel, GenericDebugScanKernel): continue print(n) print(" rng") a_dev = rng.uniform(queue, (n, ), dtype=dtype, a=0, b=2**16) a = a_dev.get() dev_start = time() print(" device") (a_dev_sorted, ), evt = sort(a_dev, key_bits=16) queue.finish() dev_end = time() print(" numpy") a_sorted = np.sort(a) numpy_end = time() numpy_elapsed = numpy_end - dev_end dev_elapsed = dev_end - dev_start print(" dev: %.2f MKeys/s numpy: %.2f MKeys/s ratio: %.2fx" % (1e-6 * n / dev_elapsed, 1e-6 * n / numpy_elapsed, numpy_elapsed / dev_elapsed)) assert (a_dev_sorted.get() == a_sorted).all()
def test_sort(ctx_factory): from pytest import importorskip importorskip("mako") context = ctx_factory() queue = cl.CommandQueue(context) dtype = np.int32 from pyopencl.algorithm import RadixSort sort = RadixSort(context, "int *ary", key_expr="ary[i]", sort_arg_names=["ary"]) from pyopencl.clrandom import RanluxGenerator rng = RanluxGenerator(queue, seed=15) from time import time # intermediate arrays for largest size cause out-of-memory on low-end GPUs for n in scan_test_counts[:-1]: print(n) print(" rng") a_dev = rng.uniform(queue, (n,), dtype=dtype, a=0, b=2 ** 16) a = a_dev.get() dev_start = time() print(" device") (a_dev_sorted,), evt = sort(a_dev, key_bits=16) queue.finish() dev_end = time() print(" numpy") a_sorted = np.sort(a) numpy_end = time() numpy_elapsed = numpy_end - dev_end dev_elapsed = dev_end - dev_start print( " dev: %.2f MKeys/s numpy: %.2f MKeys/s ratio: %.2fx" % (1e-6 * n / dev_elapsed, 1e-6 * n / numpy_elapsed, numpy_elapsed / dev_elapsed) ) assert (a_dev_sorted.get() == a_sorted).all()
def test_sort(ctx_factory): context = ctx_factory() queue = cl.CommandQueue(context) dtype = np.int32 from pyopencl.algorithm import RadixSort sort = RadixSort(context, "int *ary", key_expr="ary[i]", sort_arg_names=["ary"]) from pyopencl.clrandom import RanluxGenerator rng = RanluxGenerator(queue, seed=15) from time import time for n in scan_test_counts: print(n) print(" rng") a_dev = rng.uniform(queue, (n,), dtype=dtype, a=0, b=2**16) a = a_dev.get() dev_start = time() print(" device") a_dev_sorted, = sort(a_dev, key_bits=16) queue.finish() dev_end = time() print(" numpy") a_sorted = np.sort(a) numpy_end = time() numpy_elapsed = numpy_end-dev_end dev_elapsed = dev_end-dev_start print (" dev: %.2f MKeys/s numpy: %.2f MKeys/s ratio: %.2fx" % ( 1e-6*n/dev_elapsed, 1e-6*n/numpy_elapsed, numpy_elapsed/dev_elapsed)) assert (a_dev_sorted.get() == a_sorted).all()
def test_pyfmmlib_fmm(ctx_getter): logging.basicConfig(level=logging.INFO) from pytest import importorskip importorskip("pyfmmlib") ctx = ctx_getter() queue = cl.CommandQueue(ctx) nsources = 3000 ntargets = 1000 dims = 2 dtype = np.float64 helmholtz_k = 2 sources = p_normal(queue, nsources, dims, dtype, seed=15) targets = p_normal(queue, ntargets, dims, dtype, seed=18) + np.array([2, 0]) sources_host = particle_array_to_host(sources) targets_host = particle_array_to_host(targets) from boxtree import TreeBuilder tb = TreeBuilder(ctx) tree, _ = tb(queue, sources, targets=targets, max_particles_in_box=30, debug=True) from boxtree.traversal import FMMTraversalBuilder tbuild = FMMTraversalBuilder(ctx) trav, _ = tbuild(queue, tree, debug=True) trav = trav.get(queue=queue) from pyopencl.clrandom import RanluxGenerator rng = RanluxGenerator(queue, seed=20) weights = rng.uniform(queue, nsources, dtype=np.float64).get() # weights = np.ones(nsources) logger.info("computing direct (reference) result") from pyfmmlib import hpotgrad2dall_vec ref_pot, _, _ = hpotgrad2dall_vec( ifgrad=False, ifhess=False, sources=sources_host.T, charge=weights, targets=targets_host.T, zk=helmholtz_k ) from boxtree.pyfmmlib_integration import Helmholtz2DExpansionWrangler wrangler = Helmholtz2DExpansionWrangler(trav.tree, helmholtz_k, nterms=10) from boxtree.fmm import drive_fmm pot = drive_fmm(trav, wrangler, weights) rel_err = la.norm(pot - ref_pot) / la.norm(ref_pot) logger.info("relative l2 error: %g" % rel_err) assert rel_err < 1e-5
def test_fmm_completeness( ctx_getter, dims, nsources_req, ntargets_req, who_has_extent, source_gen, target_gen, filter_kind ): """Tests whether the built FMM traversal structures and driver completely capture all interactions. """ sources_have_extent = "s" in who_has_extent targets_have_extent = "t" in who_has_extent logging.basicConfig(level=logging.INFO) ctx = ctx_getter() queue = cl.CommandQueue(ctx) dtype = np.float64 try: sources = source_gen(queue, nsources_req, dims, dtype, seed=15) nsources = len(sources[0]) if ntargets_req is None: # This says "same as sources" to the tree builder. targets = None ntargets = ntargets_req else: targets = target_gen(queue, ntargets_req, dims, dtype, seed=16) ntargets = len(targets[0]) except ImportError: pytest.skip("loo.py not available, but needed for particle array " "generation") from pyopencl.clrandom import RanluxGenerator rng = RanluxGenerator(queue, seed=13) if sources_have_extent: source_radii = 2 ** rng.uniform(queue, nsources, dtype=dtype, a=-10, b=0) else: source_radii = None if targets_have_extent: target_radii = 2 ** rng.uniform(queue, ntargets, dtype=dtype, a=-10, b=0) else: target_radii = None from boxtree import TreeBuilder tb = TreeBuilder(ctx) tree, _ = tb( queue, sources, targets=targets, max_particles_in_box=30, source_radii=source_radii, target_radii=target_radii, debug=True, ) if 0: tree.get().plot() import matplotlib.pyplot as pt pt.show() from boxtree.traversal import FMMTraversalBuilder tbuild = FMMTraversalBuilder(ctx) trav, _ = tbuild(queue, tree, debug=True) if trav.sep_close_smaller_starts is not None: trav = trav.merge_close_lists(queue) weights = np.random.randn(nsources) # weights = np.ones(nsources) weights_sum = np.sum(weights) host_trav = trav.get(queue=queue) host_tree = host_trav.tree if filter_kind: flags = rng.uniform(queue, ntargets or nsources, np.int32, a=0, b=2).astype(np.int8) if filter_kind == "user": from boxtree.tree import filter_target_lists_in_user_order filtered_targets = filter_target_lists_in_user_order(queue, tree, flags) wrangler = ConstantOneExpansionWranglerWithFilteredTargetsInUserOrder( host_tree, filtered_targets.get(queue=queue) ) elif filter_kind == "tree": from boxtree.tree import filter_target_lists_in_tree_order filtered_targets = filter_target_lists_in_tree_order(queue, tree, flags) wrangler = ConstantOneExpansionWranglerWithFilteredTargetsInTreeOrder( host_tree, filtered_targets.get(queue=queue) ) else: raise ValueError("unsupported value of 'filter_kind'") else: wrangler = ConstantOneExpansionWrangler(host_tree) if ntargets is None and not filter_kind: # This check only works for targets == sources. assert (wrangler.reorder_potentials(wrangler.reorder_sources(weights)) == weights).all() from boxtree.fmm import drive_fmm pot = drive_fmm(host_trav, wrangler, weights) # {{{ build, evaluate matrix (and identify missing interactions) if 0: mat = np.zeros((ntargets, nsources), dtype) from pytools import ProgressBar logging.getLogger().setLevel(logging.WARNING) pb = ProgressBar("matrix", nsources) for i in range(nsources): unit_vec = np.zeros(nsources, dtype=dtype) unit_vec[i] = 1 mat[:, i] = drive_fmm(host_trav, wrangler, unit_vec) pb.progress() pb.finished() logging.getLogger().setLevel(logging.INFO) import matplotlib.pyplot as pt if 1: pt.spy(mat) pt.show() missing_tgts, missing_srcs = np.where(mat == 0) if 1 and len(missing_tgts): from boxtree.visualization import TreePlotter plotter = TreePlotter(host_tree) plotter.draw_tree(fill=False, edgecolor="black") plotter.draw_box_numbers() plotter.set_bounding_box() tree_order_missing_tgts = host_tree.indices_to_tree_target_order(missing_tgts) tree_order_missing_srcs = host_tree.indices_to_tree_source_order(missing_srcs) src_boxes = [host_tree.find_box_nr_for_source(i) for i in tree_order_missing_srcs] tgt_boxes = [host_tree.find_box_nr_for_target(i) for i in tree_order_missing_tgts] print(src_boxes) print(tgt_boxes) pt.plot(host_tree.targets[0][tree_order_missing_tgts], host_tree.targets[1][tree_order_missing_tgts], "rv") pt.plot(host_tree.sources[0][tree_order_missing_srcs], host_tree.sources[1][tree_order_missing_srcs], "go") pt.gca().set_aspect("equal") pt.show() # }}} if filter_kind: pot = pot[flags.get() > 0] rel_err = la.norm((pot - weights_sum) / nsources) good = rel_err < 1e-8 if 0 and not good: import matplotlib.pyplot as pt pt.plot(pot - weights_sum) pt.show() if 0 and not good: import matplotlib.pyplot as pt filt_targets = [host_tree.targets[0][flags.get() > 0], host_tree.targets[1][flags.get() > 0]] host_tree.plot() bad = np.abs(pot - weights_sum) >= 1e-3 bad_targets = [filt_targets[0][bad], filt_targets[1][bad]] print(bad_targets[0].shape) pt.plot(filt_targets[0], filt_targets[1], "x") pt.plot(bad_targets[0], bad_targets[1], "v") pt.show() assert good
def test_pyfmmlib_fmm(ctx_getter): logging.basicConfig(level=logging.INFO) from pytest import importorskip importorskip("pyfmmlib") ctx = ctx_getter() queue = cl.CommandQueue(ctx) nsources = 3000 ntargets = 1000 dims = 2 dtype = np.float64 helmholtz_k = 2 sources = p_normal(queue, nsources, dims, dtype, seed=15) targets = (p_normal(queue, ntargets, dims, dtype, seed=18) + np.array([2, 0])) sources_host = particle_array_to_host(sources) targets_host = particle_array_to_host(targets) from boxtree import TreeBuilder tb = TreeBuilder(ctx) tree, _ = tb(queue, sources, targets=targets, max_particles_in_box=30, debug=True) from boxtree.traversal import FMMTraversalBuilder tbuild = FMMTraversalBuilder(ctx) trav, _ = tbuild(queue, tree, debug=True) trav = trav.get(queue=queue) from pyopencl.clrandom import RanluxGenerator rng = RanluxGenerator(queue, seed=20) weights = rng.uniform(queue, nsources, dtype=np.float64).get() #weights = np.ones(nsources) logger.info("computing direct (reference) result") from pyfmmlib import hpotgrad2dall_vec ref_pot, _, _ = hpotgrad2dall_vec(ifgrad=False, ifhess=False, sources=sources_host.T, charge=weights, targets=targets_host.T, zk=helmholtz_k) from boxtree.pyfmmlib_integration import Helmholtz2DExpansionWrangler wrangler = Helmholtz2DExpansionWrangler(trav.tree, helmholtz_k, nterms=10) from boxtree.fmm import drive_fmm pot = drive_fmm(trav, wrangler, weights) rel_err = la.norm(pot - ref_pot) / la.norm(ref_pot) logger.info("relative l2 error: %g" % rel_err) assert rel_err < 1e-5
def test_fmm_completeness(ctx_getter, dims, nsources_req, ntargets_req, who_has_extent, source_gen, target_gen, filter_kind): """Tests whether the built FMM traversal structures and driver completely capture all interactions. """ sources_have_extent = "s" in who_has_extent targets_have_extent = "t" in who_has_extent logging.basicConfig(level=logging.INFO) ctx = ctx_getter() queue = cl.CommandQueue(ctx) dtype = np.float64 try: sources = source_gen(queue, nsources_req, dims, dtype, seed=15) nsources = len(sources[0]) if ntargets_req is None: # This says "same as sources" to the tree builder. targets = None ntargets = ntargets_req else: targets = target_gen(queue, ntargets_req, dims, dtype, seed=16) ntargets = len(targets[0]) except ImportError: pytest.skip("loo.py not available, but needed for particle array " "generation") from pyopencl.clrandom import RanluxGenerator rng = RanluxGenerator(queue, seed=13) if sources_have_extent: source_radii = 2**rng.uniform(queue, nsources, dtype=dtype, a=-10, b=0) else: source_radii = None if targets_have_extent: target_radii = 2**rng.uniform(queue, ntargets, dtype=dtype, a=-10, b=0) else: target_radii = None from boxtree import TreeBuilder tb = TreeBuilder(ctx) tree, _ = tb(queue, sources, targets=targets, max_particles_in_box=30, source_radii=source_radii, target_radii=target_radii, debug=True) if 0: tree.get().plot() import matplotlib.pyplot as pt pt.show() from boxtree.traversal import FMMTraversalBuilder tbuild = FMMTraversalBuilder(ctx) trav, _ = tbuild(queue, tree, debug=True) if trav.sep_close_smaller_starts is not None: trav = trav.merge_close_lists(queue) weights = np.random.randn(nsources) #weights = np.ones(nsources) weights_sum = np.sum(weights) host_trav = trav.get(queue=queue) host_tree = host_trav.tree if filter_kind: flags = rng.uniform(queue, ntargets or nsources, np.int32, a=0, b=2) \ .astype(np.int8) if filter_kind == "user": from boxtree.tree import filter_target_lists_in_user_order filtered_targets = filter_target_lists_in_user_order( queue, tree, flags) wrangler = ConstantOneExpansionWranglerWithFilteredTargetsInUserOrder( host_tree, filtered_targets.get(queue=queue)) elif filter_kind == "tree": from boxtree.tree import filter_target_lists_in_tree_order filtered_targets = filter_target_lists_in_tree_order( queue, tree, flags) wrangler = ConstantOneExpansionWranglerWithFilteredTargetsInTreeOrder( host_tree, filtered_targets.get(queue=queue)) else: raise ValueError("unsupported value of 'filter_kind'") else: wrangler = ConstantOneExpansionWrangler(host_tree) if ntargets is None and not filter_kind: # This check only works for targets == sources. assert (wrangler.reorder_potentials( wrangler.reorder_sources(weights)) == weights).all() from boxtree.fmm import drive_fmm pot = drive_fmm(host_trav, wrangler, weights) # {{{ build, evaluate matrix (and identify missing interactions) if 0: mat = np.zeros((ntargets, nsources), dtype) from pytools import ProgressBar logging.getLogger().setLevel(logging.WARNING) pb = ProgressBar("matrix", nsources) for i in range(nsources): unit_vec = np.zeros(nsources, dtype=dtype) unit_vec[i] = 1 mat[:, i] = drive_fmm(host_trav, wrangler, unit_vec) pb.progress() pb.finished() logging.getLogger().setLevel(logging.INFO) import matplotlib.pyplot as pt if 1: pt.spy(mat) pt.show() missing_tgts, missing_srcs = np.where(mat == 0) if 1 and len(missing_tgts): from boxtree.visualization import TreePlotter plotter = TreePlotter(host_tree) plotter.draw_tree(fill=False, edgecolor="black") plotter.draw_box_numbers() plotter.set_bounding_box() tree_order_missing_tgts = \ host_tree.indices_to_tree_target_order(missing_tgts) tree_order_missing_srcs = \ host_tree.indices_to_tree_source_order(missing_srcs) src_boxes = [ host_tree.find_box_nr_for_source(i) for i in tree_order_missing_srcs ] tgt_boxes = [ host_tree.find_box_nr_for_target(i) for i in tree_order_missing_tgts ] print(src_boxes) print(tgt_boxes) pt.plot(host_tree.targets[0][tree_order_missing_tgts], host_tree.targets[1][tree_order_missing_tgts], "rv") pt.plot(host_tree.sources[0][tree_order_missing_srcs], host_tree.sources[1][tree_order_missing_srcs], "go") pt.gca().set_aspect("equal") pt.show() # }}} if filter_kind: pot = pot[flags.get() > 0] rel_err = la.norm((pot - weights_sum) / nsources) good = rel_err < 1e-8 if 0 and not good: import matplotlib.pyplot as pt pt.plot(pot - weights_sum) pt.show() if 0 and not good: import matplotlib.pyplot as pt filt_targets = [ host_tree.targets[0][flags.get() > 0], host_tree.targets[1][flags.get() > 0], ] host_tree.plot() bad = np.abs(pot - weights_sum) >= 1e-3 bad_targets = [ filt_targets[0][bad], filt_targets[1][bad], ] print(bad_targets[0].shape) pt.plot(filt_targets[0], filt_targets[1], "x") pt.plot(bad_targets[0], bad_targets[1], "v") pt.show() assert good
def test_extent_tree(ctx_getter, dims, do_plot=False): logging.basicConfig(level=logging.INFO) ctx = ctx_getter() queue = cl.CommandQueue(ctx) nsources = 100000 ntargets = 200000 dtype = np.float64 npoint_sources_per_source = 16 sources = make_normal_particle_array(queue, nsources, dims, dtype, seed=12) targets = make_normal_particle_array(queue, ntargets, dims, dtype, seed=19) from pyopencl.clrandom import RanluxGenerator rng = RanluxGenerator(queue, seed=13) source_radii = 2**rng.uniform(queue, nsources, dtype=dtype, a=-10, b=0) target_radii = 2**rng.uniform(queue, ntargets, dtype=dtype, a=-10, b=0) from boxtree import TreeBuilder tb = TreeBuilder(ctx) queue.finish() dev_tree, _ = tb(queue, sources, targets=targets, source_radii=source_radii, target_radii=target_radii, max_particles_in_box=10, debug=True) logger.info("transfer tree, check orderings") tree = dev_tree.get(queue=queue) sorted_sources = np.array(list(tree.sources)) sorted_targets = np.array(list(tree.targets)) sorted_source_radii = tree.source_radii sorted_target_radii = tree.target_radii unsorted_sources = np.array([pi.get() for pi in sources]) unsorted_targets = np.array([pi.get() for pi in targets]) unsorted_source_radii = source_radii.get() unsorted_target_radii = target_radii.get() assert (sorted_sources == unsorted_sources[:, tree.user_source_ids]).all() assert (sorted_source_radii == unsorted_source_radii[tree.user_source_ids]).all() # {{{ test box structure, stick-out criterion logger.info("test box structure, stick-out criterion") user_target_ids = np.empty(tree.ntargets, dtype=np.intp) user_target_ids[tree.sorted_target_ids] = np.arange(tree.ntargets, dtype=np.intp) if ntargets: assert (sorted_targets == unsorted_targets[:, user_target_ids]).all() assert (sorted_target_radii == unsorted_target_radii[user_target_ids]).all() all_good_so_far = True # {{{ check sources, targets for ibox in range(tree.nboxes): extent_low, extent_high = tree.get_box_extent(ibox) box_radius = np.max(extent_high-extent_low) * 0.5 stick_out_dist = tree.stick_out_factor * box_radius assert (extent_low >= tree.bounding_box[0] - 1e-12*tree.root_extent).all(), ibox assert (extent_high <= tree.bounding_box[1] + 1e-12*tree.root_extent).all(), ibox box_children = tree.box_child_ids[:, ibox] existing_children = box_children[box_children != 0] assert (tree.box_source_counts_nonchild[ibox] + np.sum(tree.box_source_counts_cumul[existing_children]) == tree.box_source_counts_cumul[ibox]) assert (tree.box_target_counts_nonchild[ibox] + np.sum(tree.box_target_counts_cumul[existing_children]) == tree.box_target_counts_cumul[ibox]) for what, starts, counts, points, radii in [ ("source", tree.box_source_starts, tree.box_source_counts_cumul, sorted_sources, sorted_source_radii), ("target", tree.box_target_starts, tree.box_target_counts_cumul, sorted_targets, sorted_target_radii), ]: bstart = starts[ibox] bslice = slice(bstart, bstart+counts[ibox]) check_particles = points[:, bslice] check_radii = radii[bslice] good = ( (check_particles + check_radii < extent_high[:, np.newaxis] + stick_out_dist) & (extent_low[:, np.newaxis] - stick_out_dist <= check_particles - check_radii) ).all(axis=0) all_good_here = good.all() if not all_good_here: print("BAD BOX %s %d level %d" % (what, ibox, tree.box_levels[ibox])) all_good_so_far = all_good_so_far and all_good_here assert all_good_here # }}} assert all_good_so_far # }}} # {{{ create, link point sources logger.info("creating point sources") np.random.seed(20) from pytools.obj_array import make_obj_array point_sources = make_obj_array([ cl.array.to_device(queue, unsorted_sources[i][:, np.newaxis] + unsorted_source_radii[:, np.newaxis] * np.random.uniform( -1, 1, size=(nsources, npoint_sources_per_source)) ) for i in range(dims)]) point_source_starts = cl.array.arange(queue, 0, (nsources+1)*npoint_sources_per_source, npoint_sources_per_source, dtype=tree.particle_id_dtype) from boxtree.tree import link_point_sources dev_tree = link_point_sources(queue, dev_tree, point_source_starts, point_sources, debug=True)