Beispiel #1
0
def test_area_query_balls_outside_bbox(ctx_getter, dims, do_plot=False):
    """
    The input to the area query includes balls whose centers are not within
    the tree bounding box.
    """
    ctx = ctx_getter()
    queue = cl.CommandQueue(ctx)

    nparticles = 10**4
    dtype = np.float64

    particles = make_normal_particle_array(queue, nparticles, dims, dtype)

    if do_plot:
        import matplotlib.pyplot as pt
        pt.plot(particles[0].get(), particles[1].get(), "x")

    from boxtree import TreeBuilder
    tb = TreeBuilder(ctx)

    queue.finish()
    tree, _ = tb(queue, particles, max_particles_in_box=30, debug=True)

    nballs = 10**4
    from pyopencl.clrandom import PhiloxGenerator
    rng = PhiloxGenerator(ctx, seed=13)
    bbox_min = tree.bounding_box[0].min()
    bbox_max = tree.bounding_box[1].max()
    from pytools.obj_array import make_obj_array
    ball_centers = make_obj_array([
        rng.uniform(queue, nballs, dtype=dtype, a=bbox_min-1, b=bbox_max+1)
        for i in range(dims)])
    ball_radii = cl.array.empty(queue, nballs, dtype).fill(0.1)

    run_area_query_test(ctx, queue, tree, ball_centers, ball_radii)
Beispiel #2
0
def make_normal_particle_array(queue, nparticles, dims, dtype, seed=15):
    from pyopencl.clrandom import PhiloxGenerator
    rng = PhiloxGenerator(queue.context, seed=seed)

    return make_obj_array([
        rng.normal(queue, nparticles, dtype=dtype)
        for i in range(dims)])
Beispiel #3
0
    def __init_particle(self):
        print("Info- init particles")
        gen = PhiloxGenerator(self.ocl_ctx)

        self.x_gpu = cl_array.empty(self.ocl_queue,
                                    self.dim * self.np,
                                    dtype=self.dtype)

        # Init position on a sphere of diameter 0.05 and center (mu,mu,mu)
        # self.x_gpu = gen.normal(
        #     self.ocl_queue, (self.np * self.dim), self.dtype, mu=0.5, sigma=0.05
        # )

        # Init velocity
        self.v_gpu = gen.normal(self.ocl_queue, (self.np * self.dim),
                                self.dtype,
                                mu=0,
                                sigma=1)

        # Init time
        self.t_gpu = cl_array.zeros(self.ocl_queue, self.np, dtype=self.dtype)

        self.ocl_prg.rt_init_particles(
            self.ocl_queue,
            (self.np, ),
            None,
            self.x_gpu.data,
            self.v_gpu.data,
        ).wait()
Beispiel #4
0
def make_normal_particle_array(queue, nparticles, dims, dtype, seed=15):
    from pyopencl.clrandom import PhiloxGenerator
    rng = PhiloxGenerator(queue.context, seed=seed)

    return make_obj_array([
        rng.normal(queue, nparticles, dtype=dtype)
        for i in range(dims)])
Beispiel #5
0
    def __init__(self,
                 ctx_getter=cl.create_some_context,
                 enable_extents=False):
        ctx = ctx_getter()
        queue = cl.CommandQueue(ctx)

        from pyopencl.characterize import has_struct_arg_count_bug
        if has_struct_arg_count_bug(queue.device):
            pytest.xfail(
                "won't work on devices with the struct arg count issue")

        logging.basicConfig(level=logging.INFO)

        dims = 2
        nsources = 9000000
        ntargets = 9000000
        dtype = np.float32

        from boxtree.fmm import drive_fmm
        sources = p_normal(queue, nsources, dims, dtype, seed=15)
        targets = p_normal(queue, ntargets, dims, dtype, seed=15)

        from pyopencl.clrandom import PhiloxGenerator
        rng = PhiloxGenerator(queue.context, seed=12)

        if enable_extents:
            target_radii = 2**rng.uniform(queue,
                                          ntargets,
                                          dtype=dtype,
                                          a=-10,
                                          b=0)
        else:
            target_radii = None

        from boxtree import TreeBuilder
        tb = TreeBuilder(ctx)

        tree, _ = tb(
            queue,
            sources,
            #targets=targets,
            max_particles_in_box=30,
            #target_radii=target_radii,
            #stick_out_factor=0.25,
            debug=True)

        from boxtree.traversal import FMMTraversalBuilder
        tbuild = FMMTraversalBuilder(ctx)
        trav, _ = tbuild(queue, tree, debug=True)

        weights = np.ones(nsources)
        weights_sum = np.sum(weights)

        host_trav = trav.get(queue=queue)
        host_tree = host_trav.tree
        self.tree = host_tree
        self.trav = host_trav

        self.input = [host_tree, weights, weights_sum, host_trav]
        self.pot = None
Beispiel #6
0
def test_interaction_list_particle_count_thresholding(ctx_getter,
                                                      enable_extents):
    ctx = ctx_getter()
    queue = cl.CommandQueue(ctx)

    logging.basicConfig(level=logging.INFO)

    dims = 2
    nsources = 1000
    ntargets = 1000
    dtype = np.float

    max_particles_in_box = 30
    # Ensure that we have underfilled boxes.
    from_sep_smaller_min_nsources_cumul = 1 + max_particles_in_box

    from boxtree.fmm import drive_fmm
    sources = p_normal(queue, nsources, dims, dtype, seed=15)
    targets = p_normal(queue, ntargets, dims, dtype, seed=15)

    from pyopencl.clrandom import PhiloxGenerator
    rng = PhiloxGenerator(queue.context, seed=12)

    if enable_extents:
        target_radii = 2**rng.uniform(queue, ntargets, dtype=dtype, a=-10, b=0)
    else:
        target_radii = None

    from boxtree import TreeBuilder
    tb = TreeBuilder(ctx)

    tree, _ = tb(queue,
                 sources,
                 targets=targets,
                 max_particles_in_box=max_particles_in_box,
                 target_radii=target_radii,
                 debug=True,
                 stick_out_factor=0.25)

    from boxtree.traversal import FMMTraversalBuilder
    tbuild = FMMTraversalBuilder(ctx)
    trav, _ = tbuild(
        queue,
        tree,
        debug=True,
        _from_sep_smaller_min_nsources_cumul=from_sep_smaller_min_nsources_cumul
    )

    weights = np.ones(nsources)
    weights_sum = np.sum(weights)

    host_trav = trav.get(queue=queue)
    host_tree = host_trav.tree

    wrangler = ConstantOneExpansionWrangler(host_tree)

    pot = drive_fmm(host_trav, wrangler, weights)

    assert (pot == weights_sum).all()
Beispiel #7
0
def test_plot_traversal(ctx_factory, well_sep_is_n_away=1, plot=False):
    pytest.importorskip("matplotlib")
    ctx = ctx_factory()
    queue = cl.CommandQueue(ctx)

    #for dims in [2, 3]:
    for dims in [2]:
        nparticles = 10**4
        dtype = np.float64

        from pyopencl.clrandom import PhiloxGenerator
        rng = PhiloxGenerator(queue.context, seed=15)

        from pytools.obj_array import make_obj_array
        particles = make_obj_array([
            rng.normal(queue, nparticles, dtype=dtype)
            for i in range(dims)])

        # if do_plot:
        #     pt.plot(particles[0].get(), particles[1].get(), "x")

        from boxtree import TreeBuilder
        tb = TreeBuilder(ctx)

        queue.finish()
        tree, _ = tb(queue, particles, max_particles_in_box=30, debug=True)

        from boxtree.traversal import FMMTraversalBuilder
        tg = FMMTraversalBuilder(ctx, well_sep_is_n_away=well_sep_is_n_away)
        trav, _ = tg(queue, tree)

        tree = tree.get(queue=queue)
        trav = trav.get(queue=queue)

        from boxtree.visualization import TreePlotter
        plotter = TreePlotter(tree)
        plotter.draw_tree(fill=False, edgecolor="black")
        #plotter.draw_box_numbers()
        plotter.set_bounding_box()

        from random import randrange, seed  # noqa
        seed(7)

        from boxtree.visualization import draw_box_lists

        #draw_box_lists(randrange(tree.nboxes))

        if well_sep_is_n_away == 1:
            draw_box_lists(plotter, trav, 380)
        elif well_sep_is_n_away == 2:
            draw_box_lists(plotter, trav, 320)
        #plotter.draw_box_numbers()

        if plot:
            import matplotlib.pyplot as pt
            pt.gca().set_xticks([])
            pt.gca().set_yticks([])

            pt.show()
Beispiel #8
0
def test_pyfmmlib_fmm(ctx_getter):
    logging.basicConfig(level=logging.INFO)

    from pytest import importorskip
    importorskip("pyfmmlib")

    ctx = ctx_getter()
    queue = cl.CommandQueue(ctx)

    nsources = 3000
    ntargets = 1000
    dims = 2
    dtype = np.float64

    helmholtz_k = 2

    sources = p_normal(queue, nsources, dims, dtype, seed=15)
    targets = (
            p_normal(queue, ntargets, dims, dtype, seed=18)
            + np.array([2, 0]))

    sources_host = particle_array_to_host(sources)
    targets_host = particle_array_to_host(targets)

    from boxtree import TreeBuilder
    tb = TreeBuilder(ctx)

    tree, _ = tb(queue, sources, targets=targets,
            max_particles_in_box=30, debug=True)

    from boxtree.traversal import FMMTraversalBuilder
    tbuild = FMMTraversalBuilder(ctx)
    trav, _ = tbuild(queue, tree, debug=True)

    trav = trav.get(queue=queue)

    from pyopencl.clrandom import PhiloxGenerator
    rng = PhiloxGenerator(queue.context, seed=20)

    weights = rng.uniform(queue, nsources, dtype=np.float64).get()
    #weights = np.ones(nsources)

    logger.info("computing direct (reference) result")

    from pyfmmlib import hpotgrad2dall_vec
    ref_pot, _, _ = hpotgrad2dall_vec(ifgrad=False, ifhess=False,
            sources=sources_host.T, charge=weights,
            targets=targets_host.T, zk=helmholtz_k)

    from boxtree.pyfmmlib_integration import Helmholtz2DExpansionWrangler
    wrangler = Helmholtz2DExpansionWrangler(trav.tree, helmholtz_k, nterms=10)

    from boxtree.fmm import drive_fmm
    pot = drive_fmm(trav, wrangler, weights)

    rel_err = la.norm(pot - ref_pot) / la.norm(ref_pot)
    logger.info("relative l2 error: %g" % rel_err)
    assert rel_err < 1e-5
Beispiel #9
0
def test_fmm_float32(ctx_getter=cl.create_some_context, enable_extents=True):
    from time import time


    ctx = ctx_getter()
    queue = cl.CommandQueue(ctx)

    from pyopencl.characterize import has_struct_arg_count_bug
    if has_struct_arg_count_bug(queue.device):
        pytest.xfail("won't work on devices with the struct arg count issue")

    logging.basicConfig(level=logging.INFO)

    dims = 2
    nsources = 3000000
    ntargets = 3000000
    dtype = np.float32

    from boxtree.fmm import drive_fmm
    sources = p_normal(queue, nsources, dims, dtype, seed=15)
    targets = p_normal(queue, ntargets, dims, dtype, seed=15)

    from pyopencl.clrandom import PhiloxGenerator
    rng = PhiloxGenerator(queue.context, seed=12)

    if enable_extents:
        target_radii = 2**rng.uniform(queue, ntargets, dtype=dtype, a=-10, b=0)
    else:
        target_radii = None

    from boxtree import TreeBuilder
    tb = TreeBuilder(ctx)

    tree, _ = tb(queue, sources,
                 targets=targets,
            max_particles_in_box=30,
            target_radii=target_radii,stick_out_factor=0.25,
            debug=True)

    from boxtree.traversal import FMMTraversalBuilder
    tbuild = FMMTraversalBuilder(ctx)
    trav, _ = tbuild(queue, tree, debug=True)

    weights = np.ones(nsources)

    weights_sum = np.sum(weights)

    host_trav = trav.get(queue=queue)
    host_tree = host_trav.tree

    wrangler = ConstantOneExpansionWrangler(host_tree)

    ti = time()
    pot = drive_fmm(host_trav, wrangler, weights)
    print(time() - ti)
    assert (pot == weights_sum).all()
Beispiel #10
0
def test_sumpy_fmm_timing_data_collection(ctx_getter):
    logging.basicConfig(level=logging.INFO)

    ctx = ctx_getter()
    queue = cl.CommandQueue(
            ctx,
            properties=cl.command_queue_properties.PROFILING_ENABLE)

    nsources = 500
    dtype = np.float64

    from boxtree.tools import (
            make_normal_particle_array as p_normal)

    knl = LaplaceKernel(2)
    local_expn_class = VolumeTaylorLocalExpansion
    mpole_expn_class = VolumeTaylorMultipoleExpansion
    order = 1

    sources = p_normal(queue, nsources, knl.dim, dtype, seed=15)

    from boxtree import TreeBuilder
    tb = TreeBuilder(ctx)

    tree, _ = tb(queue, sources,
            max_particles_in_box=30, debug=True)

    from boxtree.traversal import FMMTraversalBuilder
    tbuild = FMMTraversalBuilder(ctx)
    trav, _ = tbuild(queue, tree, debug=True)

    from pyopencl.clrandom import PhiloxGenerator
    rng = PhiloxGenerator(ctx)
    weights = rng.uniform(queue, nsources, dtype=np.float64)

    out_kernels = [knl]

    from functools import partial

    from sumpy.fmm import SumpyExpansionWranglerCodeContainer
    wcc = SumpyExpansionWranglerCodeContainer(
            ctx,
            partial(mpole_expn_class, knl),
            partial(local_expn_class, knl),
            out_kernels)

    wrangler = wcc.get_wrangler(queue, tree, dtype,
            fmm_level_to_order=lambda kernel, kernel_args, tree, lev: order)
    from boxtree.fmm import drive_fmm

    timing_data = {}
    pot, = drive_fmm(trav, wrangler, weights, timing_data=timing_data)
    print(timing_data)
    assert timing_data
Beispiel #11
0
def test_sumpy_fmm_timing_data_collection(ctx_factory):
    logging.basicConfig(level=logging.INFO)

    ctx = ctx_factory()
    queue = cl.CommandQueue(
            ctx,
            properties=cl.command_queue_properties.PROFILING_ENABLE)

    nsources = 500
    dtype = np.float64

    from boxtree.tools import (
            make_normal_particle_array as p_normal)

    knl = LaplaceKernel(2)
    local_expn_class = VolumeTaylorLocalExpansion
    mpole_expn_class = VolumeTaylorMultipoleExpansion
    order = 1

    sources = p_normal(queue, nsources, knl.dim, dtype, seed=15)

    from boxtree import TreeBuilder
    tb = TreeBuilder(ctx)

    tree, _ = tb(queue, sources,
            max_particles_in_box=30, debug=True)

    from boxtree.traversal import FMMTraversalBuilder
    tbuild = FMMTraversalBuilder(ctx)
    trav, _ = tbuild(queue, tree, debug=True)

    from pyopencl.clrandom import PhiloxGenerator
    rng = PhiloxGenerator(ctx)
    weights = rng.uniform(queue, nsources, dtype=np.float64)

    out_kernels = [knl]

    from functools import partial

    from sumpy.fmm import SumpyExpansionWranglerCodeContainer
    wcc = SumpyExpansionWranglerCodeContainer(
            ctx,
            partial(mpole_expn_class, knl),
            partial(local_expn_class, knl),
            out_kernels)

    wrangler = wcc.get_wrangler(queue, tree, dtype,
            fmm_level_to_order=lambda kernel, kernel_args, tree, lev: order)
    from boxtree.fmm import drive_fmm

    timing_data = {}
    pot, = drive_fmm(trav, wrangler, (weights,), timing_data=timing_data)
    print(timing_data)
    assert timing_data
Beispiel #12
0
def plot_traversal(ctx_getter, do_plot=False, well_sep_is_n_away=1):
    ctx = ctx_getter()
    queue = cl.CommandQueue(ctx)

    #for dims in [2, 3]:
    for dims in [2]:
        nparticles = 10**4
        dtype = np.float64

        from pyopencl.clrandom import PhiloxGenerator
        rng = PhiloxGenerator(queue.context, seed=15)

        from pytools.obj_array import make_obj_array
        particles = make_obj_array([
            rng.normal(queue, nparticles, dtype=dtype)
            for i in range(dims)])

        # if do_plot:
        #     pt.plot(particles[0].get(), particles[1].get(), "x")

        from boxtree import TreeBuilder
        tb = TreeBuilder(ctx)

        queue.finish()
        tree, _ = tb(queue, particles, max_particles_in_box=30, debug=True)

        from boxtree.traversal import FMMTraversalBuilder
        tg = FMMTraversalBuilder(ctx, well_sep_is_n_away=well_sep_is_n_away)
        trav, _ = tg(queue, tree)

        tree = tree.get(queue=queue)
        trav = trav.get(queue=queue)

        from boxtree.visualization import TreePlotter
        plotter = TreePlotter(tree)
        plotter.draw_tree(fill=False, edgecolor="black")
        #plotter.draw_box_numbers()
        plotter.set_bounding_box()

        from random import randrange, seed  # noqa
        seed(7)

        from boxtree.visualization import draw_box_lists

        #draw_box_lists(randrange(tree.nboxes))
        draw_box_lists(plotter, trav, 320)
        #plotter.draw_box_numbers()

        import matplotlib.pyplot as pt
        pt.show()
Beispiel #13
0
    def __push_particle(self):
        gen = PhiloxGenerator(self.ocl_ctx)

        rand_gpu = gen.uniform(self.ocl_queue, (self.np, 4), dtype=self.dtype)

        self.ocl_prg.rt_push_particles(
            self.ocl_queue,
            (self.np, ),
            None,
            rand_gpu.data,
            self.x_gpu.data,
            self.v_gpu.data,
            self.t_gpu.data,
        ).wait()
Beispiel #14
0
def test_explicit_refine_weights_particle_tree(ctx_getter, dtype, dims,
            do_plot=False):
    ctx = ctx_getter()
    queue = cl.CommandQueue(ctx)

    from boxtree import TreeBuilder
    builder = TreeBuilder(ctx)

    nparticles = 10**5

    from pyopencl.clrandom import PhiloxGenerator
    rng = PhiloxGenerator(ctx, seed=10)
    refine_weights = rng.uniform(queue, nparticles, dtype=np.int32, a=1, b=10)

    run_build_test(builder, queue, dims, dtype, nparticles,
            refine_weights=refine_weights, max_leaf_refine_weight=100,
            do_plot=do_plot)
Beispiel #15
0
def test_area_query_balls_outside_bbox(ctx_factory, dims, do_plot=False):
    """
    The input to the area query includes balls whose centers are not within
    the tree bounding box.
    """
    ctx = ctx_factory()
    queue = cl.CommandQueue(ctx)

    nparticles = 10**4
    dtype = np.float64

    particles = make_normal_particle_array(queue, nparticles, dims, dtype)

    if do_plot:
        import matplotlib.pyplot as pt
        pt.plot(particles[0].get(), particles[1].get(), "x")

    from boxtree import TreeBuilder
    tb = TreeBuilder(ctx)

    queue.finish()
    tree, _ = tb(queue, particles, max_particles_in_box=30, debug=True)

    nballs = 10**4
    from pyopencl.clrandom import PhiloxGenerator
    rng = PhiloxGenerator(ctx, seed=13)
    bbox_min = tree.bounding_box[0].min()
    bbox_max = tree.bounding_box[1].max()
    from pytools.obj_array import make_obj_array
    ball_centers = make_obj_array([
        rng.uniform(queue, nballs, dtype=dtype, a=bbox_min - 1, b=bbox_max + 1)
        for i in range(dims)
    ])
    ball_radii = cl.array.empty(queue, nballs, dtype).fill(0.1)

    run_area_query_test(ctx, queue, tree, ball_centers, ball_radii)
Beispiel #16
0
def test_explicit_refine_weights_particle_tree(ctx_factory,
                                               dtype,
                                               dims,
                                               do_plot=False):
    ctx = ctx_factory()
    queue = cl.CommandQueue(ctx)

    from boxtree import TreeBuilder
    builder = TreeBuilder(ctx)

    nparticles = 10**5

    from pyopencl.clrandom import PhiloxGenerator
    rng = PhiloxGenerator(ctx, seed=10)
    refine_weights = rng.uniform(queue, nparticles, dtype=np.int32, a=1, b=10)

    run_build_test(builder,
                   queue,
                   dims,
                   dtype,
                   nparticles,
                   refine_weights=refine_weights,
                   max_leaf_refine_weight=100,
                   do_plot=do_plot)
Beispiel #17
0
def test_fmm_completeness(ctx_getter, dims, nsources_req, ntargets_req,
                          who_has_extent, source_gen, target_gen, filter_kind,
                          well_sep_is_n_away, extent_norm,
                          from_sep_smaller_crit):
    """Tests whether the built FMM traversal structures and driver completely
    capture all interactions.
    """

    sources_have_extent = "s" in who_has_extent
    targets_have_extent = "t" in who_has_extent

    logging.basicConfig(level=logging.INFO)

    ctx = ctx_getter()
    queue = cl.CommandQueue(ctx)

    dtype = np.float64

    try:
        sources = source_gen(queue, nsources_req, dims, dtype, seed=15)
        nsources = len(sources[0])

        if ntargets_req is None:
            # This says "same as sources" to the tree builder.
            targets = None
            ntargets = ntargets_req
        else:
            targets = target_gen(queue, ntargets_req, dims, dtype, seed=16)
            ntargets = len(targets[0])
    except ImportError:
        pytest.skip("loo.py not available, but needed for particle array "
                    "generation")

    from pyopencl.clrandom import PhiloxGenerator
    rng = PhiloxGenerator(queue.context, seed=12)
    if sources_have_extent:
        source_radii = 2**rng.uniform(queue, nsources, dtype=dtype, a=-10, b=0)
    else:
        source_radii = None

    if targets_have_extent:
        target_radii = 2**rng.uniform(queue, ntargets, dtype=dtype, a=-10, b=0)
    else:
        target_radii = None

    from boxtree import TreeBuilder
    tb = TreeBuilder(ctx)

    tree, _ = tb(queue,
                 sources,
                 targets=targets,
                 max_particles_in_box=30,
                 source_radii=source_radii,
                 target_radii=target_radii,
                 debug=True,
                 stick_out_factor=0.25,
                 extent_norm=extent_norm)
    if 0:
        tree.get().plot()
        import matplotlib.pyplot as pt
        pt.show()

    from boxtree.traversal import FMMTraversalBuilder
    tbuild = FMMTraversalBuilder(ctx,
                                 well_sep_is_n_away=well_sep_is_n_away,
                                 from_sep_smaller_crit=from_sep_smaller_crit)
    trav, _ = tbuild(queue, tree, debug=True)

    if who_has_extent:
        pre_merge_trav = trav
        trav = trav.merge_close_lists(queue)

    #weights = np.random.randn(nsources)
    weights = np.ones(nsources)
    weights_sum = np.sum(weights)

    host_trav = trav.get(queue=queue)
    host_tree = host_trav.tree

    if who_has_extent:
        pre_merge_host_trav = pre_merge_trav.get(queue=queue)

    from boxtree.tree import ParticleListFilter
    plfilt = ParticleListFilter(ctx)

    if filter_kind:
        flags = rng.uniform(queue, ntargets or nsources, np.int32, a=0, b=2) \
                .astype(np.int8)
        if filter_kind == "user":
            filtered_targets = plfilt.filter_target_lists_in_user_order(
                queue, tree, flags)
            wrangler = ConstantOneExpansionWranglerWithFilteredTargetsInUserOrder(
                host_tree, filtered_targets.get(queue=queue))
        elif filter_kind == "tree":
            filtered_targets = plfilt.filter_target_lists_in_tree_order(
                queue, tree, flags)
            wrangler = ConstantOneExpansionWranglerWithFilteredTargetsInTreeOrder(
                host_tree, filtered_targets.get(queue=queue))
        else:
            raise ValueError("unsupported value of 'filter_kind'")
    else:
        wrangler = ConstantOneExpansionWrangler(host_tree)
        flags = cl.array.empty(queue, ntargets or nsources, dtype=np.int8)
        flags.fill(1)

    if ntargets is None and not filter_kind:
        # This check only works for targets == sources.
        assert (wrangler.reorder_potentials(
            wrangler.reorder_sources(weights)) == weights).all()

    from boxtree.fmm import drive_fmm
    pot = drive_fmm(host_trav, wrangler, weights)

    if filter_kind:
        pot = pot[flags.get() > 0]

    rel_err = la.norm((pot - weights_sum) / nsources)
    good = rel_err < 1e-8

    # {{{ build, evaluate matrix (and identify incorrect interactions)

    if 0 and not good:
        mat = np.zeros((ntargets, nsources), dtype)
        from pytools import ProgressBar

        logging.getLogger().setLevel(logging.WARNING)

        pb = ProgressBar("matrix", nsources)
        for i in range(nsources):
            unit_vec = np.zeros(nsources, dtype=dtype)
            unit_vec[i] = 1
            mat[:, i] = drive_fmm(host_trav, wrangler, unit_vec)
            pb.progress()
        pb.finished()

        logging.getLogger().setLevel(logging.INFO)

        import matplotlib.pyplot as pt

        if 0:
            pt.imshow(mat)
            pt.colorbar()
            pt.show()

        incorrect_tgts, incorrect_srcs = np.where(mat != 1)

        if 1 and len(incorrect_tgts):
            from boxtree.visualization import TreePlotter
            plotter = TreePlotter(host_tree)
            plotter.draw_tree(fill=False, edgecolor="black")
            plotter.draw_box_numbers()
            plotter.set_bounding_box()

            tree_order_incorrect_tgts = \
                    host_tree.indices_to_tree_target_order(incorrect_tgts)
            tree_order_incorrect_srcs = \
                    host_tree.indices_to_tree_source_order(incorrect_srcs)

            src_boxes = [
                host_tree.find_box_nr_for_source(i)
                for i in tree_order_incorrect_srcs
            ]
            tgt_boxes = [
                host_tree.find_box_nr_for_target(i)
                for i in tree_order_incorrect_tgts
            ]
            print(src_boxes)
            print(tgt_boxes)

            # plot all sources/targets
            if 0:
                pt.plot(host_tree.targets[0],
                        host_tree.targets[1],
                        "v",
                        alpha=0.9)
                pt.plot(host_tree.sources[0],
                        host_tree.sources[1],
                        "gx",
                        alpha=0.9)

            # plot offending sources/targets
            if 0:
                pt.plot(host_tree.targets[0][tree_order_incorrect_tgts],
                        host_tree.targets[1][tree_order_incorrect_tgts], "rv")
                pt.plot(host_tree.sources[0][tree_order_incorrect_srcs],
                        host_tree.sources[1][tree_order_incorrect_srcs], "go")
            pt.gca().set_aspect("equal")

            from boxtree.visualization import draw_box_lists
            draw_box_lists(
                plotter, pre_merge_host_trav if who_has_extent else host_trav,
                22)
            # from boxtree.visualization import draw_same_level_non_well_sep_boxes
            # draw_same_level_non_well_sep_boxes(plotter, host_trav, 2)

            pt.show()

    # }}}

    if 0 and not good:
        import matplotlib.pyplot as pt
        pt.plot(pot - weights_sum)
        pt.show()

    if 0 and not good:
        import matplotlib.pyplot as pt
        filt_targets = [
            host_tree.targets[0][flags.get() > 0],
            host_tree.targets[1][flags.get() > 0],
        ]
        host_tree.plot()
        bad = np.abs(pot - weights_sum) >= 1e-3
        bad_targets = [
            filt_targets[0][bad],
            filt_targets[1][bad],
        ]
        print(bad_targets[0].shape)
        pt.plot(filt_targets[0], filt_targets[1], "x")
        pt.plot(bad_targets[0], bad_targets[1], "v")
        pt.show()

    assert good
Beispiel #18
0
def test_fmm_with_optimized_3d_m2l(ctx_factory, nsrcntgts, helmholtz_k,
                                   well_sep_is_n_away):
    logging.basicConfig(level=logging.INFO)

    from pytest import importorskip
    importorskip("pyfmmlib")

    dims = 3

    ctx = ctx_factory()
    queue = cl.CommandQueue(ctx)

    nsources = ntargets = nsrcntgts // 2
    dtype = np.float64

    sources = p_normal(queue, nsources, dims, dtype, seed=15)
    targets = (p_normal(queue, ntargets, dims, dtype, seed=18) +
               np.array([2, 0, 0])[:dims])

    from boxtree import TreeBuilder
    tb = TreeBuilder(ctx)

    tree, _ = tb(queue,
                 sources,
                 targets=targets,
                 max_particles_in_box=30,
                 debug=True)

    from boxtree.traversal import FMMTraversalBuilder
    tbuild = FMMTraversalBuilder(ctx)
    trav, _ = tbuild(queue, tree, debug=True)

    trav = trav.get(queue=queue)

    from pyopencl.clrandom import PhiloxGenerator
    rng = PhiloxGenerator(queue.context, seed=20)

    weights = rng.uniform(queue, nsources, dtype=np.float64).get()

    base_nterms = 10

    def fmm_level_to_nterms(tree, lev):
        result = base_nterms

        if lev < 3 and helmholtz_k:
            # exercise order-varies-by-level capability
            result += 5

        return result

    from boxtree.pyfmmlib_integration import (FMMLibExpansionWrangler,
                                              FMMLibRotationData)

    baseline_wrangler = FMMLibExpansionWrangler(
        trav.tree, helmholtz_k, fmm_level_to_nterms=fmm_level_to_nterms)

    optimized_wrangler = FMMLibExpansionWrangler(
        trav.tree,
        helmholtz_k,
        fmm_level_to_nterms=fmm_level_to_nterms,
        rotation_data=FMMLibRotationData(queue, trav))

    from boxtree.fmm import drive_fmm

    baseline_timing_data = {}
    baseline_pot = drive_fmm(trav,
                             baseline_wrangler, (weights, ),
                             timing_data=baseline_timing_data)

    optimized_timing_data = {}
    optimized_pot = drive_fmm(trav,
                              optimized_wrangler, (weights, ),
                              timing_data=optimized_timing_data)

    baseline_time = baseline_timing_data["multipole_to_local"][
        "process_elapsed"]
    if baseline_time is not None:
        print("Baseline M2L time : %#.4g s" % baseline_time)

    opt_time = optimized_timing_data["multipole_to_local"]["process_elapsed"]
    if opt_time is not None:
        print("Optimized M2L time: %#.4g s" % opt_time)

    assert np.allclose(baseline_pot, optimized_pot, atol=1e-13, rtol=1e-13)
Beispiel #19
0
def test_target_association(ctx_factory,
                            curve_name,
                            curve_f,
                            nelements,
                            visualize=False):
    cl_ctx = ctx_factory()
    queue = cl.CommandQueue(cl_ctx)
    actx = PyOpenCLArrayContext(queue)

    # {{{ generate lpot source

    order = 16

    # Make the curve mesh.
    mesh = make_curve_mesh(curve_f, np.linspace(0, 1, nelements + 1), order)

    from meshmode.discretization import Discretization
    from meshmode.discretization.poly_element import \
            InterpolatoryQuadratureSimplexGroupFactory
    factory = InterpolatoryQuadratureSimplexGroupFactory(order)
    discr = Discretization(actx, mesh, factory)

    lpot_source = QBXLayerPotentialSource(
        discr,
        qbx_order=order,  # not used in target association
        fine_order=order)
    places = GeometryCollection(lpot_source)

    # }}}

    # {{{ generate targets

    from pyopencl.clrandom import PhiloxGenerator
    rng = PhiloxGenerator(cl_ctx, seed=RNG_SEED)

    dd = places.auto_source.to_stage1()
    centers = dof_array_to_numpy(
        actx,
        bind(
            places,
            sym.interleaved_expansion_centers(lpot_source.ambient_dim,
                                              dofdesc=dd))(actx))

    density_discr = places.get_discretization(dd.geometry)

    noise = actx.to_numpy(
        rng.uniform(queue, density_discr.ndofs, dtype=np.float, a=0.01, b=1.0))

    tunnel_radius = dof_array_to_numpy(
        actx,
        bind(
            places,
            sym._close_target_tunnel_radii(lpot_source.ambient_dim,
                                           dofdesc=dd))(actx))

    def targets_from_sources(sign, dist, dim=2):
        nodes = dof_array_to_numpy(
            actx,
            bind(places, sym.nodes(dim,
                                   dofdesc=dd))(actx).as_vector(np.object))
        normals = dof_array_to_numpy(
            actx,
            bind(places, sym.normal(dim,
                                    dofdesc=dd))(actx).as_vector(np.object))
        return actx.from_numpy(nodes + normals * sign * dist)

    from pytential.target import PointsTarget
    int_targets = PointsTarget(targets_from_sources(-1, noise * tunnel_radius))
    ext_targets = PointsTarget(targets_from_sources(+1, noise * tunnel_radius))
    far_targets = PointsTarget(
        targets_from_sources(+1, FAR_TARGET_DIST_FROM_SOURCE))

    # Create target discretizations.
    target_discrs = (
        # On-surface targets, interior
        (density_discr, -1),
        # On-surface targets, exterior
        (density_discr, +1),
        # Interior close targets
        (int_targets, -2),
        # Exterior close targets
        (ext_targets, +2),
        # Far targets, should not need centers
        (far_targets, 0),
    )

    sizes = np.cumsum([discr.ndofs for discr, _ in target_discrs])

    (
        surf_int_slice,
        surf_ext_slice,
        vol_int_slice,
        vol_ext_slice,
        far_slice,
    ) = [slice(start, end) for start, end in zip(np.r_[0, sizes], sizes)]

    # }}}

    # {{{ run target associator and check

    from pytential.qbx.target_assoc import (TargetAssociationCodeContainer,
                                            associate_targets_to_qbx_centers)

    from pytential.qbx.utils import TreeCodeContainer
    code_container = TargetAssociationCodeContainer(actx,
                                                    TreeCodeContainer(actx))

    target_assoc = (associate_targets_to_qbx_centers(
        places,
        places.auto_source,
        code_container.get_wrangler(actx),
        target_discrs,
        target_association_tolerance=1e-10).get(queue=queue))

    expansion_radii = dof_array_to_numpy(
        actx,
        bind(
            places,
            sym.expansion_radii(lpot_source.ambient_dim,
                                granularity=sym.GRANULARITY_CENTER))(actx))
    from meshmode.dof_array import thaw
    surf_targets = dof_array_to_numpy(actx, thaw(actx, density_discr.nodes()))
    int_targets = actx.to_numpy(int_targets.nodes())
    ext_targets = actx.to_numpy(ext_targets.nodes())

    def visualize_curve_and_assoc():
        import matplotlib.pyplot as plt
        from meshmode.mesh.visualization import draw_curve

        draw_curve(density_discr.mesh)

        targets = int_targets
        tgt_slice = surf_int_slice

        plt.plot(centers[0], centers[1], "+", color="orange")
        ax = plt.gca()

        for tx, ty, tcenter in zip(targets[0, tgt_slice], targets[1,
                                                                  tgt_slice],
                                   target_assoc.target_to_center[tgt_slice]):
            if tcenter >= 0:
                ax.add_artist(
                    plt.Line2D(
                        (tx, centers[0, tcenter]),
                        (ty, centers[1, tcenter]),
                    ))

        ax.set_aspect("equal")
        plt.show()

    if visualize:
        visualize_curve_and_assoc()

    # Checks that the targets match with centers on the appropriate side and
    # within the allowable distance.
    def check_close_targets(centers, targets, true_side, target_to_center,
                            target_to_side_result, tgt_slice):
        targets_have_centers = (target_to_center >= 0).all()
        assert targets_have_centers

        assert (target_to_side_result == true_side).all()

        TOL = 1e-3
        dists = la.norm((targets.T - centers.T[target_to_center]), axis=1)
        assert (dists <= (1 + TOL) * expansion_radii[target_to_center]).all()

    # Center side order = -1, 1, -1, 1, ...
    target_to_center_side = 2 * (target_assoc.target_to_center % 2) - 1

    # interior surface
    check_close_targets(centers, surf_targets, -1,
                        target_assoc.target_to_center[surf_int_slice],
                        target_to_center_side[surf_int_slice], surf_int_slice)

    # exterior surface
    check_close_targets(centers, surf_targets, +1,
                        target_assoc.target_to_center[surf_ext_slice],
                        target_to_center_side[surf_ext_slice], surf_ext_slice)

    # interior volume
    check_close_targets(centers, int_targets, -1,
                        target_assoc.target_to_center[vol_int_slice],
                        target_to_center_side[vol_int_slice], vol_int_slice)

    # exterior volume
    check_close_targets(centers, ext_targets, +1,
                        target_assoc.target_to_center[vol_ext_slice],
                        target_to_center_side[vol_ext_slice], vol_ext_slice)

    # Checks that far targets are not assigned a center.
    assert (target_assoc.target_to_center[far_slice] == -1).all()
Beispiel #20
0
def test_target_association(ctx_getter, curve_name, curve_f, nelements,
        visualize=False):
    cl_ctx = ctx_getter()
    queue = cl.CommandQueue(cl_ctx)

    # {{{ generate lpot source

    order = 16

    # Make the curve mesh.
    mesh = make_curve_mesh(curve_f, np.linspace(0, 1, nelements+1), order)

    from meshmode.discretization import Discretization
    from meshmode.discretization.poly_element import \
            InterpolatoryQuadratureSimplexGroupFactory
    factory = InterpolatoryQuadratureSimplexGroupFactory(order)

    discr = Discretization(cl_ctx, mesh, factory)

    lpot_source, conn = QBXLayerPotentialSource(discr,
            qbx_order=order,  # not used in target association
            fine_order=order).with_refinement()
    del discr

    from pytential.qbx.utils import get_interleaved_centers
    centers = np.array([ax.get(queue)
            for ax in get_interleaved_centers(queue, lpot_source)])

    # }}}

    # {{{ generate targets

    from pyopencl.clrandom import PhiloxGenerator
    rng = PhiloxGenerator(cl_ctx, seed=RNG_SEED)
    nsources = lpot_source.density_discr.nnodes
    noise = rng.uniform(queue, nsources, dtype=np.float, a=0.01, b=1.0)
    tunnel_radius = \
            lpot_source._close_target_tunnel_radius("nsources").with_queue(queue)

    def targets_from_sources(sign, dist):
        from pytential import sym, bind
        dim = 2
        nodes = bind(lpot_source.density_discr, sym.nodes(dim))(queue)
        normals = bind(lpot_source.density_discr, sym.normal(dim))(queue)
        return (nodes + normals * sign * dist).as_vector(np.object)

    from pytential.target import PointsTarget

    int_targets = PointsTarget(targets_from_sources(-1, noise * tunnel_radius))
    ext_targets = PointsTarget(targets_from_sources(+1, noise * tunnel_radius))
    far_targets = PointsTarget(targets_from_sources(+1, FAR_TARGET_DIST_FROM_SOURCE))

    # Create target discretizations.
    target_discrs = (
        # On-surface targets, interior
        (lpot_source.density_discr, -1),
        # On-surface targets, exterior
        (lpot_source.density_discr, +1),
        # Interior close targets
        (int_targets, -2),
        # Exterior close targets
        (ext_targets, +2),
        # Far targets, should not need centers
        (far_targets, 0),
    )

    sizes = np.cumsum([discr.nnodes for discr, _ in target_discrs])

    (surf_int_slice,
     surf_ext_slice,
     vol_int_slice,
     vol_ext_slice,
     far_slice,
     ) = [slice(start, end) for start, end in zip(np.r_[0, sizes], sizes)]

    # }}}

    # {{{ run target associator and check

    from pytential.qbx.target_assoc import (
            TargetAssociationCodeContainer, associate_targets_to_qbx_centers)

    from pytential.qbx.utils import TreeCodeContainer

    code_container = TargetAssociationCodeContainer(
            cl_ctx, TreeCodeContainer(cl_ctx))

    target_assoc = (associate_targets_to_qbx_centers(
            lpot_source,
            code_container.get_wrangler(queue),
            target_discrs,
            target_association_tolerance=1e-10)
        .get(queue=queue))

    expansion_radii = lpot_source._expansion_radii("ncenters").get(queue)

    surf_targets = np.array(
            [axis.get(queue) for axis in lpot_source.density_discr.nodes()])
    int_targets = np.array([axis.get(queue) for axis in int_targets.nodes()])
    ext_targets = np.array([axis.get(queue) for axis in ext_targets.nodes()])

    def visualize_curve_and_assoc():
        import matplotlib.pyplot as plt
        from meshmode.mesh.visualization import draw_curve

        draw_curve(lpot_source.density_discr.mesh)

        targets = int_targets
        tgt_slice = surf_int_slice

        plt.plot(centers[0], centers[1], "+", color="orange")
        ax = plt.gca()

        for tx, ty, tcenter in zip(
                targets[0, tgt_slice],
                targets[1, tgt_slice],
                target_assoc.target_to_center[tgt_slice]):
            if tcenter >= 0:
                ax.add_artist(
                        plt.Line2D(
                            (tx, centers[0, tcenter]),
                            (ty, centers[1, tcenter]),
                            ))

        ax.set_aspect("equal")
        plt.show()

    if visualize:
        visualize_curve_and_assoc()

    # Checks that the targets match with centers on the appropriate side and
    # within the allowable distance.
    def check_close_targets(centers, targets, true_side,
                            target_to_center, target_to_side_result,
                            tgt_slice):
        targets_have_centers = (target_to_center >= 0).all()
        assert targets_have_centers

        assert (target_to_side_result == true_side).all()

        TOL = 1e-3
        dists = la.norm((targets.T - centers.T[target_to_center]), axis=1)
        assert (dists <= (1 + TOL) * expansion_radii[target_to_center]).all()

    # Center side order = -1, 1, -1, 1, ...
    target_to_center_side = 2 * (target_assoc.target_to_center % 2) - 1

    # interior surface
    check_close_targets(
        centers, surf_targets, -1,
        target_assoc.target_to_center[surf_int_slice],
        target_to_center_side[surf_int_slice],
        surf_int_slice)

    # exterior surface
    check_close_targets(
        centers, surf_targets, +1,
        target_assoc.target_to_center[surf_ext_slice],
        target_to_center_side[surf_ext_slice],
        surf_ext_slice)

    # interior volume
    check_close_targets(
        centers, int_targets, -1,
        target_assoc.target_to_center[vol_int_slice],
        target_to_center_side[vol_int_slice],
        vol_int_slice)

    # exterior volume
    check_close_targets(
        centers, ext_targets, +1,
        target_assoc.target_to_center[vol_ext_slice],
        target_to_center_side[vol_ext_slice],
        vol_ext_slice)

    # Checks that far targets are not assigned a center.
    assert (target_assoc.target_to_center[far_slice] == -1).all()
Beispiel #21
0
def test_pyfmmlib_fmm(ctx_getter, dims):
    logging.basicConfig(level=logging.INFO)

    from pytest import importorskip
    importorskip("pyfmmlib")

    ctx = ctx_getter()
    queue = cl.CommandQueue(ctx)

    nsources = 3000
    ntargets = 1000
    dtype = np.float64

    helmholtz_k = 2

    sources = p_normal(queue, nsources, dims, dtype, seed=15)
    targets = (p_normal(queue, ntargets, dims, dtype, seed=18) +
               np.array([2, 0, 0])[:dims])

    sources_host = particle_array_to_host(sources)
    targets_host = particle_array_to_host(targets)

    from boxtree import TreeBuilder
    tb = TreeBuilder(ctx)

    tree, _ = tb(queue,
                 sources,
                 targets=targets,
                 max_particles_in_box=30,
                 debug=True)

    from boxtree.traversal import FMMTraversalBuilder
    tbuild = FMMTraversalBuilder(ctx)
    trav, _ = tbuild(queue, tree, debug=True)

    trav = trav.get(queue=queue)

    from pyopencl.clrandom import PhiloxGenerator
    rng = PhiloxGenerator(queue.context, seed=20)

    weights = rng.uniform(queue, nsources, dtype=np.float64).get()
    #weights = np.ones(nsources)

    from boxtree.pyfmmlib_integration import HelmholtzExpansionWrangler
    wrangler = HelmholtzExpansionWrangler(trav.tree, helmholtz_k, nterms=10)

    from boxtree.fmm import drive_fmm
    pot = drive_fmm(trav, wrangler, weights)

    logger.info("computing direct (reference) result")

    if dims == 2:
        from pyfmmlib import hpotgrad2dall_vec
        ref_pot, _, _ = hpotgrad2dall_vec(ifgrad=False,
                                          ifhess=False,
                                          sources=sources_host.T,
                                          charge=weights,
                                          targets=targets_host.T,
                                          zk=helmholtz_k)
    else:
        from pyfmmlib import hpotfld3dall_vec
        ref_pot, _ = hpotfld3dall_vec(iffld=False,
                                      sources=sources_host.T,
                                      charge=weights,
                                      targets=targets_host.T,
                                      zk=helmholtz_k)

    rel_err = la.norm(pot - ref_pot) / la.norm(ref_pot)
    logger.info("relative l2 error: %g" % rel_err)
    assert rel_err < 1e-5
Beispiel #22
0
def test_sumpy_fmm_exclude_self(ctx_getter):
    logging.basicConfig(level=logging.INFO)

    ctx = ctx_getter()
    queue = cl.CommandQueue(ctx)

    nsources = 500
    dtype = np.float64

    from boxtree.tools import (make_normal_particle_array as p_normal)

    knl = LaplaceKernel(2)
    local_expn_class = VolumeTaylorLocalExpansion
    mpole_expn_class = VolumeTaylorMultipoleExpansion
    order = 10

    sources = p_normal(queue, nsources, knl.dim, dtype, seed=15)

    from boxtree import TreeBuilder
    tb = TreeBuilder(ctx)

    tree, _ = tb(queue, sources, max_particles_in_box=30, debug=True)

    from boxtree.traversal import FMMTraversalBuilder
    tbuild = FMMTraversalBuilder(ctx)
    trav, _ = tbuild(queue, tree, debug=True)

    from pyopencl.clrandom import PhiloxGenerator
    rng = PhiloxGenerator(ctx)
    weights = rng.uniform(queue, nsources, dtype=np.float64)

    target_to_source = np.arange(tree.ntargets, dtype=np.int32)
    self_extra_kwargs = {"target_to_source": target_to_source}

    out_kernels = [knl]

    from functools import partial

    from sumpy.fmm import SumpyExpansionWranglerCodeContainer
    wcc = SumpyExpansionWranglerCodeContainer(ctx,
                                              partial(mpole_expn_class, knl),
                                              partial(local_expn_class, knl),
                                              out_kernels,
                                              exclude_self=True)

    wrangler = wcc.get_wrangler(
        queue,
        tree,
        dtype,
        fmm_level_to_order=lambda kernel, kernel_args, tree, lev: order,
        self_extra_kwargs=self_extra_kwargs)

    from boxtree.fmm import drive_fmm

    pot, = drive_fmm(trav, wrangler, weights)

    from sumpy import P2P
    p2p = P2P(ctx, out_kernels, exclude_self=True)
    evt, (ref_pot, ) = p2p(queue, sources, sources, (weights, ),
                           **self_extra_kwargs)

    pot = pot.get()
    ref_pot = ref_pot.get()

    rel_err = la.norm(pot - ref_pot) / la.norm(ref_pot)
    logger.info("order %d -> relative l2 error: %g" % (order, rel_err))

    assert np.isclose(rel_err, 0, atol=1e-7)
Beispiel #23
0
def test_extent_tree(ctx_getter, dims, do_plot=False):
    logging.basicConfig(level=logging.INFO)

    ctx = ctx_getter()
    queue = cl.CommandQueue(ctx)

    nsources = 100000
    ntargets = 200000
    dtype = np.float64
    npoint_sources_per_source = 16

    sources = make_normal_particle_array(queue, nsources, dims, dtype,
            seed=12)
    targets = make_normal_particle_array(queue, ntargets, dims, dtype,
            seed=19)

    from pyopencl.clrandom import PhiloxGenerator
    rng = PhiloxGenerator(queue.context, seed=13)
    source_radii = 2**rng.uniform(queue, nsources, dtype=dtype,
            a=-10, b=0)
    target_radii = 2**rng.uniform(queue, ntargets, dtype=dtype,
            a=-10, b=0)

    from boxtree import TreeBuilder
    tb = TreeBuilder(ctx)

    queue.finish()
    dev_tree, _ = tb(queue, sources, targets=targets,
            source_radii=source_radii, target_radii=target_radii,
            max_particles_in_box=10, debug=True)

    logger.info("transfer tree, check orderings")

    tree = dev_tree.get(queue=queue)

    sorted_sources = np.array(list(tree.sources))
    sorted_targets = np.array(list(tree.targets))
    sorted_source_radii = tree.source_radii
    sorted_target_radii = tree.target_radii

    unsorted_sources = np.array([pi.get() for pi in sources])
    unsorted_targets = np.array([pi.get() for pi in targets])
    unsorted_source_radii = source_radii.get()
    unsorted_target_radii = target_radii.get()
    assert (sorted_sources
            == unsorted_sources[:, tree.user_source_ids]).all()
    assert (sorted_source_radii
            == unsorted_source_radii[tree.user_source_ids]).all()

    # {{{ test box structure, stick-out criterion

    logger.info("test box structure, stick-out criterion")

    user_target_ids = np.empty(tree.ntargets, dtype=np.intp)
    user_target_ids[tree.sorted_target_ids] = np.arange(tree.ntargets, dtype=np.intp)
    if ntargets:
        assert (sorted_targets
                == unsorted_targets[:, user_target_ids]).all()
        assert (sorted_target_radii
                == unsorted_target_radii[user_target_ids]).all()

    all_good_so_far = True

    # {{{ check sources, targets

    for ibox in range(tree.nboxes):
        extent_low, extent_high = tree.get_box_extent(ibox)

        box_radius = np.max(extent_high-extent_low) * 0.5
        stick_out_dist = tree.stick_out_factor * box_radius

        assert (extent_low >=
                tree.bounding_box[0] - 1e-12*tree.root_extent).all(), ibox
        assert (extent_high <=
                tree.bounding_box[1] + 1e-12*tree.root_extent).all(), ibox

        box_children = tree.box_child_ids[:, ibox]
        existing_children = box_children[box_children != 0]

        assert (tree.box_source_counts_nonchild[ibox]
                + np.sum(tree.box_source_counts_cumul[existing_children])
                == tree.box_source_counts_cumul[ibox])
        assert (tree.box_target_counts_nonchild[ibox]
                + np.sum(tree.box_target_counts_cumul[existing_children])
                == tree.box_target_counts_cumul[ibox])

        for what, starts, counts, points, radii in [
                ("source", tree.box_source_starts, tree.box_source_counts_cumul,
                    sorted_sources, sorted_source_radii),
                ("target", tree.box_target_starts, tree.box_target_counts_cumul,
                    sorted_targets, sorted_target_radii),
                ]:
            bstart = starts[ibox]
            bslice = slice(bstart, bstart+counts[ibox])
            check_particles = points[:, bslice]
            check_radii = radii[bslice]

            good = (
                    (check_particles + check_radii
                        < extent_high[:, np.newaxis] + stick_out_dist)
                    &
                    (extent_low[:, np.newaxis] - stick_out_dist
                        <= check_particles - check_radii)
                    ).all(axis=0)

            all_good_here = good.all()

            if not all_good_here:
                print("BAD BOX %s %d level %d" % (what, ibox, tree.box_levels[ibox]))

            all_good_so_far = all_good_so_far and all_good_here
            assert all_good_here

    # }}}

    assert all_good_so_far

    # }}}

    # {{{ create, link point sources

    logger.info("creating point sources")

    np.random.seed(20)

    from pytools.obj_array import make_obj_array
    point_sources = make_obj_array([
            cl.array.to_device(queue,
                unsorted_sources[i][:, np.newaxis]
                + unsorted_source_radii[:, np.newaxis]
                * np.random.uniform(
                    -1, 1, size=(nsources, npoint_sources_per_source))
                 )
            for i in range(dims)])

    point_source_starts = cl.array.arange(queue,
            0, (nsources+1)*npoint_sources_per_source, npoint_sources_per_source,
            dtype=tree.particle_id_dtype)

    from boxtree.tree import link_point_sources
    dev_tree = link_point_sources(queue, dev_tree,
            point_source_starts, point_sources,
            debug=True)
Beispiel #24
0
def demo_cost_model():
    if not SUPPORTS_PROCESS_TIME:
        raise NotImplementedError(
            "Currently this script uses process time which only works on Python>=3.3"
        )

    from boxtree.pyfmmlib_integration import FMMLibExpansionWrangler

    nsources_list = [1000, 2000, 3000, 4000, 5000]
    ntargets_list = [1000, 2000, 3000, 4000, 5000]
    dims = 3
    dtype = np.float64

    ctx = cl.create_some_context()
    queue = cl.CommandQueue(ctx)

    traversals = []
    traversals_dev = []
    level_to_orders = []
    timing_results = []

    def fmm_level_to_nterms(tree, ilevel):
        return 10

    for nsources, ntargets in zip(nsources_list, ntargets_list):
        # {{{ Generate sources, targets and target_radii

        from boxtree.tools import make_normal_particle_array as p_normal
        sources = p_normal(queue, nsources, dims, dtype, seed=15)
        targets = p_normal(queue, ntargets, dims, dtype, seed=18)

        from pyopencl.clrandom import PhiloxGenerator
        rng = PhiloxGenerator(queue.context, seed=22)
        target_radii = rng.uniform(queue, ntargets, a=0, b=0.05,
                                   dtype=dtype).get()

        # }}}

        # {{{ Generate tree and traversal

        from boxtree import TreeBuilder
        tb = TreeBuilder(ctx)
        tree, _ = tb(queue,
                     sources,
                     targets=targets,
                     target_radii=target_radii,
                     stick_out_factor=0.15,
                     max_particles_in_box=30,
                     debug=True)

        from boxtree.traversal import FMMTraversalBuilder
        tg = FMMTraversalBuilder(ctx, well_sep_is_n_away=2)
        trav_dev, _ = tg(queue, tree, debug=True)
        trav = trav_dev.get(queue=queue)

        traversals.append(trav)
        traversals_dev.append(trav_dev)

        # }}}

        wrangler = FMMLibExpansionWrangler(trav.tree, 0, fmm_level_to_nterms)
        level_to_orders.append(wrangler.level_nterms)

        timing_data = {}
        from boxtree.fmm import drive_fmm
        src_weights = np.random.rand(tree.nsources).astype(tree.coord_dtype)
        drive_fmm(trav, wrangler, src_weights, timing_data=timing_data)

        timing_results.append(timing_data)

    time_field_name = "process_elapsed"

    from boxtree.cost import FMMCostModel
    from boxtree.cost import make_pde_aware_translation_cost_model
    cost_model = FMMCostModel(make_pde_aware_translation_cost_model)

    model_results = []
    for icase in range(len(traversals) - 1):
        traversal = traversals_dev[icase]
        model_results.append(
            cost_model.cost_per_stage(
                queue,
                traversal,
                level_to_orders[icase],
                FMMCostModel.get_unit_calibration_params(),
            ))
    queue.finish()

    params = cost_model.estimate_calibration_params(
        model_results, timing_results[:-1], time_field_name=time_field_name)

    predicted_time = cost_model.cost_per_stage(
        queue,
        traversals_dev[-1],
        level_to_orders[-1],
        params,
    )
    queue.finish()

    for field in [
            "form_multipoles", "eval_direct", "multipole_to_local",
            "eval_multipoles", "form_locals", "eval_locals",
            "coarsen_multipoles", "refine_locals"
    ]:
        measured = timing_results[-1][field]["process_elapsed"]
        pred_err = ((measured - predicted_time[field]) / measured)
        logger.info("actual/predicted time for %s: %.3g/%.3g -> %g %% error",
                    field, measured, predicted_time[field],
                    abs(100 * pred_err))
Beispiel #25
0
def test_fmm_completeness(ctx_getter, dims, nsources_req, ntargets_req,
         who_has_extent, source_gen, target_gen, filter_kind):
    """Tests whether the built FMM traversal structures and driver completely
    capture all interactions.
    """

    sources_have_extent = "s" in who_has_extent
    targets_have_extent = "t" in who_has_extent

    logging.basicConfig(level=logging.INFO)

    ctx = ctx_getter()
    queue = cl.CommandQueue(ctx)

    dtype = np.float64

    try:
        sources = source_gen(queue, nsources_req, dims, dtype, seed=15)
        nsources = len(sources[0])

        if ntargets_req is None:
            # This says "same as sources" to the tree builder.
            targets = None
            ntargets = ntargets_req
        else:
            targets = target_gen(queue, ntargets_req, dims, dtype, seed=16)
            ntargets = len(targets[0])
    except ImportError:
        pytest.skip("loo.py not available, but needed for particle array "
                "generation")

    from pyopencl.clrandom import PhiloxGenerator
    rng = PhiloxGenerator(queue.context, seed=12)
    if sources_have_extent:
        source_radii = 2**rng.uniform(queue, nsources, dtype=dtype,
                a=-10, b=0)
    else:
        source_radii = None

    if targets_have_extent:
        target_radii = 2**rng.uniform(queue, ntargets, dtype=dtype,
                a=-10, b=0)
    else:
        target_radii = None

    from boxtree import TreeBuilder
    tb = TreeBuilder(ctx)

    tree, _ = tb(queue, sources, targets=targets,
            max_particles_in_box=30,
            source_radii=source_radii, target_radii=target_radii,
            debug=True)
    if 0:
        tree.get().plot()
        import matplotlib.pyplot as pt
        pt.show()

    from boxtree.traversal import FMMTraversalBuilder
    tbuild = FMMTraversalBuilder(ctx)
    trav, _ = tbuild(queue, tree, debug=True)
    if trav.sep_close_smaller_starts is not None:
        trav = trav.merge_close_lists(queue)

    weights = np.random.randn(nsources)
    #weights = np.ones(nsources)
    weights_sum = np.sum(weights)

    host_trav = trav.get(queue=queue)
    host_tree = host_trav.tree

    if filter_kind:
        flags = rng.uniform(queue, ntargets or nsources, np.int32, a=0, b=2) \
                .astype(np.int8)
        if filter_kind == "user":
            from boxtree.tree import filter_target_lists_in_user_order
            filtered_targets = filter_target_lists_in_user_order(queue, tree, flags)
            wrangler = ConstantOneExpansionWranglerWithFilteredTargetsInUserOrder(
                    host_tree, filtered_targets.get(queue=queue))
        elif filter_kind == "tree":
            from boxtree.tree import filter_target_lists_in_tree_order
            filtered_targets = filter_target_lists_in_tree_order(queue, tree, flags)
            wrangler = ConstantOneExpansionWranglerWithFilteredTargetsInTreeOrder(
                    host_tree, filtered_targets.get(queue=queue))
        else:
            raise ValueError("unsupported value of 'filter_kind'")
    else:
        wrangler = ConstantOneExpansionWrangler(host_tree)

    if ntargets is None and not filter_kind:
        # This check only works for targets == sources.
        assert (wrangler.reorder_potentials(
                wrangler.reorder_sources(weights)) == weights).all()

    from boxtree.fmm import drive_fmm
    pot = drive_fmm(host_trav, wrangler, weights)

    # {{{ build, evaluate matrix (and identify missing interactions)

    if 0:
        mat = np.zeros((ntargets, nsources), dtype)
        from pytools import ProgressBar

        logging.getLogger().setLevel(logging.WARNING)

        pb = ProgressBar("matrix", nsources)
        for i in range(nsources):
            unit_vec = np.zeros(nsources, dtype=dtype)
            unit_vec[i] = 1
            mat[:, i] = drive_fmm(host_trav, wrangler, unit_vec)
            pb.progress()
        pb.finished()

        logging.getLogger().setLevel(logging.INFO)

        import matplotlib.pyplot as pt

        if 1:
            pt.spy(mat)
            pt.show()

        missing_tgts, missing_srcs = np.where(mat == 0)

        if 1 and len(missing_tgts):
            from boxtree.visualization import TreePlotter
            plotter = TreePlotter(host_tree)
            plotter.draw_tree(fill=False, edgecolor="black")
            plotter.draw_box_numbers()
            plotter.set_bounding_box()

            tree_order_missing_tgts = \
                    host_tree.indices_to_tree_target_order(missing_tgts)
            tree_order_missing_srcs = \
                    host_tree.indices_to_tree_source_order(missing_srcs)

            src_boxes = [
                    host_tree.find_box_nr_for_source(i)
                    for i in tree_order_missing_srcs]
            tgt_boxes = [
                    host_tree.find_box_nr_for_target(i)
                    for i in tree_order_missing_tgts]
            print(src_boxes)
            print(tgt_boxes)

            pt.plot(
                    host_tree.targets[0][tree_order_missing_tgts],
                    host_tree.targets[1][tree_order_missing_tgts],
                    "rv")
            pt.plot(
                    host_tree.sources[0][tree_order_missing_srcs],
                    host_tree.sources[1][tree_order_missing_srcs],
                    "go")
            pt.gca().set_aspect("equal")

            pt.show()

    # }}}

    if filter_kind:
        pot = pot[flags.get() > 0]

    rel_err = la.norm((pot - weights_sum) / nsources)
    good = rel_err < 1e-8
    if 0 and not good:
        import matplotlib.pyplot as pt
        pt.plot(pot-weights_sum)
        pt.show()

    if 0 and not good:
        import matplotlib.pyplot as pt
        filt_targets = [
                host_tree.targets[0][flags.get() > 0],
                host_tree.targets[1][flags.get() > 0],
                ]
        host_tree.plot()
        bad = np.abs(pot - weights_sum) >= 1e-3
        bad_targets = [
                filt_targets[0][bad],
                filt_targets[1][bad],
                ]
        print(bad_targets[0].shape)
        pt.plot(filt_targets[0], filt_targets[1], "x")
        pt.plot(bad_targets[0], bad_targets[1], "v")
        pt.show()

    assert good
Beispiel #26
0
def test_extent_tree(ctx_getter, dims, extent_norm, do_plot=False):
    logging.basicConfig(level=logging.INFO)

    ctx = ctx_getter()
    queue = cl.CommandQueue(ctx)

    nsources = 100000
    ntargets = 200000
    dtype = np.float64
    npoint_sources_per_source = 16

    sources = make_normal_particle_array(queue, nsources, dims, dtype,
            seed=12)
    targets = make_normal_particle_array(queue, ntargets, dims, dtype,
            seed=19)

    refine_weights = cl.array.zeros(queue, nsources+ntargets, np.int32)
    refine_weights[:nsources] = 1

    from pyopencl.clrandom import PhiloxGenerator
    rng = PhiloxGenerator(queue.context, seed=13)
    source_radii = 2**rng.uniform(queue, nsources, dtype=dtype,
            a=-10, b=0)
    target_radii = 2**rng.uniform(queue, ntargets, dtype=dtype,
            a=-10, b=0)

    from boxtree import TreeBuilder
    tb = TreeBuilder(ctx)

    queue.finish()
    dev_tree, _ = tb(queue, sources, targets=targets,
            source_radii=source_radii,
            target_radii=target_radii,
            extent_norm=extent_norm,

            refine_weights=refine_weights,
            max_leaf_refine_weight=20,

            #max_particles_in_box=10,

            # Set artificially small, to exercise the reallocation code.
            nboxes_guess=10,

            debug=True,
            stick_out_factor=0)

    logger.info("transfer tree, check orderings")

    tree = dev_tree.get(queue=queue)

    if do_plot:
        import matplotlib.pyplot as pt
        pt.plot(sources[0].get(), sources[1].get(), "rx")
        pt.plot(targets[0].get(), targets[1].get(), "g+")

        from boxtree.visualization import TreePlotter
        plotter = TreePlotter(tree)
        plotter.draw_tree(fill=False, edgecolor="black", zorder=10)
        plotter.draw_box_numbers()
        plotter.set_bounding_box()

        pt.gca().set_aspect("equal", "datalim")
        pt.show()

    sorted_sources = np.array(list(tree.sources))
    sorted_targets = np.array(list(tree.targets))
    sorted_source_radii = tree.source_radii
    sorted_target_radii = tree.target_radii

    unsorted_sources = np.array([pi.get() for pi in sources])
    unsorted_targets = np.array([pi.get() for pi in targets])
    unsorted_source_radii = source_radii.get()
    unsorted_target_radii = target_radii.get()

    assert (sorted_sources
            == unsorted_sources[:, tree.user_source_ids]).all()
    assert (sorted_source_radii
            == unsorted_source_radii[tree.user_source_ids]).all()

    # {{{ test box structure, stick-out criterion

    logger.info("test box structure, stick-out criterion")

    user_target_ids = np.empty(tree.ntargets, dtype=np.intp)
    user_target_ids[tree.sorted_target_ids] = np.arange(tree.ntargets, dtype=np.intp)
    if ntargets:
        assert (sorted_targets
                == unsorted_targets[:, user_target_ids]).all()
        assert (sorted_target_radii
                == unsorted_target_radii[user_target_ids]).all()

    all_good_so_far = True

    # {{{ check sources, targets

    assert np.sum(tree.box_source_counts_nonchild) == nsources
    assert np.sum(tree.box_target_counts_nonchild) == ntargets

    for ibox in range(tree.nboxes):
        kid_sum = sum(
                    tree.box_target_counts_cumul[ichild_box]
                    for ichild_box in tree.box_child_ids[:, ibox]
                    if ichild_box != 0)
        assert (
                tree.box_target_counts_cumul[ibox]
                == (
                    tree.box_target_counts_nonchild[ibox]
                    + kid_sum)), ibox

    for ibox in range(tree.nboxes):
        extent_low, extent_high = tree.get_box_extent(ibox)

        assert (extent_low
                >= tree.bounding_box[0] - 1e-12*tree.root_extent).all(), ibox
        assert (extent_high
                <= tree.bounding_box[1] + 1e-12*tree.root_extent).all(), ibox

        box_children = tree.box_child_ids[:, ibox]
        existing_children = box_children[box_children != 0]

        assert (tree.box_source_counts_nonchild[ibox]
                + np.sum(tree.box_source_counts_cumul[existing_children])
                == tree.box_source_counts_cumul[ibox])
        assert (tree.box_target_counts_nonchild[ibox]
                + np.sum(tree.box_target_counts_cumul[existing_children])
                == tree.box_target_counts_cumul[ibox])

    del existing_children
    del box_children

    for ibox in range(tree.nboxes):
        lev = int(tree.box_levels[ibox])
        box_radius = 0.5 * tree.root_extent / (1 << lev)
        box_center = tree.box_centers[:, ibox]
        extent_low = box_center - box_radius
        extent_high = box_center + box_radius

        stick_out_dist = tree.stick_out_factor * box_radius
        radius_with_stickout = (1 + tree.stick_out_factor) * box_radius

        for what, starts, counts, points, radii in [
                ("source", tree.box_source_starts, tree.box_source_counts_cumul,
                    sorted_sources, sorted_source_radii),
                ("target", tree.box_target_starts, tree.box_target_counts_cumul,
                    sorted_targets, sorted_target_radii),
                ]:
            bstart = starts[ibox]
            bslice = slice(bstart, bstart+counts[ibox])
            check_particles = points[:, bslice]
            check_radii = radii[bslice]

            if extent_norm == "linf":
                good = (
                        (check_particles + check_radii
                            < extent_high[:, np.newaxis] + stick_out_dist)
                        &  # noqa: W504
                        (extent_low[:, np.newaxis] - stick_out_dist
                            <= check_particles - check_radii)
                        ).all(axis=0)

            elif extent_norm == "l2":
                center_dists = np.sqrt(
                        np.sum(
                            (check_particles - box_center.reshape(-1, 1))**2,
                            axis=0))

                good = (
                        (center_dists + check_radii)**2
                        < dims * radius_with_stickout**2)

            else:
                raise ValueError("unexpected value of extent_norm")

            all_good_here = good.all()

            if not all_good_here:
                print("BAD BOX %s %d level %d"
                        % (what, ibox, tree.box_levels[ibox]))

            all_good_so_far = all_good_so_far and all_good_here
            assert all_good_here

    # }}}

    assert all_good_so_far

    # }}}

    # {{{ create, link point sources

    logger.info("creating point sources")

    np.random.seed(20)

    from pytools.obj_array import make_obj_array
    point_sources = make_obj_array([
            cl.array.to_device(queue,
                unsorted_sources[i][:, np.newaxis]
                + unsorted_source_radii[:, np.newaxis]
                * np.random.uniform(
                    -1, 1, size=(nsources, npoint_sources_per_source))
                 )
            for i in range(dims)])

    point_source_starts = cl.array.arange(queue,
            0, (nsources+1)*npoint_sources_per_source, npoint_sources_per_source,
            dtype=tree.particle_id_dtype)

    from boxtree.tree import link_point_sources
    dev_tree = link_point_sources(queue, dev_tree,
            point_source_starts, point_sources,
            debug=True)
Beispiel #27
0
def test_pyfmmlib_fmm(ctx_getter, dims, use_dipoles, helmholtz_k):
    logging.basicConfig(level=logging.INFO)

    from pytest import importorskip
    importorskip("pyfmmlib")

    ctx = ctx_getter()
    queue = cl.CommandQueue(ctx)

    nsources = 3000
    ntargets = 1000
    dtype = np.float64

    sources = p_normal(queue, nsources, dims, dtype, seed=15)
    targets = (p_normal(queue, ntargets, dims, dtype, seed=18) +
               np.array([2, 0, 0])[:dims])

    sources_host = particle_array_to_host(sources)
    targets_host = particle_array_to_host(targets)

    from boxtree import TreeBuilder
    tb = TreeBuilder(ctx)

    tree, _ = tb(queue,
                 sources,
                 targets=targets,
                 max_particles_in_box=30,
                 debug=True)

    from boxtree.traversal import FMMTraversalBuilder
    tbuild = FMMTraversalBuilder(ctx)
    trav, _ = tbuild(queue, tree, debug=True)

    trav = trav.get(queue=queue)

    from pyopencl.clrandom import PhiloxGenerator
    rng = PhiloxGenerator(queue.context, seed=20)

    weights = rng.uniform(queue, nsources, dtype=np.float64).get()
    #weights = np.ones(nsources)

    if use_dipoles:
        np.random.seed(13)
        dipole_vec = np.random.randn(dims, nsources)
    else:
        dipole_vec = None

    if dims == 2 and helmholtz_k == 0:
        base_nterms = 20
    else:
        base_nterms = 10

    def fmm_level_to_nterms(tree, lev):
        result = base_nterms

        if lev < 3 and helmholtz_k:
            # exercise order-varies-by-level capability
            result += 5

        if use_dipoles:
            result += 1

        return result

    from boxtree.pyfmmlib_integration import FMMLibExpansionWrangler
    wrangler = FMMLibExpansionWrangler(trav.tree,
                                       helmholtz_k,
                                       fmm_level_to_nterms=fmm_level_to_nterms,
                                       dipole_vec=dipole_vec)

    from boxtree.fmm import drive_fmm

    timing_data = {}
    pot = drive_fmm(trav, wrangler, weights, timing_data=timing_data)
    print(timing_data)
    assert timing_data

    # {{{ ref fmmlib computation

    logger.info("computing direct (reference) result")

    import pyfmmlib
    fmmlib_routine = getattr(
        pyfmmlib, "%spot%s%ddall%s_vec" %
        (wrangler.eqn_letter, "fld" if dims == 3 else "grad", dims,
         "_dp" if use_dipoles else ""))

    kwargs = {}
    if dims == 3:
        kwargs["iffld"] = False
    else:
        kwargs["ifgrad"] = False
        kwargs["ifhess"] = False

    if use_dipoles:
        if helmholtz_k == 0 and dims == 2:
            kwargs["dipstr"] = -weights * (dipole_vec[0] + 1j * dipole_vec[1])
        else:
            kwargs["dipstr"] = weights
            kwargs["dipvec"] = dipole_vec
    else:
        kwargs["charge"] = weights
    if helmholtz_k:
        kwargs["zk"] = helmholtz_k

    ref_pot = wrangler.finalize_potentials(
        fmmlib_routine(sources=sources_host.T,
                       targets=targets_host.T,
                       **kwargs)[0])

    rel_err = la.norm(pot - ref_pot, np.inf) / la.norm(ref_pot, np.inf)
    logger.info("relative l2 error vs fmmlib direct: %g" % rel_err)
    assert rel_err < 1e-5, rel_err

    # }}}

    # {{{ check against sumpy

    try:
        import sumpy  # noqa
    except ImportError:
        have_sumpy = False
        from warnings import warn
        warn("sumpy unavailable: cannot compute independent reference "
             "values for pyfmmlib")
    else:
        have_sumpy = True

    if have_sumpy:
        from sumpy.kernel import (LaplaceKernel, HelmholtzKernel,
                                  DirectionalSourceDerivative)
        from sumpy.p2p import P2P

        sumpy_extra_kwargs = {}
        if helmholtz_k:
            knl = HelmholtzKernel(dims)
            sumpy_extra_kwargs["k"] = helmholtz_k
        else:
            knl = LaplaceKernel(dims)

        if use_dipoles:
            knl = DirectionalSourceDerivative(knl)
            sumpy_extra_kwargs["src_derivative_dir"] = dipole_vec

        p2p = P2P(ctx, [knl], exclude_self=False)

        evt, (sumpy_ref_pot, ) = p2p(queue,
                                     targets,
                                     sources, [weights],
                                     out_host=True,
                                     **sumpy_extra_kwargs)

        sumpy_rel_err = (la.norm(pot - sumpy_ref_pot, np.inf) /
                         la.norm(sumpy_ref_pot, np.inf))

        logger.info("relative l2 error vs sumpy direct: %g" % sumpy_rel_err)
        assert sumpy_rel_err < 1e-5, sumpy_rel_err
Beispiel #28
0
def test_extent_tree(ctx_factory, dims, extent_norm, do_plot=False):
    logging.basicConfig(level=logging.INFO)

    ctx = ctx_factory()
    queue = cl.CommandQueue(ctx)

    nsources = 100000
    ntargets = 200000
    dtype = np.float64
    npoint_sources_per_source = 16

    sources = make_normal_particle_array(queue, nsources, dims, dtype, seed=12)
    targets = make_normal_particle_array(queue, ntargets, dims, dtype, seed=19)

    refine_weights = cl.array.zeros(queue, nsources + ntargets, np.int32)
    refine_weights[:nsources] = 1

    from pyopencl.clrandom import PhiloxGenerator
    rng = PhiloxGenerator(queue.context, seed=13)
    source_radii = 2**rng.uniform(queue, nsources, dtype=dtype, a=-10, b=0)
    target_radii = 2**rng.uniform(queue, ntargets, dtype=dtype, a=-10, b=0)

    from boxtree import TreeBuilder
    tb = TreeBuilder(ctx)

    queue.finish()
    dev_tree, _ = tb(
        queue,
        sources,
        targets=targets,
        source_radii=source_radii,
        target_radii=target_radii,
        extent_norm=extent_norm,
        refine_weights=refine_weights,
        max_leaf_refine_weight=20,

        #max_particles_in_box=10,

        # Set artificially small, to exercise the reallocation code.
        nboxes_guess=10,
        debug=True,
        stick_out_factor=0)

    logger.info("transfer tree, check orderings")

    tree = dev_tree.get(queue=queue)

    if do_plot:
        import matplotlib.pyplot as pt
        pt.plot(sources[0].get(), sources[1].get(), "rx")
        pt.plot(targets[0].get(), targets[1].get(), "g+")

        from boxtree.visualization import TreePlotter
        plotter = TreePlotter(tree)
        plotter.draw_tree(fill=False, edgecolor="black", zorder=10)
        plotter.draw_box_numbers()
        plotter.set_bounding_box()

        pt.gca().set_aspect("equal", "datalim")
        pt.show()

    sorted_sources = np.array(list(tree.sources))
    sorted_targets = np.array(list(tree.targets))
    sorted_source_radii = tree.source_radii
    sorted_target_radii = tree.target_radii

    unsorted_sources = np.array([pi.get() for pi in sources])
    unsorted_targets = np.array([pi.get() for pi in targets])
    unsorted_source_radii = source_radii.get()
    unsorted_target_radii = target_radii.get()

    assert (sorted_sources == unsorted_sources[:, tree.user_source_ids]).all()
    assert (sorted_source_radii == unsorted_source_radii[tree.user_source_ids]
            ).all()

    # {{{ test box structure, stick-out criterion

    logger.info("test box structure, stick-out criterion")

    user_target_ids = np.empty(tree.ntargets, dtype=np.intp)
    user_target_ids[tree.sorted_target_ids] = np.arange(tree.ntargets,
                                                        dtype=np.intp)
    if ntargets:
        assert (sorted_targets == unsorted_targets[:, user_target_ids]).all()
        assert (sorted_target_radii == unsorted_target_radii[user_target_ids]
                ).all()

    all_good_so_far = True

    # {{{ check sources, targets

    assert np.sum(tree.box_source_counts_nonchild) == nsources
    assert np.sum(tree.box_target_counts_nonchild) == ntargets

    for ibox in range(tree.nboxes):
        kid_sum = sum(tree.box_target_counts_cumul[ichild_box]
                      for ichild_box in tree.box_child_ids[:, ibox]
                      if ichild_box != 0)
        assert (tree.box_target_counts_cumul[ibox] == (
            tree.box_target_counts_nonchild[ibox] + kid_sum)), ibox

    for ibox in range(tree.nboxes):
        extent_low, extent_high = tree.get_box_extent(ibox)

        assert (extent_low >=
                tree.bounding_box[0] - 1e-12 * tree.root_extent).all(), ibox
        assert (extent_high <=
                tree.bounding_box[1] + 1e-12 * tree.root_extent).all(), ibox

        box_children = tree.box_child_ids[:, ibox]
        existing_children = box_children[box_children != 0]

        assert (tree.box_source_counts_nonchild[ibox] +
                np.sum(tree.box_source_counts_cumul[existing_children]) ==
                tree.box_source_counts_cumul[ibox])
        assert (tree.box_target_counts_nonchild[ibox] +
                np.sum(tree.box_target_counts_cumul[existing_children]) ==
                tree.box_target_counts_cumul[ibox])

    del existing_children
    del box_children

    for ibox in range(tree.nboxes):
        lev = int(tree.box_levels[ibox])
        box_radius = 0.5 * tree.root_extent / (1 << lev)
        box_center = tree.box_centers[:, ibox]
        extent_low = box_center - box_radius
        extent_high = box_center + box_radius

        stick_out_dist = tree.stick_out_factor * box_radius
        radius_with_stickout = (1 + tree.stick_out_factor) * box_radius

        for what, starts, counts, points, radii in [
            ("source", tree.box_source_starts, tree.box_source_counts_cumul,
             sorted_sources, sorted_source_radii),
            ("target", tree.box_target_starts, tree.box_target_counts_cumul,
             sorted_targets, sorted_target_radii),
        ]:
            bstart = starts[ibox]
            bslice = slice(bstart, bstart + counts[ibox])
            check_particles = points[:, bslice]
            check_radii = radii[bslice]

            if extent_norm == "linf":
                good = ((check_particles + check_radii <
                         extent_high[:, np.newaxis] + stick_out_dist)
                        &  # noqa: W504
                        (extent_low[:, np.newaxis] - stick_out_dist <=
                         check_particles - check_radii)).all(axis=0)

            elif extent_norm == "l2":
                center_dists = np.sqrt(
                    np.sum((check_particles - box_center.reshape(-1, 1))**2,
                           axis=0))

                good = ((center_dists + check_radii)**2 <
                        dims * radius_with_stickout**2)

            else:
                raise ValueError("unexpected value of extent_norm")

            all_good_here = good.all()

            if not all_good_here:
                print("BAD BOX %s %d level %d" %
                      (what, ibox, tree.box_levels[ibox]))

            all_good_so_far = all_good_so_far and all_good_here
            assert all_good_here

    # }}}

    assert all_good_so_far

    # }}}

    # {{{ create, link point sources

    logger.info("creating point sources")

    np.random.seed(20)

    from pytools.obj_array import make_obj_array
    point_sources = make_obj_array([
        cl.array.to_device(
            queue, unsorted_sources[i][:, np.newaxis] +
            unsorted_source_radii[:, np.newaxis] * np.random.uniform(
                -1, 1, size=(nsources, npoint_sources_per_source)))
        for i in range(dims)
    ])

    point_source_starts = cl.array.arange(queue,
                                          0, (nsources + 1) *
                                          npoint_sources_per_source,
                                          npoint_sources_per_source,
                                          dtype=tree.particle_id_dtype)

    from boxtree.tree import link_point_sources
    dev_tree = link_point_sources(queue,
                                  dev_tree,
                                  point_source_starts,
                                  point_sources,
                                  debug=True)
Beispiel #29
0
def plot_traversal(ctx_getter, do_plot=False):
    ctx = ctx_getter()
    queue = cl.CommandQueue(ctx)

    #for dims in [2, 3]:
    for dims in [2]:
        nparticles = 10**4
        dtype = np.float64

        from pyopencl.clrandom import PhiloxGenerator
        rng = PhiloxGenerator(queue.context, seed=15)

        from pytools.obj_array import make_obj_array
        particles = make_obj_array(
            [rng.normal(queue, nparticles, dtype=dtype) for i in range(dims)])

        # if do_plot:
        #     pt.plot(particles[0].get(), particles[1].get(), "x")

        from boxtree import TreeBuilder
        tb = TreeBuilder(ctx)

        queue.finish()
        tree = tb(queue, particles, max_particles_in_box=30, debug=True)

        from boxtree.traversal import FMMTraversalBuilder
        tg = FMMTraversalBuilder(ctx)
        trav = tg(queue, tree).get()

        from boxtree.visualization import TreePlotter
        plotter = TreePlotter(tree)
        plotter.draw_tree(fill=False, edgecolor="black")
        #plotter.draw_box_numbers()
        plotter.set_bounding_box()

        from random import randrange, seed
        seed(7)

        # {{{ generic box drawing helper

        def draw_some_box_lists(starts, lists, key_to_box=None, count=5):
            actual_count = 0
            while actual_count < count:
                if key_to_box is not None:
                    key = randrange(len(key_to_box))
                    ibox = key_to_box[key]
                else:
                    key = ibox = randrange(tree.nboxes)

                start, end = starts[key:key + 2]
                if start == end:
                    continue

                #print ibox, start, end, lists[start:end]
                for jbox in lists[start:end]:
                    plotter.draw_box(jbox, facecolor='yellow')

                plotter.draw_box(ibox, facecolor='red')

                actual_count += 1

        # }}}

        if 0:
            # colleagues
            draw_some_box_lists(trav.colleagues_starts, trav.colleagues_lists)
        elif 0:
            # near neighbors ("list 1")
            draw_some_box_lists(trav.neighbor_leaves_starts,
                                trav.neighbor_leaves_lists,
                                key_to_box=trav.source_boxes)
        elif 0:
            # well-separated siblings (list 2)
            draw_some_box_lists(trav.sep_siblings_starts,
                                trav.sep_siblings_lists)
        elif 1:
            # separated smaller (list 3)
            draw_some_box_lists(trav.sep_smaller_starts,
                                trav.sep_smaller_lists,
                                key_to_box=trav.source_boxes)
        elif 1:
            # separated bigger (list 4)
            draw_some_box_lists(trav.sep_bigger_starts, trav.sep_bigger_lists)

        import matplotlib.pyplot as pt
        pt.show()
Beispiel #30
0
def test_sumpy_fmm(ctx_getter, knl, local_expn_class, mpole_expn_class):
    logging.basicConfig(level=logging.INFO)

    ctx = ctx_getter()
    queue = cl.CommandQueue(ctx)

    nsources = 1000
    ntargets = 300
    dtype = np.float64

    from boxtree.tools import (make_normal_particle_array as p_normal)

    sources = p_normal(queue, nsources, knl.dim, dtype, seed=15)
    if 1:
        offset = np.zeros(knl.dim)
        offset[0] = 0.1

        targets = (p_normal(queue, ntargets, knl.dim, dtype, seed=18) + offset)

        del offset
    else:
        from sumpy.visualization import FieldPlotter
        fp = FieldPlotter(np.array([0.5, 0]), extent=3, npoints=200)
        from pytools.obj_array import make_obj_array
        targets = make_obj_array([fp.points[i] for i in range(knl.dim)])

    from boxtree import TreeBuilder
    tb = TreeBuilder(ctx)

    tree, _ = tb(queue,
                 sources,
                 targets=targets,
                 max_particles_in_box=30,
                 debug=True)

    from boxtree.traversal import FMMTraversalBuilder
    tbuild = FMMTraversalBuilder(ctx)
    trav, _ = tbuild(queue, tree, debug=True)

    # {{{ plot tree

    if 0:
        host_tree = tree.get()
        host_trav = trav.get()

        if 1:
            print("src_box", host_tree.find_box_nr_for_source(403))
            print("tgt_box", host_tree.find_box_nr_for_target(28))
            print(list(host_trav.target_or_target_parent_boxes).index(37))
            print(host_trav.get_box_list("sep_bigger", 22))

        from boxtree.visualization import TreePlotter
        plotter = TreePlotter(host_tree)
        plotter.draw_tree(fill=False, edgecolor="black", zorder=10)
        plotter.set_bounding_box()
        plotter.draw_box_numbers()

        import matplotlib.pyplot as pt
        pt.show()

    # }}}

    from pyopencl.clrandom import PhiloxGenerator
    rng = PhiloxGenerator(ctx, seed=44)
    weights = rng.uniform(queue, nsources, dtype=np.float64)

    logger.info("computing direct (reference) result")

    from pytools.convergence import PConvergenceVerifier

    pconv_verifier = PConvergenceVerifier()

    extra_kwargs = {}
    dtype = np.float64
    order_values = [1, 2, 3]
    if isinstance(knl, HelmholtzKernel):
        extra_kwargs["k"] = 0.05
        dtype = np.complex128

        if knl.dim == 3:
            order_values = [1, 2]
        elif knl.dim == 2 and issubclass(local_expn_class, H2DLocalExpansion):
            order_values = [10, 12]

    elif isinstance(knl, YukawaKernel):
        extra_kwargs["lam"] = 2
        dtype = np.complex128

        if knl.dim == 3:
            order_values = [1, 2]
        elif knl.dim == 2 and issubclass(local_expn_class, Y2DLocalExpansion):
            order_values = [10, 12]

    from functools import partial
    for order in order_values:
        out_kernels = [knl]

        from sumpy.fmm import SumpyExpansionWranglerCodeContainer
        wcc = SumpyExpansionWranglerCodeContainer(
            ctx, partial(mpole_expn_class, knl),
            partial(local_expn_class, knl), out_kernels)
        wrangler = wcc.get_wrangler(
            queue,
            tree,
            dtype,
            fmm_level_to_order=lambda kernel, kernel_args, tree, lev: order,
            kernel_extra_kwargs=extra_kwargs)

        from boxtree.fmm import drive_fmm

        pot, = drive_fmm(trav, wrangler, weights)

        from sumpy import P2P
        p2p = P2P(ctx, out_kernels, exclude_self=False)
        evt, (ref_pot, ) = p2p(queue, targets, sources, (weights, ),
                               **extra_kwargs)

        pot = pot.get()
        ref_pot = ref_pot.get()

        rel_err = la.norm(pot - ref_pot, np.inf) / la.norm(ref_pot, np.inf)
        logger.info("order %d -> relative l2 error: %g" % (order, rel_err))

        pconv_verifier.add_data_point(order, rel_err)

    print(pconv_verifier)
    pconv_verifier()
Beispiel #31
0
def test_estimate_calibration_params(ctx_factory):
    from boxtree.pyfmmlib_integration import FMMLibExpansionWrangler

    nsources_list = [1000, 2000, 3000, 4000]
    ntargets_list = [1000, 2000, 3000, 4000]
    dims = 3
    dtype = np.float64

    ctx = ctx_factory()
    queue = cl.CommandQueue(ctx)

    traversals = []
    traversals_dev = []
    level_to_orders = []
    timing_results = []

    def fmm_level_to_nterms(tree, ilevel):
        return 10

    for nsources, ntargets in zip(nsources_list, ntargets_list):
        # {{{ Generate sources, targets and target_radii

        from boxtree.tools import make_normal_particle_array as p_normal
        sources = p_normal(queue, nsources, dims, dtype, seed=15)
        targets = p_normal(queue, ntargets, dims, dtype, seed=18)

        from pyopencl.clrandom import PhiloxGenerator
        rng = PhiloxGenerator(queue.context, seed=22)
        target_radii = rng.uniform(
            queue, ntargets, a=0, b=0.05, dtype=dtype
        ).get()

        # }}}

        # {{{ Generate tree and traversal

        from boxtree import TreeBuilder
        tb = TreeBuilder(ctx)
        tree, _ = tb(
            queue, sources, targets=targets, target_radii=target_radii,
            stick_out_factor=0.15, max_particles_in_box=30, debug=True
        )

        from boxtree.traversal import FMMTraversalBuilder
        tg = FMMTraversalBuilder(ctx, well_sep_is_n_away=2)
        trav_dev, _ = tg(queue, tree, debug=True)
        trav = trav_dev.get(queue=queue)

        traversals.append(trav)
        traversals_dev.append(trav_dev)

        # }}}

        wrangler = FMMLibExpansionWrangler(trav.tree, 0, fmm_level_to_nterms)
        level_to_orders.append(wrangler.level_nterms)

        timing_data = {}
        from boxtree.fmm import drive_fmm
        src_weights = np.random.rand(tree.nsources).astype(tree.coord_dtype)
        drive_fmm(trav, wrangler, (src_weights,), timing_data=timing_data)

        timing_results.append(timing_data)

    if SUPPORTS_PROCESS_TIME:
        time_field_name = "process_elapsed"
    else:
        time_field_name = "wall_elapsed"

    def test_params_sanity(test_params):
        param_names = ["c_p2m", "c_m2m", "c_p2p", "c_m2l", "c_m2p", "c_p2l", "c_l2l",
                       "c_l2p"]
        for name in param_names:
            assert isinstance(test_params[name], np.float64)

    def test_params_equal(test_params1, test_params2):
        param_names = ["c_p2m", "c_m2m", "c_p2p", "c_m2l", "c_m2p", "c_p2l", "c_l2l",
                       "c_l2p"]
        for name in param_names:
            assert test_params1[name] == test_params2[name]

    python_cost_model = _PythonFMMCostModel(make_pde_aware_translation_cost_model)

    python_model_results = []

    for icase in range(len(traversals)-1):
        traversal = traversals[icase]
        level_to_order = level_to_orders[icase]

        python_model_results.append(python_cost_model.cost_per_stage(
            queue, traversal, level_to_order,
            _PythonFMMCostModel.get_unit_calibration_params(),
        ))

    python_params = python_cost_model.estimate_calibration_params(
        python_model_results, timing_results[:-1], time_field_name=time_field_name
    )

    test_params_sanity(python_params)

    cl_cost_model = FMMCostModel(make_pde_aware_translation_cost_model)

    cl_model_results = []

    for icase in range(len(traversals_dev)-1):
        traversal = traversals_dev[icase]
        level_to_order = level_to_orders[icase]

        cl_model_results.append(cl_cost_model.cost_per_stage(
            queue, traversal, level_to_order,
            FMMCostModel.get_unit_calibration_params(),
        ))

    cl_params = cl_cost_model.estimate_calibration_params(
        cl_model_results, timing_results[:-1], time_field_name=time_field_name
    )

    test_params_sanity(cl_params)

    if SUPPORTS_PROCESS_TIME:
        test_params_equal(cl_params, python_params)
Beispiel #32
0
def test_sumpy_fmm(ctx_getter, knl, local_expn_class, mpole_expn_class):
    logging.basicConfig(level=logging.INFO)

    ctx = ctx_getter()
    queue = cl.CommandQueue(ctx)

    nsources = 1000
    ntargets = 300
    dtype = np.float64

    from boxtree.tools import (
            make_normal_particle_array as p_normal)

    sources = p_normal(queue, nsources, knl.dim, dtype, seed=15)
    if 1:
        offset = np.zeros(knl.dim)
        offset[0] = 0.1

        targets = (
                p_normal(queue, ntargets, knl.dim, dtype, seed=18)
                + offset)

        del offset
    else:
        from sumpy.visualization import FieldPlotter
        fp = FieldPlotter(np.array([0.5, 0]), extent=3, npoints=200)
        from pytools.obj_array import make_obj_array
        targets = make_obj_array(
                [fp.points[i] for i in range(knl.dim)])

    from boxtree import TreeBuilder
    tb = TreeBuilder(ctx)

    tree, _ = tb(queue, sources, targets=targets,
            max_particles_in_box=30, debug=True)

    from boxtree.traversal import FMMTraversalBuilder
    tbuild = FMMTraversalBuilder(ctx)
    trav, _ = tbuild(queue, tree, debug=True)

    # {{{ plot tree

    if 0:
        host_tree = tree.get()
        host_trav = trav.get()

        if 1:
            print("src_box", host_tree.find_box_nr_for_source(403))
            print("tgt_box", host_tree.find_box_nr_for_target(28))
            print(list(host_trav.target_or_target_parent_boxes).index(37))
            print(host_trav.get_box_list("sep_bigger", 22))

        from boxtree.visualization import TreePlotter
        plotter = TreePlotter(host_tree)
        plotter.draw_tree(fill=False, edgecolor="black", zorder=10)
        plotter.set_bounding_box()
        plotter.draw_box_numbers()

        import matplotlib.pyplot as pt
        pt.show()

    # }}}

    from pyopencl.clrandom import PhiloxGenerator
    rng = PhiloxGenerator(ctx, seed=44)
    weights = rng.uniform(queue, nsources, dtype=np.float64)

    logger.info("computing direct (reference) result")

    from pytools.convergence import PConvergenceVerifier

    pconv_verifier = PConvergenceVerifier()

    extra_kwargs = {}
    dtype = np.float64
    order_values = [1, 2, 3]
    if isinstance(knl, HelmholtzKernel):
        extra_kwargs["k"] = 0.05
        dtype = np.complex128

        if knl.dim == 3:
            order_values = [1, 2]
        elif knl.dim == 2 and issubclass(local_expn_class, H2DLocalExpansion):
            order_values = [10, 12]

    elif isinstance(knl, YukawaKernel):
        extra_kwargs["lam"] = 2
        dtype = np.complex128

        if knl.dim == 3:
            order_values = [1, 2]
        elif knl.dim == 2 and issubclass(local_expn_class, Y2DLocalExpansion):
            order_values = [10, 12]

    from functools import partial
    for order in order_values:
        out_kernels = [knl]

        from sumpy.fmm import SumpyExpansionWranglerCodeContainer
        wcc = SumpyExpansionWranglerCodeContainer(
                ctx,
                partial(mpole_expn_class, knl),
                partial(local_expn_class, knl),
                out_kernels)
        wrangler = wcc.get_wrangler(queue, tree, dtype,
                fmm_level_to_order=lambda kernel, kernel_args, tree, lev: order,
                kernel_extra_kwargs=extra_kwargs)

        from boxtree.fmm import drive_fmm

        pot, = drive_fmm(trav, wrangler, weights)

        from sumpy import P2P
        p2p = P2P(ctx, out_kernels, exclude_self=False)
        evt, (ref_pot,) = p2p(queue, targets, sources, (weights,),
                **extra_kwargs)

        pot = pot.get()
        ref_pot = ref_pot.get()

        rel_err = la.norm(pot - ref_pot, np.inf) / la.norm(ref_pot, np.inf)
        logger.info("order %d -> relative l2 error: %g" % (order, rel_err))

        pconv_verifier.add_data_point(order, rel_err)

    print(pconv_verifier)
    pconv_verifier()
Beispiel #33
0
def test_cost_model_op_counts_agree_with_constantone_wrangler(
        ctx_factory, nsources, ntargets, dims, dtype):
    ctx = ctx_factory()
    queue = cl.CommandQueue(ctx)

    from boxtree.tools import make_normal_particle_array as p_normal
    sources = p_normal(queue, nsources, dims, dtype, seed=16)
    targets = p_normal(queue, ntargets, dims, dtype, seed=19)

    from pyopencl.clrandom import PhiloxGenerator
    rng = PhiloxGenerator(queue.context, seed=20)
    target_radii = rng.uniform(queue, ntargets, a=0, b=0.04, dtype=dtype).get()

    from boxtree import TreeBuilder
    tb = TreeBuilder(ctx)
    tree, _ = tb(
        queue, sources, targets=targets, target_radii=target_radii,
        stick_out_factor=0.15, max_particles_in_box=30, debug=True
    )

    from boxtree.traversal import FMMTraversalBuilder
    tg = FMMTraversalBuilder(ctx, well_sep_is_n_away=2)
    trav_dev, _ = tg(queue, tree, debug=True)
    trav = trav_dev.get(queue=queue)

    from boxtree.tools import ConstantOneExpansionWrangler
    wrangler = ConstantOneExpansionWrangler(trav.tree)

    timing_data = {}
    from boxtree.fmm import drive_fmm
    src_weights = np.random.rand(tree.nsources).astype(tree.coord_dtype)
    drive_fmm(trav, wrangler, (src_weights,), timing_data=timing_data)

    cost_model = FMMCostModel(
        translation_cost_model_factory=OpCountingTranslationCostModel
    )

    level_to_order = np.array([1 for _ in range(tree.nlevels)])

    modeled_time = cost_model.cost_per_stage(
        queue, trav_dev, level_to_order,
        FMMCostModel.get_unit_calibration_params(),
    )

    mismatches = []
    for stage in timing_data:
        if timing_data[stage]["ops_elapsed"] != modeled_time[stage]:
            mismatches.append(
                    (stage, timing_data[stage]["ops_elapsed"], modeled_time[stage]))

    assert not mismatches, "\n".join(str(s) for s in mismatches)

    # {{{ Test per-box cost

    total_cost = 0.0
    for stage in timing_data:
        total_cost += timing_data[stage]["ops_elapsed"]

    per_box_cost = cost_model.cost_per_box(
        queue, trav_dev, level_to_order,
        FMMCostModel.get_unit_calibration_params(),
    )
    total_aggregate_cost = cost_model.aggregate_over_boxes(per_box_cost)

    assert total_cost == (
            total_aggregate_cost
            + modeled_time["coarsen_multipoles"]
            + modeled_time["refine_locals"]
    )
Beispiel #34
0
def test_sumpy_fmm_exclude_self(ctx_getter):
    logging.basicConfig(level=logging.INFO)

    ctx = ctx_getter()
    queue = cl.CommandQueue(ctx)

    nsources = 500
    dtype = np.float64

    from boxtree.tools import (
            make_normal_particle_array as p_normal)

    knl = LaplaceKernel(2)
    local_expn_class = VolumeTaylorLocalExpansion
    mpole_expn_class = VolumeTaylorMultipoleExpansion
    order = 10

    sources = p_normal(queue, nsources, knl.dim, dtype, seed=15)

    from boxtree import TreeBuilder
    tb = TreeBuilder(ctx)

    tree, _ = tb(queue, sources,
            max_particles_in_box=30, debug=True)

    from boxtree.traversal import FMMTraversalBuilder
    tbuild = FMMTraversalBuilder(ctx)
    trav, _ = tbuild(queue, tree, debug=True)

    from pyopencl.clrandom import PhiloxGenerator
    rng = PhiloxGenerator(ctx)
    weights = rng.uniform(queue, nsources, dtype=np.float64)

    target_to_source = np.arange(tree.ntargets, dtype=np.int32)
    self_extra_kwargs = {"target_to_source": target_to_source}

    out_kernels = [knl]

    from functools import partial

    from sumpy.fmm import SumpyExpansionWranglerCodeContainer
    wcc = SumpyExpansionWranglerCodeContainer(
            ctx,
            partial(mpole_expn_class, knl),
            partial(local_expn_class, knl),
            out_kernels,
            exclude_self=True)

    wrangler = wcc.get_wrangler(queue, tree, dtype,
            fmm_level_to_order=lambda kernel, kernel_args, tree, lev: order,
            self_extra_kwargs=self_extra_kwargs)

    from boxtree.fmm import drive_fmm

    pot, = drive_fmm(trav, wrangler, weights)

    from sumpy import P2P
    p2p = P2P(ctx, out_kernels, exclude_self=True)
    evt, (ref_pot,) = p2p(queue, sources, sources, (weights,),
            **self_extra_kwargs)

    pot = pot.get()
    ref_pot = ref_pot.get()

    rel_err = la.norm(pot - ref_pot) / la.norm(ref_pot)
    logger.info("order %d -> relative l2 error: %g" % (order, rel_err))

    assert np.isclose(rel_err, 0, atol=1e-7)
Beispiel #35
0
def test_compare_cl_and_py_cost_model(ctx_factory, nsources, ntargets, dims, dtype):
    ctx = ctx_factory()
    queue = cl.CommandQueue(ctx)

    # {{{ Generate sources, targets and target_radii

    from boxtree.tools import make_normal_particle_array as p_normal
    sources = p_normal(queue, nsources, dims, dtype, seed=15)
    targets = p_normal(queue, ntargets, dims, dtype, seed=18)

    from pyopencl.clrandom import PhiloxGenerator
    rng = PhiloxGenerator(queue.context, seed=22)
    target_radii = rng.uniform(
        queue, ntargets, a=0, b=0.05, dtype=dtype
    ).get()

    # }}}

    # {{{ Generate tree and traversal

    from boxtree import TreeBuilder
    tb = TreeBuilder(ctx)
    tree, _ = tb(
        queue, sources, targets=targets, target_radii=target_radii,
        stick_out_factor=0.15, max_particles_in_box=30, debug=True
    )

    from boxtree.traversal import FMMTraversalBuilder
    tg = FMMTraversalBuilder(ctx, well_sep_is_n_away=2)
    trav_dev, _ = tg(queue, tree, debug=True)
    trav = trav_dev.get(queue=queue)

    # }}}

    # {{{ Construct cost models

    cl_cost_model = FMMCostModel(None)
    python_cost_model = _PythonFMMCostModel(None)

    constant_one_params = cl_cost_model.get_unit_calibration_params().copy()
    for ilevel in range(trav.tree.nlevels):
        constant_one_params["p_fmm_lev%d" % ilevel] = 10

    xlat_cost = make_pde_aware_translation_cost_model(dims, trav.tree.nlevels)

    # }}}

    # {{{ Test process_form_multipoles

    nlevels = trav.tree.nlevels
    p2m_cost = np.zeros(nlevels, dtype=np.float64)
    for ilevel in range(nlevels):
        p2m_cost[ilevel] = evaluate(
            xlat_cost.p2m(ilevel),
            context=constant_one_params
        )
    p2m_cost_dev = cl.array.to_device(queue, p2m_cost)

    queue.finish()
    start_time = time.time()

    cl_form_multipoles = cl_cost_model.process_form_multipoles(
        queue, trav_dev, p2m_cost_dev
    )

    queue.finish()
    logger.info("OpenCL time for process_form_multipoles: {0}".format(
        str(time.time() - start_time)
    ))

    start_time = time.time()

    python_form_multipoles = python_cost_model.process_form_multipoles(
        queue, trav, p2m_cost
    )

    logger.info("Python time for process_form_multipoles: {0}".format(
        str(time.time() - start_time)
    ))

    assert np.array_equal(cl_form_multipoles.get(), python_form_multipoles)

    # }}}

    # {{{ Test process_coarsen_multipoles

    m2m_cost = np.zeros(nlevels - 1, dtype=np.float64)
    for target_level in range(nlevels - 1):
        m2m_cost[target_level] = evaluate(
            xlat_cost.m2m(target_level + 1, target_level),
            context=constant_one_params
        )
    m2m_cost_dev = cl.array.to_device(queue, m2m_cost)

    queue.finish()
    start_time = time.time()
    cl_coarsen_multipoles = cl_cost_model.process_coarsen_multipoles(
        queue, trav_dev, m2m_cost_dev
    )

    queue.finish()
    logger.info("OpenCL time for coarsen_multipoles: {0}".format(
        str(time.time() - start_time)
    ))

    start_time = time.time()

    python_coarsen_multipoles = python_cost_model.process_coarsen_multipoles(
        queue, trav, m2m_cost
    )

    logger.info("Python time for coarsen_multipoles: {0}".format(
        str(time.time() - start_time)
    ))

    assert cl_coarsen_multipoles == python_coarsen_multipoles

    # }}}

    # {{{ Test process_direct

    queue.finish()
    start_time = time.time()

    cl_ndirect_sources_per_target_box = \
        cl_cost_model.get_ndirect_sources_per_target_box(queue, trav_dev)

    cl_direct = cl_cost_model.process_direct(
        queue, trav_dev, cl_ndirect_sources_per_target_box, 5.0
    )

    queue.finish()
    logger.info("OpenCL time for process_direct: {0}".format(
        str(time.time() - start_time)
    ))

    start_time = time.time()

    python_ndirect_sources_per_target_box = \
        python_cost_model.get_ndirect_sources_per_target_box(queue, trav)

    python_direct = python_cost_model.process_direct(
        queue, trav, python_ndirect_sources_per_target_box, 5.0
    )

    logger.info("Python time for process_direct: {0}".format(
        str(time.time() - start_time)
    ))

    assert np.array_equal(cl_direct.get(), python_direct)

    # }}}

    # {{{ Test aggregate_over_boxes

    start_time = time.time()

    cl_direct_aggregate = cl_cost_model.aggregate_over_boxes(cl_direct)

    queue.finish()
    logger.info("OpenCL time for aggregate_over_boxes: {0}".format(
        str(time.time() - start_time)
    ))

    start_time = time.time()

    python_direct_aggregate = python_cost_model.aggregate_over_boxes(python_direct)

    logger.info("Python time for aggregate_over_boxes: {0}".format(
        str(time.time() - start_time)
    ))

    assert cl_direct_aggregate == python_direct_aggregate

    # }}}

    # {{{ Test process_list2

    nlevels = trav.tree.nlevels
    m2l_cost = np.zeros(nlevels, dtype=np.float64)
    for ilevel in range(nlevels):
        m2l_cost[ilevel] = evaluate(
            xlat_cost.m2l(ilevel, ilevel),
            context=constant_one_params
        )
    m2l_cost_dev = cl.array.to_device(queue, m2l_cost)

    queue.finish()
    start_time = time.time()

    cl_m2l_cost = cl_cost_model.process_list2(queue, trav_dev, m2l_cost_dev)

    queue.finish()
    logger.info("OpenCL time for process_list2: {0}".format(
        str(time.time() - start_time)
    ))

    start_time = time.time()
    python_m2l_cost = python_cost_model.process_list2(queue, trav, m2l_cost)
    logger.info("Python time for process_list2: {0}".format(
        str(time.time() - start_time)
    ))

    assert np.array_equal(cl_m2l_cost.get(), python_m2l_cost)

    # }}}

    # {{{ Test process_list 3

    m2p_cost = np.zeros(nlevels, dtype=np.float64)
    for ilevel in range(nlevels):
        m2p_cost[ilevel] = evaluate(
            xlat_cost.m2p(ilevel),
            context=constant_one_params
        )
    m2p_cost_dev = cl.array.to_device(queue, m2p_cost)

    queue.finish()
    start_time = time.time()

    cl_m2p_cost = cl_cost_model.process_list3(queue, trav_dev, m2p_cost_dev)

    queue.finish()
    logger.info("OpenCL time for process_list3: {0}".format(
        str(time.time() - start_time)
    ))

    start_time = time.time()
    python_m2p_cost = python_cost_model.process_list3(queue, trav, m2p_cost)
    logger.info("Python time for process_list3: {0}".format(
        str(time.time() - start_time)
    ))

    assert np.array_equal(cl_m2p_cost.get(), python_m2p_cost)

    # }}}

    # {{{ Test process_list4

    p2l_cost = np.zeros(nlevels, dtype=np.float64)
    for ilevel in range(nlevels):
        p2l_cost[ilevel] = evaluate(
            xlat_cost.p2l(ilevel),
            context=constant_one_params
        )
    p2l_cost_dev = cl.array.to_device(queue, p2l_cost)

    queue.finish()
    start_time = time.time()

    cl_p2l_cost = cl_cost_model.process_list4(queue, trav_dev, p2l_cost_dev)

    queue.finish()
    logger.info("OpenCL time for process_list4: {0}".format(
        str(time.time() - start_time)
    ))

    start_time = time.time()
    python_p2l_cost = python_cost_model.process_list4(queue, trav, p2l_cost)
    logger.info("Python time for process_list4: {0}".format(
        str(time.time() - start_time)
    ))

    assert np.array_equal(cl_p2l_cost.get(), python_p2l_cost)

    # }}}

    # {{{ Test process_refine_locals

    l2l_cost = np.zeros(nlevels - 1, dtype=np.float64)
    for ilevel in range(nlevels - 1):
        l2l_cost[ilevel] = evaluate(
            xlat_cost.l2l(ilevel, ilevel + 1),
            context=constant_one_params
        )
    l2l_cost_dev = cl.array.to_device(queue, l2l_cost)

    queue.finish()
    start_time = time.time()

    cl_refine_locals_cost = cl_cost_model.process_refine_locals(
        queue, trav_dev, l2l_cost_dev
    )

    queue.finish()
    logger.info("OpenCL time for refine_locals: {0}".format(
        str(time.time() - start_time)
    ))

    start_time = time.time()
    python_refine_locals_cost = python_cost_model.process_refine_locals(
        queue, trav, l2l_cost
    )
    logger.info("Python time for refine_locals: {0}".format(
        str(time.time() - start_time)
    ))

    assert cl_refine_locals_cost == python_refine_locals_cost

    # }}}

    # {{{ Test process_eval_locals

    l2p_cost = np.zeros(nlevels, dtype=np.float64)
    for ilevel in range(nlevels):
        l2p_cost[ilevel] = evaluate(
            xlat_cost.l2p(ilevel),
            context=constant_one_params
        )
    l2p_cost_dev = cl.array.to_device(queue, l2p_cost)

    queue.finish()
    start_time = time.time()

    cl_l2p_cost = cl_cost_model.process_eval_locals(queue, trav_dev, l2p_cost_dev)

    queue.finish()
    logger.info("OpenCL time for process_eval_locals: {0}".format(
        str(time.time() - start_time)
    ))

    start_time = time.time()
    python_l2p_cost = python_cost_model.process_eval_locals(queue, trav, l2p_cost)
    logger.info("Python time for process_eval_locals: {0}".format(
        str(time.time() - start_time)
    ))

    assert np.array_equal(cl_l2p_cost.get(), python_l2p_cost)
Beispiel #36
0
def plot_traversal(ctx_getter, do_plot=False):
    ctx = ctx_getter()
    queue = cl.CommandQueue(ctx)

    #for dims in [2, 3]:
    for dims in [2]:
        nparticles = 10**4
        dtype = np.float64

        from pyopencl.clrandom import PhiloxGenerator
        rng = PhiloxGenerator(queue.context, seed=15)

        from pytools.obj_array import make_obj_array
        particles = make_obj_array([
            rng.normal(queue, nparticles, dtype=dtype)
            for i in range(dims)])

        # if do_plot:
        #     pt.plot(particles[0].get(), particles[1].get(), "x")

        from boxtree import TreeBuilder
        tb = TreeBuilder(ctx)

        queue.finish()
        tree = tb(queue, particles, max_particles_in_box=30, debug=True)

        from boxtree.traversal import FMMTraversalBuilder
        tg = FMMTraversalBuilder(ctx)
        trav = tg(queue, tree).get()

        from boxtree.visualization import TreePlotter
        plotter = TreePlotter(tree)
        plotter.draw_tree(fill=False, edgecolor="black")
        #plotter.draw_box_numbers()
        plotter.set_bounding_box()

        from random import randrange, seed
        seed(7)

        # {{{ generic box drawing helper

        def draw_some_box_lists(starts, lists, key_to_box=None,
                count=5):
            actual_count = 0
            while actual_count < count:
                if key_to_box is not None:
                    key = randrange(len(key_to_box))
                    ibox = key_to_box[key]
                else:
                    key = ibox = randrange(tree.nboxes)

                start, end = starts[key:key+2]
                if start == end:
                    continue

                #print ibox, start, end, lists[start:end]
                for jbox in lists[start:end]:
                    plotter.draw_box(jbox, facecolor='yellow')

                plotter.draw_box(ibox, facecolor='red')

                actual_count += 1

        # }}}

        if 0:
            # colleagues
            draw_some_box_lists(
                    trav.colleagues_starts,
                    trav.colleagues_lists)
        elif 0:
            # near neighbors ("list 1")
            draw_some_box_lists(
                    trav.neighbor_leaves_starts,
                    trav.neighbor_leaves_lists,
                    key_to_box=trav.source_boxes)
        elif 0:
            # well-separated siblings (list 2)
            draw_some_box_lists(
                    trav.sep_siblings_starts,
                    trav.sep_siblings_lists)
        elif 1:
            # separated smaller (list 3)
            draw_some_box_lists(
                    trav.sep_smaller_starts,
                    trav.sep_smaller_lists,
                    key_to_box=trav.source_boxes)
        elif 1:
            # separated bigger (list 4)
            draw_some_box_lists(
                    trav.sep_bigger_starts,
                    trav.sep_bigger_lists)

        import matplotlib.pyplot as pt
        pt.show()
Beispiel #37
0
def test_from_sep_siblings_rotation_classes(ctx_factory, well_sep_is_n_away):
    ctx = ctx_factory()
    queue = cl.CommandQueue(ctx)

    dims = 3
    nparticles = 10**4
    dtype = np.float64

    # {{{ build tree

    from pyopencl.clrandom import PhiloxGenerator
    rng = PhiloxGenerator(queue.context, seed=15)

    from pytools.obj_array import make_obj_array
    particles = make_obj_array([
        rng.normal(queue, nparticles, dtype=dtype)
        for i in range(dims)])

    from boxtree import TreeBuilder
    tb = TreeBuilder(ctx)

    queue.finish()
    tree, _ = tb(queue, particles, max_particles_in_box=30, debug=True)

    # }}}

    # {{{ build traversal

    from boxtree.traversal import FMMTraversalBuilder
    from boxtree.rotation_classes import RotationClassesBuilder

    tg = FMMTraversalBuilder(ctx, well_sep_is_n_away=well_sep_is_n_away)
    trav, _ = tg(queue, tree)

    rb = RotationClassesBuilder(ctx)
    result, _ = rb(queue, trav, tree)

    rot_classes = result.from_sep_siblings_rotation_classes.get(queue)
    rot_angles = result.from_sep_siblings_rotation_class_to_angle.get(queue)

    tree = tree.get(queue=queue)
    trav = trav.get(queue=queue)

    centers = tree.box_centers.T

    # }}}

    # For each entry of from_sep_siblings, compute the source-target translation
    # direction as a vector, and check that the from_sep_siblings rotation class
    # in the traversal corresponds to the angle with the z-axis of the
    # translation direction.

    for itgt_box, tgt_ibox in enumerate(trav.target_or_target_parent_boxes):
        start, end = trav.from_sep_siblings_starts[itgt_box:itgt_box+2]
        seps = trav.from_sep_siblings_lists[start:end]
        level_rot_classes = rot_classes[start:end]

        translation_vecs = centers[tgt_ibox] - centers[seps]
        theta = np.arctan2(
                la.norm(translation_vecs[:, :dims - 1], axis=1),
                translation_vecs[:, dims - 1])
        level_rot_angles = rot_angles[level_rot_classes]

        assert np.allclose(theta, level_rot_angles, atol=1e-13, rtol=1e-13)