def refine_locals( self, level_start_target_or_target_parent_box_nrs, target_or_target_parent_boxes, local_exps, ): return FMMLibExpansionWrangler.refine_locals( self, level_start_target_or_target_parent_box_nrs, target_or_target_parent_boxes, local_exps, )
def form_locals( self, level_start_target_or_target_parent_box_nrs, target_or_target_parent_boxes, starts, lists, src_weights, ): return FMMLibExpansionWrangler.form_locals( self, level_start_target_or_target_parent_box_nrs, target_or_target_parent_boxes, starts, lists, src_weights, )
def multipole_to_local( self, level_start_target_box_nrs, target_boxes, src_box_starts, src_box_lists, mpole_exps, ): return FMMLibExpansionWrangler.multipole_to_local( self, level_start_target_box_nrs, target_boxes, src_box_starts, src_box_lists, mpole_exps, )
def demo_cost_model(): if not SUPPORTS_PROCESS_TIME: raise NotImplementedError( "Currently this script uses process time which only works on Python>=3.3" ) from boxtree.pyfmmlib_integration import FMMLibExpansionWrangler nsources_list = [1000, 2000, 3000, 4000, 5000] ntargets_list = [1000, 2000, 3000, 4000, 5000] dims = 3 dtype = np.float64 ctx = cl.create_some_context() queue = cl.CommandQueue(ctx) traversals = [] traversals_dev = [] level_to_orders = [] timing_results = [] def fmm_level_to_nterms(tree, ilevel): return 10 for nsources, ntargets in zip(nsources_list, ntargets_list): # {{{ Generate sources, targets and target_radii from boxtree.tools import make_normal_particle_array as p_normal sources = p_normal(queue, nsources, dims, dtype, seed=15) targets = p_normal(queue, ntargets, dims, dtype, seed=18) from pyopencl.clrandom import PhiloxGenerator rng = PhiloxGenerator(queue.context, seed=22) target_radii = rng.uniform(queue, ntargets, a=0, b=0.05, dtype=dtype).get() # }}} # {{{ Generate tree and traversal from boxtree import TreeBuilder tb = TreeBuilder(ctx) tree, _ = tb(queue, sources, targets=targets, target_radii=target_radii, stick_out_factor=0.15, max_particles_in_box=30, debug=True) from boxtree.traversal import FMMTraversalBuilder tg = FMMTraversalBuilder(ctx, well_sep_is_n_away=2) trav_dev, _ = tg(queue, tree, debug=True) trav = trav_dev.get(queue=queue) traversals.append(trav) traversals_dev.append(trav_dev) # }}} wrangler = FMMLibExpansionWrangler(trav.tree, 0, fmm_level_to_nterms) level_to_orders.append(wrangler.level_nterms) timing_data = {} from boxtree.fmm import drive_fmm src_weights = np.random.rand(tree.nsources).astype(tree.coord_dtype) drive_fmm(trav, wrangler, src_weights, timing_data=timing_data) timing_results.append(timing_data) time_field_name = "process_elapsed" from boxtree.cost import FMMCostModel from boxtree.cost import make_pde_aware_translation_cost_model cost_model = FMMCostModel(make_pde_aware_translation_cost_model) model_results = [] for icase in range(len(traversals) - 1): traversal = traversals_dev[icase] model_results.append( cost_model.cost_per_stage( queue, traversal, level_to_orders[icase], FMMCostModel.get_unit_calibration_params(), )) queue.finish() params = cost_model.estimate_calibration_params( model_results, timing_results[:-1], time_field_name=time_field_name) predicted_time = cost_model.cost_per_stage( queue, traversals_dev[-1], level_to_orders[-1], params, ) queue.finish() for field in [ "form_multipoles", "eval_direct", "multipole_to_local", "eval_multipoles", "form_locals", "eval_locals", "coarsen_multipoles", "refine_locals" ]: measured = timing_results[-1][field]["process_elapsed"] pred_err = ((measured - predicted_time[field]) / measured) logger.info("actual/predicted time for %s: %.3g/%.3g -> %g %% error", field, measured, predicted_time[field], abs(100 * pred_err))
def coarsen_multipoles( self, level_start_source_parent_box_nrs, source_parent_boxes, mpoles ): return FMMLibExpansionWrangler.coarsen_multipoles( self, level_start_source_parent_box_nrs, source_parent_boxes, mpoles )
def form_multipoles(self, level_start_source_box_nrs, source_boxes, src_weights): return FMMLibExpansionWrangler.form_multipoles( self, level_start_source_box_nrs, source_boxes, src_weights )
def finalize_potentials(self, potentials): # return potentials return FMMLibExpansionWrangler.finalize_potentials(self, potentials)
def reorder_potentials(self, potentials): return FMMLibExpansionWrangler.reorder_potentials(self, potentials)
def eval_multipoles( self, target_boxes_by_source_level, source_boxes_by_level, mpole_exps ): return FMMLibExpansionWrangler.eval_multipoles( self, target_boxes_by_source_level, source_boxes_by_level, mpole_exps )
def test_fmm_with_optimized_3d_m2l(ctx_factory, nsrcntgts, helmholtz_k, well_sep_is_n_away): logging.basicConfig(level=logging.INFO) from pytest import importorskip importorskip("pyfmmlib") dims = 3 ctx = ctx_factory() queue = cl.CommandQueue(ctx) nsources = ntargets = nsrcntgts // 2 dtype = np.float64 sources = p_normal(queue, nsources, dims, dtype, seed=15) targets = (p_normal(queue, ntargets, dims, dtype, seed=18) + np.array([2, 0, 0])[:dims]) from boxtree import TreeBuilder tb = TreeBuilder(ctx) tree, _ = tb(queue, sources, targets=targets, max_particles_in_box=30, debug=True) from boxtree.traversal import FMMTraversalBuilder tbuild = FMMTraversalBuilder(ctx) trav, _ = tbuild(queue, tree, debug=True) trav = trav.get(queue=queue) from pyopencl.clrandom import PhiloxGenerator rng = PhiloxGenerator(queue.context, seed=20) weights = rng.uniform(queue, nsources, dtype=np.float64).get() base_nterms = 10 def fmm_level_to_nterms(tree, lev): result = base_nterms if lev < 3 and helmholtz_k: # exercise order-varies-by-level capability result += 5 return result from boxtree.pyfmmlib_integration import (FMMLibExpansionWrangler, FMMLibRotationData) baseline_wrangler = FMMLibExpansionWrangler( trav.tree, helmholtz_k, fmm_level_to_nterms=fmm_level_to_nterms) optimized_wrangler = FMMLibExpansionWrangler( trav.tree, helmholtz_k, fmm_level_to_nterms=fmm_level_to_nterms, rotation_data=FMMLibRotationData(queue, trav)) from boxtree.fmm import drive_fmm baseline_timing_data = {} baseline_pot = drive_fmm(trav, baseline_wrangler, (weights, ), timing_data=baseline_timing_data) optimized_timing_data = {} optimized_pot = drive_fmm(trav, optimized_wrangler, (weights, ), timing_data=optimized_timing_data) baseline_time = baseline_timing_data["multipole_to_local"][ "process_elapsed"] if baseline_time is not None: print("Baseline M2L time : %#.4g s" % baseline_time) opt_time = optimized_timing_data["multipole_to_local"]["process_elapsed"] if opt_time is not None: print("Optimized M2L time: %#.4g s" % opt_time) assert np.allclose(baseline_pot, optimized_pot, atol=1e-13, rtol=1e-13)
def local_expansion_zeros(self): return FMMLibExpansionWrangler.local_expansion_zeros(self)
def multipole_expansion_zeros(self): return FMMLibExpansionWrangler.multipole_expansion_zeros(self)
def __init__(self, code_container, queue, tree, near_field_table, dtype, fmm_level_to_order, quad_order, potential_kind=1, source_extra_kwargs=None, kernel_extra_kwargs=None, self_extra_kwargs=None, list1_extra_kwargs=None, *args, **kwargs): self.code = code_container self.queue = queue tree = tree.get(queue) self.tree = tree self.dtype = dtype self.quad_order = quad_order self.potential_kind = potential_kind # {{{ digest out_kernels ifgrad = False outputs = [] source_deriv_names = [] k_names = [] for out_knl in self.code.out_kernels: if self.is_supported_helmknl(out_knl): outputs.append(()) no_target_deriv_knl = out_knl elif (isinstance(out_knl, AxisTargetDerivative) and self.is_supported_helmknl(out_knl.inner_kernel)): outputs.append((out_knl.axis,)) ifgrad = True no_target_deriv_knl = out_knl.inner_kernel else: raise ValueError( "only the 2/3D Laplace and Helmholtz kernel " "and their derivatives are supported") source_deriv_names.append(no_target_deriv_knl.dir_vec_name if isinstance(no_target_deriv_knl, DirectionalSourceDerivative) else None) base_knl = out_knl.get_base_kernel() k_names.append(base_knl.helmholtz_k_name if isinstance(base_knl, HelmholtzKernel) else None) self.outputs = outputs from pytools import is_single_valued if not is_single_valued(source_deriv_names): raise ValueError("not all kernels passed are the same in " "whether they represent a source derivative") source_deriv_name = source_deriv_names[0] if not is_single_valued(k_names): raise ValueError("not all kernels passed have the same " "Helmholtz parameter") k_name = k_names[0] if k_name is None: helmholtz_k = 0 else: helmholtz_k = kernel_extra_kwargs[k_name] # }}} # {{{ table setup # TODO put this part into the inteferce class self.near_field_table = {} # list of tables for a single out kernel if isinstance(near_field_table, list): assert len(self.code.out_kernels) == 1 self.near_field_table[ self.code.out_kernels[0].__repr__() ] = near_field_table self.n_tables = len(near_field_table) # single table elif isinstance(near_field_table, NearFieldInteractionTable): assert len(self.code.out_kernels) == 1 self.near_field_table[self.code.out_kernels[0].__repr__()] = [ near_field_table ] self.n_tables = 1 # dictionary of lists of tables elif isinstance(near_field_table, dict): self.n_tables = dict() for out_knl in self.code.out_kernels: if repr(out_knl) not in near_field_table: raise RuntimeError( "Missing nearfield table for %s." % repr(out_knl)) if isinstance(near_field_table[repr(out_knl)], NearFieldInteractionTable): near_field_table[repr(out_knl)] = [ near_field_table[repr(out_knl)]] else: assert isinstance(near_field_table[repr(out_knl)], list) self.n_tables[repr(out_knl)] = len(near_field_table[repr(out_knl)]) self.near_field_table = near_field_table else: raise RuntimeError("Table type unrecognized.") # TODO: make all parameters table-specific (allow using inhomogeneous tables) kname = repr(self.code.out_kernels[0]) self.root_table_source_box_extent = ( self.near_field_table[kname][0].source_box_extent) table_starting_level = np.round( np.log(self.tree.root_extent / self.root_table_source_box_extent) / np.log(2) ) for kid in range(len(self.code.out_kernels)): kname = self.code.out_kernels[kid].__repr__() for lev, table in zip( range(len(self.near_field_table[kname])), self.near_field_table[kname] ): assert table.quad_order == self.quad_order if not table.is_built: raise RuntimeError( "Near field interaction table needs to be built " "prior to being used" ) table_root_extent = table.source_box_extent * 2 ** lev assert ( abs(self.root_table_source_box_extent - table_root_extent) < 1e-15 ) # If the kernel cannot be scaled, # - tree_root_extent must be integral times of table_root_extent # - n_tables must be sufficient if not isinstance(self.n_tables, dict) and self.n_tables > 1: if ( not abs( int(self.tree.root_extent / table_root_extent) * table_root_extent - self.tree.root_extent ) < 1e-15 ): raise RuntimeError( "Incompatible list of tables: the " "source_box_extent of the root table must " "divide the bounding box's extent by an integer." ) if not isinstance(self.n_tables, dict) and self.n_tables > 1: # this checks that the boxes at the highest level are covered if ( not tree.nlevels <= len(self.near_field_table[kname]) + table_starting_level ): raise RuntimeError( "Insufficient list of tables: the " "finest level mesh cells at level " + str(tree.nlevels) + " are not covered." ) # the check that the boxes at the coarsest level are covered is # deferred until trav.target_boxes is passed when invoking # eval_direct if source_extra_kwargs is None: source_extra_kwargs = {} if kernel_extra_kwargs is None: kernel_extra_kwargs = {} if self_extra_kwargs is None: self_extra_kwargs = {} if list1_extra_kwargs is None: list1_extra_kwargs = {} self.list1_extra_kwargs = list1_extra_kwargs # }}} End table setup if not callable(fmm_level_to_order): raise TypeError("fmm_level_to_order not passed") dipole_vec = None if source_deriv_name is not None: dipole_vec = np.array([ d_i.get(queue=queue) for d_i in source_extra_kwargs[source_deriv_name]], order="F") def inner_fmm_level_to_nterms(tree, level): if helmholtz_k == 0: return fmm_level_to_order( LaplaceKernel(tree.dimensions), frozenset(), tree, level) else: return fmm_level_to_order( HelmholtzKernel(tree.dimensions), frozenset([("k", helmholtz_k)]), tree, level) rotation_data = None if 'traversal' in kwargs: # add rotation data if traversal is passed as a keyword argument from boxtree.pyfmmlib_integration import FMMLibRotationData rotation_data = FMMLibRotationData(self.queue, kwargs['traversal']) else: logger.warning("Rotation data is not utilized since traversal is " "not known to FPNDFMMLibExpansionWrangler.") FMMLibExpansionWrangler.__init__( self, tree, helmholtz_k=helmholtz_k, dipole_vec=dipole_vec, dipoles_already_reordered=True, fmm_level_to_nterms=inner_fmm_level_to_nterms, rotation_data=rotation_data, ifgrad=ifgrad)
def eval_direct_p2p( self, target_boxes, source_box_starts, source_box_lists, src_weights ): return FMMLibExpansionWrangler.eval_direct( self, target_boxes, source_box_starts, source_box_lists, src_weights )
def eval_locals(self, level_start_target_box_nrs, target_boxes, local_exps): return FMMLibExpansionWrangler.eval_locals( self, level_start_target_box_nrs, target_boxes, local_exps )
def test_estimate_calibration_params(ctx_factory): from boxtree.pyfmmlib_integration import FMMLibExpansionWrangler nsources_list = [1000, 2000, 3000, 4000] ntargets_list = [1000, 2000, 3000, 4000] dims = 3 dtype = np.float64 ctx = ctx_factory() queue = cl.CommandQueue(ctx) traversals = [] traversals_dev = [] level_to_orders = [] timing_results = [] def fmm_level_to_nterms(tree, ilevel): return 10 for nsources, ntargets in zip(nsources_list, ntargets_list): # {{{ Generate sources, targets and target_radii from boxtree.tools import make_normal_particle_array as p_normal sources = p_normal(queue, nsources, dims, dtype, seed=15) targets = p_normal(queue, ntargets, dims, dtype, seed=18) from pyopencl.clrandom import PhiloxGenerator rng = PhiloxGenerator(queue.context, seed=22) target_radii = rng.uniform( queue, ntargets, a=0, b=0.05, dtype=dtype ).get() # }}} # {{{ Generate tree and traversal from boxtree import TreeBuilder tb = TreeBuilder(ctx) tree, _ = tb( queue, sources, targets=targets, target_radii=target_radii, stick_out_factor=0.15, max_particles_in_box=30, debug=True ) from boxtree.traversal import FMMTraversalBuilder tg = FMMTraversalBuilder(ctx, well_sep_is_n_away=2) trav_dev, _ = tg(queue, tree, debug=True) trav = trav_dev.get(queue=queue) traversals.append(trav) traversals_dev.append(trav_dev) # }}} wrangler = FMMLibExpansionWrangler(trav.tree, 0, fmm_level_to_nterms) level_to_orders.append(wrangler.level_nterms) timing_data = {} from boxtree.fmm import drive_fmm src_weights = np.random.rand(tree.nsources).astype(tree.coord_dtype) drive_fmm(trav, wrangler, (src_weights,), timing_data=timing_data) timing_results.append(timing_data) if SUPPORTS_PROCESS_TIME: time_field_name = "process_elapsed" else: time_field_name = "wall_elapsed" def test_params_sanity(test_params): param_names = ["c_p2m", "c_m2m", "c_p2p", "c_m2l", "c_m2p", "c_p2l", "c_l2l", "c_l2p"] for name in param_names: assert isinstance(test_params[name], np.float64) def test_params_equal(test_params1, test_params2): param_names = ["c_p2m", "c_m2m", "c_p2p", "c_m2l", "c_m2p", "c_p2l", "c_l2l", "c_l2p"] for name in param_names: assert test_params1[name] == test_params2[name] python_cost_model = _PythonFMMCostModel(make_pde_aware_translation_cost_model) python_model_results = [] for icase in range(len(traversals)-1): traversal = traversals[icase] level_to_order = level_to_orders[icase] python_model_results.append(python_cost_model.cost_per_stage( queue, traversal, level_to_order, _PythonFMMCostModel.get_unit_calibration_params(), )) python_params = python_cost_model.estimate_calibration_params( python_model_results, timing_results[:-1], time_field_name=time_field_name ) test_params_sanity(python_params) cl_cost_model = FMMCostModel(make_pde_aware_translation_cost_model) cl_model_results = [] for icase in range(len(traversals_dev)-1): traversal = traversals_dev[icase] level_to_order = level_to_orders[icase] cl_model_results.append(cl_cost_model.cost_per_stage( queue, traversal, level_to_order, FMMCostModel.get_unit_calibration_params(), )) cl_params = cl_cost_model.estimate_calibration_params( cl_model_results, timing_results[:-1], time_field_name=time_field_name ) test_params_sanity(cl_params) if SUPPORTS_PROCESS_TIME: test_params_equal(cl_params, python_params)
def test_pyfmmlib_numerical_stability(ctx_factory, dims, helmholtz_k, order): logging.basicConfig(level=logging.INFO) from pytest import importorskip importorskip("pyfmmlib") ctx = ctx_factory() queue = cl.CommandQueue(ctx) nsources = 30 dtype = np.float64 # The input particles are arranged with geometrically increasing/decreasing # spacing along a line, to build a deep tree that stress-tests the # translations. particle_line = np.array([2**-i for i in range(nsources // 2)], dtype=dtype) particle_line = np.hstack([particle_line, 3 - particle_line]) zero = np.zeros(nsources, dtype=dtype) sources = np.vstack([particle_line, zero, zero])[:dims] targets = sources * (1 + 1e-3) from boxtree import TreeBuilder tb = TreeBuilder(ctx) tree, _ = tb(queue, sources, targets=targets, max_particles_in_box=2, debug=True) assert tree.nlevels >= 15 from boxtree.traversal import FMMTraversalBuilder tbuild = FMMTraversalBuilder(ctx) trav, _ = tbuild(queue, tree, debug=True) trav = trav.get(queue=queue) weights = np.ones_like(sources[0]) from boxtree.pyfmmlib_integration import (FMMLibExpansionWrangler, FMMLibRotationData) def fmm_level_to_nterms(tree, lev): return order wrangler = FMMLibExpansionWrangler(trav.tree, helmholtz_k, fmm_level_to_nterms=fmm_level_to_nterms, rotation_data=FMMLibRotationData( queue, trav)) from boxtree.fmm import drive_fmm pot = drive_fmm(trav, wrangler, (weights, )) assert not np.isnan(pot).any() # {{{ ref fmmlib computation logger.info("computing direct (reference) result") ref_pot = get_fmmlib_ref_pot(wrangler, weights, sources, targets, helmholtz_k) rel_err = la.norm(pot - ref_pot, np.inf) / la.norm(ref_pot, np.inf) logger.info("relative l2 error vs fmmlib direct: %g" % rel_err) if dims == 2: error_bound = (1 / 2)**(1 + order) else: error_bound = (3 / 4)**(1 + order) assert rel_err < error_bound, rel_err
def output_zeros(self): return FMMLibExpansionWrangler.output_zeros(self)
def test_pyfmmlib_fmm(ctx_getter, dims, use_dipoles, helmholtz_k): logging.basicConfig(level=logging.INFO) from pytest import importorskip importorskip("pyfmmlib") ctx = ctx_getter() queue = cl.CommandQueue(ctx) nsources = 3000 ntargets = 1000 dtype = np.float64 sources = p_normal(queue, nsources, dims, dtype, seed=15) targets = (p_normal(queue, ntargets, dims, dtype, seed=18) + np.array([2, 0, 0])[:dims]) sources_host = particle_array_to_host(sources) targets_host = particle_array_to_host(targets) from boxtree import TreeBuilder tb = TreeBuilder(ctx) tree, _ = tb(queue, sources, targets=targets, max_particles_in_box=30, debug=True) from boxtree.traversal import FMMTraversalBuilder tbuild = FMMTraversalBuilder(ctx) trav, _ = tbuild(queue, tree, debug=True) trav = trav.get(queue=queue) from pyopencl.clrandom import PhiloxGenerator rng = PhiloxGenerator(queue.context, seed=20) weights = rng.uniform(queue, nsources, dtype=np.float64).get() #weights = np.ones(nsources) if use_dipoles: np.random.seed(13) dipole_vec = np.random.randn(dims, nsources) else: dipole_vec = None if dims == 2 and helmholtz_k == 0: base_nterms = 20 else: base_nterms = 10 def fmm_level_to_nterms(tree, lev): result = base_nterms if lev < 3 and helmholtz_k: # exercise order-varies-by-level capability result += 5 if use_dipoles: result += 1 return result from boxtree.pyfmmlib_integration import FMMLibExpansionWrangler wrangler = FMMLibExpansionWrangler(trav.tree, helmholtz_k, fmm_level_to_nterms=fmm_level_to_nterms, dipole_vec=dipole_vec) from boxtree.fmm import drive_fmm timing_data = {} pot = drive_fmm(trav, wrangler, weights, timing_data=timing_data) print(timing_data) assert timing_data # {{{ ref fmmlib computation logger.info("computing direct (reference) result") import pyfmmlib fmmlib_routine = getattr( pyfmmlib, "%spot%s%ddall%s_vec" % (wrangler.eqn_letter, "fld" if dims == 3 else "grad", dims, "_dp" if use_dipoles else "")) kwargs = {} if dims == 3: kwargs["iffld"] = False else: kwargs["ifgrad"] = False kwargs["ifhess"] = False if use_dipoles: if helmholtz_k == 0 and dims == 2: kwargs["dipstr"] = -weights * (dipole_vec[0] + 1j * dipole_vec[1]) else: kwargs["dipstr"] = weights kwargs["dipvec"] = dipole_vec else: kwargs["charge"] = weights if helmholtz_k: kwargs["zk"] = helmholtz_k ref_pot = wrangler.finalize_potentials( fmmlib_routine(sources=sources_host.T, targets=targets_host.T, **kwargs)[0]) rel_err = la.norm(pot - ref_pot, np.inf) / la.norm(ref_pot, np.inf) logger.info("relative l2 error vs fmmlib direct: %g" % rel_err) assert rel_err < 1e-5, rel_err # }}} # {{{ check against sumpy try: import sumpy # noqa except ImportError: have_sumpy = False from warnings import warn warn("sumpy unavailable: cannot compute independent reference " "values for pyfmmlib") else: have_sumpy = True if have_sumpy: from sumpy.kernel import (LaplaceKernel, HelmholtzKernel, DirectionalSourceDerivative) from sumpy.p2p import P2P sumpy_extra_kwargs = {} if helmholtz_k: knl = HelmholtzKernel(dims) sumpy_extra_kwargs["k"] = helmholtz_k else: knl = LaplaceKernel(dims) if use_dipoles: knl = DirectionalSourceDerivative(knl) sumpy_extra_kwargs["src_derivative_dir"] = dipole_vec p2p = P2P(ctx, [knl], exclude_self=False) evt, (sumpy_ref_pot, ) = p2p(queue, targets, sources, [weights], out_host=True, **sumpy_extra_kwargs) sumpy_rel_err = (la.norm(pot - sumpy_ref_pot, np.inf) / la.norm(sumpy_ref_pot, np.inf)) logger.info("relative l2 error vs sumpy direct: %g" % sumpy_rel_err) assert sumpy_rel_err < 1e-5, sumpy_rel_err
def reorder_sources(self, source_array): return FMMLibExpansionWrangler.reorder_sources(self, source_array)