def drive_fmm(wrangler, strengths, geo_data, kernel, kernel_arguments): del geo_data, kernel, kernel_arguments from pytential.qbx.fmm import drive_fmm if return_timing_data: timing_data = {} else: timing_data = None return drive_fmm(wrangler, strengths, timing_data), timing_data
def test_cost_model_correctness(ctx_factory, dim, off_surface, use_target_specific_qbx): """Check that computed cost matches that of a constant-one FMM.""" cl_ctx = ctx_factory() queue = cl.CommandQueue(cl_ctx) actx = PyOpenCLArrayContext(queue) cost_model = QBXCostModel( translation_cost_model_factory=OpCountingTranslationCostModel) lpot_source = get_lpot_source(actx, dim).copy( cost_model=cost_model, _use_target_specific_qbx=use_target_specific_qbx) # Construct targets. if off_surface: from pytential.target import PointsTarget from boxtree.tools import make_uniform_particle_array ntargets = 10 ** 3 targets = PointsTarget( make_uniform_particle_array(queue, ntargets, dim, np.float)) target_discrs_and_qbx_sides = ((targets, 0),) qbx_forced_limit = None else: targets = lpot_source.density_discr target_discrs_and_qbx_sides = ((targets, 1),) qbx_forced_limit = 1 places = GeometryCollection((lpot_source, targets)) source_dd = places.auto_source density_discr = places.get_discretization(source_dd.geometry) # Construct bound op, run cost model. sigma_sym = sym.var("sigma") k_sym = LaplaceKernel(lpot_source.ambient_dim) sym_op_S = sym.S(k_sym, sigma_sym, qbx_forced_limit=qbx_forced_limit) op_S = bind(places, sym_op_S) sigma = get_density(actx, density_discr) from pytools import one modeled_time, _ = op_S.cost_per_stage("constant_one", sigma=sigma) modeled_time = one(modeled_time.values()) # Run FMM with ConstantOneWrangler. This can't be done with pytential's # high-level interface, so call the FMM driver directly. from pytential.qbx.fmm import drive_fmm geo_data = lpot_source.qbx_fmm_geometry_data( places, source_dd.geometry, target_discrs_and_qbx_sides=target_discrs_and_qbx_sides) wrangler = ConstantOneQBXExpansionWrangler( queue, geo_data, use_target_specific_qbx) quad_stage2_density_discr = places.get_discretization( source_dd.geometry, sym.QBX_SOURCE_QUAD_STAGE2) ndofs = quad_stage2_density_discr.ndofs src_weights = np.ones(ndofs) timing_data = {} potential = drive_fmm(wrangler, (src_weights,), timing_data, traversal=wrangler.trav)[0][geo_data.ncenters:] # Check constant one wrangler for correctness. assert (potential == ndofs).all() # Check that the cost model matches the timing data returned by the # constant one wrangler. mismatches = [] for stage in timing_data: if stage not in modeled_time: assert timing_data[stage]["ops_elapsed"] == 0 else: if timing_data[stage]["ops_elapsed"] != modeled_time[stage]: mismatches.append( (stage, timing_data[stage]["ops_elapsed"], modeled_time[stage])) assert not mismatches, "\n".join(str(s) for s in mismatches) # {{{ Test per-box cost total_cost = 0.0 for stage in timing_data: total_cost += timing_data[stage]["ops_elapsed"] per_box_cost, _ = op_S.cost_per_box("constant_one", sigma=sigma) print(per_box_cost) per_box_cost = one(per_box_cost.values()) total_aggregate_cost = cost_model.aggregate_over_boxes(per_box_cost) assert total_cost == ( total_aggregate_cost + modeled_time["coarsen_multipoles"] + modeled_time["refine_locals"] )
def exec_layer_potential_insn_fmm(self, queue, insn, bound_expr, evaluate): # {{{ build list of unique target discretizations used # map (name, qbx_side) to number in list tgt_name_and_side_to_number = {} # list of tuples (discr, qbx_side) target_discrs_and_qbx_sides = [] for o in insn.outputs: key = (o.target_name, o.qbx_forced_limit) if key not in tgt_name_and_side_to_number: tgt_name_and_side_to_number[key] = \ len(target_discrs_and_qbx_sides) target_discr = bound_expr.places[o.target_name] if isinstance(target_discr, LayerPotentialSource): target_discr = target_discr.density_discr target_discrs_and_qbx_sides.append( (target_discr, o.qbx_forced_limit)) target_discrs_and_qbx_sides = tuple(target_discrs_and_qbx_sides) # }}} geo_data = self.qbx_fmm_geometry_data(target_discrs_and_qbx_sides) # FIXME Exert more positive control over geo_data attribute lifetimes using # geo_data.<method>.clear_cache(geo_data). # FIXME Synthesize "bad centers" around corners and edges that have # inadequate QBX coverage. # FIXME don't compute *all* output kernels on all targets--respect that # some target discretizations may only be asking for derivatives (e.g.) strengths = (evaluate(insn.density).with_queue(queue) * self.weights_and_area_elements()) # {{{ get expansion wrangler base_kernel = None out_kernels = [] from sumpy.kernel import AxisTargetDerivativeRemover for knl in insn.kernels: candidate_base_kernel = AxisTargetDerivativeRemover()(knl) if base_kernel is None: base_kernel = candidate_base_kernel else: assert base_kernel == candidate_base_kernel out_kernels = tuple(knl for knl in insn.kernels) if base_kernel.is_complex_valued or strengths.dtype.kind == "c": value_dtype = self.complex_dtype else: value_dtype = self.real_dtype # {{{ build extra_kwargs dictionaries # This contains things like the Helmholtz parameter k or # the normal directions for double layers. def reorder_sources(source_array): if isinstance(source_array, cl.array.Array): return (source_array .with_queue(queue) [geo_data.tree().user_point_source_ids] .with_queue(None)) else: return source_array kernel_extra_kwargs = {} source_extra_kwargs = {} from sumpy.tools import gather_arguments, gather_source_arguments from pytools.obj_array import with_object_array_or_scalar for func, var_dict in [ (gather_arguments, kernel_extra_kwargs), (gather_source_arguments, source_extra_kwargs), ]: for arg in func(out_kernels): var_dict[arg.name] = with_object_array_or_scalar( reorder_sources, evaluate(insn.kernel_arguments[arg.name])) # }}} wrangler = self.expansion_wrangler_code_container( base_kernel, out_kernels).get_wrangler( queue, geo_data, value_dtype, source_extra_kwargs=source_extra_kwargs, kernel_extra_kwargs=kernel_extra_kwargs) # }}} #geo_data.plot() if len(geo_data.global_qbx_centers()) != geo_data.center_info().ncenters: raise NotImplementedError("geometry has centers requiring local QBX") from pytential.qbx.geometry import target_state if (geo_data.user_target_to_center().with_queue(queue) == target_state.FAILED).get().any(): raise RuntimeError("geometry has failed targets") # {{{ execute global QBX from pytential.qbx.fmm import drive_fmm all_potentials_on_every_tgt = drive_fmm(wrangler, strengths) # }}} result = [] for o in insn.outputs: tgt_side_number = tgt_name_and_side_to_number[ o.target_name, o.qbx_forced_limit] tgt_slice = slice(*geo_data.target_info().target_discr_starts[ tgt_side_number:tgt_side_number+2]) result.append( (o.name, all_potentials_on_every_tgt[o.kernel_index][tgt_slice])) return result, []
def test_cost_model_correctness(ctx_getter, dim, off_surface, use_target_specific_qbx): """Check that computed cost matches that of a constant-one FMM.""" cl_ctx = ctx_getter() queue = cl.CommandQueue(cl_ctx) cost_model = (CostModel( translation_cost_model_factory=OpCountingTranslationCostModel)) lpot_source = get_lpot_source(queue, dim).copy( cost_model=cost_model, _use_target_specific_qbx=use_target_specific_qbx) # Construct targets. if off_surface: from pytential.target import PointsTarget from boxtree.tools import make_uniform_particle_array ntargets = 10**3 targets = PointsTarget( make_uniform_particle_array(queue, ntargets, dim, np.float)) target_discrs_and_qbx_sides = ((targets, 0), ) qbx_forced_limit = None else: targets = lpot_source.density_discr target_discrs_and_qbx_sides = ((targets, 1), ) qbx_forced_limit = 1 # Construct bound op, run cost model. sigma_sym = sym.var("sigma") k_sym = LaplaceKernel(lpot_source.ambient_dim) sym_op_S = sym.S(k_sym, sigma_sym, qbx_forced_limit=qbx_forced_limit) op_S = bind((lpot_source, targets), sym_op_S) sigma = get_density(queue, lpot_source) from pytools import one cost_S = one(op_S.get_modeled_cost(queue, sigma=sigma).values()) # Run FMM with ConstantOneWrangler. This can't be done with pytential's # high-level interface, so call the FMM driver directly. from pytential.qbx.fmm import drive_fmm geo_data = lpot_source.qbx_fmm_geometry_data( target_discrs_and_qbx_sides=target_discrs_and_qbx_sides) wrangler = ConstantOneQBXExpansionWrangler(queue, geo_data, use_target_specific_qbx) nnodes = lpot_source.quad_stage2_density_discr.nnodes src_weights = np.ones(nnodes) timing_data = {} potential = drive_fmm(wrangler, src_weights, timing_data, traversal=wrangler.trav)[0][geo_data.ncenters:] # Check constant one wrangler for correctness. assert (potential == nnodes).all() modeled_time = cost_S.get_predicted_times(merge_close_lists=True) # Check that the cost model matches the timing data returned by the # constant one wrangler. mismatches = [] for stage in timing_data: if timing_data[stage]["ops_elapsed"] != modeled_time[stage]: mismatches.append((stage, timing_data[stage]["ops_elapsed"], modeled_time[stage])) assert not mismatches, "\n".join(str(s) for s in mismatches)
def exec_compute_potential_insn_fmm(self, queue, insn, bound_expr, evaluate): # {{{ build list of unique target discretizations used # map (name, qbx_side) to number in list tgt_name_and_side_to_number = {} # list of tuples (discr, qbx_side) target_discrs_and_qbx_sides = [] for o in insn.outputs: key = (o.target_name, o.qbx_forced_limit) if key not in tgt_name_and_side_to_number: tgt_name_and_side_to_number[key] = \ len(target_discrs_and_qbx_sides) target_discr = bound_expr.places[o.target_name] if isinstance(target_discr, LayerPotentialSourceBase): target_discr = target_discr.density_discr qbx_forced_limit = o.qbx_forced_limit if qbx_forced_limit is None: qbx_forced_limit = 0 target_discrs_and_qbx_sides.append( (target_discr, qbx_forced_limit)) target_discrs_and_qbx_sides = tuple(target_discrs_and_qbx_sides) # }}} geo_data = self.qbx_fmm_geometry_data(target_discrs_and_qbx_sides) # geo_data.plot() # FIXME Exert more positive control over geo_data attribute lifetimes using # geo_data.<method>.clear_cache(geo_data). # FIXME Synthesize "bad centers" around corners and edges that have # inadequate QBX coverage. # FIXME don't compute *all* output kernels on all targets--respect that # some target discretizations may only be asking for derivatives (e.g.) strengths = (evaluate(insn.density).with_queue(queue) * self.weights_and_area_elements()) out_kernels = tuple(knl for knl in insn.kernels) fmm_kernel = self.get_fmm_kernel(out_kernels) output_and_expansion_dtype = ( self.get_fmm_output_and_expansion_dtype(fmm_kernel, strengths)) kernel_extra_kwargs, source_extra_kwargs = ( self.get_fmm_expansion_wrangler_extra_kwargs( queue, out_kernels, geo_data.tree().user_source_ids, insn.kernel_arguments, evaluate)) wrangler = self.expansion_wrangler_code_container( fmm_kernel, out_kernels).get_wrangler( queue, geo_data, output_and_expansion_dtype, self.qbx_order, self.fmm_level_to_order, source_extra_kwargs=source_extra_kwargs, kernel_extra_kwargs=kernel_extra_kwargs) from pytential.qbx.geometry import target_state if (geo_data.user_target_to_center().with_queue(queue) == target_state.FAILED).any().get(): raise RuntimeError("geometry has failed targets") # {{{ performance data hook if self.geometry_data_inspector is not None: perform_fmm = self.geometry_data_inspector(insn, bound_expr, geo_data) if not perform_fmm: return [(o.name, 0) for o in insn.outputs] # }}} # {{{ execute global QBX from pytential.qbx.fmm import drive_fmm all_potentials_on_every_tgt = drive_fmm(wrangler, strengths) # }}} result = [] for o in insn.outputs: tgt_side_number = tgt_name_and_side_to_number[ o.target_name, o.qbx_forced_limit] tgt_slice = slice(*geo_data.target_info().target_discr_starts[ tgt_side_number:tgt_side_number+2]) result.append( (o.name, all_potentials_on_every_tgt[o.kernel_index][tgt_slice])) return result