def calibrate_cost_model(ctx): queue = cl.CommandQueue(ctx) from pytential.qbx.cost import CostModel, estimate_calibration_params cost_model = CostModel() model_results = [] timing_results = [] for lpot_source in training_geometries(queue): lpot_source = lpot_source.copy(cost_model=cost_model) bound_op = get_bound_op(lpot_source) sigma = get_test_density(queue, lpot_source) cost_S = bound_op.get_modeled_cost(queue, sigma=sigma) # Warm-up run. bound_op.eval(queue, {"sigma": sigma}) for _ in range(RUNS): timing_data = {} bound_op.eval(queue, {"sigma": sigma}, timing_data=timing_data) model_results.append(one(cost_S.values())) timing_results.append(one(timing_data.values())) calibration_params = (estimate_calibration_params(model_results, timing_results)) return cost_model.with_calibration_params(calibration_params)
def test_cost_model(ctx_factory, dim, use_target_specific_qbx): """Test that cost model gathering can execute successfully.""" cl_ctx = ctx_factory() queue = cl.CommandQueue(cl_ctx) actx = PyOpenCLArrayContext(queue) lpot_source = get_lpot_source(actx, dim).copy( _use_target_specific_qbx=use_target_specific_qbx, cost_model=CostModel()) places = GeometryCollection(lpot_source) density_discr = places.get_discretization(places.auto_source.geometry) sigma = get_density(actx, density_discr) sigma_sym = sym.var("sigma") k_sym = LaplaceKernel(lpot_source.ambient_dim) sym_op_S = sym.S(k_sym, sigma_sym, qbx_forced_limit=+1) op_S = bind(places, sym_op_S) cost_S = op_S.get_modeled_cost(actx, sigma=sigma) assert len(cost_S) == 1 sym_op_S_plus_D = ( sym.S(k_sym, sigma_sym, qbx_forced_limit=+1) + sym.D(k_sym, sigma_sym, qbx_forced_limit="avg")) op_S_plus_D = bind(places, sym_op_S_plus_D) cost_S_plus_D = op_S_plus_D.get_modeled_cost(actx, sigma=sigma) assert len(cost_S_plus_D) == 2
def calibrate_cost_model(ctx): queue = cl.CommandQueue(ctx) actx = PyOpenCLArrayContext(queue) from pytential.qbx.cost import CostModel, estimate_calibration_params cost_model = CostModel() model_results = [] timing_results = [] for lpot_source in training_geometries(actx): lpot_source = lpot_source.copy(cost_model=cost_model) from pytential import GeometryCollection places = GeometryCollection(lpot_source) density_discr = places.get_discretization(places.auto_source.geometry) bound_op = get_bound_op(places) sigma = get_test_density(actx, density_discr) cost_S = bound_op.get_modeled_cost(actx, sigma=sigma) # Warm-up run. bound_op.eval({"sigma": sigma}, array_context=actx) for _ in range(RUNS): timing_data = {} bound_op.eval({"sigma": sigma}, array_context=actx, timing_data=timing_data) model_results.append(one(cost_S.values())) timing_results.append(one(timing_data.values())) calibration_params = (estimate_calibration_params(model_results, timing_results)) return cost_model.with_calibration_params(calibration_params)
def test_cost_model_order_varying_by_level(ctx_factory): """For FMM order varying by level, this checks to ensure that the costs are different. The varying-level case should have larger cost. """ cl_ctx = ctx_factory() queue = cl.CommandQueue(cl_ctx) actx = PyOpenCLArrayContext(queue) # {{{ constant level to order def level_to_order_constant(kernel, kernel_args, tree, level): return 1 lpot_source = get_lpot_source(actx, 2).copy( cost_model=CostModel( calibration_params=CONSTANT_ONE_PARAMS), fmm_level_to_order=level_to_order_constant) places = GeometryCollection(lpot_source) density_discr = places.get_discretization(places.auto_source.geometry) sigma_sym = sym.var("sigma") k_sym = LaplaceKernel(2) sym_op = sym.S(k_sym, sigma_sym, qbx_forced_limit=+1) sigma = get_density(actx, density_discr) cost_constant = one( bind(places, sym_op) .get_modeled_cost(actx, sigma=sigma).values()) # }}} # {{{ varying level to order varying_order_params = cost_constant.params.copy() nlevels = cost_constant.params["nlevels"] for level in range(nlevels): varying_order_params["p_fmm_lev%d" % level] = nlevels - level cost_varying = cost_constant.with_params(varying_order_params) # }}} assert ( sum(cost_varying.get_predicted_times().values()) > sum(cost_constant.get_predicted_times().values()))
def inspect_geo_data(insn, bound_expr, geo_data): del bound_expr from pytential.qbx.cost import CostModel cost_model = CostModel( translation_cost_model_factory=(GigaQBXPaperTranslationCostModel), calibration_params=CONSTANT_ONE_PARAMS) kernel = lpot_source.get_fmm_kernel(insn.kernels) kernel_arguments = insn.kernel_arguments result = cost_model(geo_data, kernel, kernel_arguments) inspect_geo_data_result.append(result) return False
def test_cost_model(ctx_getter, dim, use_target_specific_qbx): """Test that cost model gathering can execute successfully.""" cl_ctx = ctx_getter() queue = cl.CommandQueue(cl_ctx) lpot_source = (get_lpot_source(queue, dim).copy( _use_target_specific_qbx=use_target_specific_qbx, cost_model=CostModel())) sigma = get_density(queue, lpot_source) sigma_sym = sym.var("sigma") k_sym = LaplaceKernel(lpot_source.ambient_dim) sym_op_S = sym.S(k_sym, sigma_sym, qbx_forced_limit=+1) op_S = bind(lpot_source, sym_op_S) cost_S = op_S.get_modeled_cost(queue, sigma=sigma) assert len(cost_S) == 1 sym_op_S_plus_D = (sym.S(k_sym, sigma_sym, qbx_forced_limit=+1) + sym.D(k_sym, sigma_sym)) op_S_plus_D = bind(lpot_source, sym_op_S_plus_D) cost_S_plus_D = op_S_plus_D.get_modeled_cost(queue, sigma=sigma) assert len(cost_S_plus_D) == 2
def __init__( self, density_discr, fine_order, qbx_order=None, fmm_order=None, fmm_level_to_order=None, to_refined_connection=None, expansion_factory=None, target_association_tolerance=_not_provided, # begin experimental arguments # FIXME default debug=False once everything has matured debug=True, _refined_for_global_qbx=False, _expansions_in_tree_have_extent=True, _expansion_stick_out_factor=0.5, _well_sep_is_n_away=2, _max_leaf_refine_weight=None, _box_extent_norm=None, _from_sep_smaller_crit=None, _from_sep_smaller_min_nsources_cumul=None, _tree_kind="adaptive", _use_target_specific_qbx=None, geometry_data_inspector=None, cost_model=None, fmm_backend="sumpy", target_stick_out_factor=_not_provided): """ :arg fine_order: The total degree to which the (upsampled) underlying quadrature is exact. :arg to_refined_connection: A connection used for resampling from *density_discr* the fine density discretization. It is assumed that the fine density discretization given by *to_refined_connection.to_discr* is *not* already upsampled. May be *None*. :arg fmm_order: `False` for direct calculation. May not be given if *fmm_level_to_order* is given. :arg fmm_level_to_order: A function that takes arguments of *(kernel, kernel_args, tree, level)* and returns the expansion order to be used on a given *level* of *tree* with *kernel*, where *kernel* is the :class:`sumpy.kernel.Kernel` being evaluated, and *kernel_args* is a set of *(key, value)* tuples with evaluated kernel arguments. May not be given if *fmm_order* is given. Experimental arguments without a promise of forward compatibility: :arg _use_target_specific_qbx: Whether to use target-specific acceleration by default if possible. *None* means "use if possible". :arg cost_model: Either *None* or instance of :class:`~pytential.qbx.cost.CostModel`, used for gathering modeled costs (experimental) """ # {{{ argument processing if fine_order is None: raise ValueError("fine_order must be provided.") if qbx_order is None: raise ValueError("qbx_order must be provided.") if target_stick_out_factor is not _not_provided: from warnings import warn warn( "target_stick_out_factor has been renamed to " "target_association_tolerance. " "Using target_stick_out_factor is deprecated " "and will stop working in 2018.", DeprecationWarning, stacklevel=2) if target_association_tolerance is not _not_provided: raise TypeError( "May not pass both target_association_tolerance and " "target_stick_out_factor.") target_association_tolerance = target_stick_out_factor del target_stick_out_factor if target_association_tolerance is _not_provided: target_association_tolerance = float( np.finfo(density_discr.real_dtype).eps) * 1e3 if fmm_order is not None and fmm_level_to_order is not None: raise TypeError( "may not specify both fmm_order and fmm_level_to_order") if _box_extent_norm is None: _box_extent_norm = "l2" if _from_sep_smaller_crit is None: # This seems to win no matter what the box extent norm is # https://gitlab.tiker.net/papers/2017-qbx-fmm-3d/issues/10 _from_sep_smaller_crit = "precise_linf" if fmm_level_to_order is None: if fmm_order is False: fmm_level_to_order = False else: def fmm_level_to_order(kernel, kernel_args, tree, level): # noqa pylint:disable=function-redefined return fmm_order if _max_leaf_refine_weight is None: if density_discr.ambient_dim == 2: # FIXME: This should be verified now that l^2 is the default. _max_leaf_refine_weight = 64 elif density_discr.ambient_dim == 3: # For static_linf/linf: https://gitlab.tiker.net/papers/2017-qbx-fmm-3d/issues/8#note_25009 # noqa # For static_l2/l2: https://gitlab.tiker.net/papers/2017-qbx-fmm-3d/issues/12 # noqa _max_leaf_refine_weight = 512 else: # Just guessing... _max_leaf_refine_weight = 64 if _from_sep_smaller_min_nsources_cumul is None: # See here for the comment thread that led to these defaults: # https://gitlab.tiker.net/inducer/boxtree/merge_requests/28#note_18661 if density_discr.dim == 1: _from_sep_smaller_min_nsources_cumul = 15 else: _from_sep_smaller_min_nsources_cumul = 30 # }}} LayerPotentialSourceBase.__init__(self, density_discr) self.fine_order = fine_order self.qbx_order = qbx_order self.fmm_level_to_order = fmm_level_to_order assert target_association_tolerance is not None self.target_association_tolerance = target_association_tolerance self.fmm_backend = fmm_backend # Default values are lazily provided if these are None self._to_refined_connection = to_refined_connection if expansion_factory is None: from sumpy.expansion import DefaultExpansionFactory expansion_factory = DefaultExpansionFactory() self.expansion_factory = expansion_factory self.debug = debug self._refined_for_global_qbx = _refined_for_global_qbx self._expansions_in_tree_have_extent = \ _expansions_in_tree_have_extent self._expansion_stick_out_factor = _expansion_stick_out_factor self._well_sep_is_n_away = _well_sep_is_n_away self._max_leaf_refine_weight = _max_leaf_refine_weight self._box_extent_norm = _box_extent_norm self._from_sep_smaller_crit = _from_sep_smaller_crit self._from_sep_smaller_min_nsources_cumul = \ _from_sep_smaller_min_nsources_cumul self._tree_kind = _tree_kind self._use_target_specific_qbx = _use_target_specific_qbx self.geometry_data_inspector = geometry_data_inspector if cost_model is None: from pytential.qbx.cost import CostModel cost_model = CostModel() self.cost_model = cost_model
def test_cost_model_correctness(ctx_getter, dim, off_surface, use_target_specific_qbx): """Check that computed cost matches that of a constant-one FMM.""" cl_ctx = ctx_getter() queue = cl.CommandQueue(cl_ctx) cost_model = (CostModel( translation_cost_model_factory=OpCountingTranslationCostModel)) lpot_source = get_lpot_source(queue, dim).copy( cost_model=cost_model, _use_target_specific_qbx=use_target_specific_qbx) # Construct targets. if off_surface: from pytential.target import PointsTarget from boxtree.tools import make_uniform_particle_array ntargets = 10**3 targets = PointsTarget( make_uniform_particle_array(queue, ntargets, dim, np.float)) target_discrs_and_qbx_sides = ((targets, 0), ) qbx_forced_limit = None else: targets = lpot_source.density_discr target_discrs_and_qbx_sides = ((targets, 1), ) qbx_forced_limit = 1 # Construct bound op, run cost model. sigma_sym = sym.var("sigma") k_sym = LaplaceKernel(lpot_source.ambient_dim) sym_op_S = sym.S(k_sym, sigma_sym, qbx_forced_limit=qbx_forced_limit) op_S = bind((lpot_source, targets), sym_op_S) sigma = get_density(queue, lpot_source) from pytools import one cost_S = one(op_S.get_modeled_cost(queue, sigma=sigma).values()) # Run FMM with ConstantOneWrangler. This can't be done with pytential's # high-level interface, so call the FMM driver directly. from pytential.qbx.fmm import drive_fmm geo_data = lpot_source.qbx_fmm_geometry_data( target_discrs_and_qbx_sides=target_discrs_and_qbx_sides) wrangler = ConstantOneQBXExpansionWrangler(queue, geo_data, use_target_specific_qbx) nnodes = lpot_source.quad_stage2_density_discr.nnodes src_weights = np.ones(nnodes) timing_data = {} potential = drive_fmm(wrangler, src_weights, timing_data, traversal=wrangler.trav)[0][geo_data.ncenters:] # Check constant one wrangler for correctness. assert (potential == nnodes).all() modeled_time = cost_S.get_predicted_times(merge_close_lists=True) # Check that the cost model matches the timing data returned by the # constant one wrangler. mismatches = [] for stage in timing_data: if timing_data[stage]["ops_elapsed"] != modeled_time[stage]: mismatches.append((stage, timing_data[stage]["ops_elapsed"], modeled_time[stage])) assert not mismatches, "\n".join(str(s) for s in mismatches)