def test_estimate_calibration_params(ctx_factory): from boxtree.pyfmmlib_integration import FMMLibExpansionWrangler nsources_list = [1000, 2000, 3000, 4000] ntargets_list = [1000, 2000, 3000, 4000] dims = 3 dtype = np.float64 ctx = ctx_factory() queue = cl.CommandQueue(ctx) traversals = [] traversals_dev = [] level_to_orders = [] timing_results = [] def fmm_level_to_nterms(tree, ilevel): return 10 for nsources, ntargets in zip(nsources_list, ntargets_list): # {{{ Generate sources, targets and target_radii from boxtree.tools import make_normal_particle_array as p_normal sources = p_normal(queue, nsources, dims, dtype, seed=15) targets = p_normal(queue, ntargets, dims, dtype, seed=18) from pyopencl.clrandom import PhiloxGenerator rng = PhiloxGenerator(queue.context, seed=22) target_radii = rng.uniform( queue, ntargets, a=0, b=0.05, dtype=dtype ).get() # }}} # {{{ Generate tree and traversal from boxtree import TreeBuilder tb = TreeBuilder(ctx) tree, _ = tb( queue, sources, targets=targets, target_radii=target_radii, stick_out_factor=0.15, max_particles_in_box=30, debug=True ) from boxtree.traversal import FMMTraversalBuilder tg = FMMTraversalBuilder(ctx, well_sep_is_n_away=2) trav_dev, _ = tg(queue, tree, debug=True) trav = trav_dev.get(queue=queue) traversals.append(trav) traversals_dev.append(trav_dev) # }}} wrangler = FMMLibExpansionWrangler(trav.tree, 0, fmm_level_to_nterms) level_to_orders.append(wrangler.level_nterms) timing_data = {} from boxtree.fmm import drive_fmm src_weights = np.random.rand(tree.nsources).astype(tree.coord_dtype) drive_fmm(trav, wrangler, (src_weights,), timing_data=timing_data) timing_results.append(timing_data) if SUPPORTS_PROCESS_TIME: time_field_name = "process_elapsed" else: time_field_name = "wall_elapsed" def test_params_sanity(test_params): param_names = ["c_p2m", "c_m2m", "c_p2p", "c_m2l", "c_m2p", "c_p2l", "c_l2l", "c_l2p"] for name in param_names: assert isinstance(test_params[name], np.float64) def test_params_equal(test_params1, test_params2): param_names = ["c_p2m", "c_m2m", "c_p2p", "c_m2l", "c_m2p", "c_p2l", "c_l2l", "c_l2p"] for name in param_names: assert test_params1[name] == test_params2[name] python_cost_model = _PythonFMMCostModel(make_pde_aware_translation_cost_model) python_model_results = [] for icase in range(len(traversals)-1): traversal = traversals[icase] level_to_order = level_to_orders[icase] python_model_results.append(python_cost_model.cost_per_stage( queue, traversal, level_to_order, _PythonFMMCostModel.get_unit_calibration_params(), )) python_params = python_cost_model.estimate_calibration_params( python_model_results, timing_results[:-1], time_field_name=time_field_name ) test_params_sanity(python_params) cl_cost_model = FMMCostModel(make_pde_aware_translation_cost_model) cl_model_results = [] for icase in range(len(traversals_dev)-1): traversal = traversals_dev[icase] level_to_order = level_to_orders[icase] cl_model_results.append(cl_cost_model.cost_per_stage( queue, traversal, level_to_order, FMMCostModel.get_unit_calibration_params(), )) cl_params = cl_cost_model.estimate_calibration_params( cl_model_results, timing_results[:-1], time_field_name=time_field_name ) test_params_sanity(cl_params) if SUPPORTS_PROCESS_TIME: test_params_equal(cl_params, python_params)
def demo_cost_model(): if not SUPPORTS_PROCESS_TIME: raise NotImplementedError( "Currently this script uses process time which only works on Python>=3.3" ) from boxtree.pyfmmlib_integration import FMMLibExpansionWrangler nsources_list = [1000, 2000, 3000, 4000, 5000] ntargets_list = [1000, 2000, 3000, 4000, 5000] dims = 3 dtype = np.float64 ctx = cl.create_some_context() queue = cl.CommandQueue(ctx) traversals = [] traversals_dev = [] level_to_orders = [] timing_results = [] def fmm_level_to_nterms(tree, ilevel): return 10 for nsources, ntargets in zip(nsources_list, ntargets_list): # {{{ Generate sources, targets and target_radii from boxtree.tools import make_normal_particle_array as p_normal sources = p_normal(queue, nsources, dims, dtype, seed=15) targets = p_normal(queue, ntargets, dims, dtype, seed=18) from pyopencl.clrandom import PhiloxGenerator rng = PhiloxGenerator(queue.context, seed=22) target_radii = rng.uniform(queue, ntargets, a=0, b=0.05, dtype=dtype).get() # }}} # {{{ Generate tree and traversal from boxtree import TreeBuilder tb = TreeBuilder(ctx) tree, _ = tb(queue, sources, targets=targets, target_radii=target_radii, stick_out_factor=0.15, max_particles_in_box=30, debug=True) from boxtree.traversal import FMMTraversalBuilder tg = FMMTraversalBuilder(ctx, well_sep_is_n_away=2) trav_dev, _ = tg(queue, tree, debug=True) trav = trav_dev.get(queue=queue) traversals.append(trav) traversals_dev.append(trav_dev) # }}} wrangler = FMMLibExpansionWrangler(trav.tree, 0, fmm_level_to_nterms) level_to_orders.append(wrangler.level_nterms) timing_data = {} from boxtree.fmm import drive_fmm src_weights = np.random.rand(tree.nsources).astype(tree.coord_dtype) drive_fmm(trav, wrangler, src_weights, timing_data=timing_data) timing_results.append(timing_data) time_field_name = "process_elapsed" from boxtree.cost import FMMCostModel from boxtree.cost import make_pde_aware_translation_cost_model cost_model = FMMCostModel(make_pde_aware_translation_cost_model) model_results = [] for icase in range(len(traversals) - 1): traversal = traversals_dev[icase] model_results.append( cost_model.cost_per_stage( queue, traversal, level_to_orders[icase], FMMCostModel.get_unit_calibration_params(), )) queue.finish() params = cost_model.estimate_calibration_params( model_results, timing_results[:-1], time_field_name=time_field_name) predicted_time = cost_model.cost_per_stage( queue, traversals_dev[-1], level_to_orders[-1], params, ) queue.finish() for field in [ "form_multipoles", "eval_direct", "multipole_to_local", "eval_multipoles", "form_locals", "eval_locals", "coarsen_multipoles", "refine_locals" ]: measured = timing_results[-1][field]["process_elapsed"] pred_err = ((measured - predicted_time[field]) / measured) logger.info("actual/predicted time for %s: %.3g/%.3g -> %g %% error", field, measured, predicted_time[field], abs(100 * pred_err))