def get_lpot_cost(queue, geometry_getter, kind): lpot_source = geometry_getter(queue) from pytential import sym, bind sigma_sym = sym.var("sigma") from sumpy.kernel import LaplaceKernel k_sym = LaplaceKernel(lpot_source.ambient_dim) op = sym.S(k_sym, sigma_sym, qbx_forced_limit=+1) bound_op = bind(lpot_source, op) density_discr = lpot_source.density_discr nodes = density_discr.nodes().with_queue(queue) sigma = cl.clmath.sin(10 * nodes[0]) from pytools import one if kind == "actual": timing_data = {} result = bound_op.eval(queue, {"sigma": sigma}, timing_data=timing_data) assert not np.isnan(result.get(queue)).any() result = one(timing_data.values()) elif kind == "model": perf_results = bound_op.get_modeled_performance(queue, sigma=sigma) result = one(perf_results.values()) return result
def problem_stats(order=3): cl_ctx = cl.create_some_context() queue = cl.CommandQueue(cl_ctx) actx = PyOpenCLArrayContext( queue, allocator=cl_tools.MemoryPool(cl_tools.ImmediateAllocator(queue)) ) with open_output_file("grudge-problem-stats.txt") as outf: _, dg_discr_2d = get_wave_op_with_discr( actx, dims=2, order=order) print("Number of 2D elements:", dg_discr_2d.mesh.nelements, file=outf) vol_discr_2d = dg_discr_2d.discr_from_dd("vol") dofs_2d = {group.nunit_dofs for group in vol_discr_2d.groups} from pytools import one print("Number of DOFs per 2D element:", one(dofs_2d), file=outf) _, dg_discr_3d = get_wave_op_with_discr( actx, dims=3, order=order) print("Number of 3D elements:", dg_discr_3d.mesh.nelements, file=outf) vol_discr_3d = dg_discr_3d.discr_from_dd("vol") dofs_3d = {group.nunit_dofs for group in vol_discr_3d.groups} from pytools import one print("Number of DOFs per 3D element:", one(dofs_3d), file=outf) logger.info("Wrote '%s'", outf.name)
def calibrate_cost_model(ctx): queue = cl.CommandQueue(ctx) from pytential.qbx.cost import CostModel, estimate_calibration_params cost_model = CostModel() model_results = [] timing_results = [] for lpot_source in training_geometries(queue): lpot_source = lpot_source.copy(cost_model=cost_model) bound_op = get_bound_op(lpot_source) sigma = get_test_density(queue, lpot_source) cost_S = bound_op.get_modeled_cost(queue, sigma=sigma) # Warm-up run. bound_op.eval(queue, {"sigma": sigma}) for _ in range(RUNS): timing_data = {} bound_op.eval(queue, {"sigma": sigma}, timing_data=timing_data) model_results.append(one(cost_S.values())) timing_results.append(one(timing_data.values())) calibration_params = (estimate_calibration_params(model_results, timing_results)) return cost_model.with_calibration_params(calibration_params)
def test_cost_model(ctx, cost_model): queue = cl.CommandQueue(ctx) for lpot_source in test_geometries(queue): lpot_source = lpot_source.copy(cost_model=cost_model) bound_op = get_bound_op(lpot_source) sigma = get_test_density(queue, lpot_source) cost_S = bound_op.get_modeled_cost(queue, sigma=sigma) model_result = (one( cost_S.values()).get_predicted_times(merge_close_lists=True)) # Warm-up run. bound_op.eval(queue, {"sigma": sigma}) temp_timing_results = [] for _ in range(RUNS): timing_data = {} bound_op.eval(queue, {"sigma": sigma}, timing_data=timing_data) temp_timing_results.append(one(timing_data.values())) timing_result = {} for param in model_result: timing_result[param] = (sum( temp_timing_result[param]["process_elapsed"] for temp_timing_result in temp_timing_results)) / RUNS print("=" * 20) for stage in model_result: print("stage: ", stage) print("actual: ", timing_result[stage]) print("predicted: ", model_result[stage]) print("=" * 20)
def test_cost_model_order_varying_by_level(ctx_factory): """For FMM order varying by level, this checks to ensure that the costs are different. The varying-level case should have larger cost. """ cl_ctx = ctx_factory() queue = cl.CommandQueue(cl_ctx) actx = PyOpenCLArrayContext(queue) # {{{ constant level to order def level_to_order_constant(kernel, kernel_args, tree, level): return 1 lpot_source = get_lpot_source(actx, 2).copy( cost_model=QBXCostModel(), fmm_level_to_order=level_to_order_constant) places = GeometryCollection(lpot_source) density_discr = places.get_discretization(places.auto_source.geometry) sigma_sym = sym.var("sigma") k_sym = LaplaceKernel(2) sym_op = sym.S(k_sym, sigma_sym, qbx_forced_limit=+1) sigma = get_density(actx, density_discr) cost_constant, metadata = bind(places, sym_op).cost_per_stage( "constant_one", sigma=sigma) cost_constant = one(cost_constant.values()) metadata = one(metadata.values()) # }}} # {{{ varying level to order def level_to_order_varying(kernel, kernel_args, tree, level): return metadata["nlevels"] - level lpot_source = get_lpot_source(actx, 2).copy( cost_model=QBXCostModel(), fmm_level_to_order=level_to_order_varying) places = GeometryCollection(lpot_source) density_discr = places.get_discretization(places.auto_source.geometry) sigma = get_density(actx, density_discr) cost_varying, _ = bind(lpot_source, sym_op).cost_per_stage( "constant_one", sigma=sigma) cost_varying = one(cost_varying.values()) # }}} assert sum(cost_varying.values()) > sum(cost_constant.values())
def test_cost_model(ctx, calibration_params): queue = cl.CommandQueue(ctx) actx = PyOpenCLArrayContext(queue, force_device_scalars=True) cost_model = QBXCostModel() for lpot_source in test_geometries(actx): lpot_source = lpot_source.copy(cost_model=cost_model) from pytential import GeometryCollection places = GeometryCollection(lpot_source) density_discr = places.get_discretization(places.auto_source.geometry) bound_op = get_bound_op(places) sigma = get_test_density(actx, density_discr) cost_S, _ = bound_op.cost_per_stage(calibration_params, sigma=sigma) model_result = one(cost_S.values()) # Warm-up run. bound_op.eval({"sigma": sigma}, array_context=actx) temp_timing_results = [] for _ in range(RUNS): timing_data = {} bound_op.eval({"sigma": sigma}, array_context=actx, timing_data=timing_data) temp_timing_results.append(one(timing_data.values())) timing_result = {} for param in model_result: timing_result[param] = (sum( temp_timing_result[param]["process_elapsed"] for temp_timing_result in temp_timing_results)) / RUNS from pytools import Table table = Table() table.add_row(["stage", "actual (s)", "predicted (s)"]) for stage in model_result: row = [ stage, f"{timing_result[stage]:.2f}", f"{model_result[stage]:.2f}", ] table.add_row(row) print(table)
def get_lpot_cost(which, helmholtz_k, geometry_getter, lpot_kwargs, kind): """ Parameters: which: "D" or "S" kind: "actual" or "model" """ context = cl.create_some_context(interactive=False) queue = cl.CommandQueue(context) lpot_source = geometry_getter(queue, lpot_kwargs) from sumpy.kernel import LaplaceKernel, HelmholtzKernel sigma_sym = sym.var("sigma") if helmholtz_k == 0: k_sym = LaplaceKernel(lpot_source.ambient_dim) kernel_kwargs = {} else: k_sym = HelmholtzKernel(lpot_source.ambient_dim, "k") kernel_kwargs = {"k": helmholtz_k} if which == "S": op = sym.S(k_sym, sigma_sym, qbx_forced_limit=+1, **kernel_kwargs) elif which == "D": op = sym.D(k_sym, sigma_sym, qbx_forced_limit="avg", **kernel_kwargs) else: raise ValueError("unknown lpot symbol: '%s'" % which) bound_op = bind(lpot_source, op) density_discr = lpot_source.density_discr nodes = density_discr.nodes().with_queue(queue) sigma = cl.clmath.sin(10 * nodes[0]) if kind == "actual": timing_data = {} result = bound_op.eval(queue, {"sigma": sigma}, timing_data=timing_data) assert not np.isnan(result.get(queue)).any() result = one(timing_data.values()) elif kind == "model": perf_results = bound_op.get_modeled_performance(queue, sigma=sigma) result = one(perf_results.values()) return result
def test_cost_model(ctx, cost_model): queue = cl.CommandQueue(ctx) for lpot_source in test_geometries(queue): lpot_source = lpot_source.copy(cost_model=cost_model) bound_op = get_bound_op(lpot_source) sigma = get_test_density(queue, lpot_source) cost_S = bound_op.get_modeled_cost(queue, sigma=sigma) model_result = (one( cost_S.values()).get_predicted_times(merge_close_lists=True)) # Warm-up run. bound_op.eval(queue, {"sigma": sigma}) temp_timing_results = [] for _ in range(RUNS): timing_data = {} bound_op.eval(queue, {"sigma": sigma}, timing_data=timing_data) temp_timing_results.append(one(timing_data.values())) timing_result = {} for param in model_result: timing_result[param] = (sum( temp_timing_result[param]["process_elapsed"] for temp_timing_result in temp_timing_results)) / RUNS from pytools import Table table = Table() table.add_row(["stage", "actual (s)", "predicted (s)"]) for stage in model_result: row = [ stage, "%.2f" % timing_result[stage], "%.2f" % model_result[stage] ] table.add_row(row) print(table)
def calibrate_cost_model(ctx): queue = cl.CommandQueue(ctx) actx = PyOpenCLArrayContext(queue) from pytential.qbx.cost import CostModel, estimate_calibration_params cost_model = CostModel() model_results = [] timing_results = [] for lpot_source in training_geometries(actx): lpot_source = lpot_source.copy(cost_model=cost_model) from pytential import GeometryCollection places = GeometryCollection(lpot_source) density_discr = places.get_discretization(places.auto_source.geometry) bound_op = get_bound_op(places) sigma = get_test_density(actx, density_discr) cost_S = bound_op.get_modeled_cost(actx, sigma=sigma) # Warm-up run. bound_op.eval({"sigma": sigma}, array_context=actx) for _ in range(RUNS): timing_data = {} bound_op.eval({"sigma": sigma}, array_context=actx, timing_data=timing_data) model_results.append(one(cost_S.values())) timing_results.append(one(timing_data.values())) calibration_params = (estimate_calibration_params(model_results, timing_results)) return cost_model.with_calibration_params(calibration_params)
def problem_stats(order=3): cl_ctx = cl.create_some_context() with open_output_file("grudge-problem-stats.txt") as outf: _, dg_discr_2d = get_strong_wave_op_with_discr_direct(cl_ctx, dims=2, order=order) print("Number of 2D elements:", dg_discr_2d.mesh.nelements, file=outf) vol_discr_2d = dg_discr_2d.discr_from_dd("vol") dofs_2d = {group.nunit_nodes for group in vol_discr_2d.groups} from pytools import one print("Number of DOFs per 2D element:", one(dofs_2d), file=outf) _, dg_discr_3d = get_strong_wave_op_with_discr_direct(cl_ctx, dims=3, order=order) print("Number of 3D elements:", dg_discr_3d.mesh.nelements, file=outf) vol_discr_3d = dg_discr_3d.discr_from_dd("vol") dofs_3d = {group.nunit_nodes for group in vol_discr_3d.groups} from pytools import one print("Number of DOFs per 3D element:", one(dofs_3d), file=outf) logger.info("Wrote '%s'", outf.name)
def test_cost_model_order_varying_by_level(ctx_factory): """For FMM order varying by level, this checks to ensure that the costs are different. The varying-level case should have larger cost. """ cl_ctx = ctx_factory() queue = cl.CommandQueue(cl_ctx) actx = PyOpenCLArrayContext(queue) # {{{ constant level to order def level_to_order_constant(kernel, kernel_args, tree, level): return 1 lpot_source = get_lpot_source(actx, 2).copy( cost_model=CostModel( calibration_params=CONSTANT_ONE_PARAMS), fmm_level_to_order=level_to_order_constant) places = GeometryCollection(lpot_source) density_discr = places.get_discretization(places.auto_source.geometry) sigma_sym = sym.var("sigma") k_sym = LaplaceKernel(2) sym_op = sym.S(k_sym, sigma_sym, qbx_forced_limit=+1) sigma = get_density(actx, density_discr) cost_constant = one( bind(places, sym_op) .get_modeled_cost(actx, sigma=sigma).values()) # }}} # {{{ varying level to order varying_order_params = cost_constant.params.copy() nlevels = cost_constant.params["nlevels"] for level in range(nlevels): varying_order_params["p_fmm_lev%d" % level] = nlevels - level cost_varying = cost_constant.with_params(varying_order_params) # }}} assert ( sum(cost_varying.get_predicted_times().values()) > sum(cost_constant.get_predicted_times().values()))
def test_cost_model_metadata_gathering(ctx_factory): """Test that the cost model correctly gathers metadata.""" cl_ctx = ctx_factory() queue = cl.CommandQueue(cl_ctx) actx = PyOpenCLArrayContext(queue) from sumpy.expansion.level_to_order import SimpleExpansionOrderFinder fmm_level_to_order = SimpleExpansionOrderFinder(tol=1e-5) lpot_source = get_lpot_source(actx, 2).copy( fmm_level_to_order=fmm_level_to_order) places = GeometryCollection(lpot_source) density_discr = places.get_discretization(places.auto_source.geometry) sigma = get_density(actx, density_discr) sigma_sym = sym.var("sigma") k_sym = HelmholtzKernel(2, "k") k = 2 sym_op_S = sym.S(k_sym, sigma_sym, qbx_forced_limit=+1, k=sym.var("k")) op_S = bind(places, sym_op_S) _, metadata = op_S.cost_per_stage( "constant_one", sigma=sigma, k=k, return_metadata=True ) metadata = one(metadata.values()) geo_data = lpot_source.qbx_fmm_geometry_data( places, places.auto_source, target_discrs_and_qbx_sides=((density_discr, 1),)) tree = geo_data.tree() assert metadata["p_qbx"] == QBX_ORDER assert metadata["nlevels"] == tree.nlevels assert metadata["nsources"] == tree.nsources assert metadata["ntargets"] == tree.ntargets assert metadata["ncenters"] == geo_data.ncenters for level in range(tree.nlevels): assert ( metadata["p_fmm_lev%d" % level] == fmm_level_to_order(k_sym, {"k": 2}, tree, level))
def test_cost_model_metadata_gathering(ctx_getter): """Test that the cost model correctly gathers metadata.""" cl_ctx = ctx_getter() queue = cl.CommandQueue(cl_ctx) from sumpy.expansion.level_to_order import SimpleExpansionOrderFinder fmm_level_to_order = SimpleExpansionOrderFinder(tol=1e-5) lpot_source = get_lpot_source( queue, 2).copy(fmm_level_to_order=fmm_level_to_order) sigma = get_density(queue, lpot_source) sigma_sym = sym.var("sigma") k_sym = HelmholtzKernel(2, "k") k = 2 sym_op_S = sym.S(k_sym, sigma_sym, qbx_forced_limit=+1, k=sym.var("k")) op_S = bind(lpot_source, sym_op_S) cost_S = one(op_S.get_modeled_cost(queue, sigma=sigma, k=k).values()) geo_data = lpot_source.qbx_fmm_geometry_data( target_discrs_and_qbx_sides=((lpot_source.density_discr, 1), )) tree = geo_data.tree() assert cost_S.params["p_qbx"] == QBX_ORDER assert cost_S.params["nlevels"] == tree.nlevels assert cost_S.params["nsources"] == tree.nsources assert cost_S.params["ntargets"] == tree.ntargets assert cost_S.params["ncenters"] == geo_data.ncenters for level in range(tree.nlevels): assert (cost_S.params["p_fmm_lev%d" % level] == fmm_level_to_order( k_sym, {"k": 2}, tree, level))
def test_cost_model_correctness(ctx_factory, dim, off_surface, use_target_specific_qbx): """Check that computed cost matches that of a constant-one FMM.""" cl_ctx = ctx_factory() queue = cl.CommandQueue(cl_ctx) actx = PyOpenCLArrayContext(queue) cost_model = QBXCostModel( translation_cost_model_factory=OpCountingTranslationCostModel) lpot_source = get_lpot_source(actx, dim).copy( cost_model=cost_model, _use_target_specific_qbx=use_target_specific_qbx) # Construct targets. if off_surface: from pytential.target import PointsTarget from boxtree.tools import make_uniform_particle_array ntargets = 10 ** 3 targets = PointsTarget( make_uniform_particle_array(queue, ntargets, dim, np.float)) target_discrs_and_qbx_sides = ((targets, 0),) qbx_forced_limit = None else: targets = lpot_source.density_discr target_discrs_and_qbx_sides = ((targets, 1),) qbx_forced_limit = 1 places = GeometryCollection((lpot_source, targets)) source_dd = places.auto_source density_discr = places.get_discretization(source_dd.geometry) # Construct bound op, run cost model. sigma_sym = sym.var("sigma") k_sym = LaplaceKernel(lpot_source.ambient_dim) sym_op_S = sym.S(k_sym, sigma_sym, qbx_forced_limit=qbx_forced_limit) op_S = bind(places, sym_op_S) sigma = get_density(actx, density_discr) from pytools import one modeled_time, _ = op_S.cost_per_stage("constant_one", sigma=sigma) modeled_time = one(modeled_time.values()) # Run FMM with ConstantOneWrangler. This can't be done with pytential's # high-level interface, so call the FMM driver directly. from pytential.qbx.fmm import drive_fmm geo_data = lpot_source.qbx_fmm_geometry_data( places, source_dd.geometry, target_discrs_and_qbx_sides=target_discrs_and_qbx_sides) wrangler = ConstantOneQBXExpansionWrangler( queue, geo_data, use_target_specific_qbx) quad_stage2_density_discr = places.get_discretization( source_dd.geometry, sym.QBX_SOURCE_QUAD_STAGE2) ndofs = quad_stage2_density_discr.ndofs src_weights = np.ones(ndofs) timing_data = {} potential = drive_fmm(wrangler, (src_weights,), timing_data, traversal=wrangler.trav)[0][geo_data.ncenters:] # Check constant one wrangler for correctness. assert (potential == ndofs).all() # Check that the cost model matches the timing data returned by the # constant one wrangler. mismatches = [] for stage in timing_data: if stage not in modeled_time: assert timing_data[stage]["ops_elapsed"] == 0 else: if timing_data[stage]["ops_elapsed"] != modeled_time[stage]: mismatches.append( (stage, timing_data[stage]["ops_elapsed"], modeled_time[stage])) assert not mismatches, "\n".join(str(s) for s in mismatches) # {{{ Test per-box cost total_cost = 0.0 for stage in timing_data: total_cost += timing_data[stage]["ops_elapsed"] per_box_cost, _ = op_S.cost_per_box("constant_one", sigma=sigma) print(per_box_cost) per_box_cost = one(per_box_cost.values()) total_aggregate_cost = cost_model.aggregate_over_boxes(per_box_cost) assert total_cost == ( total_aggregate_cost + modeled_time["coarsen_multipoles"] + modeled_time["refine_locals"] )
def test_cost_model_correctness(ctx_getter, dim, off_surface, use_target_specific_qbx): """Check that computed cost matches that of a constant-one FMM.""" cl_ctx = ctx_getter() queue = cl.CommandQueue(cl_ctx) cost_model = (CostModel( translation_cost_model_factory=OpCountingTranslationCostModel)) lpot_source = get_lpot_source(queue, dim).copy( cost_model=cost_model, _use_target_specific_qbx=use_target_specific_qbx) # Construct targets. if off_surface: from pytential.target import PointsTarget from boxtree.tools import make_uniform_particle_array ntargets = 10**3 targets = PointsTarget( make_uniform_particle_array(queue, ntargets, dim, np.float)) target_discrs_and_qbx_sides = ((targets, 0), ) qbx_forced_limit = None else: targets = lpot_source.density_discr target_discrs_and_qbx_sides = ((targets, 1), ) qbx_forced_limit = 1 # Construct bound op, run cost model. sigma_sym = sym.var("sigma") k_sym = LaplaceKernel(lpot_source.ambient_dim) sym_op_S = sym.S(k_sym, sigma_sym, qbx_forced_limit=qbx_forced_limit) op_S = bind((lpot_source, targets), sym_op_S) sigma = get_density(queue, lpot_source) from pytools import one cost_S = one(op_S.get_modeled_cost(queue, sigma=sigma).values()) # Run FMM with ConstantOneWrangler. This can't be done with pytential's # high-level interface, so call the FMM driver directly. from pytential.qbx.fmm import drive_fmm geo_data = lpot_source.qbx_fmm_geometry_data( target_discrs_and_qbx_sides=target_discrs_and_qbx_sides) wrangler = ConstantOneQBXExpansionWrangler(queue, geo_data, use_target_specific_qbx) nnodes = lpot_source.quad_stage2_density_discr.nnodes src_weights = np.ones(nnodes) timing_data = {} potential = drive_fmm(wrangler, src_weights, timing_data, traversal=wrangler.trav)[0][geo_data.ncenters:] # Check constant one wrangler for correctness. assert (potential == nnodes).all() modeled_time = cost_S.get_predicted_times(merge_close_lists=True) # Check that the cost model matches the timing data returned by the # constant one wrangler. mismatches = [] for stage in timing_data: if timing_data[stage]["ops_elapsed"] != modeled_time[stage]: mismatches.append((stage, timing_data[stage]["ops_elapsed"], modeled_time[stage])) assert not mismatches, "\n".join(str(s) for s in mismatches)
def find_missing_node(face_vertex_node_indices): return unit_nodes[one(vertex_node_indices - set(face_vertex_node_indices))]
def find_missing_node(face_vertex_node_indices): return unit_nodes[one( vertex_node_indices - set(face_vertex_node_indices))]