def find_associativity(): FUNC_CODE = """ int go(unsigned array_size, unsigned stride, unsigned steps) { char *ary = (char *) malloc(sizeof(int) * array_size); unsigned p = 0; for (unsigned i = 0; i < steps; ++i) { ary[p] ++; p += stride; if (p >= array_size) p = 0; } int result = 0; for (unsigned i = 0; i < array_size; ++i) result += ary[i]; free(ary); return result; } """ from codepy.jit import extension_from_string cmod = extension_from_string(toolchain, "module", MODULE_CODE % FUNC_CODE) result = {} steps = 2**20 from pytools import ProgressBar meg_range = range(1, 25) stride_range = range(1, 640) pb = ProgressBar("bench", len(meg_range) * len(stride_range)) for array_megs in meg_range: for stride in stride_range: start = time() cmod.go(array_megs << 20, stride, steps) stop = time() elapsed = stop - start gb_transferred = 2 * steps / 1e9 # 2 for rw, 4 for sizeof(int) bandwidth = gb_transferred / elapsed result[array_megs, stride] = bandwidth pb.progress() from cPickle import dump dump(result, open("assoc_result.dat", "w")) open("assoc.c", "w").write(FUNC_CODE)
def build_matrix(op, dtype=None, shape=None): dtype = dtype or op.dtype from pytools import ProgressBar shape = shape or op.shape rows, cols = shape pb = ProgressBar("matrix", cols) mat = np.zeros(shape, dtype) try: matvec_method = op.matvec except AttributeError: matvec_method = op.__call__ for i in range(cols): unit_vec = np.zeros(cols, dtype=dtype) unit_vec[i] = 1 mat[:, i] = matvec_method(unit_vec) pb.progress() pb.finished() return mat
def test_fmm_completeness(ctx_getter, dims, nsources_req, ntargets_req, who_has_extent, source_gen, target_gen, filter_kind, well_sep_is_n_away, extent_norm, from_sep_smaller_crit): """Tests whether the built FMM traversal structures and driver completely capture all interactions. """ sources_have_extent = "s" in who_has_extent targets_have_extent = "t" in who_has_extent logging.basicConfig(level=logging.INFO) ctx = ctx_getter() queue = cl.CommandQueue(ctx) dtype = np.float64 try: sources = source_gen(queue, nsources_req, dims, dtype, seed=15) nsources = len(sources[0]) if ntargets_req is None: # This says "same as sources" to the tree builder. targets = None ntargets = ntargets_req else: targets = target_gen(queue, ntargets_req, dims, dtype, seed=16) ntargets = len(targets[0]) except ImportError: pytest.skip("loo.py not available, but needed for particle array " "generation") from pyopencl.clrandom import PhiloxGenerator rng = PhiloxGenerator(queue.context, seed=12) if sources_have_extent: source_radii = 2**rng.uniform(queue, nsources, dtype=dtype, a=-10, b=0) else: source_radii = None if targets_have_extent: target_radii = 2**rng.uniform(queue, ntargets, dtype=dtype, a=-10, b=0) else: target_radii = None from boxtree import TreeBuilder tb = TreeBuilder(ctx) tree, _ = tb(queue, sources, targets=targets, max_particles_in_box=30, source_radii=source_radii, target_radii=target_radii, debug=True, stick_out_factor=0.25, extent_norm=extent_norm) if 0: tree.get().plot() import matplotlib.pyplot as pt pt.show() from boxtree.traversal import FMMTraversalBuilder tbuild = FMMTraversalBuilder(ctx, well_sep_is_n_away=well_sep_is_n_away, from_sep_smaller_crit=from_sep_smaller_crit) trav, _ = tbuild(queue, tree, debug=True) if who_has_extent: pre_merge_trav = trav trav = trav.merge_close_lists(queue) #weights = np.random.randn(nsources) weights = np.ones(nsources) weights_sum = np.sum(weights) host_trav = trav.get(queue=queue) host_tree = host_trav.tree if who_has_extent: pre_merge_host_trav = pre_merge_trav.get(queue=queue) from boxtree.tree import ParticleListFilter plfilt = ParticleListFilter(ctx) if filter_kind: flags = rng.uniform(queue, ntargets or nsources, np.int32, a=0, b=2) \ .astype(np.int8) if filter_kind == "user": filtered_targets = plfilt.filter_target_lists_in_user_order( queue, tree, flags) wrangler = ConstantOneExpansionWranglerWithFilteredTargetsInUserOrder( host_tree, filtered_targets.get(queue=queue)) elif filter_kind == "tree": filtered_targets = plfilt.filter_target_lists_in_tree_order( queue, tree, flags) wrangler = ConstantOneExpansionWranglerWithFilteredTargetsInTreeOrder( host_tree, filtered_targets.get(queue=queue)) else: raise ValueError("unsupported value of 'filter_kind'") else: wrangler = ConstantOneExpansionWrangler(host_tree) flags = cl.array.empty(queue, ntargets or nsources, dtype=np.int8) flags.fill(1) if ntargets is None and not filter_kind: # This check only works for targets == sources. assert (wrangler.reorder_potentials( wrangler.reorder_sources(weights)) == weights).all() from boxtree.fmm import drive_fmm pot = drive_fmm(host_trav, wrangler, weights) if filter_kind: pot = pot[flags.get() > 0] rel_err = la.norm((pot - weights_sum) / nsources) good = rel_err < 1e-8 # {{{ build, evaluate matrix (and identify incorrect interactions) if 0 and not good: mat = np.zeros((ntargets, nsources), dtype) from pytools import ProgressBar logging.getLogger().setLevel(logging.WARNING) pb = ProgressBar("matrix", nsources) for i in range(nsources): unit_vec = np.zeros(nsources, dtype=dtype) unit_vec[i] = 1 mat[:, i] = drive_fmm(host_trav, wrangler, unit_vec) pb.progress() pb.finished() logging.getLogger().setLevel(logging.INFO) import matplotlib.pyplot as pt if 0: pt.imshow(mat) pt.colorbar() pt.show() incorrect_tgts, incorrect_srcs = np.where(mat != 1) if 1 and len(incorrect_tgts): from boxtree.visualization import TreePlotter plotter = TreePlotter(host_tree) plotter.draw_tree(fill=False, edgecolor="black") plotter.draw_box_numbers() plotter.set_bounding_box() tree_order_incorrect_tgts = \ host_tree.indices_to_tree_target_order(incorrect_tgts) tree_order_incorrect_srcs = \ host_tree.indices_to_tree_source_order(incorrect_srcs) src_boxes = [ host_tree.find_box_nr_for_source(i) for i in tree_order_incorrect_srcs ] tgt_boxes = [ host_tree.find_box_nr_for_target(i) for i in tree_order_incorrect_tgts ] print(src_boxes) print(tgt_boxes) # plot all sources/targets if 0: pt.plot(host_tree.targets[0], host_tree.targets[1], "v", alpha=0.9) pt.plot(host_tree.sources[0], host_tree.sources[1], "gx", alpha=0.9) # plot offending sources/targets if 0: pt.plot(host_tree.targets[0][tree_order_incorrect_tgts], host_tree.targets[1][tree_order_incorrect_tgts], "rv") pt.plot(host_tree.sources[0][tree_order_incorrect_srcs], host_tree.sources[1][tree_order_incorrect_srcs], "go") pt.gca().set_aspect("equal") from boxtree.visualization import draw_box_lists draw_box_lists( plotter, pre_merge_host_trav if who_has_extent else host_trav, 22) # from boxtree.visualization import draw_same_level_non_well_sep_boxes # draw_same_level_non_well_sep_boxes(plotter, host_trav, 2) pt.show() # }}} if 0 and not good: import matplotlib.pyplot as pt pt.plot(pot - weights_sum) pt.show() if 0 and not good: import matplotlib.pyplot as pt filt_targets = [ host_tree.targets[0][flags.get() > 0], host_tree.targets[1][flags.get() > 0], ] host_tree.plot() bad = np.abs(pot - weights_sum) >= 1e-3 bad_targets = [ filt_targets[0][bad], filt_targets[1][bad], ] print(bad_targets[0].shape) pt.plot(filt_targets[0], filt_targets[1], "x") pt.plot(bad_targets[0], bad_targets[1], "v") pt.show() assert good
def __init__(self, quad_order, method="gauss-legendre", dim=2, kernel_func=None, kernel_type=None, sumpy_kernel=None, build_method=None, source_box_extent=1, dtype=np.float64, inverse_droste=False, progress_bar=True, **kwargs): """ kernel_type determines how the kernel is scaled w.r.t. box size. build_method can be "Transform" or "DrosteSum". The source box is [0, source_box_extent]^dim :arg inverse_droste True if computing with the fractional Laplacian kernel. """ self.quad_order = quad_order self.dim = dim self.dtype = dtype self.inverse_droste = inverse_droste assert source_box_extent > 0 self.source_box_extent = source_box_extent self.center = np.ones(self.dim) * 0.5 * self.source_box_extent self.build_method = build_method if dim == 1: if build_method == "Transform": raise NotImplementedError("Use build_method=DrosteSum for 1d") self.kernel_func = kernel_func self.kernel_type = kernel_type self.integral_knl = sumpy_kernel elif dim == 2: # Constant kernel can be used for fun/testing if kernel_func is None: kernel_func = constant_one kernel_type = "const" # for DrosteSum kernel_func is unused if build_method == "Transform": logger.warning("setting kernel_func to be constant.") # Kernel function differs from OpenCL's kernels self.kernel_func = kernel_func self.kernel_type = kernel_type self.integral_knl = sumpy_kernel if build_method == "DrosteSum": assert sumpy_kernel is not None elif dim == 3: if build_method == "Transform": raise NotImplementedError("Use build_method=DrosteSum for 3d") self.kernel_func = kernel_func self.kernel_type = kernel_type self.integral_knl = sumpy_kernel else: raise NotImplementedError # number of quad points per box # equals to the number of modes per box self.n_q_points = self.quad_order**dim # Normalizers for polynomial modes # Needed only when we want to rescale log type kernels self.mode_normalizers = np.zeros(self.n_q_points, dtype=self.dtype) # Exterior normalizers for hypersingular kernels self.kernel_exterior_normalizers = np.zeros(self.n_q_points, dtype=self.dtype) # number of (source_mode, target_point) pairs between two boxes self.n_pairs = self.n_q_points**2 # possible interaction cases self.interaction_case_vecs, self.case_encode, self.case_indices = \ gallery.generate_list1_gallery(self.dim) self.n_cases = len(self.interaction_case_vecs) if method == "gauss-legendre": # quad points in [-1,1] import volumential.meshgen as mg if 'queue' in kwargs: queue = kwargs['queue'] else: queue = None q_points, _, _ = mg.make_uniform_cubic_grid(degree=quad_order, level=1, dim=self.dim, queue=queue) # map to source box mapped_q_points = np.array([ 0.5 * self.source_box_extent * (qp + np.ones(self.dim)) for qp in q_points ]) # sort in dictionary order, preserve only the leading # digits to prevent floating point errors from polluting # the ordering. q_points_ordering = sorted( range(len(mapped_q_points)), key=lambda i: list(np.floor(mapped_q_points[i] * 10000)), ) self.q_points = mapped_q_points[q_points_ordering] else: raise NotImplementedError self.data = np.empty(self.n_pairs * self.n_cases, dtype=self.dtype) self.data.fill(np.nan) total_evals = len(self.data) + self.n_q_points if progress_bar: from pytools import ProgressBar self.pb = ProgressBar("Building table:", total_evals) else: self.pb = None self.is_built = False
def optimize_plan(opt_name, plan_generator, target_func, maximize, debug_flags=set(), occupancy_slack=0.5, log_filename=None): plans = [p for p in plan_generator() if p.invalid_reason() is None] debug = "cuda_%s_plan" % opt_name in debug_flags show_progress = ("cuda_plan_no_progress" not in debug_flags) and not debug if "cuda_plan_log" not in debug_flags: log_filename = None if not plans: raise RuntimeError, "no valid CUDA execution plans found" if set(["cuda_no_plan", "cuda_no_plan_"+opt_name]) & debug_flags: from pytools import argmax2 return argmax2((plan, plan.occupancy_record().occupancy) for plan in plans), 0 max_occup = max(plan.occupancy_record().occupancy for plan in plans) desired_occup = occupancy_slack*max_occup if log_filename is not None: from pytools import single_valued feature_columns = single_valued(p.feature_columns() for p in plans) feature_names = [fc.split()[0] for fc in feature_columns] try: import sqlite3 as sqlite except ImportError: from pysqlite2 import dbapi2 as sqlite db_conn = sqlite.connect("plan-%s.dat" % log_filename) try: db_conn.execute(""" create table data ( id integer primary key autoincrement, %s, value real)""" % ", ".join(feature_columns)) except sqlite.OperationalError: pass if show_progress: from pytools import ProgressBar pbar = ProgressBar("plan "+opt_name, len(plans)) try: plan_values = [] for p in plans: if show_progress: pbar.progress() if p.occupancy_record().occupancy >= desired_occup - 1e-10: if debug: print "<---- trying %s:" % p value = target_func(p) if isinstance(value, tuple): extra_info = value[1:] value = value[0] else: extra_info = None if value is not None: if debug: print "----> yielded %g" % (value) plan_values.append(((len(plan_values), p), value)) if log_filename is not None: db_conn.execute( "insert into data (%s,value) values (%s)" % (", ".join(feature_names), ",".join(["?"]*(1+len(feature_names)))), p.features(*extra_info)+(value,)) finally: if show_progress: pbar.finished() if log_filename is not None: db_conn.commit() from pytools import argmax2, argmin2 if maximize: num_plan, plan = argmax2(plan_values) else: num_plan, plan = argmin2(plan_values) plan_value = plan_values[num_plan][1] if debug: print "----------------------------------------------" print "chosen: %s" % plan print "value: %g" % plan_value print "----------------------------------------------" return plan, plan_value
def dump_couch_to_sqlite(couch_db, outfile, scan_max=None): import sqlite3 as sqlite # {{{ scan for types column_type_dict = {} from pytools import ProgressBar pb = ProgressBar("scan (pass 1/2)", len(couch_db)) scan_count = 0 for doc in generate_all_docs(couch_db): if "type" in doc and doc["type"] == "job": for k, v in six.iteritems(doc): new_type = type(v) if (k in column_type_dict and column_type_dict[k] != new_type and v is not None): old_type = column_type_dict[k] if set([old_type, new_type]) == set([float, int]): new_type = float else: raise RuntimeError("ambiguous types for '%s': %s, %s" % (k, new_type, old_type)) column_type_dict[k] = new_type scan_count += 1 if scan_max is not None and scan_count >= scan_max: break pb.progress() pb.finished() # }}} del column_type_dict["type"] column_types = [] for name, tp in six.iteritems(column_type_dict): column_types.append((name, tp)) def get_sql_type(tp): if tp in (str, six.text_type): return "text" elif issubclass(tp, list): return "text" elif issubclass(tp, int): return "integer" elif issubclass(tp, (float, numpy.floating)): return "real" else: raise TypeError("No SQL type for %s" % tp) create_stmt = ("create table data (%s)" % ",".join("%s %s" % (name, get_sql_type(tp)) for name, tp in column_types)) db_conn = sqlite.connect(outfile, timeout=30) db_conn.execute(create_stmt) db_conn.commit() insert_stmt = "insert into data values (%s)" % (",".join( ["?"] * len(column_types))) pb = ProgressBar("fill (pass 2/2)", len(couch_db)) for doc in generate_all_docs(couch_db): data = [None] * len(column_types) for i, (col_name, col_tp) in enumerate(column_types): if "type" in doc and doc["type"] == "job": try: if isinstance(doc[col_name], list): data[i] = str(doc[col_name]) else: data[i] = doc[col_name] except KeyError: print("doc %s had no field %s" % (doc["_id"], col_name)) db_conn.execute(insert_stmt, data) pb.progress() pb.finished() db_conn.commit() db_conn.close()