def find_associativity(): FUNC_CODE = """ int go(unsigned array_size, unsigned stride, unsigned steps) { char *ary = (char *) malloc(sizeof(int) * array_size); unsigned p = 0; for (unsigned i = 0; i < steps; ++i) { ary[p] ++; p += stride; if (p >= array_size) p = 0; } int result = 0; for (unsigned i = 0; i < array_size; ++i) result += ary[i]; free(ary); return result; } """ from codepy.jit import extension_from_string cmod = extension_from_string(toolchain, "module", MODULE_CODE % FUNC_CODE) result = {} steps = 2**20 from pytools import ProgressBar meg_range = range(1, 25) stride_range = range(1, 640) pb = ProgressBar("bench", len(meg_range)*len(stride_range)) for array_megs in meg_range: for stride in stride_range: start = time() cmod.go(array_megs<<20, stride, steps) stop = time() elapsed = stop-start gb_transferred = 2*steps/1e9 # 2 for rw, 4 for sizeof(int) bandwidth = gb_transferred/elapsed result[array_megs, stride] = bandwidth pb.progress() from cPickle import dump dump(result, open("assoc_result.dat", "w")) open("assoc.c", "w").write(FUNC_CODE)
def find_associativity(): FUNC_CODE = """ int go(unsigned array_size, unsigned stride, unsigned steps) { char *ary = (char *) malloc(sizeof(int) * array_size); unsigned p = 0; for (unsigned i = 0; i < steps; ++i) { ary[p] ++; p += stride; if (p >= array_size) p = 0; } int result = 0; for (unsigned i = 0; i < array_size; ++i) result += ary[i]; free(ary); return result; } """ from codepy.jit import extension_from_string cmod = extension_from_string(toolchain, "module", MODULE_CODE % FUNC_CODE) result = {} steps = 2**20 from pytools import ProgressBar meg_range = range(1, 25) stride_range = range(1, 640) pb = ProgressBar("bench", len(meg_range) * len(stride_range)) for array_megs in meg_range: for stride in stride_range: start = time() cmod.go(array_megs << 20, stride, steps) stop = time() elapsed = stop - start gb_transferred = 2 * steps / 1e9 # 2 for rw, 4 for sizeof(int) bandwidth = gb_transferred / elapsed result[array_megs, stride] = bandwidth pb.progress() from cPickle import dump dump(result, open("assoc_result.dat", "w")) open("assoc.c", "w").write(FUNC_CODE)
def build_matrix(op, dtype=None, shape=None): dtype = dtype or op.dtype from pytools import ProgressBar shape = shape or op.shape rows, cols = shape pb = ProgressBar("matrix", cols) mat = np.zeros(shape, dtype) try: matvec_method = op.matvec except AttributeError: matvec_method = op.__call__ for i in range(cols): unit_vec = np.zeros(cols, dtype=dtype) unit_vec[i] = 1 mat[:, i] = matvec_method(unit_vec) pb.progress() pb.finished() return mat
def dump_couch_to_sqlite(couch_db, outfile, scan_max=None): import sqlite3 as sqlite # {{{ scan for types column_type_dict = {} from pytools import ProgressBar pb = ProgressBar("scan (pass 1/2)", len(couch_db)) scan_count = 0 for doc in generate_all_docs(couch_db): if "type" in doc and doc["type"] == "job": for k, v in doc.iteritems(): new_type = type(v) if k in column_type_dict and column_type_dict[k] != new_type and v is not None: old_type = column_type_dict[k] if set([old_type, new_type]) == set([float, int]): new_type = float else: raise RuntimeError("ambiguous types for '%s': %s, %s" % (k, new_type, old_type)) column_type_dict[k] = new_type scan_count += 1 if scan_max is not None and scan_count >= scan_max: break pb.progress() pb.finished() # }}} del column_type_dict["type"] column_types = [] for name, tp in column_type_dict.iteritems(): column_types.append((name, tp)) def get_sql_type(tp): if tp in (str, unicode): return "text" elif issubclass(tp, list): return "text" elif issubclass(tp, int): return "integer" elif issubclass(tp, (float, numpy.floating)): return "real" else: raise TypeError("No SQL type for %s" % tp) create_stmt = "create table data (%s)" % ",".join("%s %s" % (name, get_sql_type(tp)) for name, tp in column_types) db_conn = sqlite.connect(outfile, timeout=30) db_conn.execute(create_stmt) db_conn.commit() insert_stmt = "insert into data values (%s)" % (",".join(["?"] * len(column_types))) pb = ProgressBar("fill (pass 2/2)", len(couch_db)) for doc in generate_all_docs(couch_db): data = [None] * len(column_types) for i, (col_name, col_tp) in enumerate(column_types): if "type" in doc and doc["type"] == "job": try: if isinstance(doc[col_name], list): data[i] = str(doc[col_name]) else: data[i] = doc[col_name] except KeyError: print "doc %s had no field %s" % (doc["_id"], col_name) db_conn.execute(insert_stmt, data) pb.progress() pb.finished() db_conn.commit() db_conn.close()
def test_fmm_completeness(ctx_getter, dims, nsources_req, ntargets_req, who_has_extent, source_gen, target_gen, filter_kind, well_sep_is_n_away, extent_norm, from_sep_smaller_crit): """Tests whether the built FMM traversal structures and driver completely capture all interactions. """ sources_have_extent = "s" in who_has_extent targets_have_extent = "t" in who_has_extent logging.basicConfig(level=logging.INFO) ctx = ctx_getter() queue = cl.CommandQueue(ctx) dtype = np.float64 try: sources = source_gen(queue, nsources_req, dims, dtype, seed=15) nsources = len(sources[0]) if ntargets_req is None: # This says "same as sources" to the tree builder. targets = None ntargets = ntargets_req else: targets = target_gen(queue, ntargets_req, dims, dtype, seed=16) ntargets = len(targets[0]) except ImportError: pytest.skip("loo.py not available, but needed for particle array " "generation") from pyopencl.clrandom import PhiloxGenerator rng = PhiloxGenerator(queue.context, seed=12) if sources_have_extent: source_radii = 2**rng.uniform(queue, nsources, dtype=dtype, a=-10, b=0) else: source_radii = None if targets_have_extent: target_radii = 2**rng.uniform(queue, ntargets, dtype=dtype, a=-10, b=0) else: target_radii = None from boxtree import TreeBuilder tb = TreeBuilder(ctx) tree, _ = tb(queue, sources, targets=targets, max_particles_in_box=30, source_radii=source_radii, target_radii=target_radii, debug=True, stick_out_factor=0.25, extent_norm=extent_norm) if 0: tree.get().plot() import matplotlib.pyplot as pt pt.show() from boxtree.traversal import FMMTraversalBuilder tbuild = FMMTraversalBuilder(ctx, well_sep_is_n_away=well_sep_is_n_away, from_sep_smaller_crit=from_sep_smaller_crit) trav, _ = tbuild(queue, tree, debug=True) if who_has_extent: pre_merge_trav = trav trav = trav.merge_close_lists(queue) #weights = np.random.randn(nsources) weights = np.ones(nsources) weights_sum = np.sum(weights) host_trav = trav.get(queue=queue) host_tree = host_trav.tree if who_has_extent: pre_merge_host_trav = pre_merge_trav.get(queue=queue) from boxtree.tree import ParticleListFilter plfilt = ParticleListFilter(ctx) if filter_kind: flags = rng.uniform(queue, ntargets or nsources, np.int32, a=0, b=2) \ .astype(np.int8) if filter_kind == "user": filtered_targets = plfilt.filter_target_lists_in_user_order( queue, tree, flags) wrangler = ConstantOneExpansionWranglerWithFilteredTargetsInUserOrder( host_tree, filtered_targets.get(queue=queue)) elif filter_kind == "tree": filtered_targets = plfilt.filter_target_lists_in_tree_order( queue, tree, flags) wrangler = ConstantOneExpansionWranglerWithFilteredTargetsInTreeOrder( host_tree, filtered_targets.get(queue=queue)) else: raise ValueError("unsupported value of 'filter_kind'") else: wrangler = ConstantOneExpansionWrangler(host_tree) flags = cl.array.empty(queue, ntargets or nsources, dtype=np.int8) flags.fill(1) if ntargets is None and not filter_kind: # This check only works for targets == sources. assert (wrangler.reorder_potentials( wrangler.reorder_sources(weights)) == weights).all() from boxtree.fmm import drive_fmm pot = drive_fmm(host_trav, wrangler, weights) if filter_kind: pot = pot[flags.get() > 0] rel_err = la.norm((pot - weights_sum) / nsources) good = rel_err < 1e-8 # {{{ build, evaluate matrix (and identify incorrect interactions) if 0 and not good: mat = np.zeros((ntargets, nsources), dtype) from pytools import ProgressBar logging.getLogger().setLevel(logging.WARNING) pb = ProgressBar("matrix", nsources) for i in range(nsources): unit_vec = np.zeros(nsources, dtype=dtype) unit_vec[i] = 1 mat[:, i] = drive_fmm(host_trav, wrangler, unit_vec) pb.progress() pb.finished() logging.getLogger().setLevel(logging.INFO) import matplotlib.pyplot as pt if 0: pt.imshow(mat) pt.colorbar() pt.show() incorrect_tgts, incorrect_srcs = np.where(mat != 1) if 1 and len(incorrect_tgts): from boxtree.visualization import TreePlotter plotter = TreePlotter(host_tree) plotter.draw_tree(fill=False, edgecolor="black") plotter.draw_box_numbers() plotter.set_bounding_box() tree_order_incorrect_tgts = \ host_tree.indices_to_tree_target_order(incorrect_tgts) tree_order_incorrect_srcs = \ host_tree.indices_to_tree_source_order(incorrect_srcs) src_boxes = [ host_tree.find_box_nr_for_source(i) for i in tree_order_incorrect_srcs ] tgt_boxes = [ host_tree.find_box_nr_for_target(i) for i in tree_order_incorrect_tgts ] print(src_boxes) print(tgt_boxes) # plot all sources/targets if 0: pt.plot(host_tree.targets[0], host_tree.targets[1], "v", alpha=0.9) pt.plot(host_tree.sources[0], host_tree.sources[1], "gx", alpha=0.9) # plot offending sources/targets if 0: pt.plot(host_tree.targets[0][tree_order_incorrect_tgts], host_tree.targets[1][tree_order_incorrect_tgts], "rv") pt.plot(host_tree.sources[0][tree_order_incorrect_srcs], host_tree.sources[1][tree_order_incorrect_srcs], "go") pt.gca().set_aspect("equal") from boxtree.visualization import draw_box_lists draw_box_lists( plotter, pre_merge_host_trav if who_has_extent else host_trav, 22) # from boxtree.visualization import draw_same_level_non_well_sep_boxes # draw_same_level_non_well_sep_boxes(plotter, host_trav, 2) pt.show() # }}} if 0 and not good: import matplotlib.pyplot as pt pt.plot(pot - weights_sum) pt.show() if 0 and not good: import matplotlib.pyplot as pt filt_targets = [ host_tree.targets[0][flags.get() > 0], host_tree.targets[1][flags.get() > 0], ] host_tree.plot() bad = np.abs(pot - weights_sum) >= 1e-3 bad_targets = [ filt_targets[0][bad], filt_targets[1][bad], ] print(bad_targets[0].shape) pt.plot(filt_targets[0], filt_targets[1], "x") pt.plot(bad_targets[0], bad_targets[1], "v") pt.show() assert good
def test_fmm_completeness( ctx_getter, dims, nsources_req, ntargets_req, who_has_extent, source_gen, target_gen, filter_kind ): """Tests whether the built FMM traversal structures and driver completely capture all interactions. """ sources_have_extent = "s" in who_has_extent targets_have_extent = "t" in who_has_extent logging.basicConfig(level=logging.INFO) ctx = ctx_getter() queue = cl.CommandQueue(ctx) dtype = np.float64 try: sources = source_gen(queue, nsources_req, dims, dtype, seed=15) nsources = len(sources[0]) if ntargets_req is None: # This says "same as sources" to the tree builder. targets = None ntargets = ntargets_req else: targets = target_gen(queue, ntargets_req, dims, dtype, seed=16) ntargets = len(targets[0]) except ImportError: pytest.skip("loo.py not available, but needed for particle array " "generation") from pyopencl.clrandom import RanluxGenerator rng = RanluxGenerator(queue, seed=13) if sources_have_extent: source_radii = 2 ** rng.uniform(queue, nsources, dtype=dtype, a=-10, b=0) else: source_radii = None if targets_have_extent: target_radii = 2 ** rng.uniform(queue, ntargets, dtype=dtype, a=-10, b=0) else: target_radii = None from boxtree import TreeBuilder tb = TreeBuilder(ctx) tree, _ = tb( queue, sources, targets=targets, max_particles_in_box=30, source_radii=source_radii, target_radii=target_radii, debug=True, ) if 0: tree.get().plot() import matplotlib.pyplot as pt pt.show() from boxtree.traversal import FMMTraversalBuilder tbuild = FMMTraversalBuilder(ctx) trav, _ = tbuild(queue, tree, debug=True) if trav.sep_close_smaller_starts is not None: trav = trav.merge_close_lists(queue) weights = np.random.randn(nsources) # weights = np.ones(nsources) weights_sum = np.sum(weights) host_trav = trav.get(queue=queue) host_tree = host_trav.tree if filter_kind: flags = rng.uniform(queue, ntargets or nsources, np.int32, a=0, b=2).astype(np.int8) if filter_kind == "user": from boxtree.tree import filter_target_lists_in_user_order filtered_targets = filter_target_lists_in_user_order(queue, tree, flags) wrangler = ConstantOneExpansionWranglerWithFilteredTargetsInUserOrder( host_tree, filtered_targets.get(queue=queue) ) elif filter_kind == "tree": from boxtree.tree import filter_target_lists_in_tree_order filtered_targets = filter_target_lists_in_tree_order(queue, tree, flags) wrangler = ConstantOneExpansionWranglerWithFilteredTargetsInTreeOrder( host_tree, filtered_targets.get(queue=queue) ) else: raise ValueError("unsupported value of 'filter_kind'") else: wrangler = ConstantOneExpansionWrangler(host_tree) if ntargets is None and not filter_kind: # This check only works for targets == sources. assert (wrangler.reorder_potentials(wrangler.reorder_sources(weights)) == weights).all() from boxtree.fmm import drive_fmm pot = drive_fmm(host_trav, wrangler, weights) # {{{ build, evaluate matrix (and identify missing interactions) if 0: mat = np.zeros((ntargets, nsources), dtype) from pytools import ProgressBar logging.getLogger().setLevel(logging.WARNING) pb = ProgressBar("matrix", nsources) for i in range(nsources): unit_vec = np.zeros(nsources, dtype=dtype) unit_vec[i] = 1 mat[:, i] = drive_fmm(host_trav, wrangler, unit_vec) pb.progress() pb.finished() logging.getLogger().setLevel(logging.INFO) import matplotlib.pyplot as pt if 1: pt.spy(mat) pt.show() missing_tgts, missing_srcs = np.where(mat == 0) if 1 and len(missing_tgts): from boxtree.visualization import TreePlotter plotter = TreePlotter(host_tree) plotter.draw_tree(fill=False, edgecolor="black") plotter.draw_box_numbers() plotter.set_bounding_box() tree_order_missing_tgts = host_tree.indices_to_tree_target_order(missing_tgts) tree_order_missing_srcs = host_tree.indices_to_tree_source_order(missing_srcs) src_boxes = [host_tree.find_box_nr_for_source(i) for i in tree_order_missing_srcs] tgt_boxes = [host_tree.find_box_nr_for_target(i) for i in tree_order_missing_tgts] print(src_boxes) print(tgt_boxes) pt.plot(host_tree.targets[0][tree_order_missing_tgts], host_tree.targets[1][tree_order_missing_tgts], "rv") pt.plot(host_tree.sources[0][tree_order_missing_srcs], host_tree.sources[1][tree_order_missing_srcs], "go") pt.gca().set_aspect("equal") pt.show() # }}} if filter_kind: pot = pot[flags.get() > 0] rel_err = la.norm((pot - weights_sum) / nsources) good = rel_err < 1e-8 if 0 and not good: import matplotlib.pyplot as pt pt.plot(pot - weights_sum) pt.show() if 0 and not good: import matplotlib.pyplot as pt filt_targets = [host_tree.targets[0][flags.get() > 0], host_tree.targets[1][flags.get() > 0]] host_tree.plot() bad = np.abs(pot - weights_sum) >= 1e-3 bad_targets = [filt_targets[0][bad], filt_targets[1][bad]] print(bad_targets[0].shape) pt.plot(filt_targets[0], filt_targets[1], "x") pt.plot(bad_targets[0], bad_targets[1], "v") pt.show() assert good
def __init__(self, quad_order, method="gauss-legendre", dim=2, kernel_func=None, kernel_type=None, sumpy_kernel=None, build_method=None, source_box_extent=1, dtype=np.float64, inverse_droste=False, progress_bar=True, **kwargs): """ kernel_type determines how the kernel is scaled w.r.t. box size. build_method can be "Transform" or "DrosteSum". The source box is [0, source_box_extent]^dim :arg inverse_droste True if computing with the fractional Laplacian kernel. """ self.quad_order = quad_order self.dim = dim self.dtype = dtype self.inverse_droste = inverse_droste assert source_box_extent > 0 self.source_box_extent = source_box_extent self.center = np.ones(self.dim) * 0.5 * self.source_box_extent self.build_method = build_method if dim == 1: if build_method == "Transform": raise NotImplementedError("Use build_method=DrosteSum for 1d") self.kernel_func = kernel_func self.kernel_type = kernel_type self.integral_knl = sumpy_kernel elif dim == 2: # Constant kernel can be used for fun/testing if kernel_func is None: kernel_func = constant_one kernel_type = "const" # for DrosteSum kernel_func is unused if build_method == "Transform": logger.warning("setting kernel_func to be constant.") # Kernel function differs from OpenCL's kernels self.kernel_func = kernel_func self.kernel_type = kernel_type self.integral_knl = sumpy_kernel if build_method == "DrosteSum": assert sumpy_kernel is not None elif dim == 3: if build_method == "Transform": raise NotImplementedError("Use build_method=DrosteSum for 3d") self.kernel_func = kernel_func self.kernel_type = kernel_type self.integral_knl = sumpy_kernel else: raise NotImplementedError # number of quad points per box # equals to the number of modes per box self.n_q_points = self.quad_order**dim # Normalizers for polynomial modes # Needed only when we want to rescale log type kernels self.mode_normalizers = np.zeros(self.n_q_points, dtype=self.dtype) # Exterior normalizers for hypersingular kernels self.kernel_exterior_normalizers = np.zeros(self.n_q_points, dtype=self.dtype) # number of (source_mode, target_point) pairs between two boxes self.n_pairs = self.n_q_points**2 # possible interaction cases self.interaction_case_vecs, self.case_encode, self.case_indices = \ gallery.generate_list1_gallery(self.dim) self.n_cases = len(self.interaction_case_vecs) if method == "gauss-legendre": # quad points in [-1,1] import volumential.meshgen as mg if 'queue' in kwargs: queue = kwargs['queue'] else: queue = None q_points, _, _ = mg.make_uniform_cubic_grid(degree=quad_order, level=1, dim=self.dim, queue=queue) # map to source box mapped_q_points = np.array([ 0.5 * self.source_box_extent * (qp + np.ones(self.dim)) for qp in q_points ]) # sort in dictionary order, preserve only the leading # digits to prevent floating point errors from polluting # the ordering. q_points_ordering = sorted( range(len(mapped_q_points)), key=lambda i: list(np.floor(mapped_q_points[i] * 10000)), ) self.q_points = mapped_q_points[q_points_ordering] else: raise NotImplementedError self.data = np.empty(self.n_pairs * self.n_cases, dtype=self.dtype) self.data.fill(np.nan) total_evals = len(self.data) + self.n_q_points if progress_bar: from pytools import ProgressBar self.pb = ProgressBar("Building table:", total_evals) else: self.pb = None self.is_built = False
class NearFieldInteractionTable(object): """Class for a near-field interaction table. A near-field interaction table stores precomputed singular integrals on template boxes and supports transforms to actual boxes on lookup. The query process is done through scaling the entries based on actual box sized. Orientations are ordered counter-clockwise. A template box is one of [0,1]^dim """ # {{{ constructor def __init__(self, quad_order, method="gauss-legendre", dim=2, kernel_func=None, kernel_type=None, sumpy_kernel=None, build_method=None, source_box_extent=1, dtype=np.float64, inverse_droste=False, progress_bar=True, **kwargs): """ kernel_type determines how the kernel is scaled w.r.t. box size. build_method can be "Transform" or "DrosteSum". The source box is [0, source_box_extent]^dim :arg inverse_droste True if computing with the fractional Laplacian kernel. """ self.quad_order = quad_order self.dim = dim self.dtype = dtype self.inverse_droste = inverse_droste assert source_box_extent > 0 self.source_box_extent = source_box_extent self.center = np.ones(self.dim) * 0.5 * self.source_box_extent self.build_method = build_method if dim == 1: if build_method == "Transform": raise NotImplementedError("Use build_method=DrosteSum for 1d") self.kernel_func = kernel_func self.kernel_type = kernel_type self.integral_knl = sumpy_kernel elif dim == 2: # Constant kernel can be used for fun/testing if kernel_func is None: kernel_func = constant_one kernel_type = "const" # for DrosteSum kernel_func is unused if build_method == "Transform": logger.warning("setting kernel_func to be constant.") # Kernel function differs from OpenCL's kernels self.kernel_func = kernel_func self.kernel_type = kernel_type self.integral_knl = sumpy_kernel if build_method == "DrosteSum": assert sumpy_kernel is not None elif dim == 3: if build_method == "Transform": raise NotImplementedError("Use build_method=DrosteSum for 3d") self.kernel_func = kernel_func self.kernel_type = kernel_type self.integral_knl = sumpy_kernel else: raise NotImplementedError # number of quad points per box # equals to the number of modes per box self.n_q_points = self.quad_order**dim # Normalizers for polynomial modes # Needed only when we want to rescale log type kernels self.mode_normalizers = np.zeros(self.n_q_points, dtype=self.dtype) # Exterior normalizers for hypersingular kernels self.kernel_exterior_normalizers = np.zeros(self.n_q_points, dtype=self.dtype) # number of (source_mode, target_point) pairs between two boxes self.n_pairs = self.n_q_points**2 # possible interaction cases self.interaction_case_vecs, self.case_encode, self.case_indices = \ gallery.generate_list1_gallery(self.dim) self.n_cases = len(self.interaction_case_vecs) if method == "gauss-legendre": # quad points in [-1,1] import volumential.meshgen as mg if 'queue' in kwargs: queue = kwargs['queue'] else: queue = None q_points, _, _ = mg.make_uniform_cubic_grid(degree=quad_order, level=1, dim=self.dim, queue=queue) # map to source box mapped_q_points = np.array([ 0.5 * self.source_box_extent * (qp + np.ones(self.dim)) for qp in q_points ]) # sort in dictionary order, preserve only the leading # digits to prevent floating point errors from polluting # the ordering. q_points_ordering = sorted( range(len(mapped_q_points)), key=lambda i: list(np.floor(mapped_q_points[i] * 10000)), ) self.q_points = mapped_q_points[q_points_ordering] else: raise NotImplementedError self.data = np.empty(self.n_pairs * self.n_cases, dtype=self.dtype) self.data.fill(np.nan) total_evals = len(self.data) + self.n_q_points if progress_bar: from pytools import ProgressBar self.pb = ProgressBar("Building table:", total_evals) else: self.pb = None self.is_built = False # }}} End constructor # {{{ encode to table index def get_entry_index(self, source_mode_index, target_point_index, case_id): assert source_mode_index >= 0 and source_mode_index < self.n_q_points assert target_point_index >= 0 and target_point_index < self.n_q_points pair_id = source_mode_index * self.n_q_points + target_point_index return case_id * self.n_pairs + pair_id # }}} End encode to table index # {{{ decode table index to entry info def decode_index(self, entry_id): """This is the inverse function of get_entry_index() """ index_info = dict() case_id = entry_id // self.n_pairs pair_id = entry_id % self.n_pairs source_mode_index = pair_id // self.n_q_points target_point_index = pair_id % self.n_q_points index_info["case_index"] = case_id index_info["source_mode_index"] = source_mode_index index_info["target_point_index"] = target_point_index return index_info # }}} End decode table index to entry info # {{{ basis modes in the template box def unwrap_mode_index(self, mode_index): # NOTE: these two lines should be changed # in accordance with the mesh generator # to get correct xi (1d grid) if self.dim == 1: idx = [mode_index] elif self.dim == 2: idx = [mode_index // self.quad_order, mode_index % self.quad_order] elif self.dim == 3: idx = [ mode_index // (self.quad_order**2), mode_index % (self.quad_order**2) // self.quad_order, mode_index % (self.quad_order**2) % self.quad_order, ] return idx def get_template_mode(self, mode_index): assert mode_index >= 0 and mode_index < self.n_q_points """ template modes are defined on an l_infty circle. """ idx = self.unwrap_mode_index(mode_index) xi = (np.array( [p[self.dim - 1] for p in self.q_points[:self.quad_order]]) / self.source_box_extent) assert len(xi) == self.quad_order yi = [] for d in range(self.dim): yi.append(np.zeros(self.quad_order, dtype=self.dtype)) yi[d][idx[d]] = 1 axis_interp = [Interpolator(xi, yi[d]) for d in range(self.dim)] def mode(*coords): assert len(coords) == self.dim if isinstance(coords[0], (int, float, complex)): fvals = np.ones(1) else: fvals = np.ones(np.array(coords[0]).shape) for d, coord in zip(range(self.dim), coords): fvals = np.multiply(fvals, axis_interp[d](np.array(coord))) return fvals return mode def get_mode(self, mode_index): """ normal modes are deined on the source box """ assert mode_index >= 0 and mode_index < self.n_q_points idx = self.unwrap_mode_index(mode_index) xi = np.array( [p[self.dim - 1] for p in self.q_points[:self.quad_order]]) assert len(xi) == self.quad_order yi = [] for d in range(self.dim): yi.append(np.zeros(self.quad_order, dtype=self.dtype)) yi[d][idx[d]] = 1 axis_interp = [Interpolator(xi, yi[d]) for d in range(self.dim)] def mode(*coords): assert len(coords) == self.dim if isinstance(coords[0], (int, float, complex)): fvals = np.ones(1) else: fvals = np.ones(np.array(coords[0]).shape) for d, coord in zip(range(self.dim), coords): fvals = np.multiply(fvals, axis_interp[d](np.array(coord))) return fvals return mode def get_mode_cheb_coeffs(self, mode_index, cheb_order): """ Cheb coeffs of a mode. The projection process is performed on [0,1]^dim. """ import scipy.special as sps cheby_nodes, _, cheby_weights = \ sps.chebyt(cheb_order).weights.T # pylint: disable=E1136,E0633 window = [0, 1] cheby_nodes = cheby_nodes * (window[1] - window[0]) / 2 + np.mean(window) cheby_weights = cheby_weights * (window[1] - window[0]) / 2 mode = self.get_template_mode(mode_index) grid = np.meshgrid(*[cheby_nodes for d in range(self.dim)], indexing='ij') mvals = mode(*grid) from numpy.polynomial.chebyshev import Chebyshev coef_scale = 2 * np.ones(cheb_order) / cheb_order coef_scale[0] /= 2 basis_1d = np.array([ Chebyshev(coef=_orthonormal(cheb_order, i), domain=window)(cheby_nodes) for i in range(cheb_order) ]) from itertools import product if self.dim == 1: basis_set = basis_1d elif self.dim == 2: basis_set = np.array([ b1.reshape([cheb_order, 1]) * b2.reshape([1, cheb_order]) for b1, b2 in product(*[basis_1d for d in range(self.dim)]) ]) elif self.dim == 3: basis_set = np.array([ b1.reshape([cheb_order, 1, 1]) * b2.reshape([1, cheb_order, 1]) * b3.reshape([1, 1, cheb_order]) for b1, b2, b3 in product(*[basis_1d for d in range(self.dim)]) ]) mode_cheb_coeffs = np.array([ np.sum(mvals * basis) for basis in basis_set ]) * _self_tp(coef_scale, self.dim).reshape(-1) # purge small coeffs whose magnitude are less than 8 times machine epsilon mode_cheb_coeffs[np.abs(mode_cheb_coeffs) < 8 * np.finfo(mode_cheb_coeffs.dtype).eps] = 0 return mode_cheb_coeffs # }}} End basis modes in the template box # {{{ build table via transform def get_symmetry_transform(self, source_mode_index): """Apply proper transforms to map source mode to a reduced region Returns: - a transform that can be applied on the interaction case vectors connection box centers. - a transform that can be applied to the mode/point indices. """ # mat = np.diag(np.ones(self.dim)) # q_points must be sorted in (ascending) dictionary order k = np.zeros(self.dim) resid = source_mode_index for d in range(-1, -1 - self.dim, -1): k[d] = resid % self.quad_order resid = resid // self.quad_order s1 = np.sign((self.quad_order - 0.5) / 2 - k) for d in range(self.dim): if s1[d] < 0: k[d] = self.quad_order - 1 - k[d] s2 = sorted(range(len(k)), key=lambda i: abs(k[i])) def symmetry_transform(vec): nv = np.array(vec) * s1 return nv[s2] def qpoint_index_transform(index): k = np.zeros(self.dim, dtype=int) resid = index for d in range(-1, -1 - self.dim, -1): k[d] = resid % self.quad_order resid = resid // self.quad_order assert resid == 0 for d in range(self.dim): if s1[d] < 0: k[d] = self.quad_order - 1 - k[d] k = k[s2] new_id = 0 for d in range(self.dim): new_id = new_id * int(self.quad_order) + k[d] return new_id return (symmetry_transform, qpoint_index_transform) def find_target_point(self, target_point_index, case_index): """Apply proper transforms to find the target point's coordinate. Only translations and scalings are allowed in this step, avoiding the indices of quad points to be messed up. """ assert target_point_index >= 0 and target_point_index < self.n_q_points # rescale to source box with size 1x1 vec = (np.array(self.interaction_case_vecs[case_index]) / 4.0 * self.source_box_extent) new_cntr = (np.ones(self.dim, dtype=self.dtype) * 0.5 * self.source_box_extent + vec) if int(max(abs(np.array( self.interaction_case_vecs[case_index])))) == 0: new_size = 1 else: new_size = (max([ abs(cvc) - 2 for cvc in self.interaction_case_vecs[case_index] ]) / 2) # print(vec, new_cntr, new_size) return new_cntr + new_size * (self.q_points[target_point_index] - self.center) def lookup_by_symmetry(self, entry_id): """Loop up table entry that is mapped to a region where: - k_i <= q/2 in all direction i - k_i's are sorted in ascending order Returns the mapped entry_id """ entry_info = self.decode_index(entry_id) # source_mode = self.get_mode( # entry_info[ # "source_mode_index"]) # target_point = self.find_target_point( # target_point_index= # entry_info[ # "target_point_index"], # case_index=entry_info[ # "case_index"]) vec_map, qp_map = self.get_symmetry_transform( entry_info["source_mode_index"]) # mapped (canonical) case_id case_vec = self.interaction_case_vecs[entry_info["case_index"]] cc_vec = vec_map(case_vec) cc_id = self.case_indices[self.case_encode(cc_vec)] cs_id = qp_map(entry_info["source_mode_index"]) ct_id = qp_map(entry_info["target_point_index"]) centry_id = self.get_entry_index(cs_id, ct_id, cc_id) return entry_id, centry_id def compute_table_entry(self, entry_id): """Compute one entry in the table indexed by self.data[entry_id] Input kernel function should be centered at origin. """ entry_info = self.decode_index(entry_id) source_mode = self.get_mode(entry_info["source_mode_index"]) target_point = self.find_target_point( target_point_index=entry_info["target_point_index"], case_index=entry_info["case_index"], ) # print(entry_info, target_point) # source_point = ( # self.q_points[entry_info[ # "source_mode_index"]]) # print(source_mode(source_point[0], source_point[1])) if self.dim == 2: def integrand(x, y): return source_mode(x, y) * self.kernel_func( x - target_point[0], y - target_point[1]) integral, error = squad.box_quad( func=integrand, a=0, b=self.source_box_extent, c=0, d=self.source_box_extent, singular_point=target_point, # tol=1e-10, # rtol=1e-10, # miniter=300, maxiter=301, ) else: raise NotImplementedError return (entry_id, integral) def compute_nmlz(self, mode_id): mode_func = self.get_mode(mode_id) nmlz, err = squad.qquad( func=mode_func, a=0, b=self.source_box_extent, c=0, d=self.source_box_extent, tol=1., rtol=1., minitero=25, miniteri=25, maxitero=100, maxiteri=100, ) # FIXME: cannot pickle logger if err > 1e-15: logger.debug("Normalizer %d quad error is %e" % (mode_id, err)) return (mode_id, nmlz) def build_normalizer_table(self, pool=None, pb=None): """ Build normalizers, used for log-scaled kernels, currently only supported in 2D. """ assert self.dim == 2 if 0: # FIXME: make everything needed for compute_nmlz picklable if pool is None: from multiprocessing import Pool pool = Pool(processes=None) for mode_id, nmlz in pool.imap_unordered( self.compute_nmlz, [i for i in range(self.n_q_points)]): self.mode_normalizers[mode_id] = nmlz if pb is not None: pb.progress(1) else: for mode_id in range(self.n_q_points): _, nmlz = self.compute_nmlz(mode_id) self.mode_normalizers[mode_id] = nmlz if pb is not None: pb.progress(1) def build_table_via_transform(self): """ Build the full data table using transforms to remove the singularity. """ assert self.dim == 2 if 0: # FIXME: make everything needed for compute_nmlz picklable # multiprocessing cannot handle member functions from multiprocessing import Pool pool = Pool(processes=None) else: pool = None if self.pb is not None: self.pb.draw() self.build_normalizer_table(pool, pb=self.pb) self.has_normalizers = True # First compute entries that are invariant under # symmetry lookup invariant_entry_ids = [ i for i in range(len(self.data)) if self.lookup_by_symmetry(i) == (i, i) ] if 0: # multiprocess disabled to remove dependency on dill/multiprocess for entry_id, entry_val in pool.imap_unordered( self.compute_table_entry, invariant_entry_ids): self.data[entry_id] = entry_val if self.pb is not None: self.pb.progress(1) else: for entry_id in invariant_entry_ids: _, entry_val = self.compute_table_entry(entry_id) self.data[entry_id] = entry_val if self.pb is not None: self.pb.progress(1) if 0: # Then complete the table via symmetry lookup for entry_id, centry_id in pool.imap_unordered( self.lookup_by_symmetry, [i for i in range(len(self.data))]): assert not np.isnan(self.data[centry_id]) if centry_id == entry_id: continue self.data[entry_id] = self.data[centry_id] if self.pb is not None: self.pb.progress(1) else: for entry_id in range(len(self.data)): _, centry_id = self.lookup_by_symmetry(entry_id) assert not np.isnan(self.data[centry_id]) if centry_id == entry_id: continue self.data[entry_id] = self.data[centry_id] if self.pb is not None: self.pb.progress(1) if self.pb is not None: self.pb.finished() for entry in self.data: assert not np.isnan(entry) self.is_built = True # }}} End build table via transform # {{{ build table via adding up a Droste of bricks def get_droste_table_builder(self, n_brick_quad_points, special_radial_brick_quadrature, nradial_brick_quad_points, use_symmetry=False, knl_symmetry_tags=None): if self.inverse_droste: from volumential.droste import InverseDrosteReduced drf = InverseDrosteReduced( self.integral_knl, self.quad_order, self.interaction_case_vecs, n_brick_quad_points, knl_symmetry_tags, auto_windowing=False, special_radial_quadrature=special_radial_brick_quadrature, nradial_quad_points=nradial_brick_quad_points) else: if not use_symmetry: from volumential.droste import DrosteFull drf = DrosteFull( self.integral_knl, self.quad_order, self.interaction_case_vecs, n_brick_quad_points, special_radial_quadrature=special_radial_brick_quadrature, nradial_quad_points=nradial_brick_quad_points) else: from volumential.droste import DrosteReduced drf = DrosteReduced( self.integral_knl, self.quad_order, self.interaction_case_vecs, n_brick_quad_points, knl_symmetry_tags, special_radial_quadrature=special_radial_brick_quadrature, nradial_quad_points=nradial_brick_quad_points) return drf def build_table_via_droste_bricks(self, n_brick_quad_points=50, alpha=0, cl_ctx=None, queue=None, adaptive_level=True, adaptive_quadrature=True, use_symmetry=False, **kwargs): if queue is None: import pyopencl as cl cl_ctx = cl.create_some_context(interactive=True) queue = cl.CommandQueue(cl_ctx) assert alpha >= 0 and alpha < 1 if "nlevels" in kwargs: nlev = kwargs.pop("nlevels") else: nlev = 1 if "special_radial_brick_quadrature" in kwargs: special_radial_brick_quadrature = kwargs.pop( "special_radial_brick_quadrature") nradial_brick_quad_points = kwargs.pop("nradial_brick_quad_points") else: special_radial_brick_quadrature = False nradial_brick_quad_points = None if use_symmetry: if "knl_symmetry_tags" in kwargs: knl_symmetry_tags = kwargs["knl_symmetry_tags"] else: # Maximum symmetry by default logger.warn( "use_symmetry is set to True, but knl_symmetry_tags is not " "set. Using the default maximum symmetry. (Using maximum " "symmetry for some kernels (e.g. derivatives of " "LaplaceKernel will yield incorrect results).") knl_symmetry_tags = None # extra_kernel_kwargs = {} # if "extra_kernel_kwargs" in kwargs: # extra_kernel_kwargs = kwargs["extra_kernel_kwargs"] cheb_coefs = [ self.get_mode_cheb_coeffs(mid, self.quad_order) for mid in range(self.n_q_points) ] # compute an initial table drf = self.get_droste_table_builder(n_brick_quad_points, special_radial_brick_quadrature, nradial_brick_quad_points, use_symmetry, knl_symmetry_tags) data0 = drf( queue, source_box_extent=self.source_box_extent, alpha=alpha, nlevels=nlev, # extra_kernel_kwargs=extra_kernel_kwargs, cheb_coefs=cheb_coefs, **kwargs) # {{{ adaptively determine number of levels resid = -1 missing_measure = 1 if adaptive_level: table_tol = np.finfo(self.dtype).eps * 256 # 5e-14 for float64 logger.warn("Searching for nlevels since adaptive_level=True") while True: missing_measure = (alpha**nlev * self.source_box_extent)**self.dim if missing_measure < np.finfo(self.dtype).eps * 128: logger.warn("Adaptive level refinement terminated " "at %d since missing measure is minuscule " "(%e)" % (nlev, missing_measure)) break nlev = nlev + 1 data1 = drf( queue, source_box_extent=self.source_box_extent, alpha=alpha, nlevels=nlev, # extra_kernel_kwargs=extra_kernel_kwargs, cheb_coefs=cheb_coefs, **kwargs) resid = np.max(np.abs(data1 - data0)) / np.max(np.abs(data1)) data0 = data1 if abs(resid) < table_tol: logger.warn("Adaptive level refinement " "converged at level %d with residual %e" % (nlev - 1, resid)) break if np.isnan(resid): logger.warn("Adaptive level refinement terminated " "at %d before converging due to NaNs" % nlev) break if resid >= table_tol: logger.warn("Adaptive level refinement failed to converge.") logger.warn(f"Residual at level {nlev} equals to {resid}") # }}} End adaptively determine number of levels # {{{ adaptively determine brick quad order if adaptive_quadrature: table_tol = np.finfo(self.dtype).eps * 256 # 5e-14 for float64 logger.warn( "Searching for n_brick_quad_points since " "adaptive_quadrature=True. Note that if you are using " "special radial quadrature, the radial order will also be " "adaptively refined.") max_n_quad_pts = 1000 resid = np.inf while True: n_brick_quad_points += max(int(n_brick_quad_points * 0.2), 3) if special_radial_brick_quadrature: nradial_brick_quad_points += max( int(nradial_brick_quad_points * 0.2), 3) logger.warn( f"Trying n_brick_quad_points = {n_brick_quad_points}, " f"nradial_brick_quad_points = {nradial_brick_quad_points}, " f"resid = {resid}") else: logger.warn( f"Trying n_brick_quad_points = {n_brick_quad_points}, " f"resid = {resid}") if n_brick_quad_points > max_n_quad_pts: logger.warn("Adaptive quadrature refinement terminated " "since order %d exceeds the max order " "allowed (%d)" % (n_brick_quad_points - 1, max_n_quad_pts - 1)) break drf = self.get_droste_table_builder( n_brick_quad_points, special_radial_brick_quadrature, nradial_brick_quad_points, use_symmetry, knl_symmetry_tags) data1 = drf( queue, source_box_extent=self.source_box_extent, alpha=alpha, nlevels=nlev, n_brick_quad_points=n_brick_quad_points, # extra_kernel_kwargs=extra_kernel_kwargs, cheb_coefs=cheb_coefs, **kwargs) resid_prev = resid resid = np.max(np.abs(data1 - data0)) / np.max(np.abs(data1)) data0 = data1 if resid < table_tol: logger.warn("Adaptive quadrature " "converged at order %d with residual %e" % (n_brick_quad_points - 1, resid)) break if resid > resid_prev: logger.warn("Non-monotonic residual, breaking..") break if np.isnan(resid): logger.warn("Adaptive quadrature terminated " "at %d before converging due to NaNs" % nlev) break if resid >= table_tol: logger.warn("Adaptive quadrature failed to converge.") logger.warn(f"Residual at order {n_brick_quad_points} " f"equals to {resid}") if resid < 0: logger.warn("Failed to perform quadrature order refinement.") # }}} End adaptively determine brick quad order self.data = data0 # {{{ (only for 2D) compute normalizers # NOTE: normalizers are for log kernels and not needed in 3D if self.dim == 2: self.build_normalizer_table() self.has_normalizers = True else: self.has_normalizers = False if self.inverse_droste: assert cl_ctx self.build_kernel_exterior_normalizer_table( cl_ctx, queue, **kwargs) # }}} End Compute normalizers self.is_built = True # }}} End build table via adding up a Droste of bricks # {{{ build table (driver) def build_table(self, cl_ctx=None, queue=None, **kwargs): method = self.build_method if method == "Transform": logger.info("Building table with transform method") self.build_table_via_transform() elif method == "DrosteSum": logger.info("Building table with Droste method") self.build_table_via_droste_bricks(cl_ctx=cl_ctx, queue=queue, **kwargs) else: raise NotImplementedError() # }}} End build table (driver) # {{{ build kernel exterior normalizer table def build_kernel_exterior_normalizer_table(self, cl_ctx, queue, pool=None, ncpus=None, mesh_order=5, quad_order=10, mesh_size=0.03, remove_tmp_files=True, **kwargs): r"""Build the kernel exterior normalizer table for fractional Laplacians. An exterior normalizer for kernel :math:`G(r)` and target :math:`x` is defined as .. math:: \int_{B^c} G(\lVert x - y \rVert) dy where :math:`B` is the source box :math:`[0, source_box_extent]^dim`. """ logger.warn("this method is currently under construction.") if not self.inverse_droste: raise ValueError() if ncpus is None: import multiprocessing ncpus = multiprocessing.cpu_count() if pool is None: from multiprocessing import Pool pool = Pool(ncpus) def fl_scaling(k, s): # scaling constant from scipy.special import gamma return (2**(2 * s) * s * gamma(s + k / 2)) / (np.pi**(k / 2) * gamma(1 - s)) # Directly compute and return in 1D if self.dim == 1: s = self.integral_knl.s targets = np.array(self.q_points).reshape(-1) r1 = targets r2 = self.source_box_extent - targets self.kernel_exterior_normalizers = 1 / (2 * s) * ( 1 / r1**(2 * s) + 1 / r2**(2 * s)) * fl_scaling(k=self.dim, s=s) return from meshmode.array_context import PyOpenCLArrayContext from meshmode.dof_array import thaw, flatten from meshmode.mesh.io import read_gmsh from meshmode.discretization import Discretization from meshmode.discretization.poly_element import \ PolynomialWarpAndBlendGroupFactory # {{{ gmsh processing import gmsh gmsh.initialize() gmsh.option.setNumber("General.Terminal", 1) # meshmode does not support other versions gmsh.option.setNumber("Mesh.MshFileVersion", 2) gmsh.option.setNumber("Mesh.CharacteristicLengthMax", mesh_size) gmsh.option.setNumber("Mesh.ElementOrder", mesh_order) if mesh_order > 1: gmsh.option.setNumber("Mesh.CharacteristicLengthFromCurvature", 1) # radius of source box hs = self.source_box_extent / 2 # radius of bouding sphere r = hs * np.sqrt(self.dim) logger.debug("r_inner = %f, r_outer = %f" % (hs, r)) if self.dim == 2: tag_box = gmsh.model.occ.addRectangle(x=0, y=0, z=0, dx=2 * hs, dy=2 * hs, tag=-1) elif self.dim == 3: tag_box = gmsh.model.occ.addBox(x=0, y=0, z=0, dx=2 * hs, dy=2 * hs, dz=2 * hs, tag=-1) else: raise NotImplementedError() if self.dim == 2: tag_ball = gmsh.model.occ.addDisk(xc=hs, yc=hs, zc=0, rx=r, ry=r, tag=-1) elif self.dim == 3: tag_sphere = gmsh.model.occ.addSphere(xc=hs, yc=hs, zc=hs, radius=r, tag=-1) tag_ball = gmsh.model.occ.addVolume([tag_sphere], tag=-1) else: raise NotImplementedError() dimtags_ints, dimtags_map_ints = gmsh.model.occ.cut( objectDimTags=[(self.dim, tag_ball)], toolDimTags=[(self.dim, tag_box)], tag=-1, removeObject=True, removeTool=True) gmsh.model.occ.synchronize() gmsh.model.mesh.generate(self.dim) from tempfile import mkdtemp from os.path import join temp_dir = mkdtemp(prefix="tmp_volumential_nft") msh_filename = join(temp_dir, 'chinese_lucky_coin.msh') gmsh.write(msh_filename) gmsh.finalize() mesh = read_gmsh(msh_filename) if remove_tmp_files: import shutil shutil.rmtree(temp_dir) # }}} End gmsh processing arr_ctx = PyOpenCLArrayContext(queue) discr = Discretization( arr_ctx, mesh, PolynomialWarpAndBlendGroupFactory(order=quad_order)) from pytential import bind, sym # {{{ optional checks if 1: if self.dim == 2: arerr = np.abs((np.pi * r**2 - (2 * hs)**2) - bind(discr, sym.integral(self.dim, self.dim, 1)) (queue)) / (np.pi * r**2 - (2 * hs)**2) if arerr > 1e-12: log_to = logger.warn else: log_to = logger.debug log_to("the numerical error when computing the measure of a " "unit ball is %e" % arerr) elif self.dim == 3: arerr = np.abs((4 / 3 * np.pi * r**3 - (2 * hs)**3) - bind(discr, sym.integral(self.dim, self.dim, 1)) (queue)) / (4 / 3 * np.pi * r**3 - (2 * hs)**3) if arerr > 1e-12: log_to = logger.warn else: log_to = logger.debug logger.warn( "The numerical error when computing the measure of a " "unit ball is %e" % arerr) # }}} End optional checks # {{{ kernel evaluation # TODO: take advantage of symmetry if this is too slow from volumential.droste import InverseDrosteReduced # only for getting kernel evaluation related stuff drf = InverseDrosteReduced(self.integral_knl, self.quad_order, self.interaction_case_vecs, n_brick_quad_points=0, knl_symmetry_tags=[], auto_windowing=False) # uses "dist[dim]", assigned to "knl_val" knl_insns = drf.get_sumpy_kernel_insns() eval_kernel_insns = [ insn.copy(within_inames=insn.within_inames | frozenset(["iqpt"])) for insn in knl_insns ] from sumpy.symbolic import SympyToPymbolicMapper sympy_conv = SympyToPymbolicMapper() scaling_assignment = lp.Assignment( id=None, assignee="knl_scaling", expression=sympy_conv( self.integral_knl.get_global_scaling_const()), temp_var_type=lp.Optional(), ) extra_kernel_kwarg_types = () if "extra_kernel_kwarg_types" in kwargs: extra_kernel_kwarg_types = kwargs["extra_kernel_kwarg_types"] lpknl = lp.make_kernel( # NOQA "{ [iqpt, iaxis]: 0<=iqpt<n_q_points and 0<=iaxis<dim }", [ """ for iqpt for iaxis <> dist[iaxis] = (quad_points[iaxis, iqpt] - target_point[iaxis]) end end """ ] + eval_kernel_insns + [scaling_assignment] + [ """ for iqpt result[iqpt] = knl_val * knl_scaling end """ ], [ lp.ValueArg("dim, n_q_points", np.int32), lp.GlobalArg("quad_points", np.float64, "dim, n_q_points"), lp.GlobalArg("target_point", np.float64, "dim") ] + list(extra_kernel_kwarg_types) + [ "...", ], name="eval_kernel_lucky_coin", lang_version=(2018, 2), ) lpknl = lp.fix_parameters(lpknl, dim=self.dim) lpknl = lp.set_options(lpknl, write_cl=False) lpknl = lp.set_options(lpknl, return_dict=True) # }}} End kernel evaluation node_coords = flatten(thaw(arr_ctx, discr.nodes())) nodes = cl.array.to_device( queue, np.vstack([crd.get() for crd in node_coords])) int_vals = [] for target in self.q_points: evt, res = lpknl(queue, quad_points=nodes, target_point=target) knl_vals = res['result'] integ = bind( discr, sym.integral(self.dim, self.dim, sym.var("integrand")))(queue, integrand=knl_vals) queue.finish() int_vals.append(integ) int_vals_coins = np.array(int_vals) int_vals_inf = np.zeros(self.n_q_points) # {{{ integrate over the exterior of the ball if self.dim == 2: def rho_0(theta, target, radius): rho_x = np.linalg.norm(target, ord=2) return (-1 * rho_x * np.cos(theta) + np.sqrt(radius**2 - rho_x**2 * (np.sin(theta)**2))) def ext_inf_integrand(theta, s, target, radius): _rho_0 = rho_0(theta, target=target, radius=radius) return _rho_0**(-2 * s) def compute_ext_inf_integral(target, s, radius): # target: target point # s: fractional order # radius: radius of the circle import scipy.integrate as sint val, _ = sint.quadrature(partial(ext_inf_integrand, s=s, target=target, radius=radius), a=0, b=2 * np.pi) return val * (1 / (2 * s)) * fl_scaling(k=self.dim, s=s) if 1: # optional test target = [0, 0] s = 0.5 radius = 1 scaling = fl_scaling(k=self.dim, s=s) val = compute_ext_inf_integral(target, s, radius) test_err = np.abs(val - radius**(-2 * s) * 2 * np.pi * (1 / (2 * s)) * scaling) / (radius**(-2 * s) * 2 * np.pi * (1 / (2 * s)) * scaling) if test_err > 1e-12: logger.warn("Error evaluating at origin = %f" % test_err) for tid, target in enumerate(self.q_points): # The formula assumes that the source box is centered at origin int_vals_inf[tid] = compute_ext_inf_integral( target=target - hs, s=self.integral_knl.s, radius=r) elif self.dim == 3: # FIXME raise NotImplementedError("3D not yet implemented.") else: raise NotImplementedError("Unsupported dimension") # }}} End integrate over the exterior of the ball self.kernel_exterior_normalizers = int_vals_coins + int_vals_inf return # }}} End kernel exterior normalizer table # {{{ query table and transform to actual box def get_potential_scaler(self, entry_id, source_box_size=1, kernel_type=None, kernel_power=None): """Returns a helper function to rescale the table entry based on source_box's actual size (edge length). """ assert source_box_size > 0 a = source_box_size if kernel_type is None: kernel_type = self.kernel_type if kernel_type is None: raise NotImplementedError( "Specify kernel type before performing scaling queries") if kernel_type == "log": assert kernel_power is None source_mode_index = self.decode_index( entry_id)["source_mode_index"] displacement = (a ** 2) * np.log(a) \ * self.mode_normalizers[source_mode_index] scaling = a**2 elif kernel_type == "const": displacement = 0 scaling = 1 elif kernel_type == "inv_power": assert kernel_power is not None displacement = 0 scaling = source_box_size**(2 + kernel_power) elif kernel_type == "rigid": # TODO: add assertion for source box size displacement = 0 scaling = 1 else: raise NotImplementedError("Unsupported kernel type") def scaler(pot_val): return pot_val * scaling + displacement return scaler
def optimize_plan(opt_name, plan_generator, target_func, maximize, debug_flags=set(), occupancy_slack=0.5, log_filename=None): plans = [p for p in plan_generator() if p.invalid_reason() is None] debug = "cuda_%s_plan" % opt_name in debug_flags show_progress = ("cuda_plan_no_progress" not in debug_flags) and not debug if "cuda_plan_log" not in debug_flags: log_filename = None if not plans: raise RuntimeError, "no valid CUDA execution plans found" if set(["cuda_no_plan", "cuda_no_plan_"+opt_name]) & debug_flags: from pytools import argmax2 return argmax2((plan, plan.occupancy_record().occupancy) for plan in plans), 0 max_occup = max(plan.occupancy_record().occupancy for plan in plans) desired_occup = occupancy_slack*max_occup if log_filename is not None: from pytools import single_valued feature_columns = single_valued(p.feature_columns() for p in plans) feature_names = [fc.split()[0] for fc in feature_columns] try: import sqlite3 as sqlite except ImportError: from pysqlite2 import dbapi2 as sqlite db_conn = sqlite.connect("plan-%s.dat" % log_filename) try: db_conn.execute(""" create table data ( id integer primary key autoincrement, %s, value real)""" % ", ".join(feature_columns)) except sqlite.OperationalError: pass if show_progress: from pytools import ProgressBar pbar = ProgressBar("plan "+opt_name, len(plans)) try: plan_values = [] for p in plans: if show_progress: pbar.progress() if p.occupancy_record().occupancy >= desired_occup - 1e-10: if debug: print "<---- trying %s:" % p value = target_func(p) if isinstance(value, tuple): extra_info = value[1:] value = value[0] else: extra_info = None if value is not None: if debug: print "----> yielded %g" % (value) plan_values.append(((len(plan_values), p), value)) if log_filename is not None: db_conn.execute( "insert into data (%s,value) values (%s)" % (", ".join(feature_names), ",".join(["?"]*(1+len(feature_names)))), p.features(*extra_info)+(value,)) finally: if show_progress: pbar.finished() if log_filename is not None: db_conn.commit() from pytools import argmax2, argmin2 if maximize: num_plan, plan = argmax2(plan_values) else: num_plan, plan = argmin2(plan_values) plan_value = plan_values[num_plan][1] if debug: print "----------------------------------------------" print "chosen: %s" % plan print "value: %g" % plan_value print "----------------------------------------------" return plan, plan_value
def dump_couch_to_sqlite(couch_db, outfile, scan_max=None): import sqlite3 as sqlite # {{{ scan for types column_type_dict = {} from pytools import ProgressBar pb = ProgressBar("scan (pass 1/2)", len(couch_db)) scan_count = 0 for doc in generate_all_docs(couch_db): if "type" in doc and doc["type"] == "job": for k, v in six.iteritems(doc): new_type = type(v) if (k in column_type_dict and column_type_dict[k] != new_type and v is not None): old_type = column_type_dict[k] if set([old_type, new_type]) == set([float, int]): new_type = float else: raise RuntimeError("ambiguous types for '%s': %s, %s" % (k, new_type, old_type)) column_type_dict[k] = new_type scan_count += 1 if scan_max is not None and scan_count >= scan_max: break pb.progress() pb.finished() # }}} del column_type_dict["type"] column_types = [] for name, tp in six.iteritems(column_type_dict): column_types.append((name, tp)) def get_sql_type(tp): if tp in (str, six.text_type): return "text" elif issubclass(tp, list): return "text" elif issubclass(tp, int): return "integer" elif issubclass(tp, (float, numpy.floating)): return "real" else: raise TypeError("No SQL type for %s" % tp) create_stmt = ("create table data (%s)" % ",".join("%s %s" % (name, get_sql_type(tp)) for name, tp in column_types)) db_conn = sqlite.connect(outfile, timeout=30) db_conn.execute(create_stmt) db_conn.commit() insert_stmt = "insert into data values (%s)" % (",".join( ["?"] * len(column_types))) pb = ProgressBar("fill (pass 2/2)", len(couch_db)) for doc in generate_all_docs(couch_db): data = [None] * len(column_types) for i, (col_name, col_tp) in enumerate(column_types): if "type" in doc and doc["type"] == "job": try: if isinstance(doc[col_name], list): data[i] = str(doc[col_name]) else: data[i] = doc[col_name] except KeyError: print("doc %s had no field %s" % (doc["_id"], col_name)) db_conn.execute(insert_stmt, data) pb.progress() pb.finished() db_conn.commit() db_conn.close()