def _make_cross_face_batches( queue, vol_discr, bdry_discr, i_tgt_grp, i_src_grp, i_face_tgt, adj_grp, vbc_tgt_grp_face_batch, src_grp_el_lookup): # {{{ index wrangling # Assert that the adjacency group and the restriction # interpolation batch and the adjacency group have the same # element ordering. adj_grp_tgt_flags = adj_grp.element_faces == i_face_tgt assert ( np.array_equal( adj_grp.elements[adj_grp_tgt_flags], vbc_tgt_grp_face_batch.from_element_indices .get(queue=queue))) # find to_element_indices to_bdry_element_indices = ( vbc_tgt_grp_face_batch.to_element_indices .get(queue=queue)) # find from_element_indices from_vol_element_indices = adj_grp.neighbors[adj_grp_tgt_flags] from_element_faces = adj_grp.neighbor_faces[adj_grp_tgt_flags] from_bdry_element_indices = src_grp_el_lookup[ from_vol_element_indices, from_element_faces] # }}} # {{{ visualization (for debugging) if 0: print("TVE", adj_grp.elements[adj_grp_tgt_flags]) print("TBE", to_bdry_element_indices) print("FVE", from_vol_element_indices) from meshmode.mesh.visualization import draw_2d_mesh import matplotlib.pyplot as pt draw_2d_mesh(vol_discr.mesh, draw_element_numbers=True, set_bounding_box=True, draw_vertex_numbers=False, draw_face_numbers=True, fill=None) pt.figure() draw_2d_mesh(bdry_discr.mesh, draw_element_numbers=True, set_bounding_box=True, draw_vertex_numbers=False, draw_face_numbers=True, fill=None) pt.show() # }}} # {{{ invert face map (using Gauss-Newton) to_bdry_nodes = ( # FIXME: This should view-then-transfer (but PyOpenCL doesn't do # non-contiguous transfers for now). bdry_discr.groups[i_tgt_grp].view( bdry_discr.nodes().get(queue=queue)) [:, to_bdry_element_indices]) tol = 1e4 * np.finfo(to_bdry_nodes.dtype).eps from_mesh_grp = bdry_discr.mesh.groups[i_src_grp] from_grp = bdry_discr.groups[i_src_grp] dim = from_grp.dim ambient_dim, nelements, nto_unit_nodes = to_bdry_nodes.shape initial_guess = np.mean(from_mesh_grp.vertex_unit_coordinates(), axis=0) from_unit_nodes = np.empty((dim, nelements, nto_unit_nodes)) from_unit_nodes[:] = initial_guess.reshape(-1, 1, 1) import modepy as mp from_vdm = mp.vandermonde(from_grp.basis(), from_grp.unit_nodes) from_inv_t_vdm = la.inv(from_vdm.T) from_nfuncs = len(from_grp.basis()) # (ambient_dim, nelements, nfrom_unit_nodes) from_bdry_nodes = ( # FIXME: This should view-then-transfer (but PyOpenCL doesn't do # non-contiguous transfers for now). bdry_discr.groups[i_src_grp].view( bdry_discr.nodes().get(queue=queue)) [:, from_bdry_element_indices]) def apply_map(unit_nodes): # unit_nodes: (dim, nelements, nto_unit_nodes) # basis_at_unit_nodes basis_at_unit_nodes = np.empty((from_nfuncs, nelements, nto_unit_nodes)) for i, f in enumerate(from_grp.basis()): basis_at_unit_nodes[i] = ( f(unit_nodes.reshape(dim, -1)) .reshape(nelements, nto_unit_nodes)) intp_coeffs = np.einsum("fj,jet->fet", from_inv_t_vdm, basis_at_unit_nodes) # If we're interpolating 1, we had better get 1 back. one_deviation = np.abs(np.sum(intp_coeffs, axis=0) - 1) assert (one_deviation < tol).all(), np.max(one_deviation) return np.einsum("fet,aef->aet", intp_coeffs, from_bdry_nodes) def get_map_jacobian(unit_nodes): # unit_nodes: (dim, nelements, nto_unit_nodes) # basis_at_unit_nodes dbasis_at_unit_nodes = np.empty( (dim, from_nfuncs, nelements, nto_unit_nodes)) for i, df in enumerate(from_grp.grad_basis()): df_result = df(unit_nodes.reshape(dim, -1)) for rst_axis, df_r in enumerate(df_result): dbasis_at_unit_nodes[rst_axis, i] = ( df_r.reshape(nelements, nto_unit_nodes)) dintp_coeffs = np.einsum( "fj,rjet->rfet", from_inv_t_vdm, dbasis_at_unit_nodes) return np.einsum("rfet,aef->raet", dintp_coeffs, from_bdry_nodes) # {{{ test map applier and jacobian if 0: u = from_unit_nodes f = apply_map(u) for h in [1e-1, 1e-2]: du = h*np.random.randn(*u.shape) f_2 = apply_map(u+du) jf = get_map_jacobian(u) f2_2 = f + np.einsum("raet,ret->aet", jf, du) print(h, la.norm((f_2-f2_2).ravel())) # }}} # {{{ visualize initial guess if 0: import matplotlib.pyplot as pt guess = apply_map(from_unit_nodes) goals = to_bdry_nodes from meshmode.discretization.visualization import draw_curve draw_curve(bdry_discr) pt.plot(guess[0].reshape(-1), guess[1].reshape(-1), "or") pt.plot(goals[0].reshape(-1), goals[1].reshape(-1), "og") pt.plot(from_bdry_nodes[0].reshape(-1), from_bdry_nodes[1].reshape(-1), "o", color="purple") pt.show() # }}} logger.info("make_opposite_face_connection: begin gauss-newton") niter = 0 while True: resid = apply_map(from_unit_nodes) - to_bdry_nodes df = get_map_jacobian(from_unit_nodes) df_inv_resid = np.empty_like(from_unit_nodes) # For the 1D/2D accelerated versions, we'll use the normal # equations and Cramer's rule. If you're looking for high-end # numerics, look no further than meshmode. if dim == 1: # A is df.T ata = np.einsum("iket,jket->ijet", df, df) atb = np.einsum("iket,ket->iet", df, resid) df_inv_resid = atb / ata[0, 0] elif dim == 2: # A is df.T ata = np.einsum("iket,jket->ijet", df, df) atb = np.einsum("iket,ket->iet", df, resid) det = ata[0, 0]*ata[1, 1] - ata[0, 1]*ata[1, 0] df_inv_resid = np.empty_like(from_unit_nodes) df_inv_resid[0] = 1/det * (ata[1, 1] * atb[0] - ata[1, 0]*atb[1]) df_inv_resid[1] = 1/det * (-ata[0, 1] * atb[0] + ata[0, 0]*atb[1]) else: # The boundary of a 3D mesh is 2D, so that's the # highest-dimensional case we genuinely care about. # # This stinks, performance-wise, because it's not vectorized. # But we'll only hit it for boundaries of 4+D meshes, in which # case... good luck. :) for e in range(nelements): for t in range(nto_unit_nodes): df_inv_resid[:, e, t], _, _, _ = \ la.lstsq(df[:, :, e, t].T, resid[:, e, t]) from_unit_nodes = from_unit_nodes - df_inv_resid max_resid = np.max(np.abs(resid)) logger.debug("gauss-newton residual: %g" % max_resid) if max_resid < tol: logger.info("make_opposite_face_connection: gauss-newton: done, " "final residual: %g" % max_resid) break niter += 1 if niter > 10: raise RuntimeError("Gauss-Newton (for finding opposite-face reference " "coordinates) did not converge") # }}} # {{{ find groups of from_unit_nodes def to_dev(ary): return cl.array.to_device(queue, ary, array_queue=None) done_elements = np.zeros(nelements, dtype=np.bool) while True: todo_elements, = np.where(~done_elements) if not len(todo_elements): return template_unit_nodes = from_unit_nodes[:, todo_elements[0], :] unit_node_dist = np.max(np.max(np.abs( from_unit_nodes[:, todo_elements, :] - template_unit_nodes.reshape(dim, 1, -1)), axis=2), axis=0) close_els = todo_elements[unit_node_dist < tol] done_elements[close_els] = True unit_node_dist = np.max(np.max(np.abs( from_unit_nodes[:, todo_elements, :] - template_unit_nodes.reshape(dim, 1, -1)), axis=2), axis=0) from meshmode.discretization.connection import InterpolationBatch yield InterpolationBatch( from_group_index=i_src_grp, from_element_indices=to_dev(from_bdry_element_indices[close_els]), to_element_indices=to_dev(to_bdry_element_indices[close_els]), result_unit_nodes=template_unit_nodes, to_element_face=None)
def test_perf_data_gathering(ctx_getter, n_arms=5): cl_ctx = ctx_getter() queue = cl.CommandQueue(cl_ctx) # prevent cache 'splosion from sympy.core.cache import clear_cache clear_cache() target_order = 8 starfish_func = NArmedStarfish(n_arms, 0.8) mesh = make_curve_mesh( starfish_func, np.linspace(0, 1, n_arms * 30), target_order) sigma_sym = sym.var("sigma") # The kernel doesn't really matter here from sumpy.kernel import LaplaceKernel k_sym = LaplaceKernel(mesh.ambient_dim) sym_op = sym.S(k_sym, sigma_sym, qbx_forced_limit=+1) from meshmode.discretization import Discretization from meshmode.discretization.poly_element import ( InterpolatoryQuadratureSimplexGroupFactory) pre_density_discr = Discretization( queue.context, mesh, InterpolatoryQuadratureSimplexGroupFactory(target_order)) results = [] def inspect_geo_data(insn, bound_expr, geo_data): from pytential.qbx.fmm import assemble_performance_data perf_data = assemble_performance_data(geo_data, uses_pde_expansions=True) results.append(perf_data) return False # no need to do the actual FMM from pytential.qbx import QBXLayerPotentialSource lpot_source = QBXLayerPotentialSource( pre_density_discr, 4*target_order, # qbx order and fmm order don't really matter 10, fmm_order=10, _expansions_in_tree_have_extent=True, _expansion_stick_out_factor=0.5, geometry_data_inspector=inspect_geo_data, target_association_tolerance=1e-10, ) lpot_source, _ = lpot_source.with_refinement() density_discr = lpot_source.density_discr if 0: from meshmode.discretization.visualization import draw_curve draw_curve(density_discr) import matplotlib.pyplot as plt plt.show() nodes = density_discr.nodes().with_queue(queue) sigma = cl.clmath.sin(10 * nodes[0]) bind(lpot_source, sym_op)(queue, sigma=sigma)
def plot(self, draw_circles=False, draw_center_numbers=False, highlight_centers=None): """Plot most of the information contained in a :class:`QBXFMMGeometryData` object, for debugging. :arg highlight_centers: If not *None*, an object with which the array of centers can be indexed to find the highlighted centers. .. note:: This only works for two-dimensional geometries. """ from pytential import sym import matplotlib.pyplot as pt pt.clf() dims = self.tree().targets.shape[0] if dims != 2: raise ValueError("only 2-dimensional geometry info can be plotted") with cl.CommandQueue(self.cl_context) as queue: stage2_density_discr = self.places.get_discretization( self.source_dd.geometry, sym.QBX_SOURCE_STAGE2) quad_stage2_density_discr = self.places.get_discretization( self.source_dd.geometry, sym.QBX_SOURCE_QUAD_STAGE2) from meshmode.discretization.visualization import draw_curve draw_curve(quad_stage2_density_discr) global_flags = self.global_qbx_flags().get(queue=queue) tree = self.tree().get(queue=queue) from boxtree.visualization import TreePlotter tp = TreePlotter(tree) tp.draw_tree() # {{{ draw centers and circles centers = self.flat_centers() centers = [centers[0].get(queue), centers[1].get(queue)] pt.plot(centers[0][global_flags == 0], centers[1][global_flags == 0], "oc", label="centers needing local qbx") if highlight_centers is not None: pt.plot(centers[0][highlight_centers], centers[1][highlight_centers], "oc", label="highlighted centers", markersize=15) ax = pt.gca() if draw_circles: for icenter, (cx, cy, r) in enumerate( zip(centers[0], centers[1], self.flat_expansion_radii().get(queue))): ax.add_artist( pt.Circle((cx, cy), r, fill=False, ls="dotted", lw=1)) if draw_center_numbers: for icenter, (cx, cy, r) in enumerate(zip(centers[0], centers[1])): pt.text(cx, cy, str(icenter), fontsize=8, ha="left", va="center", bbox=dict(facecolor="white", alpha=0.5, lw=0)) # }}} # {{{ draw target-to-center arrows ttc = self.user_target_to_center().get(queue) tinfo = self.target_info() targets = tinfo.targets.get(queue) pt.plot(targets[0], targets[1], "+") pt.plot(targets[0][ttc == target_state.FAILED], targets[1][ttc == target_state.FAILED], "dr", markersize=15, label="failed targets") for itarget in np.where(ttc == target_state.FAILED)[0]: pt.text(targets[0][itarget], targets[1][itarget], str(itarget), fontsize=8, ha="left", va="center", bbox=dict(facecolor="white", alpha=0.5, lw=0)) tccount = 0 checked = 0 for tx, ty, tcenter in zip(targets[0][self.ncenters:], targets[1][self.ncenters:], ttc[self.ncenters:]): checked += 1 if tcenter >= 0: tccount += 1 ax.add_artist( pt.Line2D( (tx, centers[0][tcenter]), (ty, centers[1][tcenter]), )) logger.info("found a center for %d/%d targets", tccount, checked) # }}} pt.gca().set_aspect("equal") #pt.legend() pt.savefig("geodata-stage2-nelem%d.pdf" % stage2_density_discr.mesh.nelements)
def plot(self, draw_circles=False, draw_center_numbers=False, highlight_centers=None): """Plot most of the information contained in a :class:`QBXFMMGeometryData` object, for debugging. :arg highlight_centers: If not *None*, an object with which the array of centers can be indexed to find the highlighted centers. .. note:: This only works for two-dimensional geometries. """ import matplotlib.pyplot as pt pt.clf() dims = self.tree().targets.shape[0] if dims != 2: raise ValueError("only 2-dimensional geometry info can be plotted") with cl.CommandQueue(self.cl_context) as queue: from meshmode.discretization.visualization import draw_curve draw_curve(self.lpot_source.quad_stage2_density_discr) global_flags = self.global_qbx_flags().get(queue=queue) tree = self.tree().get(queue=queue) from boxtree.visualization import TreePlotter tp = TreePlotter(tree) tp.draw_tree() # {{{ draw centers and circles centers = self.centers() centers = [ centers[0].get(queue), centers[1].get(queue)] pt.plot(centers[0][global_flags == 0], centers[1][global_flags == 0], "oc", label="centers needing local qbx") if highlight_centers is not None: pt.plot(centers[0][highlight_centers], centers[1][highlight_centers], "oc", label="highlighted centers", markersize=15) ax = pt.gca() if draw_circles: for icenter, (cx, cy, r) in enumerate(zip( centers[0], centers[1], self.expansion_radii().get(queue))): ax.add_artist( pt.Circle((cx, cy), r, fill=False, ls="dotted", lw=1)) if draw_center_numbers: for icenter, (cx, cy, r) in enumerate(zip(centers[0], centers[1])): pt.text(cx, cy, str(icenter), fontsize=8, ha="left", va="center", bbox=dict(facecolor='white', alpha=0.5, lw=0)) # }}} # {{{ draw target-to-center arrows ttc = self.user_target_to_center().get(queue) tinfo = self.target_info() targets = tinfo.targets.get(queue) pt.plot(targets[0], targets[1], "+") pt.plot( targets[0][ttc == target_state.FAILED], targets[1][ttc == target_state.FAILED], "dr", markersize=15, label="failed targets") for itarget in np.where(ttc == target_state.FAILED)[0]: pt.text( targets[0][itarget], targets[1][itarget], str(itarget), fontsize=8, ha="left", va="center", bbox=dict(facecolor='white', alpha=0.5, lw=0)) tccount = 0 checked = 0 for tx, ty, tcenter in zip( targets[0][self.ncenters:], targets[1][self.ncenters:], ttc[self.ncenters:]): checked += 1 if tcenter >= 0: tccount += 1 ax.add_artist( pt.Line2D( (tx, centers[0][tcenter]), (ty, centers[1][tcenter]), )) print("found a center for %d/%d targets" % (tccount, checked)) # }}} pt.gca().set_aspect("equal") #pt.legend() pt.savefig( "geodata-stage2-nelem%d.pdf" % self.lpot_source.stage2_density_discr.mesh.nelements)
def _make_cross_face_batches(queue, tgt_bdry_discr, src_bdry_discr, i_tgt_grp, i_src_grp, tgt_bdry_element_indices, src_bdry_element_indices): # FIXME: This should view-then-transfer # (but PyOpenCL doesn't do non-contiguous transfers for now). tgt_bdry_nodes = (tgt_bdry_discr.groups[i_tgt_grp].view( tgt_bdry_discr.nodes().get(queue=queue))[:, tgt_bdry_element_indices]) # FIXME: This should view-then-transfer # (but PyOpenCL doesn't do non-contiguous transfers for now). src_bdry_nodes = (src_bdry_discr.groups[i_src_grp].view( src_bdry_discr.nodes().get(queue=queue))[:, src_bdry_element_indices]) tol = 1e4 * np.finfo(tgt_bdry_nodes.dtype).eps src_mesh_grp = src_bdry_discr.mesh.groups[i_src_grp] src_grp = src_bdry_discr.groups[i_src_grp] dim = src_grp.dim ambient_dim, nelements, ntgt_unit_nodes = tgt_bdry_nodes.shape assert tgt_bdry_nodes.shape == src_bdry_nodes.shape # {{{ invert face map (using Gauss-Newton) initial_guess = np.mean(src_mesh_grp.vertex_unit_coordinates(), axis=0) src_unit_nodes = np.empty((dim, nelements, ntgt_unit_nodes)) src_unit_nodes[:] = initial_guess.reshape(-1, 1, 1) import modepy as mp vdm = mp.vandermonde(src_grp.basis(), src_grp.unit_nodes) inv_t_vdm = la.inv(vdm.T) nsrc_funcs = len(src_grp.basis()) def apply_map(unit_nodes): # unit_nodes: (dim, nelements, ntgt_unit_nodes) # basis_at_unit_nodes basis_at_unit_nodes = np.empty( (nsrc_funcs, nelements, ntgt_unit_nodes)) for i, f in enumerate(src_grp.basis()): basis_at_unit_nodes[i] = (f(unit_nodes.reshape(dim, -1)).reshape( nelements, ntgt_unit_nodes)) intp_coeffs = np.einsum("fj,jet->fet", inv_t_vdm, basis_at_unit_nodes) # If we're interpolating 1, we had better get 1 back. one_deviation = np.abs(np.sum(intp_coeffs, axis=0) - 1) assert (one_deviation < tol).all(), np.max(one_deviation) return np.einsum("fet,aef->aet", intp_coeffs, src_bdry_nodes) def get_map_jacobian(unit_nodes): # unit_nodes: (dim, nelements, ntgt_unit_nodes) # basis_at_unit_nodes dbasis_at_unit_nodes = np.empty( (dim, nsrc_funcs, nelements, ntgt_unit_nodes)) for i, df in enumerate(src_grp.grad_basis()): df_result = df(unit_nodes.reshape(dim, -1)) for rst_axis, df_r in enumerate(df_result): dbasis_at_unit_nodes[rst_axis, i] = (df_r.reshape( nelements, ntgt_unit_nodes)) dintp_coeffs = np.einsum("fj,rjet->rfet", inv_t_vdm, dbasis_at_unit_nodes) return np.einsum("rfet,aef->raet", dintp_coeffs, src_bdry_nodes) # {{{ test map applier and jacobian if 0: u = src_unit_nodes f = apply_map(u) for h in [1e-1, 1e-2]: du = h * np.random.randn(*u.shape) f_2 = apply_map(u + du) jf = get_map_jacobian(u) f2_2 = f + np.einsum("raet,ret->aet", jf, du) print(h, la.norm((f_2 - f2_2).ravel())) # }}} # {{{ visualize initial guess if 0: import matplotlib.pyplot as pt guess = apply_map(src_unit_nodes) goals = tgt_bdry_nodes from meshmode.discretization.visualization import draw_curve pt.figure(0) draw_curve(tgt_bdry_discr) pt.figure(1) draw_curve(src_bdry_discr) pt.figure(2) pt.plot(guess[0].reshape(-1), guess[1].reshape(-1), "or") pt.plot(goals[0].reshape(-1), goals[1].reshape(-1), "og") pt.plot(src_bdry_nodes[0].reshape(-1), src_bdry_nodes[1].reshape(-1), "xb") pt.show() # }}} logger.info("make_opposite_face_connection: begin gauss-newton") niter = 0 while True: resid = apply_map(src_unit_nodes) - tgt_bdry_nodes df = get_map_jacobian(src_unit_nodes) df_inv_resid = np.empty_like(src_unit_nodes) # For the 1D/2D accelerated versions, we'll use the normal # equations and Cramer's rule. If you're looking for high-end # numerics, look no further than meshmode. if dim == 1: # A is df.T ata = np.einsum("iket,jket->ijet", df, df) atb = np.einsum("iket,ket->iet", df, resid) df_inv_resid = atb / ata[0, 0] elif dim == 2: # A is df.T ata = np.einsum("iket,jket->ijet", df, df) atb = np.einsum("iket,ket->iet", df, resid) det = ata[0, 0] * ata[1, 1] - ata[0, 1] * ata[1, 0] df_inv_resid = np.empty_like(src_unit_nodes) df_inv_resid[0] = 1 / det * (ata[1, 1] * atb[0] - ata[1, 0] * atb[1]) df_inv_resid[1] = 1 / det * (-ata[0, 1] * atb[0] + ata[0, 0] * atb[1]) else: # The boundary of a 3D mesh is 2D, so that's the # highest-dimensional case we genuinely care about. # # This stinks, performance-wise, because it's not vectorized. # But we'll only hit it for boundaries of 4+D meshes, in which # case... good luck. :) for e in range(nelements): for t in range(ntgt_unit_nodes): df_inv_resid[:, e, t], _, _, _ = \ la.lstsq(df[:, :, e, t].T, resid[:, e, t]) src_unit_nodes = src_unit_nodes - df_inv_resid # {{{ visualize next guess if 0: import matplotlib.pyplot as pt guess = apply_map(src_unit_nodes) goals = tgt_bdry_nodes pt.plot(guess[0].reshape(-1), guess[1].reshape(-1), "rx") pt.plot(goals[0].reshape(-1), goals[1].reshape(-1), "go") pt.show() # }}} max_resid = np.max(np.abs(resid)) logger.debug("gauss-newton residual: %g" % max_resid) if max_resid < tol: logger.info("make_opposite_face_connection: gauss-newton: done, " "final residual: %g" % max_resid) break niter += 1 if niter > 10: raise RuntimeError( "Gauss-Newton (for finding opposite-face reference " "coordinates) did not converge") # }}} # {{{ find groups of src_unit_nodes def to_dev(ary): return cl.array.to_device(queue, ary, array_queue=None) done_elements = np.zeros(nelements, dtype=np.bool) while True: todo_elements, = np.where(~done_elements) if not len(todo_elements): return template_unit_nodes = src_unit_nodes[:, todo_elements[0], :] unit_node_dist = np.max(np.max( np.abs(src_unit_nodes[:, todo_elements, :] - template_unit_nodes.reshape(dim, 1, -1)), axis=2), axis=0) close_els = todo_elements[unit_node_dist < tol] done_elements[close_els] = True unit_node_dist = np.max(np.max( np.abs(src_unit_nodes[:, todo_elements, :] - template_unit_nodes.reshape(dim, 1, -1)), axis=2), axis=0) from meshmode.discretization.connection.direct import InterpolationBatch yield InterpolationBatch( from_group_index=i_src_grp, from_element_indices=to_dev(src_bdry_element_indices[close_els]), to_element_indices=to_dev(tgt_bdry_element_indices[close_els]), result_unit_nodes=template_unit_nodes, to_element_face=None)