def _set_up_distributed_communication(self, mpi_communicator, queue): from_dd = sym.DOFDesc("vol", sym.QTAG_NONE) from meshmode.distributed import get_connected_partitions connected_parts = get_connected_partitions(self._volume_discr.mesh) if mpi_communicator is None and connected_parts: raise RuntimeError("must supply an MPI communicator when using a " "distributed mesh") grp_factory = self.group_factory_for_quadrature_tag(sym.QTAG_NONE) setup_helpers = {} boundary_connections = {} from meshmode.distributed import MPIBoundaryCommSetupHelper for i_remote_part in connected_parts: conn = self.connection_from_dds( from_dd, sym.DOFDesc(sym.BTAG_PARTITION(i_remote_part), sym.QTAG_NONE)) setup_helper = setup_helpers[ i_remote_part] = MPIBoundaryCommSetupHelper( mpi_communicator, queue, conn, i_remote_part, grp_factory) setup_helper.post_sends() for i_remote_part, setup_helper in six.iteritems(setup_helpers): boundary_connections[i_remote_part] = setup_helper.complete_setup() return boundary_connections
def _set_up_distributed_communication(self, mpi_communicator, array_context): from_dd = DOFDesc("vol", DISCR_TAG_BASE) boundary_connections = {} from meshmode.distributed import get_connected_partitions connected_parts = get_connected_partitions(self._volume_discr.mesh) if connected_parts: if mpi_communicator is None: raise RuntimeError( "must supply an MPI communicator when using a " "distributed mesh") grp_factory = \ self.group_factory_for_discretization_tag(DISCR_TAG_BASE) local_boundary_connections = {} for i_remote_part in connected_parts: local_boundary_connections[ i_remote_part] = self.connection_from_dds( from_dd, DOFDesc(BTAG_PARTITION(i_remote_part), DISCR_TAG_BASE)) from meshmode.distributed import MPIBoundaryCommSetupHelper with MPIBoundaryCommSetupHelper(mpi_communicator, array_context, local_boundary_connections, grp_factory) as bdry_setup_helper: while True: conns = bdry_setup_helper.complete_some() if not conns: break for i_remote_part, conn in conns.items(): boundary_connections[i_remote_part] = conn return boundary_connections
def test_partition_interpolation(actx_factory, dim, mesh_pars, num_parts, num_groups, part_method): np.random.seed(42) group_factory = PolynomialWarpAndBlendGroupFactory actx = actx_factory() order = 4 def f(x): return 10. * actx.np.sin(50. * x) for n in mesh_pars: from meshmode.mesh.generation import generate_warped_rect_mesh base_mesh = generate_warped_rect_mesh(dim, order=order, n=n) if num_groups > 1: from meshmode.mesh.processing import split_mesh_groups # Group every Nth element element_flags = np.arange( base_mesh.nelements, dtype=base_mesh.element_id_dtype) % num_groups mesh = split_mesh_groups(base_mesh, element_flags) else: mesh = base_mesh if part_method == "random": part_per_element = np.random.randint(num_parts, size=mesh.nelements) else: pytest.importorskip("pymetis") from meshmode.distributed import get_partition_by_pymetis part_per_element = get_partition_by_pymetis( mesh, num_parts, connectivity=part_method) from meshmode.mesh.processing import partition_mesh part_meshes = [ partition_mesh(mesh, part_per_element, i)[0] for i in range(num_parts) ] connected_parts = set() for i_local_part, part_mesh in enumerate(part_meshes): from meshmode.distributed import get_connected_partitions neighbors = get_connected_partitions(part_mesh) for i_remote_part in neighbors: connected_parts.add((i_local_part, i_remote_part)) from meshmode.discretization import Discretization vol_discrs = [ Discretization(actx, part_meshes[i], group_factory(order)) for i in range(num_parts) ] from meshmode.mesh import BTAG_PARTITION from meshmode.discretization.connection import ( make_face_restriction, make_partition_connection, check_connection) for i_local_part, i_remote_part in connected_parts: # Mark faces within local_mesh that are connected to remote_mesh local_bdry_conn = make_face_restriction( actx, vol_discrs[i_local_part], group_factory(order), BTAG_PARTITION(i_remote_part)) # Mark faces within remote_mesh that are connected to local_mesh remote_bdry_conn = make_face_restriction( actx, vol_discrs[i_remote_part], group_factory(order), BTAG_PARTITION(i_local_part)) bdry_nelements = sum(grp.nelements for grp in local_bdry_conn.to_discr.groups) remote_bdry_nelements = sum( grp.nelements for grp in remote_bdry_conn.to_discr.groups) assert bdry_nelements == remote_bdry_nelements, \ "partitions do not have the same number of connected elements" local_bdry = local_bdry_conn.to_discr remote_bdry = remote_bdry_conn.to_discr from meshmode.distributed import make_remote_group_infos remote_to_local_conn = make_partition_connection( actx, local_bdry_conn=local_bdry_conn, i_local_part=i_local_part, remote_bdry_discr=remote_bdry, remote_group_infos=make_remote_group_infos( actx, remote_bdry_conn)) # Connect from local mesh to remote mesh local_to_remote_conn = make_partition_connection( actx, local_bdry_conn=remote_bdry_conn, i_local_part=i_remote_part, remote_bdry_discr=local_bdry, remote_group_infos=make_remote_group_infos( actx, local_bdry_conn)) check_connection(actx, remote_to_local_conn) check_connection(actx, local_to_remote_conn) true_local_points = f(thaw(actx, local_bdry.nodes()[0])) remote_points = local_to_remote_conn(true_local_points) local_points = remote_to_local_conn(remote_points) err = actx.np.linalg.norm(true_local_points - local_points, np.inf) # Can't currently expect exact results due to limitations of # interpolation "snapping" in DirectDiscretizationConnection's # _resample_point_pick_indices assert err < 1e-11
def _test_mpi_boundary_swap(dim, order, num_groups): from meshmode.distributed import MPIMeshDistributor, MPIBoundaryCommSetupHelper from mpi4py import MPI mpi_comm = MPI.COMM_WORLD i_local_part = mpi_comm.Get_rank() num_parts = mpi_comm.Get_size() mesh_dist = MPIMeshDistributor(mpi_comm) if mesh_dist.is_mananger_rank(): np.random.seed(42) from meshmode.mesh.generation import generate_warped_rect_mesh meshes = [ generate_warped_rect_mesh(dim, order=order, n=4) for _ in range(num_groups) ] if num_groups > 1: from meshmode.mesh.processing import merge_disjoint_meshes mesh = merge_disjoint_meshes(meshes) else: mesh = meshes[0] part_per_element = np.random.randint(num_parts, size=mesh.nelements) local_mesh = mesh_dist.send_mesh_parts(mesh, part_per_element, num_parts) else: local_mesh = mesh_dist.receive_mesh_part() group_factory = PolynomialWarpAndBlendGroupFactory(order) from meshmode.array_context import PyOpenCLArrayContext cl_ctx = cl.create_some_context() queue = cl.CommandQueue(cl_ctx) actx = PyOpenCLArrayContext(queue) from meshmode.discretization import Discretization vol_discr = Discretization(actx, local_mesh, group_factory) from meshmode.distributed import get_connected_partitions connected_parts = get_connected_partitions(local_mesh) assert i_local_part not in connected_parts bdry_setup_helpers = {} local_bdry_conns = {} from meshmode.discretization.connection import make_face_restriction from meshmode.mesh import BTAG_PARTITION for i_remote_part in connected_parts: local_bdry_conns[i_remote_part] = make_face_restriction( actx, vol_discr, group_factory, BTAG_PARTITION(i_remote_part)) setup_helper = bdry_setup_helpers[i_remote_part] = \ MPIBoundaryCommSetupHelper( mpi_comm, actx, local_bdry_conns[i_remote_part], i_remote_part, bdry_grp_factory=group_factory) setup_helper.post_sends() remote_to_local_bdry_conns = {} from meshmode.discretization.connection import check_connection while bdry_setup_helpers: for i_remote_part, setup_helper in bdry_setup_helpers.items(): if setup_helper.is_setup_ready(): assert bdry_setup_helpers.pop(i_remote_part) is setup_helper conn = setup_helper.complete_setup() check_connection(actx, conn) remote_to_local_bdry_conns[i_remote_part] = conn break # FIXME: Not ideal, busy-waits _test_data_transfer(mpi_comm, actx, local_bdry_conns, remote_to_local_bdry_conns, connected_parts) logger.debug("Rank %d exiting", i_local_part)
def test_partition_mesh(mesh_size, num_parts, num_groups, dim, scramble_partitions): np.random.seed(42) n = (mesh_size, ) * dim from meshmode.mesh.generation import generate_regular_rect_mesh meshes = [ generate_regular_rect_mesh(a=(0 + i, ) * dim, b=(1 + i, ) * dim, n=n) for i in range(num_groups) ] from meshmode.mesh.processing import merge_disjoint_meshes mesh = merge_disjoint_meshes(meshes) if scramble_partitions: part_per_element = np.random.randint(num_parts, size=mesh.nelements) else: pytest.importorskip("pymetis") from meshmode.distributed import get_partition_by_pymetis part_per_element = get_partition_by_pymetis(mesh, num_parts) from meshmode.mesh.processing import partition_mesh # TODO: The same part_per_element array must be used to partition each mesh. # Maybe the interface should be changed to guarantee this. new_meshes = [ partition_mesh(mesh, part_per_element, i) for i in range(num_parts) ] assert mesh.nelements == np.sum( [new_meshes[i][0].nelements for i in range(num_parts)]), \ "part_mesh has the wrong number of elements" assert count_tags(mesh, BTAG_ALL) == np.sum( [count_tags(new_meshes[i][0], BTAG_ALL) for i in range(num_parts)]), \ "part_mesh has the wrong number of BTAG_ALL boundaries" connected_parts = set() for i_local_part, (part_mesh, _) in enumerate(new_meshes): from meshmode.distributed import get_connected_partitions neighbors = get_connected_partitions(part_mesh) for i_remote_part in neighbors: connected_parts.add((i_local_part, i_remote_part)) from meshmode.mesh import BTAG_PARTITION, InterPartitionAdjacencyGroup from meshmode.mesh.processing import find_group_indices num_tags = np.zeros((num_parts, )) index_lookup_table = dict() for ipart, (m, _) in enumerate(new_meshes): for igrp in range(len(m.groups)): adj = m.facial_adjacency_groups[igrp][None] if not isinstance(adj, InterPartitionAdjacencyGroup): # This group is not connected to another partition. continue for i, (elem, face) in enumerate(zip(adj.elements, adj.element_faces)): index_lookup_table[ipart, igrp, elem, face] = i for part_num in range(num_parts): part, part_to_global = new_meshes[part_num] for grp_num in range(len(part.groups)): adj = part.facial_adjacency_groups[grp_num][None] tags = -part.facial_adjacency_groups[grp_num][None].neighbors assert np.all(tags >= 0) if not isinstance(adj, InterPartitionAdjacencyGroup): # This group is not connected to another partition. continue elem_base = part.groups[grp_num].element_nr_base for idx in range(len(adj.elements)): if adj.partition_neighbors[idx] == -1: continue elem = adj.elements[idx] face = adj.element_faces[idx] n_part_num = adj.neighbor_partitions[idx] n_meshwide_elem = adj.partition_neighbors[idx] n_face = adj.neighbor_faces[idx] num_tags[n_part_num] += 1 n_part, n_part_to_global = new_meshes[n_part_num] # Hack: find_igrps expects a numpy.ndarray and returns # a numpy.ndarray. But if a single integer is fed # into find_igrps, an integer is returned. n_grp_num = int( find_group_indices(n_part.groups, n_meshwide_elem)) n_adj = n_part.facial_adjacency_groups[n_grp_num][None] n_elem_base = n_part.groups[n_grp_num].element_nr_base n_elem = n_meshwide_elem - n_elem_base n_idx = index_lookup_table[n_part_num, n_grp_num, n_elem, n_face] assert (part_num == n_adj.neighbor_partitions[n_idx] and elem + elem_base == n_adj.partition_neighbors[n_idx] and face == n_adj.neighbor_faces[n_idx]),\ "InterPartitionAdjacencyGroup is not consistent" _, n_part_to_global = new_meshes[n_part_num] p_meshwide_elem = part_to_global[elem + elem_base] p_meshwide_n_elem = n_part_to_global[n_elem + n_elem_base] p_grp_num = find_group_indices(mesh.groups, p_meshwide_elem) p_n_grp_num = find_group_indices(mesh.groups, p_meshwide_n_elem) p_elem_base = mesh.groups[p_grp_num].element_nr_base p_n_elem_base = mesh.groups[p_n_grp_num].element_nr_base p_elem = p_meshwide_elem - p_elem_base p_n_elem = p_meshwide_n_elem - p_n_elem_base f_groups = mesh.facial_adjacency_groups[p_grp_num] for p_bnd_adj in f_groups.values(): for idx in range(len(p_bnd_adj.elements)): if (p_elem == p_bnd_adj.elements[idx] and face == p_bnd_adj.element_faces[idx]): assert p_n_elem == p_bnd_adj.neighbors[idx],\ "Tag does not give correct neighbor" assert n_face == p_bnd_adj.neighbor_faces[idx],\ "Tag does not give correct neighbor" for i_remote_part in range(num_parts): tag_sum = 0 for i_local_part, (mesh, _) in enumerate(new_meshes): if (i_local_part, i_remote_part) in connected_parts: tag_sum += count_tags(mesh, BTAG_PARTITION(i_remote_part)) assert num_tags[i_remote_part] == tag_sum,\ "part_mesh has the wrong number of BTAG_PARTITION boundaries"
def connected_ranks(dcoll: DiscretizationCollection): from meshmode.distributed import get_connected_partitions return get_connected_partitions(dcoll._volume_discr.mesh)
def _test_mpi_boundary_swap(dim, order, num_groups): from meshmode.distributed import MPIMeshDistributor, MPIBoundaryCommSetupHelper from mpi4py import MPI mpi_comm = MPI.COMM_WORLD i_local_part = mpi_comm.Get_rank() num_parts = mpi_comm.Get_size() mesh_dist = MPIMeshDistributor(mpi_comm) if mesh_dist.is_mananger_rank(): np.random.seed(42) from meshmode.mesh.generation import generate_warped_rect_mesh meshes = [generate_warped_rect_mesh(dim, order=order, nelements_side=4) for _ in range(num_groups)] if num_groups > 1: from meshmode.mesh.processing import merge_disjoint_meshes mesh = merge_disjoint_meshes(meshes) else: mesh = meshes[0] part_per_element = np.random.randint(num_parts, size=mesh.nelements) local_mesh = mesh_dist.send_mesh_parts(mesh, part_per_element, num_parts) else: local_mesh = mesh_dist.receive_mesh_part() group_factory = PolynomialWarpAndBlendGroupFactory(order) from arraycontext import PyOpenCLArrayContext cl_ctx = cl.create_some_context() queue = cl.CommandQueue(cl_ctx) actx = PyOpenCLArrayContext(queue) from meshmode.discretization import Discretization vol_discr = Discretization(actx, local_mesh, group_factory) from meshmode.distributed import get_connected_partitions connected_parts = get_connected_partitions(local_mesh) # Check that the connectivity makes sense before doing any communication _test_connected_parts(mpi_comm, connected_parts) from meshmode.discretization.connection import make_face_restriction from meshmode.mesh import BTAG_PARTITION local_bdry_conns = {} for i_remote_part in connected_parts: local_bdry_conns[i_remote_part] = make_face_restriction( actx, vol_discr, group_factory, BTAG_PARTITION(i_remote_part)) remote_to_local_bdry_conns = {} with MPIBoundaryCommSetupHelper(mpi_comm, actx, local_bdry_conns, bdry_grp_factory=group_factory) as bdry_setup_helper: from meshmode.discretization.connection import check_connection while True: conns = bdry_setup_helper.complete_some() if not conns: break for i_remote_part, conn in conns.items(): check_connection(actx, conn) remote_to_local_bdry_conns[i_remote_part] = conn _test_data_transfer(mpi_comm, actx, local_bdry_conns, remote_to_local_bdry_conns, connected_parts) logger.debug("Rank %d exiting", i_local_part)
def process_sym_operator(discrwb, sym_operator, post_bind_mapper=None, dumper=lambda name, sym_operator: None): orig_sym_operator = sym_operator import grudge.symbolic.mappers as mappers dumper("before-bind", sym_operator) sym_operator = mappers.OperatorBinder()(sym_operator) mappers.ErrorChecker(discrwb.mesh)(sym_operator) sym_operator = \ mappers.OppositeInteriorFaceSwapUniqueIDAssigner()(sym_operator) # {{{ broadcast root rank's symn_operator # also make sure all ranks had same orig_sym_operator if discrwb.mpi_communicator is not None: (mgmt_rank_orig_sym_operator, mgmt_rank_sym_operator) = \ discrwb.mpi_communicator.bcast( (orig_sym_operator, sym_operator), discrwb.get_management_rank_index()) from pytools.obj_array import is_equal as is_oa_equal if not is_oa_equal(mgmt_rank_orig_sym_operator, orig_sym_operator): raise ValueError("rank %d received a different symbolic " "operator to bind from rank %d" % (discrwb.mpi_communicator.Get_rank(), discrwb.get_management_rank_index())) sym_operator = mgmt_rank_sym_operator # }}} if post_bind_mapper is not None: dumper("before-postbind", sym_operator) sym_operator = post_bind_mapper(sym_operator) dumper("before-empty-flux-killer", sym_operator) sym_operator = mappers.EmptyFluxKiller(discrwb.mesh)(sym_operator) dumper("before-cfold", sym_operator) sym_operator = mappers.CommutativeConstantFoldingMapper()(sym_operator) dumper("before-qcheck", sym_operator) sym_operator = mappers.QuadratureCheckerAndRemover( discrwb.quad_tag_to_group_factory)(sym_operator) # Work around https://github.com/numpy/numpy/issues/9438 # # The idea is that we need 1j as an expression to survive # until code generation time. If it is evaluated and combined # with other constants, we will need to determine its size # (as np.complex64/128) within the expression. But because # of the above numpy bug, sized numbers are not likely to survive # expression building--so that's why we step in here to fix that. dumper("before-csize", sym_operator) sym_operator = mappers.ConstantToNumpyConversionMapper( real_type=discrwb.real_dtype.type, complex_type=discrwb.complex_dtype.type, )(sym_operator) dumper("before-global-to-reference", sym_operator) sym_operator = mappers.GlobalToReferenceMapper(discrwb.ambient_dim)(sym_operator) dumper("before-distributed", sym_operator) volume_mesh = discrwb.discr_from_dd("vol").mesh from meshmode.distributed import get_connected_partitions connected_parts = get_connected_partitions(volume_mesh) if connected_parts: sym_operator = mappers.DistributedMapper(connected_parts)(sym_operator) dumper("before-imass", sym_operator) sym_operator = mappers.InverseMassContractor()(sym_operator) dumper("before-cfold-2", sym_operator) sym_operator = mappers.CommutativeConstantFoldingMapper()(sym_operator) # FIXME: Reenable derivative joiner # dumper("before-derivative-join", sym_operator) # sym_operator = mappers.DerivativeJoiner()(sym_operator) dumper("process-finished", sym_operator) return sym_operator
def test_partition_interpolation(ctx_factory, dim, mesh_pars, num_parts, num_groups, part_method): np.random.seed(42) group_factory = PolynomialWarpAndBlendGroupFactory cl_ctx = ctx_factory() queue = cl.CommandQueue(cl_ctx) actx = PyOpenCLArrayContext(queue) order = 4 def f(x): return 10.*actx.np.sin(50.*x) for n in mesh_pars: from meshmode.mesh.generation import generate_warped_rect_mesh base_mesh = generate_warped_rect_mesh(dim, order=order, n=n) if num_groups > 1: from meshmode.mesh.processing import split_mesh_groups # Group every Nth element element_flags = np.arange(base_mesh.nelements, dtype=base_mesh.element_id_dtype) % num_groups mesh = split_mesh_groups(base_mesh, element_flags) else: mesh = base_mesh if part_method == "random": part_per_element = np.random.randint(num_parts, size=mesh.nelements) else: pytest.importorskip('pymetis') from meshmode.distributed import get_partition_by_pymetis part_per_element = get_partition_by_pymetis(mesh, num_parts, connectivity=part_method) from meshmode.mesh.processing import partition_mesh part_meshes = [ partition_mesh(mesh, part_per_element, i)[0] for i in range(num_parts)] connected_parts = set() for i_local_part, part_mesh in enumerate(part_meshes): from meshmode.distributed import get_connected_partitions neighbors = get_connected_partitions(part_mesh) for i_remote_part in neighbors: connected_parts.add((i_local_part, i_remote_part)) from meshmode.discretization import Discretization vol_discrs = [Discretization(actx, part_meshes[i], group_factory(order)) for i in range(num_parts)] from meshmode.mesh import BTAG_PARTITION from meshmode.discretization.connection import (make_face_restriction, make_partition_connection, check_connection) for i_local_part, i_remote_part in connected_parts: # Mark faces within local_mesh that are connected to remote_mesh local_bdry_conn = make_face_restriction(actx, vol_discrs[i_local_part], group_factory(order), BTAG_PARTITION(i_remote_part)) # Mark faces within remote_mesh that are connected to local_mesh remote_bdry_conn = make_face_restriction(actx, vol_discrs[i_remote_part], group_factory(order), BTAG_PARTITION(i_local_part)) bdry_nelements = sum( grp.nelements for grp in local_bdry_conn.to_discr.groups) remote_bdry_nelements = sum( grp.nelements for grp in remote_bdry_conn.to_discr.groups) assert bdry_nelements == remote_bdry_nelements, \ "partitions do not have the same number of connected elements" # Gather just enough information for the connection local_bdry = local_bdry_conn.to_discr local_mesh = part_meshes[i_local_part] local_adj_groups = [local_mesh.facial_adjacency_groups[i][None] for i in range(len(local_mesh.groups))] local_batches = [local_bdry_conn.groups[i].batches for i in range(len(local_mesh.groups))] local_from_elem_faces = [[batch.to_element_face for batch in grp_batches] for grp_batches in local_batches] local_from_elem_indices = [[batch.to_element_indices.get(queue=queue) for batch in grp_batches] for grp_batches in local_batches] remote_bdry = remote_bdry_conn.to_discr remote_mesh = part_meshes[i_remote_part] remote_adj_groups = [remote_mesh.facial_adjacency_groups[i][None] for i in range(len(remote_mesh.groups))] remote_batches = [remote_bdry_conn.groups[i].batches for i in range(len(remote_mesh.groups))] remote_from_elem_faces = [[batch.to_element_face for batch in grp_batches] for grp_batches in remote_batches] remote_from_elem_indices = [[batch.to_element_indices.get(queue=queue) for batch in grp_batches] for grp_batches in remote_batches] # Connect from remote_mesh to local_mesh remote_to_local_conn = make_partition_connection( actx, local_bdry_conn, i_local_part, remote_bdry, remote_adj_groups, remote_from_elem_faces, remote_from_elem_indices) # Connect from local mesh to remote mesh local_to_remote_conn = make_partition_connection( actx, remote_bdry_conn, i_remote_part, local_bdry, local_adj_groups, local_from_elem_faces, local_from_elem_indices) check_connection(actx, remote_to_local_conn) check_connection(actx, local_to_remote_conn) true_local_points = f(thaw(actx, local_bdry.nodes()[0])) remote_points = local_to_remote_conn(true_local_points) local_points = remote_to_local_conn(remote_points) err = flat_norm(true_local_points - local_points, np.inf) # Can't currently expect exact results due to limitations of # interpolation 'snapping' in DirectDiscretizationConnection's # _resample_point_pick_indices assert err < 1e-11
def connected_ranks(self): from meshmode.distributed import get_connected_partitions return get_connected_partitions(self._volume_discr.mesh)