예제 #1
0
    def __call__(self, queue, tree, wait_for=None):
        """
        :arg queue: a :class:`pyopencl.CommandQueue`
        :arg tree: a :class:`boxtree.Tree`.
        :arg wait_for: may either be *None* or a list of :class:`pyopencl.Event`
            instances for whose completion this command waits before starting
            execution.
        :returns: a tuple *(pl, event)*, where *pl* is an instance of
            :class:`PeerListLookup`, and *event* is a :class:`pyopencl.Event`
            for dependency management.
        """
        from pytools import div_ceil

        # Round up level count--this gets included in the kernel as
        # a stack bound. Rounding avoids too many kernel versions.
        max_levels = div_ceil(tree.nlevels, 10) * 10

        peer_list_finder_kernel = self.get_peer_list_finder_kernel(
            tree.dimensions, tree.coord_dtype, tree.box_id_dtype, max_levels)

        pl_plog = ProcessLogger(logger, "find peer lists")

        result, evt = peer_list_finder_kernel(
                queue, tree.nboxes,
                tree.box_centers.data, tree.root_extent,
                tree.box_levels.data, tree.aligned_nboxes,
                tree.box_child_ids.data, tree.box_flags.data,
                wait_for=wait_for)

        pl_plog.done()

        return PeerListLookup(
                tree=tree,
                peer_list_starts=result["peers"].starts,
                peer_lists=result["peers"].lists).with_queue(None), evt
예제 #2
0
def check_variable_access_ordered(kernel):
    """Checks that between each write to a variable and all other accesses to
    the variable there is either:

    * a direct/indirect depdendency edge, or
    * an explicit statement that no ordering is necessary (expressed
      through a bi-directional :attr:`loopy.InstructionBase.no_sync_with`)
    """

    if kernel.options.enforce_variable_access_ordered not in [
            "no_check", True, False
    ]:
        raise LoopyError("invalid value for option "
                         "'enforce_variable_access_ordered': %s" %
                         kernel.options.enforce_variable_access_ordered)

    if kernel.options.enforce_variable_access_ordered == "no_check":
        return

    from pytools import ProcessLogger
    with ProcessLogger(logger,
                       "%s: check variable access ordered" % kernel.name):
        if kernel.options.enforce_variable_access_ordered:
            _check_variable_access_ordered_inner(kernel)
        else:
            from loopy.diagnostic import VariableAccessNotOrdered
            try:
                _check_variable_access_ordered_inner(kernel)
            except VariableAccessNotOrdered as e:
                from loopy.diagnostic import warn_with_kernel
                warn_with_kernel(kernel, "variable_access_ordered", str(e))
예제 #3
0
def test_processlogger():
    logging.basicConfig(level=logging.INFO)

    from pytools import ProcessLogger
    plog = ProcessLogger(logger,
                         "testing the process logger",
                         long_threshold_seconds=0.01)

    from time import sleep
    with plog:
        sleep(0.3)
예제 #4
0
    def __call__(self, queue, tree, wait_for=None):
        """
        :arg queue: a :class:`pyopencl.CommandQueue`
        :arg tree: a :class:`boxtree.Tree`.
        :arg wait_for: may either be *None* or a list of :class:`pyopencl.Event`
            instances for whose completion this command waits before starting
            execution.
        :returns: a tuple *(pl, event)*, where *pl* is an instance of
            :class:`PeerListLookup`, and *event* is a :class:`pyopencl.Event`
            for dependency management.
        """
        from pytools import div_ceil

        # Round up level count--this gets included in the kernel as
        # a stack bound. Rounding avoids too many kernel versions.
        max_levels = div_ceil(tree.nlevels, 10) * 10

        peer_list_finder_kernel = self.get_peer_list_finder_kernel(
            tree.dimensions, tree.coord_dtype, tree.box_id_dtype, max_levels)

        pl_plog = ProcessLogger(logger, "find peer lists")

        result, evt = peer_list_finder_kernel(queue,
                                              tree.nboxes,
                                              tree.box_centers.data,
                                              tree.root_extent,
                                              tree.box_levels,
                                              tree.aligned_nboxes,
                                              tree.box_child_ids.data,
                                              tree.box_flags,
                                              wait_for=wait_for)

        pl_plog.done()

        return PeerListLookup(
            tree=tree,
            peer_list_starts=result["peers"].starts,
            peer_lists=result["peers"].lists).with_queue(None), evt
예제 #5
0
    def refine(self, density_discr, refiner, refine_flags, factory, debug):
        """
        Refine the underlying mesh and discretization.
        """
        if isinstance(refine_flags, cl.array.Array):
            refine_flags = refine_flags.get(self.queue)
        refine_flags = refine_flags.astype(np.bool)

        with ProcessLogger(logger, "refine mesh"):
            refiner.refine(refine_flags)
            from meshmode.discretization.connection import make_refinement_connection
            conn = make_refinement_connection(refiner, density_discr, factory)

        return conn
예제 #6
0
    def refine(self, density_discr, refiner, refine_flags, factory, debug):
        """
        Refine the underlying mesh and discretization.
        """
        actx = self.array_context
        if isinstance(refine_flags, actx.array_types):
            refine_flags = actx.to_numpy(refine_flags)

        refine_flags = refine_flags.astype(bool)
        with ProcessLogger(logger, "refine mesh"):
            refiner.refine(refine_flags)
            from meshmode.discretization.connection import make_refinement_connection
            conn = make_refinement_connection(actx, refiner, density_discr,
                                              factory)

        return conn
예제 #7
0
def import_firedrake_mesh(fdrake_mesh, cells_to_use=None,
                          normals=None, no_normals_warn=None):
    """
    Create a :class:`meshmode.mesh.Mesh`
    from a `firedrake.mesh.MeshGeometry`
    with the same cells/elements, vertices, nodes,
    mesh order, and facial adjacency.

    The vertex and node coordinates will be the same, as well
    as the cell/element ordering. However, :mod:`firedrake`
    does not require elements to be positively oriented,
    so any negative elements are flipped
    as in :func:`meshmode.mesh.processing.flip_simplex_element_group`.

    The flipped cells/elements are identified by the returned
    *firedrake_orient* array

    :arg fdrake_mesh: `firedrake.mesh.MeshGeometry`.
        This mesh **must** be in a space of ambient dimension
        1, 2, or 3 and have co-dimension of 0 or 1.
        It must use a simplex as a reference element.

        In the case of a 2-dimensional mesh embedded in 3-space,
        the method ``fdrake_mesh.init_cell_orientations`` must
        have been called.

        In the case of a 1-dimensional mesh embedded in 2-space,
        see parameters *normals* and *no_normals_warn*.

        Finally, the ``coordinates`` attribute must have a function
        space whose *finat_element* associates a degree
        of freedom with each vertex. In particular,
        this means that the vertices of the mesh must have well-defined
        coordinates.
        For those unfamiliar with :mod:`firedrake`, you can
        verify this by looking at

        .. code-block:: python

            coords_fspace = fdrake_mesh.coordinates.function_space()
            vertex_entity_dofs = coords_fspace.finat_element.entity_dofs()[0]
            for entity, dof_list in vertex_entity_dofs.items():
                assert len(dof_list) > 0

    :arg cells_to_use: *cells_to_use* is primarily intended for use
        internally by :func:`~meshmode.interop.firedrake.connection.\
build_connection_from_firedrake`.
        *cells_to_use* must be either

        1. *None*, in which case this argument is ignored, or
        2. a numpy array of unique firedrake cell indexes.

        In case (2.),
        only cells whose index appears in *cells_to_use* are included
        in the resultant mesh, and their index in *cells_to_use*
        becomes the element index in the resultant mesh element group.
        Any faces or vertices which do not touch a cell in
        *cells_to_use* are also ignored.
        Note that in this latter case, some faces that are not
        boundaries in *fdrake_mesh* may become boundaries in the
        returned mesh. These "induced" boundaries are marked with
        :class:`~meshmode.mesh.BTAG_INDUCED_BOUNDARY`
        instead of :class:`~meshmode.mesh.BTAG_ALL`.

    :arg normals: **Only** used if *fdrake_mesh* is a 1-surface
        embedded in 2-space. In this case,

            - If *None* then
              all elements are assumed to be positively oriented.
            - Else, should be a list/array whose *i*\\ th entry
              is the normal for the *i*\\ th element (*i*\\ th
              in *mesh.coordinate.function_space()*'s
              *cell_node_list*)

    :arg no_normals_warn: If *True* (the default), raises a warning
        if *fdrake_mesh* is a 1-surface embedded in 2-space
        and *normals* is *None*.

    :return: A tuple *(meshmode mesh, firedrake_orient)*.
         ``firedrake_orient < 0`` is *True* for any negatively
         oriented firedrake cell (which was flipped by meshmode)
         and False for any positively oriented firedrake cell
         (which was not flipped by meshmode).
    """
    # Type validation
    from firedrake.mesh import MeshGeometry
    if not isinstance(fdrake_mesh, MeshGeometry):
        raise TypeError("'fdrake_mesh_topology' must be an instance of "
                        "firedrake.mesh.MeshGeometry, "
                        "not '%s'." % type(fdrake_mesh))
    if cells_to_use is not None:
        if not isinstance(cells_to_use, np.ndarray):
            raise TypeError("'cells_to_use' must be a np.ndarray or "
                            "*None*")
        assert len(cells_to_use.shape) == 1
        assert np.size(np.unique(cells_to_use)) == np.size(cells_to_use), \
            ":arg:`cells_to_use` must have unique entries"
        assert np.all(np.logical_and(cells_to_use >= 0,
                                     cells_to_use < fdrake_mesh.num_cells()))
    assert fdrake_mesh.ufl_cell().is_simplex(), "Mesh must use simplex cells"
    gdim = fdrake_mesh.geometric_dimension()
    tdim = fdrake_mesh.topological_dimension()
    assert gdim in [1, 2, 3], "Mesh must be in space of ambient dim 1, 2, or 3"
    assert gdim - tdim in [0, 1], "Mesh co-dimension must be 0 or 1"
    # firedrake meshes are not guaranteed be fully instantiated until
    # the .init() method is called. In particular, the coordinates function
    # may not be accessible if we do not call init(). If the mesh has
    # already been initialized, nothing will change. For more details
    # on why we need a second initialization, see
    # this pull request:
    # https://github.com/firedrakeproject/firedrake/pull/627
    # which details how Firedrake implements a mesh's coordinates
    # as a function on that very same mesh
    fdrake_mesh.init()

    # Get all the nodal information we can from the topology
    bdy_tags = _get_firedrake_boundary_tags(
        fdrake_mesh, tag_induced_boundary=cells_to_use is not None)

    with ProcessLogger(logger, "Retrieving vertex indices and computing "
                       "NodalAdjacency from firedrake mesh"):
        vertex_indices, nodal_adjacency = \
            _get_firedrake_nodal_info(fdrake_mesh, cells_to_use=cells_to_use)

        # If only using some cells, vertices may need new indices as many
        # will be removed
        if cells_to_use is not None:
            vert_ndx_new2old = np.unique(vertex_indices.flatten())
            vert_ndx_old2new = dict(zip(vert_ndx_new2old,
                                        np.arange(np.size(vert_ndx_new2old),
                                                  dtype=vertex_indices.dtype)))
            vertex_indices = \
                np.vectorize(vert_ndx_old2new.__getitem__)(vertex_indices)

    with ProcessLogger(logger, "Building (possibly) unflipped "
                       "SimplexElementGroup from firedrake unit nodes/nodes"):

        # Grab the mesh reference element and cell dimension
        coord_finat_elt = fdrake_mesh.coordinates.function_space().finat_element
        cell_dim = fdrake_mesh.cell_dimension()

        # Get finat unit nodes and map them onto the meshmode reference simplex
        finat_unit_nodes = get_finat_element_unit_nodes(coord_finat_elt)
        fd_ref_to_mm = get_affine_reference_simplex_mapping(cell_dim, True)
        finat_unit_nodes = fd_ref_to_mm(finat_unit_nodes)

        # Now grab the nodes
        coords = fdrake_mesh.coordinates
        cell_node_list = coords.function_space().cell_node_list
        if cells_to_use is not None:
            cell_node_list = cell_node_list[cells_to_use]
        nodes = np.real(coords.dat.data[cell_node_list])
        # Add extra dim in 1D for shape (nelements, nunit_nodes, dim)
        if tdim == 1:
            nodes = np.reshape(nodes, nodes.shape + (1,))
        # Transpose nodes to have shape (dim, nelements, nunit_nodes)
        nodes = np.transpose(nodes, (2, 0, 1))

        # make a group (possibly with some elements that need to be flipped)
        unflipped_group = SimplexElementGroup(coord_finat_elt.degree,
                                              vertex_indices,
                                              nodes,
                                              dim=cell_dim,
                                              unit_nodes=finat_unit_nodes)

    # Next get the vertices (we'll need these for the orientations)
    with ProcessLogger(logger, "Obtaining vertex coordinates"):
        coord_finat = fdrake_mesh.coordinates.function_space().finat_element
        # unit_vertex_indices are the element-local indices of the nodes
        # which coincide with the vertices, i.e. for element *i*,
        # vertex 0's coordinates would be nodes[i][unit_vertex_indices[0]].
        # This assumes each vertex has some node which coincides with it...
        # which is normally fine to assume for firedrake meshes.
        unit_vertex_indices = []
        # iterate through the dofs associated to each vertex on the
        # reference element
        for _, dofs in sorted(coord_finat.entity_dofs()[0].items()):
            assert len(dofs) == 1, \
                "The function space of the mesh coordinates must have" \
                " exactly one degree of freedom associated with " \
                " each vertex in order to determine vertex coordinates"
            dof, = dofs
            unit_vertex_indices.append(dof)

        # Now get the vertex coordinates as *(dim, nvertices)*-shaped array
        if cells_to_use is not None:
            nvertices = np.size(vert_ndx_new2old)
        else:
            nvertices = fdrake_mesh.num_vertices()
        vertices = np.ndarray((gdim, nvertices), dtype=nodes.dtype)
        recorded_verts = set()
        for icell, cell_vertex_indices in enumerate(vertex_indices):
            for local_vert_id, global_vert_id in enumerate(cell_vertex_indices):
                if global_vert_id not in recorded_verts:
                    recorded_verts.add(global_vert_id)
                    local_node_nr = unit_vertex_indices[local_vert_id]
                    vertices[:, global_vert_id] = nodes[:, icell, local_node_nr]

    # Use the vertices to compute the orientations and flip the group
    with ProcessLogger(logger, "Computing cell orientations"):
        orient = _get_firedrake_orientations(fdrake_mesh,
                                             unflipped_group,
                                             vertices,
                                             cells_to_use=cells_to_use,
                                             normals=normals,
                                             no_normals_warn=no_normals_warn)

    with ProcessLogger(logger, "Flipping group"):
        from meshmode.mesh.processing import flip_simplex_element_group
        group = flip_simplex_element_group(vertices, unflipped_group, orient < 0)

    # Now, any flipped element had its 0 vertex and 1 vertex exchanged.
    # This changes the local facet nr, so we need to create and then
    # fix our facial adjacency groups. To do that, we need to figure
    # out which local facet numbers switched.
    face_vertex_indices = group.face_vertex_indices()
    # face indices of the faces not containing vertex 0 and not
    # containing vertex 1, respectively
    no_zero_face_ndx, no_one_face_ndx = None, None
    for iface, face in enumerate(face_vertex_indices):
        if 0 not in face:
            no_zero_face_ndx = iface
        elif 1 not in face:
            no_one_face_ndx = iface

    with ProcessLogger(logger, "Building (possibly) unflipped "
                       "FacialAdjacencyGroups"):
        unflipped_facial_adjacency_groups = \
            _get_firedrake_facial_adjacency_groups(fdrake_mesh,
                                                   cells_to_use=cells_to_use)

    # applied below to take elements and element_faces
    # (or neighbors and neighbor_faces) and flip in any faces that need to
    # be flipped.
    def flip_local_face_indices(faces, elements):
        faces = np.copy(faces)
        neg_elements = np.full(elements.shape, False)
        # To handle neighbor case, we only need to flip at elements
        # who have a neighbor, i.e. where neighbors is not a negative
        # bitmask of bdy tags
        neg_elements[elements >= 0] = (orient[elements[elements >= 0]] < 0)
        no_zero = np.logical_and(neg_elements, faces == no_zero_face_ndx)
        no_one = np.logical_and(neg_elements, faces == no_one_face_ndx)
        faces[no_zero], faces[no_one] = no_one_face_ndx, no_zero_face_ndx
        return faces

    # Create new facial adjacency groups that have been flipped
    with ProcessLogger(logger, "Flipping FacialAdjacencyGroups"):
        facial_adjacency_groups = []
        for igroup, fagrps in enumerate(unflipped_facial_adjacency_groups):
            facial_adjacency_groups.append({})
            for ineighbor_group, fagrp in fagrps.items():
                new_element_faces = flip_local_face_indices(fagrp.element_faces,
                                                            fagrp.elements)
                new_neighbor_faces = flip_local_face_indices(fagrp.neighbor_faces,
                                                             fagrp.neighbors)
                new_fagrp = FacialAdjacencyGroup(igroup=igroup,
                                                 ineighbor_group=ineighbor_group,
                                                 elements=fagrp.elements,
                                                 element_faces=new_element_faces,
                                                 neighbors=fagrp.neighbors,
                                                 neighbor_faces=new_neighbor_faces)
                facial_adjacency_groups[igroup][ineighbor_group] = new_fagrp

    return (Mesh(vertices, [group],
                 boundary_tags=bdy_tags,
                 nodal_adjacency=nodal_adjacency,
                 facial_adjacency_groups=facial_adjacency_groups),
            orient)
예제 #8
0
def export_mesh_to_firedrake(mesh, group_nr=None, comm=None):
    r"""
    Create a firedrake mesh corresponding to one
    :class:`~meshmode.mesh.Mesh`'s
    :class:`~meshmode.mesh.SimplexElementGroup`.

    :param mesh: A :class:`~meshmode.mesh.Mesh` to convert with
        at least one :class:`~meshmode.mesh.SimplexElementGroup`.
        'mesh.is_conforming' must evaluate to *True*.
        'mesh' must have vertices supplied, i.e.
        'mesh.vertices' must not be *None*.
    :param group_nr: The group number to be converted into a firedrake
        mesh. The corresponding group must be of type
        :class:`~meshmode.mesh.SimplexElementGroup`. If *None* and
        *mesh* has only one group, that group is used. Otherwise,
        a *ValueError* is raised.
    :param comm: The communicator to build the dmplex mesh on

    :return: A tuple *(fdrake_mesh, fdrake_cell_ordering, perm2cell)*
        where

        * *fdrake_mesh* is a :mod:`firedrake`
          `firedrake.mesh.MeshGeometry` corresponding to
          *mesh*
        * *fdrake_cell_ordering* is a numpy array whose *i*\ th
          element in *mesh* (i.e. the *i*\ th element in
          *mesh.groups[group_nr].vertex_indices*) corresponds to the
          *fdrake_cell_ordering[i]*\ th :mod:`firedrake` cell
        * *perm2cell* is a dictionary, mapping tuples to
          1-D numpy arrays of meshmode element indices.
          Each meshmode element index
          appears in exactly one of these arrays. The corresponding
          tuple describes how firedrake reordered the local vertex
          indices on that cell. In particular, if *c*
          is in the list *perm2cell[p]* for a tuple *p*, then
          the *p[i]*\ th local vertex of the *fdrake_cell_ordering[c]*\ th
          firedrake cell corresponds to the *i*\ th local vertex
          of the *c*\ th meshmode element.

    .. warning::
        Currently, no custom boundary tags are exported along with the mesh.
        :mod:`firedrake` seems to only allow one marker on each facet, whereas
        :mod:`meshmode` allows many.
    """
    if not isinstance(mesh, Mesh):
        raise TypeError("'mesh' must of type meshmode.mesh.Mesh,"
                        " not '%s'." % type(mesh))
    if group_nr is None:
        if len(mesh.groups) != 1:
            raise ValueError("'group_nr' is *None* but 'mesh' has "
                             "more than one group.")
        group_nr = 0
    if not isinstance(group_nr, int):
        raise TypeError("Expecting 'group_nr' to be of type int, not "
                        f"'{type(group_nr)}'")
    if group_nr < 0 or group_nr >= len(mesh.groups):
        raise ValueError("'group_nr' is an invalid group index:"
                         f" '{group_nr}' fails to satisfy "
                         f"0 <= {group_nr} < {len(mesh.groups)}")
    if not isinstance(mesh.groups[group_nr], SimplexElementGroup):
        raise TypeError("Expecting 'mesh.groups[group_nr]' to be of type "
                        "meshmode.mesh.SimplexElementGroup, not "
                        f"'{type(mesh.groups[group_nr])}'")
    if mesh.vertices is None:
        raise ValueError("'mesh' has no vertices "
                         "('mesh.vertices' is *None*)")
    if not mesh.is_conforming:
        raise ValueError(f"'mesh.is_conforming' is {mesh.is_conforming} "
                         "instead of *True*. Converting non-conforming "
                         " meshes to Firedrake is not supported")

    # Get the vertices and vertex indices of the requested group
    with ProcessLogger(logger, "Obtaining vertices from selected group"):
        group = mesh.groups[group_nr]
        fd2mm_indices = np.unique(group.vertex_indices.flatten())
        coords = mesh.vertices[:, fd2mm_indices].T
        mm2fd_indices = dict(zip(fd2mm_indices, np.arange(np.size(fd2mm_indices))))
        cells = np.vectorize(mm2fd_indices.__getitem__)(group.vertex_indices)

    # Get a dmplex object and then a mesh topology
    with ProcessLogger(logger, "Building dmplex object and MeshTopology"):
        if comm is None:
            from pyop2.mpi import COMM_WORLD
            comm = COMM_WORLD
        # FIXME : not sure how to get around the private accesses
        import firedrake.mesh as fd_mesh
        plex = fd_mesh._from_cell_list(group.dim, cells, coords, comm)
        # Nb : One might be tempted to pass reorder=False and thereby save some
        #      hassle in exchange for forcing firedrake to have slightly
        #      less efficient caching. Unfortunately, that only prevents
        #      the cells from being reordered, and does not prevent the
        #      vertices from being (locally) reordered on each cell...
        #      the tl;dr is we don't actually save any hassle
        top = fd_mesh.Mesh(plex, dim=mesh.ambient_dim)  # mesh topology
        top.init()

    # Get new element ordering:
    with ProcessLogger(logger, "Determining permutations applied"
                       " to local vertex numbers"):
        c_start, c_end = top._topology_dm.getHeightStratum(0)
        cell_index_mm2fd = np.vectorize(top._cell_numbering.getOffset)(
            np.arange(c_start, c_end))
        v_start, v_end = top._topology_dm.getDepthStratum(0)

        # Firedrake goes crazy reordering local vertex numbers,
        # we've got to work to figure out what changes they made.
        #
        # *perm2cells* will map permutations of local vertex numbers to
        #              the list of all the meshmode cells
        #              which firedrake reordered according to that permutation
        #
        #              Permutations on *n* vertices are stored as a tuple
        #              containing all of the integers *0*, *1*, *2*, ..., *n-1*
        #              exactly once. A permutation *p*
        #              represents relabeling the *i*\ th local vertex
        #              of a meshmode element as the *p[i]*\ th local vertex
        #              in the corresponding firedrake cell.
        #
        #              *perm2cells[p]* is a list of all the meshmode element indices
        #              for which *p* represents the reordering applied by firedrake
        perm2cells = {}
        for mm_cell_id, dmp_ids in enumerate(top.cell_closure[cell_index_mm2fd]):
            # look at order of vertices in firedrake cell
            vert_dmp_ids = \
                dmp_ids[np.logical_and(v_start <= dmp_ids, dmp_ids < v_end)]
            fdrake_order = vert_dmp_ids - v_start
            # get original order
            mm_order = mesh.groups[group_nr].vertex_indices[mm_cell_id]
            # want permutation p so that mm_order[p] = fdrake_order
            # To do so, look at permutations acting by composition.
            #
            # mm_order \circ argsort(mm_order) =
            #     fdrake_order \circ argsort(fdrake_order)
            # so
            # mm_order \circ argsort(mm_order) \circ inv(argsort(fdrake_order))
            #  = fdrake_order
            #
            # argsort acts as an inverse, so the desired permutation is:
            perm = tuple(np.argsort(mm_order)[np.argsort(np.argsort(fdrake_order))])
            perm2cells.setdefault(perm, [])
            perm2cells[perm].append(mm_cell_id)

        # Make perm2cells map to numpy arrays instead of lists
        perm2cells = {perm: np.array(cells)
                      for perm, cells in perm2cells.items()}

    # Now make a coordinates function
    with ProcessLogger(logger, "Building firedrake function "
                       "space for mesh coordinates"):
        from firedrake import VectorFunctionSpace, Function
        coords_fspace = VectorFunctionSpace(top, "CG", group.order,
                                            dim=mesh.ambient_dim)
        coords = Function(coords_fspace)

    # get firedrake unit nodes and map onto meshmode reference element
    fd_ref_cell_to_mm = get_affine_reference_simplex_mapping(group.dim, True)
    fd_unit_nodes = get_finat_element_unit_nodes(coords_fspace.finat_element)
    fd_unit_nodes = fd_ref_cell_to_mm(fd_unit_nodes)

    basis = simplex_best_available_basis(group.dim, group.order)
    resampling_mat = resampling_matrix(basis,
                                       new_nodes=fd_unit_nodes,
                                       old_nodes=group.unit_nodes)
    # Store the meshmode data resampled to firedrake unit nodes
    # (but still in meshmode order)
    resampled_group_nodes = np.matmul(group.nodes, resampling_mat.T)

    # Now put the nodes in the right local order
    # nodes is shaped *(ambient dim, nelements, nunit nodes)*
    with ProcessLogger(logger, "Storing meshmode mesh coordinates"
                       " in firedrake nodal order"):
        from meshmode.mesh.processing import get_simplex_element_flip_matrix
        for perm, cells in perm2cells.items():
            flip_mat = get_simplex_element_flip_matrix(group.order,
                                                       fd_unit_nodes,
                                                       perm)
            flip_mat = np.rint(flip_mat).astype(np.int32)
            resampled_group_nodes[:, cells, :] = \
                np.matmul(resampled_group_nodes[:, cells, :], flip_mat.T)

    # store resampled data in right cell ordering
    with ProcessLogger(logger, "resampling mesh coordinates to "
                       "firedrake unit nodes"):
        reordered_cell_node_list = coords_fspace.cell_node_list[cell_index_mm2fd]
        coords.dat.data[reordered_cell_node_list, :] = \
            resampled_group_nodes.transpose((1, 2, 0))

    return fd_mesh.Mesh(coords), cell_index_mm2fd, perm2cells
예제 #9
0
    def __call__(self,
                 queue,
                 tree,
                 ball_centers,
                 ball_radii,
                 peer_lists=None,
                 wait_for=None):
        """
        :arg queue: a :class:`pyopencl.CommandQueue`
        :arg tree: a :class:`boxtree.Tree`.
        :arg ball_centers: an object array of coordinate
            :class:`pyopencl.array.Array` instances.
            Their *dtype* must match *tree*'s
            :attr:`boxtree.Tree.coord_dtype`.
        :arg ball_radii: a
            :class:`pyopencl.array.Array`
            of positive numbers.
            Its *dtype* must match *tree*'s
            :attr:`boxtree.Tree.coord_dtype`.
        :arg peer_lists: may either be *None* or an instance of
            :class:`PeerListLookup` associated with `tree`.
        :arg wait_for: may either be *None* or a list of :class:`pyopencl.Event`
            instances for whose completion this command waits before starting
            execution.
        :returns: a tuple *(lbl, event)*, where *lbl* is an instance of
            :class:`LeavesToBallsLookup`, and *event* is a :class:`pyopencl.Event`
            for dependency management.
        """

        from pytools import single_valued
        if single_valued(bc.dtype for bc in ball_centers) != tree.coord_dtype:
            raise TypeError("ball_centers dtype must match tree.coord_dtype")
        if ball_radii.dtype != tree.coord_dtype:
            raise TypeError("ball_radii dtype must match tree.coord_dtype")

        ltb_plog = ProcessLogger(logger,
                                 "leaves-to-balls lookup: run area query")

        area_query, evt = self.area_query_builder(queue, tree, ball_centers,
                                                  ball_radii, peer_lists,
                                                  wait_for)
        wait_for = [evt]

        logger.debug("leaves-to-balls lookup: expand starts")

        nkeys = tree.nboxes
        nballs_p_1 = len(area_query.leaves_near_ball_starts)
        assert nballs_p_1 == len(ball_radii) + 1

        # We invert the area query in two steps:
        #
        # 1. Turn the area query result into (ball number, box number) pairs.
        #    This is done in the "starts expander kernel."
        #
        # 2. Key-value sort the (ball number, box number) pairs by box number.

        starts_expander_knl = self.get_starts_expander_kernel(
            tree.box_id_dtype)
        expanded_starts = cl.array.empty(
            queue, len(area_query.leaves_near_ball_lists), tree.box_id_dtype)
        evt = starts_expander_knl(
            expanded_starts,
            area_query.leaves_near_ball_starts.with_queue(queue), nballs_p_1)
        wait_for = [evt]

        logger.debug("leaves-to-balls lookup: key-value sort")

        balls_near_box_starts, balls_near_box_lists, evt \
                = self.key_value_sorter(
                        queue,
                        # keys
                        area_query.leaves_near_ball_lists.with_queue(queue),
                        # values
                        expanded_starts,
                        nkeys, starts_dtype=tree.box_id_dtype,
                        wait_for=wait_for)

        ltb_plog.done()

        return LeavesToBallsLookup(
            tree=tree,
            balls_near_box_starts=balls_near_box_starts,
            balls_near_box_lists=balls_near_box_lists).with_queue(None), evt
예제 #10
0
    def __call__(self,
                 queue,
                 tree,
                 ball_centers,
                 ball_radii,
                 peer_lists=None,
                 wait_for=None):
        """
        :arg queue: a :class:`pyopencl.CommandQueue`
        :arg tree: a :class:`boxtree.Tree`.
        :arg ball_centers: an object array of coordinate
            :class:`pyopencl.array.Array` instances.
            Their *dtype* must match *tree*'s
            :attr:`boxtree.Tree.coord_dtype`.
        :arg ball_radii: a
            :class:`pyopencl.array.Array`
            of positive numbers.
            Its *dtype* must match *tree*'s
            :attr:`boxtree.Tree.coord_dtype`.
        :arg peer_lists: may either be *None* or an instance of
            :class:`PeerListLookup` associated with `tree`.
        :arg wait_for: may either be *None* or a list of :class:`pyopencl.Event`
            instances for whose completion this command waits before starting
            execution.
        :returns: a tuple *(sqi, event)*, where *sqi* is an instance of
            :class:`pyopencl.array.Array`, and *event* is a :class:`pyopencl.Event`
            for dependency management. The *dtype* of *sqi* is
            *tree*'s :attr:`boxtree.Tree.coord_dtype` and its shape is
            *(tree.nboxes,)* (see :attr:`boxtree.Tree.nboxes`).
            The entries of *sqi* are indexed by the global box index and are
            as follows:

            * if *i* is not the index of a leaf box, *sqi[i] = 0*.
            * if *i* is the index of a leaf box, *sqi[i]* is the
              outer space invader distance for *i*.
        """

        from pytools import single_valued
        if single_valued(bc.dtype for bc in ball_centers) != tree.coord_dtype:
            raise TypeError("ball_centers dtype must match tree.coord_dtype")
        if ball_radii.dtype != tree.coord_dtype:
            raise TypeError("ball_radii dtype must match tree.coord_dtype")

        from pytools import div_ceil
        # Avoid generating too many kernels.
        max_levels = div_ceil(tree.nlevels, 10) * 10

        if peer_lists is None:
            peer_lists, evt = self.peer_list_finder(queue,
                                                    tree,
                                                    wait_for=wait_for)
            wait_for = [evt]

        if len(peer_lists.peer_list_starts) != tree.nboxes + 1:
            raise ValueError(
                "size of peer lists must match with number of boxes")

        space_invader_query_kernel = self.get_space_invader_query_kernel(
            tree.dimensions, tree.coord_dtype, tree.box_id_dtype,
            peer_lists.peer_list_starts.dtype, max_levels)

        si_plog = ProcessLogger(logger, "space invader query")

        outer_space_invader_dists = cl.array.zeros(queue, tree.nboxes,
                                                   np.float32)
        if not wait_for:
            wait_for = []
        wait_for = wait_for + outer_space_invader_dists.events

        evt = space_invader_query_kernel(
            *SPACE_INVADER_QUERY_TEMPLATE.unwrap_args(
                tree, peer_lists, ball_radii, outer_space_invader_dists,
                *tuple(bc for bc in ball_centers)),
            wait_for=wait_for,
            queue=queue,
            range=slice(len(ball_radii)))

        if tree.coord_dtype != np.dtype(np.float32):
            # The kernel output is always an array of float32 due to limited
            # support for atomic operations with float64 in OpenCL.
            # Here the output is cast to match the coord dtype.
            outer_space_invader_dists.finish()
            outer_space_invader_dists = outer_space_invader_dists.astype(
                tree.coord_dtype)
            evt, = outer_space_invader_dists.events

        si_plog.done()

        return outer_space_invader_dists, evt
예제 #11
0
def refine_for_global_qbx(lpot_source,
                          wrangler,
                          group_factory,
                          kernel_length_scale=None,
                          force_stage2_uniform_refinement_rounds=None,
                          scaled_max_curvature_threshold=None,
                          debug=None,
                          maxiter=None,
                          visualize=None,
                          expansion_disturbance_tolerance=None,
                          refiner=None):
    """
    Entry point for calling the refiner.

    :arg lpot_source: An instance of :class:`QBXLayerPotentialSource`.

    :arg wrangler: An instance of :class:`RefinerWrangler`.

    :arg group_factory: An instance of
        :class:`meshmode.mesh.discretization.ElementGroupFactory`. Used for
        discretizing the coarse refined mesh.

    :arg kernel_length_scale: The kernel length scale, or *None* if not
        applicable. All panels are refined to below this size.

    :arg maxiter: The maximum number of refiner iterations.

    :returns: A tuple ``(lpot_source, *conn*)`` where ``lpot_source`` is the
        refined layer potential source, and ``conn`` is a
        :class:`meshmode.discretization.connection.DiscretizationConnection`
        going from the original mesh to the refined mesh.
    """

    if maxiter is None:
        maxiter = 10

    if debug is None:
        # FIXME: Set debug=False by default once everything works.
        debug = True

    if expansion_disturbance_tolerance is None:
        expansion_disturbance_tolerance = 0.025

    if force_stage2_uniform_refinement_rounds is None:
        force_stage2_uniform_refinement_rounds = 0

    # TODO: Stop doing redundant checks by avoiding panels which no longer need
    # refinement.

    from meshmode.mesh.refinement import RefinerWithoutAdjacency
    from meshmode.discretization.connection import (
        ChainedDiscretizationConnection, make_same_mesh_connection)

    if refiner is not None:
        assert refiner.get_current_mesh() == lpot_source.density_discr.mesh
    else:
        # We may be handed a mesh that's already non-conforming, we don't rely
        # on adjacency, and the no-adjacency refiner is faster.
        refiner = RefinerWithoutAdjacency(lpot_source.density_discr.mesh)

    connections = []

    # {{{ first stage refinement

    def visualize_refinement(niter, stage_nr, stage_name, flags):
        if not visualize:
            return

        if stage_nr == 1:
            discr = lpot_source.density_discr
        elif stage_nr == 2:
            discr = lpot_source.stage2_density_discr
        else:
            raise ValueError("unexpected stage number")

        flags = flags.get()
        logger.info("for stage %s: splitting %d/%d stage-%d elements",
                    stage_name, np.sum(flags), discr.mesh.nelements, stage_nr)

        from meshmode.discretization.visualization import make_visualizer
        vis = make_visualizer(wrangler.queue, discr, 3)

        assert len(flags) == discr.mesh.nelements

        flags = flags.astype(np.bool)
        nodes_flags = np.zeros(discr.nnodes)
        for grp in discr.groups:
            meg = grp.mesh_el_group
            grp.view(nodes_flags)[flags[meg.element_nr_base:meg.nelements +
                                        meg.element_nr_base]] = 1

        nodes_flags = cl.array.to_device(wrangler.queue, nodes_flags)
        vis_data = [
            ("refine_flags", nodes_flags),
        ]

        if 0:
            from pytential import sym, bind
            bdry_normals = bind(discr, sym.normal(discr.ambient_dim))(
                wrangler.queue).as_vector(dtype=object)
            vis_data.append(("bdry_normals", bdry_normals), )

        vis.write_vtk_file("refinement-%s-%03d.vtu" % (stage_name, niter),
                           vis_data)

    def warn_max_iterations():
        from warnings import warn
        warn(
            "QBX layer potential source refiner did not terminate "
            "after %d iterations (the maximum). "
            "You may pass 'visualize=True' to with_refinement() "
            "to see what area of the geometry is causing trouble. "
            "If the issue is disturbance of expansion disks, you may "
            "pass a slightly increased value (currently: %g) for "
            "_expansion_disturbance_tolerance in with_refinement(). "
            "As a last resort, "
            "you may use Python's warning filtering mechanism to "
            "not treat this warning as an error. "
            "The criteria triggering refinement in each iteration "
            "were: %s. " %
            (len(violated_criteria), expansion_disturbance_tolerance,
             ", ".join("%d: %s" % (i + 1, vc_text)
                       for i, vc_text in enumerate(violated_criteria))),
            RefinerNotConvergedWarning)

    violated_criteria = []
    iter_violated_criteria = ["start"]

    niter = 0

    while iter_violated_criteria:
        iter_violated_criteria = []
        niter += 1

        if niter > maxiter:
            warn_max_iterations()
            break

        refine_flags = make_empty_refine_flags(wrangler.queue, lpot_source)

        if kernel_length_scale is not None:
            with ProcessLogger(
                    logger,
                    "checking kernel length scale to panel size ratio"):

                from pytential import bind, sym
                quad_resolution = bind(
                    lpot_source,
                    sym._quad_resolution(lpot_source.ambient_dim,
                                         dofdesc=sym.GRANULARITY_ELEMENT))(
                                             wrangler.queue)

                violates_kernel_length_scale = \
                        wrangler.check_element_prop_threshold(
                                element_property=quad_resolution,
                                threshold=kernel_length_scale,
                                refine_flags=refine_flags, debug=debug)

                if violates_kernel_length_scale:
                    iter_violated_criteria.append("kernel length scale")
                    visualize_refinement(niter, 1, "kernel-length-scale",
                                         refine_flags)

        if scaled_max_curvature_threshold is not None:
            with ProcessLogger(logger,
                               "checking scaled max curvature threshold"):
                from pytential import sym, bind
                scaled_max_curv = bind(
                    lpot_source,
                    sym.ElementwiseMax(
                        sym._scaled_max_curvature(lpot_source.ambient_dim),
                        dofdesc=sym.GRANULARITY_ELEMENT))(wrangler.queue)

                violates_scaled_max_curv = \
                        wrangler.check_element_prop_threshold(
                                element_property=scaled_max_curv,
                                threshold=scaled_max_curvature_threshold,
                                refine_flags=refine_flags, debug=debug)

                if violates_scaled_max_curv:
                    iter_violated_criteria.append("curvature")
                    visualize_refinement(niter, 1, "curvature", refine_flags)

        if not iter_violated_criteria:
            # Only start building trees once the simple length-based criteria
            # are happy.

            # Build tree and auxiliary data.
            # FIXME: The tree should not have to be rebuilt at each iteration.
            tree = wrangler.build_tree(lpot_source)
            peer_lists = wrangler.find_peer_lists(tree)

            has_disturbed_expansions = \
                    wrangler.check_expansion_disks_undisturbed_by_sources(
                            lpot_source, tree, peer_lists,
                            expansion_disturbance_tolerance,
                            refine_flags, debug)
            if has_disturbed_expansions:
                iter_violated_criteria.append("disturbed expansions")
                visualize_refinement(niter, 1, "disturbed-expansions",
                                     refine_flags)

            del tree
            del peer_lists

        if iter_violated_criteria:
            violated_criteria.append(" and ".join(iter_violated_criteria))

            conn = wrangler.refine(lpot_source.density_discr, refiner,
                                   refine_flags, group_factory, debug)
            connections.append(conn)
            lpot_source = lpot_source.copy(density_discr=conn.to_discr)

        del refine_flags

    # }}}

    # {{{ second stage refinement

    iter_violated_criteria = ["start"]
    niter = 0
    fine_connections = []

    stage2_density_discr = lpot_source.density_discr

    while iter_violated_criteria:
        iter_violated_criteria = []
        niter += 1

        if niter > maxiter:
            warn_max_iterations()
            break

        # Build tree and auxiliary data.
        # FIXME: The tree should not have to be rebuilt at each iteration.
        tree = wrangler.build_tree(lpot_source, use_stage2_discr=True)
        peer_lists = wrangler.find_peer_lists(tree)
        refine_flags = make_empty_refine_flags(wrangler.queue,
                                               lpot_source,
                                               use_stage2_discr=True)

        has_insufficient_quad_res = \
                wrangler.check_sufficient_source_quadrature_resolution(
                        lpot_source, tree, peer_lists, refine_flags, debug)
        if has_insufficient_quad_res:
            iter_violated_criteria.append("insufficient quadrature resolution")
            visualize_refinement(niter, 2, "quad-resolution", refine_flags)

        if iter_violated_criteria:
            violated_criteria.append(" and ".join(iter_violated_criteria))

            conn = wrangler.refine(stage2_density_discr, refiner, refine_flags,
                                   group_factory, debug)
            stage2_density_discr = conn.to_discr
            fine_connections.append(conn)
            lpot_source = lpot_source.copy(
                to_refined_connection=ChainedDiscretizationConnection(
                    fine_connections))

        del tree
        del refine_flags
        del peer_lists

    for round in range(force_stage2_uniform_refinement_rounds):
        conn = wrangler.refine(
            stage2_density_discr, refiner,
            np.ones(stage2_density_discr.mesh.nelements, dtype=np.bool),
            group_factory, debug)
        stage2_density_discr = conn.to_discr
        fine_connections.append(conn)
        lpot_source = lpot_source.copy(
            to_refined_connection=ChainedDiscretizationConnection(
                fine_connections))

    # }}}

    lpot_source = lpot_source.copy(debug=debug, _refined_for_global_qbx=True)

    if len(connections) == 0:
        # FIXME: This is inefficient
        connection = make_same_mesh_connection(lpot_source.density_discr,
                                               lpot_source.density_discr)
    else:
        connection = ChainedDiscretizationConnection(connections)

    return lpot_source, connection
예제 #12
0
    def __call__(self,
                 queue,
                 tree,
                 ball_centers,
                 ball_radii,
                 peer_lists=None,
                 wait_for=None):
        """
        :arg queue: a :class:`pyopencl.CommandQueue`
        :arg tree: a :class:`boxtree.Tree`.
        :arg ball_centers: an object array of coordinate
            :class:`pyopencl.array.Array` instances.
            Their *dtype* must match *tree*'s
            :attr:`boxtree.Tree.coord_dtype`.
        :arg ball_radii: a
            :class:`pyopencl.array.Array`
            of positive numbers.
            Its *dtype* must match *tree*'s
            :attr:`boxtree.Tree.coord_dtype`.
        :arg peer_lists: may either be *None* or an instance of
            :class:`PeerListLookup` associated with `tree`.
        :arg wait_for: may either be *None* or a list of :class:`pyopencl.Event`
            instances for whose completion this command waits before starting
            exeuction.
        :returns: a tuple *(aq, event)*, where *aq* is an instance of
            :class:`AreaQueryResult`, and *event* is a :class:`pyopencl.Event`
            for dependency management.
        """

        from pytools import single_valued
        if single_valued(bc.dtype for bc in ball_centers) != tree.coord_dtype:
            raise TypeError("ball_centers dtype must match tree.coord_dtype")
        if ball_radii.dtype != tree.coord_dtype:
            raise TypeError("ball_radii dtype must match tree.coord_dtype")

        ball_id_dtype = tree.particle_id_dtype  # ?

        from pytools import div_ceil
        # Avoid generating too many kernels.
        max_levels = div_ceil(tree.nlevels, 10) * 10

        if peer_lists is None:
            peer_lists, evt = self.peer_list_finder(queue,
                                                    tree,
                                                    wait_for=wait_for)
            wait_for = [evt]

        if len(peer_lists.peer_list_starts) != tree.nboxes + 1:
            raise ValueError(
                "size of peer lists must match with number of boxes")

        area_query_kernel = self.get_area_query_kernel(
            tree.dimensions, tree.coord_dtype, tree.box_id_dtype,
            ball_id_dtype, peer_lists.peer_list_starts.dtype, max_levels)

        aq_plog = ProcessLogger(logger, "area query")

        result, evt = area_query_kernel(queue,
                                        len(ball_radii),
                                        tree.box_centers.data,
                                        tree.root_extent,
                                        tree.box_levels,
                                        tree.aligned_nboxes,
                                        tree.box_child_ids.data,
                                        tree.box_flags,
                                        peer_lists.peer_list_starts,
                                        peer_lists.peer_lists,
                                        ball_radii,
                                        *(tuple(tree.bounding_box[0]) +
                                          tuple(bc for bc in ball_centers)),
                                        wait_for=wait_for)

        aq_plog.done()

        return AreaQueryResult(
            tree=tree,
            leaves_near_ball_starts=result["leaves"].starts,
            leaves_near_ball_lists=result["leaves"].lists).with_queue(
                None), evt
예제 #13
0
def _refine_qbx_stage1(lpot_source,
                       density_discr,
                       wrangler,
                       group_factory,
                       kernel_length_scale=None,
                       scaled_max_curvature_threshold=None,
                       expansion_disturbance_tolerance=None,
                       maxiter=None,
                       debug=None,
                       visualize=False):
    from pytential import bind, sym
    from meshmode.discretization.connection import ChainedDiscretizationConnection
    if lpot_source._disable_refinement:
        return density_discr, ChainedDiscretizationConnection(
            [], from_discr=density_discr)

    from meshmode.mesh.refinement import RefinerWithoutAdjacency
    refiner = RefinerWithoutAdjacency(density_discr.mesh)

    # TODO: Stop doing redundant checks by avoiding panels which no longer need
    # refinement.

    connections = []
    violated_criteria = []
    iter_violated_criteria = ["start"]
    niter = 0

    actx = wrangler.array_context

    stage1_density_discr = density_discr
    while iter_violated_criteria:
        iter_violated_criteria = []
        niter += 1

        if niter > maxiter:
            _warn_max_iterations(violated_criteria,
                                 expansion_disturbance_tolerance)
            break

        refine_flags = make_empty_refine_flags(wrangler.queue,
                                               stage1_density_discr)

        if kernel_length_scale is not None:
            with ProcessLogger(
                    logger,
                    "checking kernel length scale to panel size ratio"):

                quad_resolution = bind(
                    stage1_density_discr,
                    sym._quad_resolution(
                        stage1_density_discr.ambient_dim,
                        dofdesc=sym.GRANULARITY_ELEMENT))(actx)

                violates_kernel_length_scale = \
                        wrangler.check_element_prop_threshold(
                                element_property=quad_resolution,
                                threshold=kernel_length_scale,
                                refine_flags=refine_flags, debug=debug)

                if violates_kernel_length_scale:
                    iter_violated_criteria.append("kernel length scale")
                    _visualize_refinement(actx,
                                          stage1_density_discr,
                                          niter,
                                          1,
                                          "kernel-length-scale",
                                          refine_flags,
                                          visualize=visualize)

        if scaled_max_curvature_threshold is not None:
            with ProcessLogger(logger,
                               "checking scaled max curvature threshold"):
                scaled_max_curv = bind(
                    stage1_density_discr,
                    sym.ElementwiseMax(sym._scaled_max_curvature(
                        stage1_density_discr.ambient_dim),
                                       dofdesc=sym.GRANULARITY_ELEMENT))(actx)

                violates_scaled_max_curv = \
                        wrangler.check_element_prop_threshold(
                                element_property=scaled_max_curv,
                                threshold=scaled_max_curvature_threshold,
                                refine_flags=refine_flags, debug=debug)

                if violates_scaled_max_curv:
                    iter_violated_criteria.append("curvature")
                    _visualize_refinement(wrangler.queue,
                                          stage1_density_discr,
                                          niter,
                                          1,
                                          "curvature",
                                          refine_flags,
                                          visualize=visualize)

        if not iter_violated_criteria:
            # Only start building trees once the simple length-based criteria
            # are happy.
            places = _make_temporary_collection(
                lpot_source, stage1_density_discr=stage1_density_discr)

            # Build tree and auxiliary data.
            # FIXME: The tree should not have to be rebuilt at each iteration.
            tree = wrangler.build_tree(
                places, sources_list=[places.auto_source.geometry])
            peer_lists = wrangler.find_peer_lists(tree)

            has_disturbed_expansions = \
                    wrangler.check_expansion_disks_undisturbed_by_sources(
                            stage1_density_discr, tree, peer_lists,
                            expansion_disturbance_tolerance,
                            refine_flags, debug)
            if has_disturbed_expansions:
                iter_violated_criteria.append("disturbed expansions")
                _visualize_refinement(wrangler.queue,
                                      stage1_density_discr,
                                      niter,
                                      1,
                                      "disturbed-expansions",
                                      refine_flags,
                                      visualize=visualize)

            del tree
            del peer_lists

        if iter_violated_criteria:
            violated_criteria.append(" and ".join(iter_violated_criteria))

            conn = wrangler.refine(stage1_density_discr, refiner, refine_flags,
                                   group_factory, debug)
            stage1_density_discr = conn.to_discr
            connections.append(conn)

        del refine_flags

    conn = ChainedDiscretizationConnection(connections,
                                           from_discr=density_discr)

    return stage1_density_discr, conn
예제 #14
0
def generate_code_v2(kernel):
    """
    :returns: a :class:`CodeGenerationResult`
    """

    from loopy.kernel import KernelState
    if kernel.state == KernelState.INITIAL:
        from loopy.preprocess import preprocess_kernel
        kernel = preprocess_kernel(kernel)

    if kernel.schedule is None:
        from loopy.schedule import get_one_scheduled_kernel
        kernel = get_one_scheduled_kernel(kernel)

    if kernel.state != KernelState.LINEARIZED:
        raise LoopyError("cannot generate code for a kernel that has not been "
                         "scheduled")

    # {{{ cache retrieval

    from loopy import CACHING_ENABLED

    if CACHING_ENABLED:
        input_kernel = kernel
        try:
            result = code_gen_cache[input_kernel]
            logger.debug("%s: code generation cache hit" % kernel.name)
            return result
        except KeyError:
            pass

    # }}}

    from loopy.type_inference import infer_unknown_types
    kernel = infer_unknown_types(kernel, expect_completion=True)

    from loopy.check import pre_codegen_checks
    pre_codegen_checks(kernel)

    codegen_plog = ProcessLogger(logger, f"{kernel.name}: generate code")

    # {{{ examine arg list

    from loopy.kernel.data import ValueArg
    from loopy.kernel.array import ArrayBase

    implemented_data_info = []

    for arg in kernel.args:
        is_written = arg.name in kernel.get_written_variables()
        if isinstance(arg, ArrayBase):
            implemented_data_info.extend(
                arg.decl_info(kernel.target,
                              is_written=is_written,
                              index_dtype=kernel.index_dtype))

        elif isinstance(arg, ValueArg):
            implemented_data_info.append(
                ImplementedDataInfo(target=kernel.target,
                                    name=arg.name,
                                    dtype=arg.dtype,
                                    arg_class=ValueArg,
                                    is_written=is_written))

        else:
            raise ValueError("argument type not understood: '%s'" % type(arg))

    allow_complex = False
    for var in kernel.args + list(kernel.temporary_variables.values()):
        if var.dtype.involves_complex():
            allow_complex = True

    # }}}

    seen_dtypes = set()
    seen_functions = set()
    seen_atomic_dtypes = set()

    initial_implemented_domain = isl.BasicSet.from_params(kernel.assumptions)

    from loopy.codegen.tools import CodegenOperationCacheManager

    codegen_state = CodeGenerationState(
        kernel=kernel,
        implemented_data_info=implemented_data_info,
        implemented_domain=initial_implemented_domain,
        implemented_predicates=frozenset(),
        seen_dtypes=seen_dtypes,
        seen_functions=seen_functions,
        seen_atomic_dtypes=seen_atomic_dtypes,
        var_subst_map={},
        allow_complex=allow_complex,
        var_name_generator=kernel.get_var_name_generator(),
        is_generating_device_code=False,
        gen_program_name=(kernel.target.host_program_name_prefix +
                          kernel.name +
                          kernel.target.host_program_name_suffix),
        schedule_index_end=len(kernel.schedule),
        codegen_cachemanager=CodegenOperationCacheManager.from_kernel(kernel),
    )

    from loopy.codegen.result import generate_host_or_device_program
    codegen_result = generate_host_or_device_program(codegen_state,
                                                     schedule_index=0)

    device_code_str = codegen_result.device_code()

    from loopy.check import check_implemented_domains
    assert check_implemented_domains(kernel,
                                     codegen_result.implemented_domains,
                                     device_code_str)

    # {{{ handle preambles

    for idi in codegen_state.implemented_data_info:
        seen_dtypes.add(idi.dtype)

    for tv in kernel.temporary_variables.values():
        for idi in tv.decl_info(kernel.target, index_dtype=kernel.index_dtype):
            seen_dtypes.add(idi.dtype)

    if kernel.all_inames():
        seen_dtypes.add(kernel.index_dtype)

    preambles = kernel.preambles[:]

    preamble_info = PreambleInfo(
        kernel=kernel,
        seen_dtypes=seen_dtypes,
        seen_functions=seen_functions,
        # a set of LoopyTypes (!)
        seen_atomic_dtypes=seen_atomic_dtypes,
        codegen_state=codegen_state)

    preamble_generators = (
        kernel.preamble_generators +
        kernel.target.get_device_ast_builder().preamble_generators())
    for prea_gen in preamble_generators:
        preambles.extend(prea_gen(preamble_info))

    codegen_result = codegen_result.copy(device_preambles=preambles)

    # }}}

    # For faster unpickling in the common case when implemented_domains isn't needed.
    from loopy.tools import LazilyUnpicklingDict
    codegen_result = codegen_result.copy(
        implemented_domains=LazilyUnpicklingDict(
            codegen_result.implemented_domains))

    codegen_plog.done()

    if CACHING_ENABLED:
        code_gen_cache.store_if_not_present(input_kernel, codegen_result)

    return codegen_result
예제 #15
0
def drive_fmm(traversal, expansion_wrangler, src_weights, timing_data=None):
    """Top-level driver routine for a fast multipole calculation.

    In part, this is intended as a template for custom FMMs, in the sense that
    you may copy and paste its
    `source code <https://github.com/inducer/boxtree/blob/master/boxtree/fmm.py>`_
    as a starting point.

    Nonetheless, many common applications (such as point-to-point FMMs) can be
    covered by supplying the right *expansion_wrangler* to this routine.

    :arg traversal: A :class:`boxtree.traversal.FMMTraversalInfo` instance.
    :arg expansion_wrangler: An object exhibiting the
        :class:`ExpansionWranglerInterface`.
    :arg src_weights: Source 'density/weights/charges'.
        Passed unmodified to *expansion_wrangler*.
    :arg timing_data: Either *None*, or a :class:`dict` that is populated with
        timing information for the stages of the algorithm (in the form of
        :class:`TimingResult`), if such information is available.

    Returns the potentials computed by *expansion_wrangler*.

    """
    wrangler = expansion_wrangler

    # Interface guidelines: Attributes of the tree are assumed to be known
    # to the expansion wrangler and should not be passed.

    fmm_proc = ProcessLogger(logger, "fmm")
    recorder = TimingRecorder()

    src_weights = wrangler.reorder_sources(src_weights)

    # {{{ "Step 2.1:" Construct local multipoles

    mpole_exps, timing_future = wrangler.form_multipoles(
            traversal.level_start_source_box_nrs,
            traversal.source_boxes,
            src_weights)

    recorder.add("form_multipoles", timing_future)

    # }}}

    # {{{ "Step 2.2:" Propagate multipoles upward

    mpole_exps, timing_future = wrangler.coarsen_multipoles(
            traversal.level_start_source_parent_box_nrs,
            traversal.source_parent_boxes,
            mpole_exps)

    recorder.add("coarsen_multipoles", timing_future)

    # mpole_exps is called Phi in [1]

    # }}}

    # {{{ "Stage 3:" Direct evaluation from neighbor source boxes ("list 1")

    potentials, timing_future = wrangler.eval_direct(
            traversal.target_boxes,
            traversal.neighbor_source_boxes_starts,
            traversal.neighbor_source_boxes_lists,
            src_weights)

    recorder.add("eval_direct", timing_future)

    # these potentials are called alpha in [1]

    # }}}

    # {{{ "Stage 4:" translate separated siblings' ("list 2") mpoles to local

    local_exps, timing_future = wrangler.multipole_to_local(
            traversal.level_start_target_or_target_parent_box_nrs,
            traversal.target_or_target_parent_boxes,
            traversal.from_sep_siblings_starts,
            traversal.from_sep_siblings_lists,
            mpole_exps)

    recorder.add("multipole_to_local", timing_future)

    # local_exps represents both Gamma and Delta in [1]

    # }}}

    # {{{ "Stage 5:" evaluate sep. smaller mpoles ("list 3") at particles

    # (the point of aiming this stage at particles is specifically to keep its
    # contribution *out* of the downward-propagating local expansions)

    mpole_result, timing_future = wrangler.eval_multipoles(
            traversal.target_boxes_sep_smaller_by_source_level,
            traversal.from_sep_smaller_by_level,
            mpole_exps)

    recorder.add("eval_multipoles", timing_future)

    potentials = potentials + mpole_result

    # these potentials are called beta in [1]

    if traversal.from_sep_close_smaller_starts is not None:
        logger.debug("evaluate separated close smaller interactions directly "
                "('list 3 close')")

        direct_result, timing_future = wrangler.eval_direct(
                traversal.target_boxes,
                traversal.from_sep_close_smaller_starts,
                traversal.from_sep_close_smaller_lists,
                src_weights)

        recorder.add("eval_direct", timing_future)

        potentials = potentials + direct_result

    # }}}

    # {{{ "Stage 6:" form locals for separated bigger source boxes ("list 4")

    local_result, timing_future = wrangler.form_locals(
            traversal.level_start_target_or_target_parent_box_nrs,
            traversal.target_or_target_parent_boxes,
            traversal.from_sep_bigger_starts,
            traversal.from_sep_bigger_lists,
            src_weights)

    recorder.add("form_locals", timing_future)

    local_exps = local_exps + local_result

    if traversal.from_sep_close_bigger_starts is not None:
        direct_result, timing_future = wrangler.eval_direct(
                traversal.target_boxes,
                traversal.from_sep_close_bigger_starts,
                traversal.from_sep_close_bigger_lists,
                src_weights)

        recorder.add("eval_direct", timing_future)

        potentials = potentials + direct_result

    # }}}

    # {{{ "Stage 7:" propagate local_exps downward

    local_exps, timing_future = wrangler.refine_locals(
            traversal.level_start_target_or_target_parent_box_nrs,
            traversal.target_or_target_parent_boxes,
            local_exps)

    recorder.add("refine_locals", timing_future)

    # }}}

    # {{{ "Stage 8:" evaluate locals

    local_result, timing_future = wrangler.eval_locals(
            traversal.level_start_target_box_nrs,
            traversal.target_boxes,
            local_exps)

    recorder.add("eval_locals", timing_future)

    potentials = potentials + local_result

    # }}}

    result = wrangler.reorder_potentials(potentials)

    result = wrangler.finalize_potentials(result)

    fmm_proc.done()

    if timing_data is not None:
        timing_data.update(recorder.summarize())

    return result
예제 #16
0
파일: fmm.py 프로젝트: inducer/pytential
def drive_fmm(expansion_wrangler, src_weights, timing_data=None):
    """Top-level driver routine for the QBX fast multipole calculation.

    :arg geo_data: A :class:`QBXFMMGeometryData` instance.
    :arg expansion_wrangler: An object exhibiting the
        :class:`ExpansionWranglerInterface`.
    :arg src_weights: Source 'density/weights/charges'.
        Passed unmodified to *expansion_wrangler*.
    :arg timing_data: Either *None* or a dictionary that collects
        timing data.

    Returns the potentials computed by *expansion_wrangler*.

    See also :func:`boxtree.fmm.drive_fmm`.
    """
    wrangler = expansion_wrangler

    geo_data = wrangler.geo_data
    traversal = geo_data.traversal()
    tree = traversal.tree
    recorder = TimingRecorder()

    # Interface guidelines: Attributes of the tree are assumed to be known
    # to the expansion wrangler and should not be passed.

    fmm_proc = ProcessLogger(logger, "qbx fmm")

    src_weights = wrangler.reorder_sources(src_weights)

    # {{{ construct local multipoles

    mpole_exps, timing_future = wrangler.form_multipoles(
            traversal.level_start_source_box_nrs,
            traversal.source_boxes,
            src_weights)

    recorder.add("form_multipoles", timing_future)

    # }}}

    # {{{ propagate multipoles upward

    mpole_exps, timing_future = wrangler.coarsen_multipoles(
            traversal.level_start_source_parent_box_nrs,
            traversal.source_parent_boxes,
            mpole_exps)

    recorder.add("coarsen_multipoles", timing_future)

    # }}}

    # {{{ direct evaluation from neighbor source boxes ("list 1")

    non_qbx_potentials, timing_future = wrangler.eval_direct(
            traversal.target_boxes,
            traversal.neighbor_source_boxes_starts,
            traversal.neighbor_source_boxes_lists,
            src_weights)

    recorder.add("eval_direct", timing_future)

    # }}}

    # {{{ translate separated siblings' ("list 2") mpoles to local

    local_exps, timing_future = wrangler.multipole_to_local(
            traversal.level_start_target_or_target_parent_box_nrs,
            traversal.target_or_target_parent_boxes,
            traversal.from_sep_siblings_starts,
            traversal.from_sep_siblings_lists,
            mpole_exps)

    recorder.add("multipole_to_local", timing_future)

    # }}}

    # {{{ evaluate sep. smaller mpoles ("list 3") at particles

    # (the point of aiming this stage at particles is specifically to keep its
    # contribution *out* of the downward-propagating local expansions)

    mpole_result, timing_future = wrangler.eval_multipoles(
            traversal.target_boxes_sep_smaller_by_source_level,
            traversal.from_sep_smaller_by_level,
            mpole_exps)

    recorder.add("eval_multipoles", timing_future)

    non_qbx_potentials = non_qbx_potentials + mpole_result

    # assert that list 3 close has been merged into list 1
    assert traversal.from_sep_close_smaller_starts is None

    # }}}

    # {{{ form locals for separated bigger source boxes ("list 4")

    local_result, timing_future = wrangler.form_locals(
            traversal.level_start_target_or_target_parent_box_nrs,
            traversal.target_or_target_parent_boxes,
            traversal.from_sep_bigger_starts,
            traversal.from_sep_bigger_lists,
            src_weights)

    recorder.add("form_locals", timing_future)

    local_exps = local_exps + local_result

    # assert that list 4 close has been merged into list 1
    assert traversal.from_sep_close_bigger_starts is None

    # }}}

    # {{{ propagate local_exps downward

    local_exps, timing_future = wrangler.refine_locals(
            traversal.level_start_target_or_target_parent_box_nrs,
            traversal.target_or_target_parent_boxes,
            local_exps)

    recorder.add("refine_locals", timing_future)

    # }}}

    # {{{ evaluate locals

    local_result, timing_future = wrangler.eval_locals(
            traversal.level_start_target_box_nrs,
            traversal.target_boxes,
            local_exps)

    recorder.add("eval_locals", timing_future)

    non_qbx_potentials = non_qbx_potentials + local_result

    # }}}

    # {{{ wrangle qbx expansions

    qbx_expansions, timing_future = wrangler.form_global_qbx_locals(src_weights)

    recorder.add("form_global_qbx_locals", timing_future)

    local_result, timing_future = (
            wrangler.translate_box_multipoles_to_qbx_local(mpole_exps))

    recorder.add("translate_box_multipoles_to_qbx_local", timing_future)

    qbx_expansions = qbx_expansions + local_result

    local_result, timing_future = (
            wrangler.translate_box_local_to_qbx_local(local_exps))

    recorder.add("translate_box_local_to_qbx_local", timing_future)

    qbx_expansions = qbx_expansions + local_result

    qbx_potentials, timing_future = wrangler.eval_qbx_expansions(qbx_expansions)

    recorder.add("eval_qbx_expansions", timing_future)

    # }}}

    # {{{ reorder potentials

    nqbtl = geo_data.non_qbx_box_target_lists()

    all_potentials_in_tree_order = wrangler.full_output_zeros()

    for ap_i, nqp_i in zip(all_potentials_in_tree_order, non_qbx_potentials):
        ap_i[nqbtl.unfiltered_from_filtered_target_indices] = nqp_i

    all_potentials_in_tree_order += qbx_potentials

    def reorder_and_finalize_potentials(x):
        # "finalize" gives host FMMs (like FMMlib) a chance to turn the
        # potential back into a CL array.
        return wrangler.finalize_potentials(x[tree.sorted_target_ids])

    from pytools.obj_array import with_object_array_or_scalar
    result = with_object_array_or_scalar(
            reorder_and_finalize_potentials, all_potentials_in_tree_order)

    # }}}

    fmm_proc.done()

    if timing_data is not None:
        timing_data.update(recorder.summarize())

    return result
예제 #17
0
    def __call__(self, queue, tree, ball_centers, ball_radii, peer_lists=None,
                 wait_for=None):
        """
        :arg queue: a :class:`pyopencl.CommandQueue`
        :arg tree: a :class:`boxtree.Tree`.
        :arg ball_centers: an object array of coordinate
            :class:`pyopencl.array.Array` instances.
            Their *dtype* must match *tree*'s
            :attr:`boxtree.Tree.coord_dtype`.
        :arg ball_radii: a
            :class:`pyopencl.array.Array`
            of positive numbers.
            Its *dtype* must match *tree*'s
            :attr:`boxtree.Tree.coord_dtype`.
        :arg peer_lists: may either be *None* or an instance of
            :class:`PeerListLookup` associated with `tree`.
        :arg wait_for: may either be *None* or a list of :class:`pyopencl.Event`
            instances for whose completion this command waits before starting
            exeuction.
        :returns: a tuple *(aq, event)*, where *aq* is an instance of
            :class:`AreaQueryResult`, and *event* is a :class:`pyopencl.Event`
            for dependency management.
        """

        from pytools import single_valued
        if single_valued(bc.dtype for bc in ball_centers) != tree.coord_dtype:
            raise TypeError("ball_centers dtype must match tree.coord_dtype")
        if ball_radii.dtype != tree.coord_dtype:
            raise TypeError("ball_radii dtype must match tree.coord_dtype")

        ball_id_dtype = tree.particle_id_dtype  # ?

        from pytools import div_ceil
        # Avoid generating too many kernels.
        max_levels = div_ceil(tree.nlevels, 10) * 10

        if peer_lists is None:
            peer_lists, evt = self.peer_list_finder(queue, tree, wait_for=wait_for)
            wait_for = [evt]

        if len(peer_lists.peer_list_starts) != tree.nboxes + 1:
            raise ValueError("size of peer lists must match with number of boxes")

        area_query_kernel = self.get_area_query_kernel(tree.dimensions,
            tree.coord_dtype, tree.box_id_dtype, ball_id_dtype,
            peer_lists.peer_list_starts.dtype, max_levels)

        aq_plog = ProcessLogger(logger, "area query")

        result, evt = area_query_kernel(
                queue, len(ball_radii),
                tree.box_centers.data, tree.root_extent,
                tree.box_levels.data, tree.aligned_nboxes,
                tree.box_child_ids.data, tree.box_flags.data,
                peer_lists.peer_list_starts.data,
                peer_lists.peer_lists.data, ball_radii.data,
                *(tuple(tree.bounding_box[0])
                    + tuple(bc.data for bc in ball_centers)),
                wait_for=wait_for)

        aq_plog.done()

        return AreaQueryResult(
                tree=tree,
                leaves_near_ball_starts=result["leaves"].starts,
                leaves_near_ball_lists=result["leaves"].lists).with_queue(None), evt
예제 #18
0
def as_scalar_pde(pde, vec_idx):
    r"""
    Returns a scalar PDE that is satisfied by the *vec_idx* component
    of *pde*.

    :arg pde: An instance of :class:`LinearPDESystemOperator`
    :arg vec_idx: the index of the vector-valued function that we
                  want as a scalar PDE
    """
    from sumpy.tools import nullspace

    indices = set()
    for eq in pde.eqs:
        for deriv_ident in eq.keys():
            indices.add(deriv_ident.vec_idx)

    # this is already a scalar pde
    if len(indices) == 1 and list(indices)[0] == vec_idx:
        return pde

    from pytools import ProcessLogger
    plog = ProcessLogger(logger, "computing single PDE for multiple PDEs")

    from pytools import (
            generate_nonnegative_integer_tuples_summing_to_at_most
            as gnitstam)

    dim = pde.total_dims

    # slowly increase the order of the derivatives that we take of the
    # system of PDEs. Once we reach the order of the scalar PDE, this
    # loop will break
    for order in range(2, 100):
        mis = sorted(gnitstam(order, dim), key=sum)

        pde_mat = []
        coeff_ident_enumerate_dict = dict((tuple(mi), i) for
                                            (i, mi) in enumerate(mis))
        offset = len(mis)

        # Create a matrix of equations that are derivatives of the
        # original system of PDEs
        for mi in mis:
            for pde_dict in pde.eqs:
                eq = [0]*(len(mis)*(max(indices)+1))
                for ident, coeff in pde_dict.items():
                    c = tuple(add_mi(ident.mi, mi))
                    if c not in coeff_ident_enumerate_dict:
                        break
                    idx = offset*ident.vec_idx + coeff_ident_enumerate_dict[c]
                    eq[idx] = coeff
                else:
                    pde_mat.append(eq)

        if len(pde_mat) == 0:
            continue

        # Get the nullspace of the matrix and get the rows related to this
        # vec_idx
        n = nullspace(pde_mat)[offset*vec_idx:offset*(vec_idx+1), :]
        indep_row = find_linear_relationship(n)
        if len(indep_row) > 0:
            pde_dict = {}
            mult = indep_row[max(indep_row.keys())]
            for k, v in indep_row.items():
                pde_dict[DerivativeIdentifier(mis[k], 0)] = v / mult
            plog.done()
            return LinearPDESystemOperator(pde.dim, pmap(pde_dict))

    plog.done()
    assert False
예제 #19
0
파일: fmm.py 프로젝트: inducer/boxtree
def drive_fmm(traversal, expansion_wrangler, src_weights, timing_data=None):
    """Top-level driver routine for a fast multipole calculation.

    In part, this is intended as a template for custom FMMs, in the sense that
    you may copy and paste its
    `source code <https://github.com/inducer/boxtree/blob/master/boxtree/fmm.py>`_
    as a starting point.

    Nonetheless, many common applications (such as point-to-point FMMs) can be
    covered by supplying the right *expansion_wrangler* to this routine.

    :arg traversal: A :class:`boxtree.traversal.FMMTraversalInfo` instance.
    :arg expansion_wrangler: An object exhibiting the
        :class:`ExpansionWranglerInterface`.
    :arg src_weights: Source 'density/weights/charges'.
        Passed unmodified to *expansion_wrangler*.
    :arg timing_data: Either *None*, or a :class:`dict` that is populated with
        timing information for the stages of the algorithm (in the form of
        :class:`TimingResult`), if such information is available.

    Returns the potentials computed by *expansion_wrangler*.

    """
    wrangler = expansion_wrangler

    # Interface guidelines: Attributes of the tree are assumed to be known
    # to the expansion wrangler and should not be passed.

    fmm_proc = ProcessLogger(logger, "qbx fmm")
    recorder = TimingRecorder()

    src_weights = wrangler.reorder_sources(src_weights)

    # {{{ "Step 2.1:" Construct local multipoles

    mpole_exps, timing_future = wrangler.form_multipoles(
            traversal.level_start_source_box_nrs,
            traversal.source_boxes,
            src_weights)

    recorder.add("form_multipoles", timing_future)

    # }}}

    # {{{ "Step 2.2:" Propagate multipoles upward

    mpole_exps, timing_future = wrangler.coarsen_multipoles(
            traversal.level_start_source_parent_box_nrs,
            traversal.source_parent_boxes,
            mpole_exps)

    recorder.add("coarsen_multipoles", timing_future)

    # mpole_exps is called Phi in [1]

    # }}}

    # {{{ "Stage 3:" Direct evaluation from neighbor source boxes ("list 1")

    potentials, timing_future = wrangler.eval_direct(
            traversal.target_boxes,
            traversal.neighbor_source_boxes_starts,
            traversal.neighbor_source_boxes_lists,
            src_weights)

    recorder.add("eval_direct", timing_future)

    # these potentials are called alpha in [1]

    # }}}

    # {{{ "Stage 4:" translate separated siblings' ("list 2") mpoles to local

    local_exps, timing_future = wrangler.multipole_to_local(
            traversal.level_start_target_or_target_parent_box_nrs,
            traversal.target_or_target_parent_boxes,
            traversal.from_sep_siblings_starts,
            traversal.from_sep_siblings_lists,
            mpole_exps)

    recorder.add("multipole_to_local", timing_future)

    # local_exps represents both Gamma and Delta in [1]

    # }}}

    # {{{ "Stage 5:" evaluate sep. smaller mpoles ("list 3") at particles

    # (the point of aiming this stage at particles is specifically to keep its
    # contribution *out* of the downward-propagating local expansions)

    mpole_result, timing_future = wrangler.eval_multipoles(
            traversal.target_boxes_sep_smaller_by_source_level,
            traversal.from_sep_smaller_by_level,
            mpole_exps)

    recorder.add("eval_multipoles", timing_future)

    potentials = potentials + mpole_result

    # these potentials are called beta in [1]

    if traversal.from_sep_close_smaller_starts is not None:
        logger.debug("evaluate separated close smaller interactions directly "
                "('list 3 close')")

        direct_result, timing_future = wrangler.eval_direct(
                traversal.target_boxes,
                traversal.from_sep_close_smaller_starts,
                traversal.from_sep_close_smaller_lists,
                src_weights)

        recorder.add("eval_direct", timing_future)

        potentials = potentials + direct_result

    # }}}

    # {{{ "Stage 6:" form locals for separated bigger source boxes ("list 4")

    local_result, timing_future = wrangler.form_locals(
            traversal.level_start_target_or_target_parent_box_nrs,
            traversal.target_or_target_parent_boxes,
            traversal.from_sep_bigger_starts,
            traversal.from_sep_bigger_lists,
            src_weights)

    recorder.add("form_locals", timing_future)

    local_exps = local_exps + local_result

    if traversal.from_sep_close_bigger_starts is not None:
        direct_result, timing_future = wrangler.eval_direct(
                traversal.target_boxes,
                traversal.from_sep_close_bigger_starts,
                traversal.from_sep_close_bigger_lists,
                src_weights)

        recorder.add("eval_direct", timing_future)

        potentials = potentials + direct_result

    # }}}

    # {{{ "Stage 7:" propagate local_exps downward

    local_exps, timing_future = wrangler.refine_locals(
            traversal.level_start_target_or_target_parent_box_nrs,
            traversal.target_or_target_parent_boxes,
            local_exps)

    recorder.add("refine_locals", timing_future)

    # }}}

    # {{{ "Stage 8:" evaluate locals

    local_result, timing_future = wrangler.eval_locals(
            traversal.level_start_target_box_nrs,
            traversal.target_boxes,
            local_exps)

    recorder.add("eval_locals", timing_future)

    potentials = potentials + local_result

    # }}}

    result = wrangler.reorder_potentials(potentials)

    result = wrangler.finalize_potentials(result)

    fmm_proc.done()

    if timing_data is not None:
        timing_data.update(recorder.summarize())

    return result
예제 #20
0
    def __call__(self, queue, tree, ball_centers, ball_radii, peer_lists=None,
                 wait_for=None):
        """
        :arg queue: a :class:`pyopencl.CommandQueue`
        :arg tree: a :class:`boxtree.Tree`.
        :arg ball_centers: an object array of coordinate
            :class:`pyopencl.array.Array` instances.
            Their *dtype* must match *tree*'s
            :attr:`boxtree.Tree.coord_dtype`.
        :arg ball_radii: a
            :class:`pyopencl.array.Array`
            of positive numbers.
            Its *dtype* must match *tree*'s
            :attr:`boxtree.Tree.coord_dtype`.
        :arg peer_lists: may either be *None* or an instance of
            :class:`PeerListLookup` associated with `tree`.
        :arg wait_for: may either be *None* or a list of :class:`pyopencl.Event`
            instances for whose completion this command waits before starting
            execution.
        :returns: a tuple *(sqi, event)*, where *sqi* is an instance of
            :class:`pyopencl.array.Array`, and *event* is a :class:`pyopencl.Event`
            for dependency management. The *dtype* of *sqi* is
            *tree*'s :attr:`boxtree.Tree.coord_dtype` and its shape is
            *(tree.nboxes,)* (see :attr:`boxtree.Tree.nboxes`).
            The entries of *sqi* are indexed by the global box index and are
            as follows:

            * if *i* is not the index of a leaf box, *sqi[i] = 0*.
            * if *i* is the index of a leaf box, *sqi[i]* is the
              outer space invader distance for *i*.
        """

        from pytools import single_valued
        if single_valued(bc.dtype for bc in ball_centers) != tree.coord_dtype:
            raise TypeError("ball_centers dtype must match tree.coord_dtype")
        if ball_radii.dtype != tree.coord_dtype:
            raise TypeError("ball_radii dtype must match tree.coord_dtype")

        from pytools import div_ceil
        # Avoid generating too many kernels.
        max_levels = div_ceil(tree.nlevels, 10) * 10

        if peer_lists is None:
            peer_lists, evt = self.peer_list_finder(queue, tree, wait_for=wait_for)
            wait_for = [evt]

        if len(peer_lists.peer_list_starts) != tree.nboxes + 1:
            raise ValueError("size of peer lists must match with number of boxes")

        space_invader_query_kernel = self.get_space_invader_query_kernel(
            tree.dimensions, tree.coord_dtype, tree.box_id_dtype,
            peer_lists.peer_list_starts.dtype, max_levels)

        si_plog = ProcessLogger(logger, "space invader query")

        outer_space_invader_dists = cl.array.zeros(queue, tree.nboxes, np.float32)
        if not wait_for:
            wait_for = []
        wait_for = wait_for + outer_space_invader_dists.events

        evt = space_invader_query_kernel(
                *SPACE_INVADER_QUERY_TEMPLATE.unwrap_args(
                    tree, peer_lists,
                    ball_radii,
                    outer_space_invader_dists,
                    *tuple(bc for bc in ball_centers)),
                wait_for=wait_for,
                queue=queue,
                range=slice(len(ball_radii)))

        if tree.coord_dtype != np.dtype(np.float32):
            # The kernel output is always an array of float32 due to limited
            # support for atomic operations with float64 in OpenCL.
            # Here the output is cast to match the coord dtype.
            outer_space_invader_dists.finish()
            outer_space_invader_dists = outer_space_invader_dists.astype(
                    tree.coord_dtype)
            evt, = outer_space_invader_dists.events

        si_plog.done()

        return outer_space_invader_dists, evt
예제 #21
0
    def __call__(self, queue, tree, ball_centers, ball_radii, peer_lists=None,
                 wait_for=None):
        """
        :arg queue: a :class:`pyopencl.CommandQueue`
        :arg tree: a :class:`boxtree.Tree`.
        :arg ball_centers: an object array of coordinate
            :class:`pyopencl.array.Array` instances.
            Their *dtype* must match *tree*'s
            :attr:`boxtree.Tree.coord_dtype`.
        :arg ball_radii: a
            :class:`pyopencl.array.Array`
            of positive numbers.
            Its *dtype* must match *tree*'s
            :attr:`boxtree.Tree.coord_dtype`.
        :arg peer_lists: may either be *None* or an instance of
            :class:`PeerListLookup` associated with `tree`.
        :arg wait_for: may either be *None* or a list of :class:`pyopencl.Event`
            instances for whose completion this command waits before starting
            execution.
        :returns: a tuple *(lbl, event)*, where *lbl* is an instance of
            :class:`LeavesToBallsLookup`, and *event* is a :class:`pyopencl.Event`
            for dependency management.
        """

        from pytools import single_valued
        if single_valued(bc.dtype for bc in ball_centers) != tree.coord_dtype:
            raise TypeError("ball_centers dtype must match tree.coord_dtype")
        if ball_radii.dtype != tree.coord_dtype:
            raise TypeError("ball_radii dtype must match tree.coord_dtype")

        ltb_plog = ProcessLogger(logger, "leaves-to-balls lookup: run area query")

        area_query, evt = self.area_query_builder(
                queue, tree, ball_centers, ball_radii, peer_lists, wait_for)
        wait_for = [evt]

        logger.debug("leaves-to-balls lookup: expand starts")

        nkeys = tree.nboxes
        nballs_p_1 = len(area_query.leaves_near_ball_starts)
        assert nballs_p_1 == len(ball_radii) + 1

        # We invert the area query in two steps:
        #
        # 1. Turn the area query result into (ball number, box number) pairs.
        #    This is done in the "starts expander kernel."
        #
        # 2. Key-value sort the (ball number, box number) pairs by box number.

        starts_expander_knl = self.get_starts_expander_kernel(tree.box_id_dtype)
        expanded_starts = cl.array.empty(
                queue, len(area_query.leaves_near_ball_lists), tree.box_id_dtype)
        evt = starts_expander_knl(
                expanded_starts,
                area_query.leaves_near_ball_starts.with_queue(queue),
                nballs_p_1)
        wait_for = [evt]

        logger.debug("leaves-to-balls lookup: key-value sort")

        balls_near_box_starts, balls_near_box_lists, evt \
                = self.key_value_sorter(
                        queue,
                        # keys
                        area_query.leaves_near_ball_lists.with_queue(queue),
                        # values
                        expanded_starts,
                        nkeys, starts_dtype=tree.box_id_dtype,
                        wait_for=wait_for)

        ltb_plog.done()

        return LeavesToBallsLookup(
                tree=tree,
                balls_near_box_starts=balls_near_box_starts,
                balls_near_box_lists=balls_near_box_lists).with_queue(None), evt
예제 #22
0
    def __call__(self, queue, balls_to_leaves_lookup=None, wait_for=None):
        """
        :arg queue: a :class:`pyopencl.CommandQueue`
        """
        slk_plog = ProcessLogger(logger,
                                 "element-to-source lookup: run area query")

        if balls_to_leaves_lookup is None:
            balls_to_leaves_lookup, evt = \
                self.compute_short_lists(queue, wait_for=wait_for)
            wait_for = [evt]

        # -----------------------------------------------------------------
        # Refine the area query using point-in-simplex test

        logger.debug("element-to-source lookup: refine starts")

        element_lookup_kernel = self.get_simplex_lookup_kernel()

        vertices_dev = make_obj_array([
            cl.array.to_device(queue, verts)
            for verts in self.discr.mesh.vertices
        ])

        mesh_vertices_kwargs = {
            f"mesh_vertices_{iaxis}": vertices_dev[iaxis]
            for iaxis in range(self.dim)
        }

        source_points_kwargs = {
            f"source_points_{iaxis}": self.tree.sources[iaxis]
            for iaxis in range(self.dim)
        }

        evt, res = element_lookup_kernel(
            queue,
            dim=self.dim,
            nboxes=self.tree.nboxes,
            nelements=self.discr.mesh.nelements,
            nsources=self.tree.nsources,
            result=cl.array.zeros(queue, self.tree.nsources, dtype=np.int32) -
            1,
            mesh_vertex_indices=self.discr.mesh.groups[0].vertex_indices,
            box_source_starts=self.tree.box_source_starts,
            box_source_counts_cumul=self.tree.box_source_counts_cumul,
            leaves_near_ball_starts=balls_to_leaves_lookup.
            leaves_near_ball_starts,
            leaves_near_ball_lists=balls_to_leaves_lookup.
            leaves_near_ball_lists,
            wait_for=wait_for,
            **mesh_vertices_kwargs,
            **source_points_kwargs)

        source_to_element_lookup, = res

        wait_for = [evt]

        # elements = source_to_element_lookup.get()
        # for idx in [362,  365,  874,  877, 1386, 1389, 1898, 1901])

        # -----------------------------------------------------------------
        # Invert the source-to-element lookup by a key-value sort

        logger.debug("element-to-source lookup: key-value sort")

        sources_in_element_starts, sources_in_element_lists, evt = \
            self.key_value_sorter(
                queue,
                keys=source_to_element_lookup,
                values=cl.array.arange(
                    queue, self.tree.nsources, dtype=self.tree.box_id_dtype),
                nkeys=self.discr.mesh.nelements,
                starts_dtype=self.tree.box_id_dtype,
                wait_for=wait_for)

        slk_plog.done()

        return ElementsToSourcesLookup(
            tree=self.tree,
            discr=self.discr,
            sources_in_element_starts=sources_in_element_starts,
            sources_in_element_lists=sources_in_element_lists), evt
예제 #23
0
def drive_fmm(expansion_wrangler, src_weight_vecs, timing_data=None,
        traversal=None):
    """Top-level driver routine for the QBX fast multipole calculation.

    :arg geo_data: A :class:`pytential.qbx.geometry.QBXFMMGeometryData` instance.
    :arg expansion_wrangler: An object exhibiting the
        :class:`boxtree.fmm.ExpansionWranglerInterface`.
    :arg src_weight_vecs: A sequence of source 'density/weights/charges'.
        Passed unmodified to *expansion_wrangler*.
    :arg timing_data: Either *None* or a dictionary that collects
        timing data.

    Returns the potentials computed by *expansion_wrangler*.

    See also :func:`boxtree.fmm.drive_fmm`.
    """
    wrangler = expansion_wrangler

    geo_data = wrangler.geo_data

    if traversal is None:
        traversal = geo_data.traversal()

    tree = traversal.tree

    recorder = TimingRecorder()

    # Interface guidelines: Attributes of the tree are assumed to be known
    # to the expansion wrangler and should not be passed.

    fmm_proc = ProcessLogger(logger, "qbx fmm")

    src_weight_vecs = [wrangler.reorder_sources(weight)
        for weight in src_weight_vecs]

    # {{{ construct local multipoles

    mpole_exps, timing_future = wrangler.form_multipoles(
            traversal.level_start_source_box_nrs,
            traversal.source_boxes,
            src_weight_vecs)

    recorder.add("form_multipoles", timing_future)

    # }}}

    # {{{ propagate multipoles upward

    mpole_exps, timing_future = wrangler.coarsen_multipoles(
            traversal.level_start_source_parent_box_nrs,
            traversal.source_parent_boxes,
            mpole_exps)

    recorder.add("coarsen_multipoles", timing_future)

    # }}}

    # {{{ direct evaluation from neighbor source boxes ("list 1")

    non_qbx_potentials, timing_future = wrangler.eval_direct(
            traversal.target_boxes,
            traversal.neighbor_source_boxes_starts,
            traversal.neighbor_source_boxes_lists,
            src_weight_vecs)

    recorder.add("eval_direct", timing_future)

    # }}}

    # {{{ translate separated siblings' ("list 2") mpoles to local

    local_exps, timing_future = wrangler.multipole_to_local(
            traversal.level_start_target_or_target_parent_box_nrs,
            traversal.target_or_target_parent_boxes,
            traversal.from_sep_siblings_starts,
            traversal.from_sep_siblings_lists,
            mpole_exps)

    recorder.add("multipole_to_local", timing_future)

    # }}}

    # {{{ evaluate sep. smaller mpoles ("list 3") at particles

    # (the point of aiming this stage at particles is specifically to keep its
    # contribution *out* of the downward-propagating local expansions)

    mpole_result, timing_future = wrangler.eval_multipoles(
            traversal.target_boxes_sep_smaller_by_source_level,
            traversal.from_sep_smaller_by_level,
            mpole_exps)

    recorder.add("eval_multipoles", timing_future)

    non_qbx_potentials = non_qbx_potentials + mpole_result

    # assert that list 3 close has been merged into list 1
    assert traversal.from_sep_close_smaller_starts is None

    # }}}

    # {{{ form locals for separated bigger source boxes ("list 4")

    local_result, timing_future = wrangler.form_locals(
            traversal.level_start_target_or_target_parent_box_nrs,
            traversal.target_or_target_parent_boxes,
            traversal.from_sep_bigger_starts,
            traversal.from_sep_bigger_lists,
            src_weight_vecs)

    recorder.add("form_locals", timing_future)

    local_exps = local_exps + local_result

    # assert that list 4 close has been merged into list 1
    assert traversal.from_sep_close_bigger_starts is None

    # }}}

    # {{{ propagate local_exps downward

    local_exps, timing_future = wrangler.refine_locals(
            traversal.level_start_target_or_target_parent_box_nrs,
            traversal.target_or_target_parent_boxes,
            local_exps)

    recorder.add("refine_locals", timing_future)

    # }}}

    # {{{ evaluate locals

    local_result, timing_future = wrangler.eval_locals(
            traversal.level_start_target_box_nrs,
            traversal.target_boxes,
            local_exps)

    recorder.add("eval_locals", timing_future)

    non_qbx_potentials = non_qbx_potentials + local_result

    # }}}

    # {{{ wrangle qbx expansions

    # form_global_qbx_locals and eval_target_specific_qbx_locals are responsible
    # for the same interactions (directly evaluated portion of the potentials
    # via unified List 1).  Which one is used depends on the wrangler. If one of
    # them is unused the corresponding output entries will be zero.

    qbx_expansions, timing_future = wrangler.form_global_qbx_locals(src_weight_vecs)

    recorder.add("form_global_qbx_locals", timing_future)

    local_result, timing_future = (
            wrangler.translate_box_multipoles_to_qbx_local(mpole_exps))

    recorder.add("translate_box_multipoles_to_qbx_local", timing_future)

    qbx_expansions = qbx_expansions + local_result

    local_result, timing_future = (
            wrangler.translate_box_local_to_qbx_local(local_exps))

    recorder.add("translate_box_local_to_qbx_local", timing_future)

    qbx_expansions = qbx_expansions + local_result

    qbx_potentials, timing_future = wrangler.eval_qbx_expansions(qbx_expansions)

    recorder.add("eval_qbx_expansions", timing_future)

    ts_result, timing_future = \
        wrangler.eval_target_specific_qbx_locals(src_weight_vecs)

    qbx_potentials = qbx_potentials + ts_result

    recorder.add("eval_target_specific_qbx_locals", timing_future)

    # }}}

    # {{{ reorder potentials

    nqbtl = geo_data.non_qbx_box_target_lists()

    all_potentials_in_tree_order = wrangler.full_output_zeros()

    for ap_i, nqp_i in zip(all_potentials_in_tree_order, non_qbx_potentials):
        ap_i[nqbtl.unfiltered_from_filtered_target_indices] = nqp_i

    all_potentials_in_tree_order += qbx_potentials

    def reorder_and_finalize_potentials(x):
        # "finalize" gives host FMMs (like FMMlib) a chance to turn the
        # potential back into a CL array.
        return wrangler.finalize_potentials(x[tree.sorted_target_ids])

    from pytools.obj_array import obj_array_vectorize
    result = obj_array_vectorize(
            reorder_and_finalize_potentials, all_potentials_in_tree_order)

    # }}}

    fmm_proc.done()

    if timing_data is not None:
        timing_data.update(recorder.summarize())
    return result
예제 #24
0
    def get_stored_ids_and_unscaled_projection_matrix(self):
        from pytools import ProcessLogger
        plog = ProcessLogger(logger, "compute PDE for Taylor coefficients")

        mis = self.get_full_coefficient_identifiers()
        coeff_ident_enumerate_dict = {
            tuple(mi): i
            for (i, mi) in enumerate(mis)
        }

        diff_op = self.get_pde_as_diff_op()
        assert len(diff_op.eqs) == 1
        pde_dict = {k.mi: v for k, v in diff_op.eqs[0].items()}
        for ident in pde_dict.keys():
            if ident not in coeff_ident_enumerate_dict:
                # Order of the expansion is less than the order of the PDE.
                # In that case, the compression matrix is the identity matrix
                # and there's nothing to project
                from_input_coeffs_by_row = [[(i, 1)] for i in range(len(mis))]
                from_output_coeffs_by_row = [[] for _ in range(len(mis))]
                shape = (len(mis), len(mis))
                op = CSEMatVecOperator(from_input_coeffs_by_row,
                                       from_output_coeffs_by_row, shape)
                return mis, op

        # Calculate the multi-index that appears last in in the PDE in
        # reverse degree lexicographic order (degrevlex).
        max_mi_idx = max(coeff_ident_enumerate_dict[ident]
                         for ident in pde_dict.keys())
        max_mi = mis[max_mi_idx]
        max_mi_coeff = pde_dict[max_mi]
        max_mi_mult = -1 / sym.sympify(max_mi_coeff)

        def is_stored(mi):
            """
            A multi_index mi is not stored if mi >= max_mi
            """
            return any(mi[d] < max_mi[d] for d in range(self.dim))

        stored_identifiers = []

        from_input_coeffs_by_row = []
        from_output_coeffs_by_row = []
        for i, mi in enumerate(mis):
            # If the multi-index is to be stored, keep the projection matrix
            # entry empty
            if is_stored(mi):
                idx = len(stored_identifiers)
                stored_identifiers.append(mi)
                from_input_coeffs_by_row.append([(idx, 1)])
                from_output_coeffs_by_row.append([])
                continue
            diff = [mi[d] - max_mi[d] for d in range(self.dim)]

            # eg: u_xx + u_yy + u_zz is represented as
            # [((2, 0, 0), 1), ((0, 2, 0), 1), ((0, 0, 2), 1)]
            assignment = []
            for other_mi, coeff in pde_dict.items():
                j = coeff_ident_enumerate_dict[add_mi(other_mi, diff)]
                if i == j:
                    # Skip the u_zz part here.
                    continue
                # PDE might not have max_mi_coeff = -1, divide by -max_mi_coeff
                # to get a relation of the form, u_zz = - u_xx - u_yy for Laplace 3D.
                assignment.append((j, coeff * max_mi_mult))
            from_input_coeffs_by_row.append([])
            from_output_coeffs_by_row.append(assignment)

        plog.done()

        logger.debug(
            "number of Taylor coefficients was reduced from {orig} to {red}".
            format(orig=len(self.get_full_coefficient_identifiers()),
                   red=len(stored_identifiers)))

        shape = (len(mis), len(stored_identifiers))
        op = CSEMatVecOperator(from_input_coeffs_by_row,
                               from_output_coeffs_by_row, shape)
        return stored_identifiers, op
예제 #25
0
def parse_fortran(source,
                  filename="<floopy code>",
                  free_form=None,
                  strict=None,
                  seq_dependencies=None,
                  auto_dependencies=None,
                  target=None):
    """
    :returns: a :class:`loopy.TranslationUnit`
    """

    parse_plog = ProcessLogger(logger, "parsing fortran file '%s'" % filename)

    if seq_dependencies is not None and auto_dependencies is not None:
        raise TypeError(
            "may not specify both seq_dependencies and auto_dependencies")
    if auto_dependencies is not None:
        from warnings import warn
        warn("auto_dependencies is deprecated, use seq_dependencies instead",
             DeprecationWarning,
             stacklevel=2)
        seq_dependencies = auto_dependencies

    if seq_dependencies is None:
        seq_dependencies = True
    if free_form is None:
        free_form = True
    if strict is None:
        strict = True

    import logging
    console = logging.StreamHandler()
    console.setLevel(logging.INFO)
    formatter = logging.Formatter("%(name)-12s: %(levelname)-8s %(message)s")
    console.setFormatter(formatter)
    logging.getLogger("fparser").addHandler(console)

    from fparser import api
    tree = api.parse(source,
                     isfree=free_form,
                     isstrict=strict,
                     analyze=False,
                     ignore_comments=False)

    if tree is None:
        raise LoopyError("Fortran parser was unhappy with source code "
                         "and returned invalid data (Sorry!)")

    from loopy.frontend.fortran.translator import F2LoopyTranslator
    f2loopy = F2LoopyTranslator(filename, target=target)
    f2loopy(tree)

    kernels = f2loopy.make_kernels(seq_dependencies=seq_dependencies)

    from loopy.transform.callable import merge
    prog = merge(kernels)
    all_kernels = [clbl.subkernel for clbl in prog.callables_table.values()]

    for knl in all_kernels:
        prog.with_kernel(_add_assignees_to_calls(knl, all_kernels))

    if len(all_kernels) == 1:
        # guesssing in the case of only one function
        prog = prog.with_entrypoints(all_kernels[0].name)

    from loopy.frontend.fortran.translator import specialize_fortran_division
    prog = specialize_fortran_division(prog)

    parse_plog.done()

    return prog