예제 #1
0
파일: norms.py 프로젝트: jshipton/firedrake
def errornorm(u, uh, norm_type="L2", degree_rise=3, mesh=None):
    """Compute the error :math:`e = u - u_h` in the specified norm.

    :arg u: a :class:`.Function` containing an "exact" solution
    :arg uh: a :class:`.Function` containing the approximate solution
    :arg norm_type: the type of norm to compute, see :func:`.norm` for
         details of supported norm types.
    :arg degree_rise: increase in polynomial degree to use as the
         approximation space for computing the error.
    :arg mesh: an optional mesh on which to compute the error norm
         (currently ignored).

    This function works by :func:`.project`\ing ``u`` and ``uh`` into
    a space of degree ``degree_rise`` higher than the degree of ``uh``
    and computing the error there.
    """
    urank = len(u.ufl_shape)
    uhrank = len(uh.ufl_shape)

    rank = urank
    if urank != uhrank:
        raise RuntimeError("Mismatching rank between u and uh")

    degree = uh.function_space().ufl_element().degree()
    if isinstance(degree, tuple):
        degree = max(degree) + degree_rise
    else:
        degree += degree_rise

    # The exact solution might be an expression, in which case this test is irrelevant.
    if isinstance(u, function.Function):
        degree_u = u.function_space().ufl_element().degree()
        if degree > degree_u:
            warning("Degree of exact solution less than approximation degree")

    mesh = uh.function_space().mesh()
    if rank == 0:
        V = functionspace.FunctionSpace(mesh, 'DG', degree)
    elif rank == 1:
        V = functionspace.VectorFunctionSpace(mesh,
                                              'DG',
                                              degree,
                                              dim=u.ufl_shape[0])
    else:
        raise RuntimeError(
            "Don't know how to compute error norm for tensor valued functions")

    u_ = projection.project(u, V)
    uh_ = projection.project(uh, V)

    uh_ -= u_

    return norm(uh_, norm_type=norm_type, mesh=mesh)
예제 #2
0
def errornorm(u, uh, norm_type="L2", degree_rise=3, mesh=None):
    """Compute the error :math:`e = u - u_h` in the specified norm.

    :arg u: a :class:`.Function` containing an "exact" solution
    :arg uh: a :class:`.Function` containing the approximate solution
    :arg norm_type: the type of norm to compute, see :func:`.norm` for
         details of supported norm types.
    :arg degree_rise: increase in polynomial degree to use as the
         approximation space for computing the error.
    :arg mesh: an optional mesh on which to compute the error norm
         (currently ignored).

    This function works by :func:`.project`\ing ``u`` and ``uh`` into
    a space of degree ``degree_rise`` higher than the degree of ``uh``
    and computing the error there.
    """
    urank = len(u.ufl_shape)
    uhrank = len(uh.ufl_shape)

    rank = urank
    if urank != uhrank:
        raise RuntimeError("Mismatching rank between u and uh")

    degree = uh.function_space().ufl_element().degree()
    if isinstance(degree, tuple):
        degree = max(degree) + degree_rise
    else:
        degree += degree_rise

    # The exact solution might be an expression, in which case this test is irrelevant.
    if isinstance(u, function.Function):
        degree_u = u.function_space().ufl_element().degree()
        if degree > degree_u:
            warning("Degree of exact solution less than approximation degree")

    mesh = uh.function_space().mesh()
    if rank == 0:
        V = functionspace.FunctionSpace(mesh, 'DG', degree)
    elif rank == 1:
        V = functionspace.VectorFunctionSpace(mesh, 'DG', degree,
                                              dim=u.ufl_shape[0])
    else:
        raise RuntimeError("Don't know how to compute error norm for tensor valued functions")

    u_ = projection.project(u, V)
    uh_ = projection.project(uh, V)

    uh_ -= u_

    return norm(uh_, norm_type=norm_type, mesh=mesh)
예제 #3
0
def _extract_kwargs(**kwargs):
    parameters = kwargs.get('solver_parameters', None)
    if 'parameters' in kwargs:
        warning(RED % "The 'parameters' keyword is deprecated, use 'solver_parameters' instead.")
        parameters = kwargs['parameters']
        if 'solver_parameters' in kwargs:
            warning(RED % "'parameters' and 'solver_parameters' passed, using the latter")
            parameters = kwargs['solver_parameters']

    # Make sure we don't stomp on a dict the user has passed in.
    parameters = parameters.copy() if parameters is not None else {}
    nullspace = kwargs.get('nullspace', None)
    options_prefix = kwargs.get('options_prefix', None)

    return parameters, nullspace, options_prefix
예제 #4
0
def create_slope_set(op2set, extra_halo, insp_sets=None):
    """
    Convert an OP2 set to a set suitable for the SLOPE Python interface.
    Also check that the halo region us sufficiently depth for tiling.
    """
    SlopeSet = namedtuple('SlopeSet', 'name core boundary nonexec superset')

    partitioning = op2set._partitioning if hasattr(op2set,
                                                   '_partitioning') else None
    if not isinstance(op2set, Subset):
        name = op2set.name
        superset = None
    else:
        name = "%s_ss" % op2set
        superset = s.superset.name

    if slope.get_exec_mode() not in ['OMP_MPI', 'ONLY_MPI']:
        core_size = op2set.core_size
        boundary_size = op2set.exec_size - op2set.core_size
        nonexec_size = op2set.total_size - op2set.exec_size
    elif hasattr(op2set, '_deep_size'):
        # Assume [1, ..., N] levels of halo regions
        # Each level is represented by (core, owned, exec, nonexec)
        level_N = op2set._deep_size[-1]
        core_size = level_N[0]
        boundary_size = level_N[2] - core_size
        nonexec_size = level_N[3] - level_N[2]
        if extra_halo and nonexec_size == 0:
            level_E = op2set._deep_size[-2]
            boundary_size = level_E[2] - core_size
            nonexec_size = level_E[3] - level_E[2]
    else:
        warning("Couldn't find deep halos in %s, outcome is undefined." %
                op2set.name)
        core_size = op2set.core_size
        boundary_size = op2set.exec_size - op2set.core_size
        nonexec_size = op2set.total_size - op2set.exec_size

    slope_set = SlopeSet(name, core_size, boundary_size, nonexec_size,
                         superset)
    insp_sets[slope_set] = partitioning

    return slope_set
예제 #5
0
def _extract_kwargs(**kwargs):
    parameters = kwargs.get('solver_parameters', None)
    if 'parameters' in kwargs:
        warning(
            RED %
            "The 'parameters' keyword is deprecated, use 'solver_parameters' instead."
        )
        parameters = kwargs['parameters']
        if 'solver_parameters' in kwargs:
            warning(
                RED %
                "'parameters' and 'solver_parameters' passed, using the latter"
            )
            parameters = kwargs['solver_parameters']

    # Make sure we don't stomp on a dict the user has passed in.
    parameters = parameters.copy() if parameters is not None else {}
    nullspace = kwargs.get('nullspace', None)
    options_prefix = kwargs.get('options_prefix', None)

    return parameters, nullspace, options_prefix
예제 #6
0
def create_slope_set(op2set, extra_halo, insp_sets=None):
    """
    Convert an OP2 set to a set suitable for the SLOPE Python interface.
    Also check that the halo region us sufficiently depth for tiling.
    """
    SlopeSet = namedtuple('SlopeSet', 'name core boundary nonexec superset')

    partitioning = op2set._partitioning if hasattr(op2set, '_partitioning') else None
    if not isinstance(op2set, Subset):
        name = op2set.name
        superset = None
    else:
        name = "%s_ss" % op2set
        superset = s.superset.name

    if slope.get_exec_mode() not in ['OMP_MPI', 'ONLY_MPI']:
        core_size = op2set.core_size
        boundary_size = op2set.exec_size - op2set.core_size
        nonexec_size = op2set.total_size - op2set.exec_size
    elif hasattr(op2set, '_deep_size'):
        # Assume [1, ..., N] levels of halo regions
        # Each level is represented by (core, owned, exec, nonexec)
        level_N = op2set._deep_size[-1]
        core_size = level_N[0]
        boundary_size = level_N[2] - core_size
        nonexec_size = level_N[3] - level_N[2]
        if extra_halo and nonexec_size == 0:
            level_E = op2set._deep_size[-2]
            boundary_size = level_E[2] - core_size
            nonexec_size = level_E[3] - level_E[2]
    else:
        warning("Couldn't find deep halos in %s, outcome is undefined." % op2set.name)
        core_size = op2set.core_size
        boundary_size = op2set.exec_size - op2set.core_size
        nonexec_size = op2set.total_size - op2set.exec_size

    slope_set = SlopeSet(name, core_size, boundary_size, nonexec_size, superset)
    insp_sets[slope_set] = partitioning

    return slope_set
예제 #7
0
def increase_stack(asm_opt):
    """"Increase the stack size it the total space occupied by the kernel's local
    arrays is too big."""
    # Assume the size of a C type double is 8 bytes
    double_size = 8
    # Assume the stack size is 1.7 MB (2 MB is usually the limit)
    stack_size = 1.7*1024*1024

    size = 0
    for asm in asm_opt:
        decls = asm.decls.values()
        if decls:
            size += sum([reduce(operator.mul, d.sym.rank) for d in zip(*decls)[0]
                         if d.sym.rank])

    if size*double_size > stack_size:
        # Increase the stack size if the kernel's stack size seems to outreach
        # the space available
        try:
            resource.setrlimit(resource.RLIMIT_STACK, (resource.RLIM_INFINITY,
                                                       resource.RLIM_INFINITY))
        except resource.error:
            warning("Stack may blow up, and could not increase its size.")
            warning("In case of failure, lower COFFEE's licm level to 1.")
예제 #8
0
 def __lshift__(self, arg):
     from pyop2.logger import warning, RED
     warning(RED % "The << syntax is deprecated, use File.write")
     self.write(arg)
예제 #9
0
import ufl
import ctypes
from ctypes import POINTER, c_int, c_double, c_void_p

import coffee.base as ast

from pyop2 import op2
from pyop2.logger import warning

from firedrake import functionspaceimpl
from firedrake import utils
from firedrake import vector
try:
    import cachetools
except ImportError:
    warning(
        "cachetools not available, expression assembly will be slowed down")
    cachetools = None

__all__ = ['Function', 'PointNotInDomainError']

valuetype = np.float64


class _CFunction(ctypes.Structure):
    """C struct collecting data from a :class:`Function`"""
    _fields_ = [("n_cols", c_int), ("n_layers", c_int),
                ("coords", POINTER(c_double)), ("coords_map", POINTER(c_int)),
                ("f", POINTER(c_double)), ("f_map", POINTER(c_int)),
                ("sidx", c_void_p)]

예제 #10
0
import ufl

import coffee.base as ast

from pyop2 import op2
from pyop2.logger import warning

from firedrake import expression as expression_t
from firedrake import functionspace
from firedrake import utils
from firedrake import vector

try:
    import cachetools
except ImportError:
    warning("cachetools not available, expression assembly will be slowed down")
    cachetools = None


__all__ = ["Function", "interpolate"]


valuetype = np.float64


def interpolate(expr, V):
    """Interpolate an expression onto a new function in V.

    :arg expr: an :class:`.Expression`.
    :arg V: the :class:`.FunctionSpace` to interpolate into.
예제 #11
0
파일: io.py 프로젝트: jychang48/firedrake
    def __lshift__(self, data):
        """It allows file << function syntax for writing data out to disk.

        In the case of parallel, it would also accept (function, timestep)
        tuple as an argument. If only function is given, then the timestep
        will be automatically generated."""
        # If parallel, it needs to keep track of its timestep.
        if MPI.parallel:
            # if statements to keep the consistency of how to update the
            # timestep.
            if isinstance(data, tuple):
                if self._time_step == -1 or not self._generate_time:
                    function = data[0]
                    self._time_step = data[1]
                else:
                    raise TypeError("Expected function, got tuple.")
            else:
                if self._time_step != -1 and not self._generate_time:
                    raise TypeError("Expected tuple, got function.")
                function = data
                self._time_step += 1
                self._generate_time = True
        else:
            function = data

        def is_family1(e, family):
            import ufl.finiteelement.hdivcurl as hc
            if isinstance(e, (hc.HDivElement, hc.HCurlElement)):
                return False
            if e.family() == 'OuterProductElement':
                if e.degree() == (1, 1):
                    if e._A.family() == family \
                       and e._B.family() == family:
                        return True
            elif e.family() == family and e.degree() == 1:
                return True
            return False

        def is_cgN(e):
            import ufl.finiteelement.hdivcurl as hc
            if isinstance(e, (hc.HDivElement, hc.HCurlElement)):
                return False
            if e.family() == 'OuterProductElement':
                if e._A.family() in ('Lagrange', 'Q') \
                   and e._B.family() == 'Lagrange':
                    return True
            elif e.family() in ('Lagrange', 'Q'):
                return True
            return False

        mesh = function.function_space().mesh()
        e = function.function_space().ufl_element()

        if len(e.value_shape()) > 1:
            raise RuntimeError("Can't output tensor valued functions")

        ce = mesh.coordinates.function_space().ufl_element()

        coords_p1 = is_family1(ce, 'Lagrange') or is_family1(ce, 'Q')
        coords_p1dg = is_family1(ce, 'Discontinuous Lagrange') or is_family1(ce, 'DQ')
        coords_cgN = is_cgN(ce)
        function_p1 = is_family1(e, 'Lagrange') or is_family1(e, 'Q')
        function_p1dg = is_family1(e, 'Discontinuous Lagrange') or is_family1(e, 'DQ')
        function_cgN = is_cgN(e)

        project_coords = False
        project_function = False
        discontinuous = False
        # We either output in P1 or P1dg.
        if coords_cgN and function_cgN:
            family = 'CG'
            project_coords = not coords_p1
            project_function = not function_p1
        else:
            family = 'DG'
            project_coords = not coords_p1dg
            project_function = not function_p1dg
            discontinuous = True

        if project_function:
            if len(e.value_shape()) == 0:
                Vo = fs.FunctionSpace(mesh, family, 1)
            elif len(e.value_shape()) == 1:
                Vo = fs.VectorFunctionSpace(mesh, family, 1, dim=e.value_shape()[0])
            else:
                # Never reached
                Vo = None
            if not self._warnings[0]:
                warning(RED % "*** Projecting output function to %s1", family)
                self._warnings[0] = True
            output = projection.project(function, Vo, name=function.name())
        else:
            output = function
            Vo = output.function_space()
        if project_coords:
            Vc = fs.VectorFunctionSpace(mesh, family, 1, dim=mesh._coordinate_fs.dim)
            if not self._warnings[1]:
                warning(RED % "*** Projecting coordinates to %s1", family)
                self._warnings[1] = True
            coordinates = projection.project(mesh.coordinates, Vc, name=mesh.coordinates.name())
        else:
            coordinates = mesh.coordinates
            Vc = coordinates.function_space()

        num_points = Vo.node_count

        layers = mesh.layers - 1 if isinstance(e.cell(), OuterProductCell) else 1
        num_cells = mesh.num_cells() * layers

        if not isinstance(e.cell(), OuterProductCell) and e.cell().cellname() != "quadrilateral":
            connectivity = Vc.cell_node_map().values_with_halo.flatten()
        else:
            # Connectivity of bottom cell in extruded mesh
            base = Vc.cell_node_map().values_with_halo
            if _cells[mesh.ufl_cell()] == hl.VtkQuad:
                # Quad is
                #
                # 1--3
                # |  |
                # 0--2
                #
                # needs to be
                #
                # 3--2
                # |  |
                # 0--1
                base = base[:, [0, 2, 3, 1]]
                points_per_cell = 4
            elif _cells[mesh.ufl_cell()] == hl.VtkWedge:
                # Wedge is
                #
                #    5
                #   /|\
                #  / | \
                # 1----3
                # |  4 |
                # | /\ |
                # |/  \|
                # 0----2
                #
                # needs to be
                #
                #    5
                #   /|\
                #  / | \
                # 3----4
                # |  2 |
                # | /\ |
                # |/  \|
                # 0----1
                #
                base = base[:, [0, 2, 4, 1, 3, 5]]
                points_per_cell = 6
            elif _cells[mesh.ufl_cell()] == hl.VtkHexahedron:
                # Hexahedron is
                #
                #   5----7
                #  /|   /|
                # 4----6 |
                # | 1--|-3
                # |/   |/
                # 0----2
                #
                # needs to be
                #
                #   7----6
                #  /|   /|
                # 4----5 |
                # | 3--|-2
                # |/   |/
                # 0----1
                #
                base = base[:, [0, 2, 3, 1, 4, 6, 7, 5]]
                points_per_cell = 8
            # Repeat up the column
            connectivity_temp = np.repeat(base, layers, axis=0)

            if discontinuous:
                scale = points_per_cell
            else:
                scale = 1
            offsets = np.arange(layers) * scale

            # Add offsets going up the column
            connectivity_temp += np.tile(offsets.reshape(-1, 1), (mesh.num_cells(), 1))

            connectivity = connectivity_temp.flatten()

        if isinstance(output.function_space(), fs.VectorFunctionSpace):
            tmp = output.dat.data_ro_with_halos
            vdata = [None]*3
            if output.dat.dim[0] == 1:
                vdata[0] = tmp.flatten()
            else:
                for i in range(output.dat.dim[0]):
                    vdata[i] = tmp[:, i].flatten()
            for i in range(output.dat.dim[0], 3):
                vdata[i] = np.zeros_like(vdata[0])
            data = tuple(vdata)
            # only for checking large file size
            flat_data = {function.name(): tmp.flatten()}
        else:
            data = output.dat.data_ro_with_halos.flatten()
            flat_data = {function.name(): data}

        coordinates = self._fd_to_evtk_coord(coordinates.dat.data_ro_with_halos)

        cell_types = np.empty(num_cells, dtype="uint8")

        # Assume that all cells are of same shape.
        cell_types[:] = _cells[mesh.ufl_cell()].tid
        p_c = _points_per_cell[mesh.ufl_cell()]

        # This tells which are the last nodes of each cell.
        offsets = np.arange(start=p_c, stop=p_c * (num_cells + 1), step=p_c,
                            dtype='int32')
        large_file_flag = _requiresLargeVTKFileSize("VtkUnstructuredGrid",
                                                    numPoints=num_points,
                                                    numCells=num_cells,
                                                    pointData=flat_data,
                                                    cellData=None)
        new_name = self._filename

        # When vtu file makes part of a parallel process, aggregated by a
        # pvtu file, the output is : filename_timestep_rank.vtu
        if MPI.parallel:
            new_name += "_" + str(self._time_step) + "_" + str(MPI.comm.rank)

        self._writer = hl.VtkFile(
            new_name, hl.VtkUnstructuredGrid, large_file_flag)

        self._writer.openGrid()

        self._writer.openPiece(ncells=num_cells, npoints=num_points)

        # openElement allows the stuff in side of the tag <arg></arg>
        # to be editted.
        self._writer.openElement("Points")
        # addData adds the DataArray in the tag <arg1>
        self._writer.addData("Points", coordinates)

        self._writer.closeElement("Points")
        self._writer.openElement("Cells")
        self._writer.addData("connectivity", connectivity)
        self._writer.addData("offsets", offsets)
        self._writer.addData("types", cell_types)
        self._writer.closeElement("Cells")

        self._writer.openData("Point", scalars=function.name())
        self._writer.addData(function.name(), data)
        self._writer.closeData("Point")
        self._writer.closePiece()
        self._writer.closeGrid()

        # Create the AppendedData
        self._writer.appendData(coordinates)
        self._writer.appendData(connectivity)
        self._writer.appendData(offsets)
        self._writer.appendData(cell_types)
        self._writer.appendData(data)
        self._writer.save()
예제 #12
0
def fuse(name, loop_chain, **kwargs):
    """Apply fusion (and possibly tiling) to an iterator of :class:`ParLoop`
    obecjts, which we refer to as ``loop_chain``. Return an iterator of
    :class:`ParLoop` objects, in which some loops may have been fused or tiled.
    If fusion could not be applied, return the unmodified ``loop_chain``.

    .. note::
       At the moment, the following features are not supported, in which
       case the unmodified ``loop_chain`` is returned.

        * mixed ``Datasets`` and ``Maps``;
        * extruded ``Sets``

    .. note::
       Tiling cannot be applied if any of the following conditions verifies:

        * a global reduction/write occurs in ``loop_chain``
    """
    # If there is nothing to fuse, just return
    if len(loop_chain) in [0, 1]:
        return loop_chain

    # Are there _LazyMatOp objects (i.e., synch points) preventing fusion?
    remainder = []
    synch_points = [l for l in loop_chain if isinstance(l, _LazyMatOp)]
    if synch_points:
        # Fuse only the sub-sequence before the first synch point
        synch_point = loop_chain.index(synch_points[0])
        remainder, loop_chain = loop_chain[synch_point:], loop_chain[:synch_point]

    # Return if there is nothing to fuse (e.g. only _LazyMatOp objects were present)
    if len(loop_chain) in [0, 1]:
        return loop_chain + remainder

    # Get an inspector for fusing this /loop_chain/. If there's a cache hit,
    # return the fused par loops straight away. Otherwise, try to run an inspection.
    options = {
        'log': kwargs.get('log', False),
        'mode': kwargs.get('mode', 'hard'),
        'ignore_war': kwargs.get('ignore_war', False),
        'use_glb_maps': kwargs.get('use_glb_maps', False),
        'use_prefetch': kwargs.get('use_prefetch', 0),
        'tile_size': kwargs.get('tile_size', 1),
        'seed_loop': kwargs.get('seed_loop', 0),
        'extra_halo': kwargs.get('extra_halo', False),
        'coloring': kwargs.get('coloring', 'default')
    }
    inspector = Inspector(name, loop_chain, **options)
    if inspector._initialized:
        return inspector.schedule(loop_chain) + remainder

    # Otherwise, is the inspection legal ?
    mode = kwargs.get('mode', 'hard')
    force_glb = kwargs.get('force_glb', False)

    # Skip if loops in /loop_chain/ are already /fusion/ objects: this could happen
    # when loops had already been fused in a /loop_chain/ context
    if any(isinstance(l, extended.ParLoop) for l in loop_chain):
        return loop_chain + remainder

    # Global reductions are dangerous for correctness, so avoid fusion unless the
    # user is forcing it
    if not force_glb and any(l._reduced_globals for l in loop_chain):
        return loop_chain + remainder

    # Loop fusion requires modifying kernels, so ASTs must be available
    if not mode == 'only_tile':
        if any(not l.kernel._ast or l.kernel._attached_info['flatblocks'] for l in loop_chain):
            return loop_chain + remainder

    # Mixed still not supported
    if any(a._is_mixed for a in flatten([l.args for l in loop_chain])):
        return loop_chain + remainder

    # Extrusion still not supported
    if any(l.is_layered for l in loop_chain):
        return loop_chain + remainder

    # If tiling is requested, SLOPE must be visible
    if mode in ['tile', 'only_tile'] and not slope:
        warning("Couldn't locate SLOPE. Falling back to plain op2.ParLoops.")
        return loop_chain + remainder

    schedule = inspector.inspect()
    return schedule(loop_chain) + remainder
예제 #13
0
 def __lshift__(self, arg):
     from pyop2.logger import warning, RED
     warning(RED % "The << syntax is deprecated, use File.write")
     self.write(arg)
예제 #14
0
파일: interface.py 프로젝트: zjvskobe/PyOP2
def fuse(name, loop_chain, **kwargs):
    """Apply fusion (and possibly tiling) to an iterator of :class:`ParLoop`
    obecjts, which we refer to as ``loop_chain``. Return an iterator of
    :class:`ParLoop` objects, in which some loops may have been fused or tiled.
    If fusion could not be applied, return the unmodified ``loop_chain``.

    .. note::
       At the moment, the following features are not supported, in which
       case the unmodified ``loop_chain`` is returned.

        * mixed ``Datasets`` and ``Maps``;
        * extruded ``Sets``

    .. note::
       Tiling cannot be applied if any of the following conditions verifies:

        * a global reduction/write occurs in ``loop_chain``
    """
    # If there is nothing to fuse, just return
    if len(loop_chain) in [0, 1]:
        return loop_chain

    # Are there _LazyMatOp objects (i.e., synch points) preventing fusion?
    remainder = []
    synch_points = [l for l in loop_chain if isinstance(l, _LazyMatOp)]
    if synch_points:
        # Fuse only the sub-sequence before the first synch point
        synch_point = loop_chain.index(synch_points[0])
        remainder, loop_chain = loop_chain[
            synch_point:], loop_chain[:synch_point]

    # Return if there is nothing to fuse (e.g. only _LazyMatOp objects were present)
    if len(loop_chain) in [0, 1]:
        return loop_chain + remainder

    # Get an inspector for fusing this /loop_chain/. If there's a cache hit,
    # return the fused par loops straight away. Otherwise, try to run an inspection.
    options = {
        'log': kwargs.get('log', False),
        'mode': kwargs.get('mode', 'hard'),
        'ignore_war': kwargs.get('ignore_war', False),
        'use_glb_maps': kwargs.get('use_glb_maps', False),
        'use_prefetch': kwargs.get('use_prefetch', 0),
        'tile_size': kwargs.get('tile_size', 1),
        'seed_loop': kwargs.get('seed_loop', 0),
        'extra_halo': kwargs.get('extra_halo', False),
        'coloring': kwargs.get('coloring', 'default')
    }
    inspector = Inspector(name, loop_chain, **options)
    if inspector._initialized:
        return inspector.schedule(loop_chain) + remainder

    # Otherwise, is the inspection legal ?
    mode = kwargs.get('mode', 'hard')
    force_glb = kwargs.get('force_glb', False)

    # Skip if loops in /loop_chain/ are already /fusion/ objects: this could happen
    # when loops had already been fused in a /loop_chain/ context
    if any(isinstance(l, extended.ParLoop) for l in loop_chain):
        return loop_chain + remainder

    # Global reductions are dangerous for correctness, so avoid fusion unless the
    # user is forcing it
    if not force_glb and any(l._reduced_globals for l in loop_chain):
        return loop_chain + remainder

    # Loop fusion requires modifying kernels, so ASTs must be available
    if not mode == 'only_tile':
        if any(not l.kernel._ast or l.kernel._attached_info['flatblocks']
               for l in loop_chain):
            return loop_chain + remainder

    # Mixed still not supported
    if any(a._is_mixed for a in flatten([l.args for l in loop_chain])):
        return loop_chain + remainder

    # Extrusion still not supported
    if any(l.is_layered for l in loop_chain):
        return loop_chain + remainder

    # If tiling is requested, SLOPE must be visible
    if mode in ['tile', 'only_tile'] and not slope:
        warning("Couldn't locate SLOPE. Falling back to plain op2.ParLoops.")
        return loop_chain + remainder

    schedule = inspector.inspect()
    return schedule(loop_chain) + remainder
예제 #15
0
    def __init__(self, *args, **kwargs):
        """
        :arg problem: A :class:`NonlinearVariationalProblem` to solve.
        :kwarg nullspace: an optional :class:`.VectorSpaceBasis` (or
               :class:`.MixedVectorSpaceBasis`) spanning the null
               space of the operator.
        :kwarg solver_parameters: Solver parameters to pass to PETSc.
            This should be a dict mapping PETSc options to values.  For
            example, to set the nonlinear solver type to just use a linear
            solver:

        .. code-block:: python

            {'snes_type': 'ksponly'}

        PETSc flag options should be specified with `bool` values. For example:

        .. code-block:: python

            {'snes_monitor': True}

        .. warning ::

            Since this object contains a circular reference and a
            custom ``__del__`` attribute, you *must* call :meth:`.destroy`
            on it when you are done, otherwise it will never be
            garbage collected.

        """
        assert isinstance(args[0], NonlinearVariationalProblem)
        self._problem = args[0]
        # Build the jacobian with the correct sparsity pattern.  Note
        # that since matrix assembly is lazy this doesn't actually
        # force an additional assembly of the matrix since in
        # form_jacobian we call assemble again which drops this
        # computation on the floor.
        self._jac_tensor = assemble.assemble(self._problem.J_ufl, bcs=self._problem.bcs,
                                             form_compiler_parameters=self._problem.form_compiler_parameters)
        if self._problem.Jp is not None:
            self._jac_ptensor = assemble.assemble(self._problem.Jp, bcs=self._problem.bcs,
                                                  form_compiler_parameters=self._problem.form_compiler_parameters)
        else:
            self._jac_ptensor = self._jac_tensor
        test = self._problem.F_ufl.arguments()[0]
        self._F_tensor = function.Function(test.function_space())
        # Function to hold current guess
        self._x = function.Function(self._problem.u_ufl)
        self._problem.F_ufl = ufl.replace(self._problem.F_ufl, {self._problem.u_ufl:
                                                                self._x})
        self._problem.J_ufl = ufl.replace(self._problem.J_ufl, {self._problem.u_ufl:
                                                                self._x})
        if self._problem.Jp is not None:
            self._problem.Jp = ufl.replace(self._problem.Jp, {self._problem.u_ufl:
                                                              self._x})
        self._jacobian_assembled = False
        self.snes = PETSc.SNES().create()
        self._opt_prefix = 'firedrake_snes_%d_' % NonlinearVariationalSolver._id
        NonlinearVariationalSolver._id += 1
        self.snes.setOptionsPrefix(self._opt_prefix)

        parameters = kwargs.get('solver_parameters', None)
        if 'parameters' in kwargs:
            warning(RED % "The 'parameters' keyword to %s is deprecated, use 'solver_parameters' instead.",
                    self.__class__.__name__)
            parameters = kwargs['parameters']
            if 'solver_parameters' in kwargs:
                warning(RED % "'parameters' and 'solver_parameters' passed to %s, using the latter",
                        self.__class__.__name__)
                parameters = kwargs['solver_parameters']

        # Make sure we don't stomp on a dict the user has passed in.
        parameters = parameters.copy() if parameters is not None else {}
        # Mixed problem, use jacobi pc if user has not supplied one.
        if self._jac_tensor._M.sparsity.shape != (1, 1):
            parameters.setdefault('pc_type', 'jacobi')

        self.parameters = parameters

        ksp = self.snes.getKSP()
        pc = ksp.getPC()
        pmat = self._jac_ptensor._M
        names = [fs.name if fs.name else str(i)
                 for i, fs in enumerate(test.function_space())]

        ises = solving_utils.set_fieldsplits(pmat, pc, names=names)

        with self._F_tensor.dat.vec as v:
            self.snes.setFunction(self.form_function, v)
        self.snes.setJacobian(self.form_jacobian, J=self._jac_tensor._M.handle,
                              P=self._jac_ptensor._M.handle)

        nullspace = kwargs.get('nullspace', None)
        if nullspace is not None:
            self.set_nullspace(nullspace, ises=ises)
예제 #16
0
파일: io.py 프로젝트: hbuesing/firedrake
    def __lshift__(self, data):
        """It allows file << function syntax for writing data out to disk.

        In the case of parallel, it would also accept (function, timestep)
        tuple as an argument. If only function is given, then the timestep
        will be automatically generated."""
        # If parallel, it needs to keep track of its timestep.
        if MPI.parallel:
            # if statements to keep the consistency of how to update the
            # timestep.
            if isinstance(data, tuple):
                if self._time_step == -1 or not self._generate_time:
                    function = data[0]
                    self._time_step = data[1]
                else:
                    raise TypeError("Expected function, got tuple.")
            else:
                if self._time_step != -1 and not self._generate_time:
                    raise TypeError("Expected tuple, got function.")
                function = data
                self._time_step += 1
                self._generate_time = True
        else:
            function = data

        def is_family1(e, family):
            import ufl.finiteelement.hdivcurl as hc
            if isinstance(e, (hc.HDiv, hc.HCurl)):
                return False
            if e.family() == 'OuterProductElement':
                if e.degree() == (1, 1):
                    if e._A.family() == family \
                       and e._B.family() == family:
                        return True
            elif e.family() == family and e.degree() == 1:
                return True
            return False

        def is_cgN(e):
            import ufl.finiteelement.hdivcurl as hc
            if isinstance(e, (hc.HDiv, hc.HCurl)):
                return False
            if e.family() == 'OuterProductElement':
                if e._A.family() in ('Lagrange', 'Q') \
                   and e._B.family() == 'Lagrange':
                    return True
            elif e.family() in ('Lagrange', 'Q'):
                return True
            return False

        mesh = function.function_space().mesh()
        e = function.function_space().ufl_element()

        if len(e.value_shape()) > 1:
            raise RuntimeError("Can't output tensor valued functions")

        ce = mesh.coordinates.function_space().ufl_element()

        coords_p1 = is_family1(ce, 'Lagrange') or is_family1(ce, 'Q')
        coords_p1dg = is_family1(ce, 'Discontinuous Lagrange') or is_family1(ce, 'DQ')
        coords_cgN = is_cgN(ce)
        function_p1 = is_family1(e, 'Lagrange') or is_family1(e, 'Q')
        function_p1dg = is_family1(e, 'Discontinuous Lagrange') or is_family1(e, 'DQ')
        function_cgN = is_cgN(e)

        project_coords = False
        project_function = False
        discontinuous = False
        # We either output in P1 or P1dg.
        if coords_cgN and function_cgN:
            family = 'CG'
            project_coords = not coords_p1
            project_function = not function_p1
        else:
            family = 'DG'
            project_coords = not coords_p1dg
            project_function = not function_p1dg
            discontinuous = True

        if project_function:
            if len(e.value_shape()) == 0:
                Vo = fs.FunctionSpace(mesh, family, 1)
            elif len(e.value_shape()) == 1:
                Vo = fs.VectorFunctionSpace(mesh, family, 1, dim=e.value_shape()[0])
            else:
                # Never reached
                Vo = None
            if not self._warnings[0]:
                warning(RED % "*** Projecting output function to %s1", family)
                self._warnings[0] = True
            output = projection.project(function, Vo, name=function.name())
        else:
            output = function
            Vo = output.function_space()
        if project_coords:
            Vc = fs.VectorFunctionSpace(mesh, family, 1, dim=mesh._coordinate_fs.dim)
            if not self._warnings[1]:
                warning(RED % "*** Projecting coordinates to %s1", family)
                self._warnings[1] = True
            coordinates = projection.project(mesh.coordinates, Vc, name=mesh.coordinates.name())
        else:
            coordinates = mesh.coordinates
            Vc = coordinates.function_space()

        num_points = Vo.node_count

        layers = mesh.layers - 1 if isinstance(e.cell(), OuterProductCell) else 1
        num_cells = mesh.num_cells() * layers

        if not isinstance(e.cell(), OuterProductCell) and e.cell().cellname() != "quadrilateral":
            connectivity = Vc.cell_node_map().values_with_halo.flatten()
        else:
            # Connectivity of bottom cell in extruded mesh
            base = Vc.cell_node_map().values_with_halo
            if _cells[mesh.ufl_cell()] == hl.VtkQuad:
                # Quad is
                #
                # 1--3
                # |  |
                # 0--2
                #
                # needs to be
                #
                # 3--2
                # |  |
                # 0--1
                base = base[:, [0, 2, 3, 1]]
                points_per_cell = 4
            elif _cells[mesh.ufl_cell()] == hl.VtkWedge:
                # Wedge is
                #
                #    5
                #   /|\
                #  / | \
                # 1----3
                # |  4 |
                # | /\ |
                # |/  \|
                # 0----2
                #
                # needs to be
                #
                #    5
                #   /|\
                #  / | \
                # 3----4
                # |  2 |
                # | /\ |
                # |/  \|
                # 0----1
                #
                base = base[:, [0, 2, 4, 1, 3, 5]]
                points_per_cell = 6
            elif _cells[mesh.ufl_cell()] == hl.VtkHexahedron:
                # Hexahedron is
                #
                #   5----7
                #  /|   /|
                # 4----6 |
                # | 1--|-3
                # |/   |/
                # 0----2
                #
                # needs to be
                #
                #   7----6
                #  /|   /|
                # 4----5 |
                # | 3--|-2
                # |/   |/
                # 0----1
                #
                base = base[:, [0, 2, 3, 1, 4, 6, 7, 5]]
                points_per_cell = 8
            # Repeat up the column
            connectivity_temp = np.repeat(base, layers, axis=0)

            if discontinuous:
                scale = points_per_cell
            else:
                scale = 1
            offsets = np.arange(layers) * scale

            # Add offsets going up the column
            connectivity_temp += np.tile(offsets.reshape(-1, 1), (mesh.num_cells(), 1))

            connectivity = connectivity_temp.flatten()

        if isinstance(output.function_space(), fs.VectorFunctionSpace):
            tmp = output.dat.data_ro_with_halos
            vdata = [None]*3
            if output.dat.dim[0] == 1:
                vdata[0] = tmp.flatten()
            else:
                for i in range(output.dat.dim[0]):
                    vdata[i] = tmp[:, i].flatten()
            for i in range(output.dat.dim[0], 3):
                vdata[i] = np.zeros_like(vdata[0])
            data = tuple(vdata)
            # only for checking large file size
            flat_data = {function.name(): tmp.flatten()}
        else:
            data = output.dat.data_ro_with_halos.flatten()
            flat_data = {function.name(): data}

        coordinates = self._fd_to_evtk_coord(coordinates.dat.data_ro_with_halos)

        cell_types = np.empty(num_cells, dtype="uint8")

        # Assume that all cells are of same shape.
        cell_types[:] = _cells[mesh.ufl_cell()].tid
        p_c = _points_per_cell[mesh.ufl_cell()]

        # This tells which are the last nodes of each cell.
        offsets = np.arange(start=p_c, stop=p_c * (num_cells + 1), step=p_c,
                            dtype='int32')
        large_file_flag = _requiresLargeVTKFileSize("VtkUnstructuredGrid",
                                                    numPoints=num_points,
                                                    numCells=num_cells,
                                                    pointData=flat_data,
                                                    cellData=None)
        new_name = self._filename

        # When vtu file makes part of a parallel process, aggregated by a
        # pvtu file, the output is : filename_timestep_rank.vtu
        if MPI.parallel:
            new_name += "_" + str(self._time_step) + "_" + str(MPI.comm.rank)

        self._writer = hl.VtkFile(
            new_name, hl.VtkUnstructuredGrid, large_file_flag)

        self._writer.openGrid()

        self._writer.openPiece(ncells=num_cells, npoints=num_points)

        # openElement allows the stuff in side of the tag <arg></arg>
        # to be editted.
        self._writer.openElement("Points")
        # addData adds the DataArray in the tag <arg1>
        self._writer.addData("Points", coordinates)

        self._writer.closeElement("Points")
        self._writer.openElement("Cells")
        self._writer.addData("connectivity", connectivity)
        self._writer.addData("offsets", offsets)
        self._writer.addData("types", cell_types)
        self._writer.closeElement("Cells")

        self._writer.openData("Point", scalars=function.name())
        self._writer.addData(function.name(), data)
        self._writer.closeData("Point")
        self._writer.closePiece()
        self._writer.closeGrid()

        # Create the AppendedData
        self._writer.appendData(coordinates)
        self._writer.appendData(connectivity)
        self._writer.appendData(offsets)
        self._writer.appendData(cell_types)
        self._writer.appendData(data)
        self._writer.save()
예제 #17
0
    def evict(self):
        """Run the cache eviction algorithm. This works out the permitted
cache size and deletes objects until it is achieved. Cache values are
assumed to have a :attr:`value` attribute and eviction occurs in
increasing :attr:`value` order. Currently :attr:`value` is an index of
the assembly operation, so older operations are evicted first.

The cache will be evicted down to 90% of permitted size.

The permitted size is either the explicit
:data:`parameters["assembly_cache"]["max_bytes"]` or it is the amount of
memory per core scaled by :data:`parameters["assembly_cache"]["max_factor"]`
(by default the scale factor is 0.6).

In MPI parallel, the nbytes of each cache entry is set to the maximum
over all processes, while the available memory is set to the
minimum. This produces a conservative caching policy which is
guaranteed to result in the same evictions on each processor.

        """

        if not parameters["assembly_cache"]["eviction"]:
            return

        max_cache_size = min(parameters["assembly_cache"]["max_bytes"] or float("inf"),
                             (memory or float("inf"))
                             * parameters["assembly_cache"]["max_factor"]
                             )

        if max_cache_size == float("inf"):
            if not self.evictwarned:
                warning("No maximum assembly cache size. Install psutil >= 2.0.0 or risk leaking memory!")
                self.evictwarned = True
            return

        cache_size = self.nbytes
        if cache_size < max_cache_size:
            return

        debug("Cache eviction triggered. %s bytes in cache, %s bytes allowed" %
              (cache_size, max_cache_size))

        # Evict down to 90% full.
        bytes_to_evict = cache_size - 0.9 * max_cache_size

        sorted_cache = sorted(self.cache.items(), key=lambda x: x[1][1].value)

        nbytes = lambda x: x[1][1].nbytes

        candidates = []
        while bytes_to_evict > 0:
            next = sorted_cache.pop(0)
            candidates.append(next)
            bytes_to_evict -= nbytes(next)

        for c in reversed(candidates):
            if bytes_to_evict + nbytes(c) < 0:
                # We may have been overzealous.
                bytes_to_evict += nbytes(c)
            else:
                del self.cache[c[0]]
예제 #18
0
    def evict(self):
        """Run the cache eviction algorithm. This works out the permitted
cache size and deletes objects until it is achieved. Cache values are
assumed to have a :attr:`value` attribute and eviction occurs in
increasing :attr:`value` order. Currently :attr:`value` is an index of
the assembly operation, so older operations are evicted first.

The cache will be evicted down to 90% of permitted size.

The permitted size is either the explicit
:data:`parameters["assembly_cache"]["max_bytes"]` or it is the amount of
memory per core scaled by :data:`parameters["assembly_cache"]["max_factor"]`
(by default the scale factor is 0.6).

In MPI parallel, the nbytes of each cache entry is set to the maximum
over all processes, while the available memory is set to the
minimum. This produces a conservative caching policy which is
guaranteed to result in the same evictions on each processor.

        """

        if not parameters["assembly_cache"]["eviction"]:
            return

        max_cache_size = min(
            parameters["assembly_cache"]["max_bytes"] or float("inf"),
            (memory or float("inf")) *
            parameters["assembly_cache"]["max_factor"])

        if max_cache_size == float("inf"):
            if not self.evictwarned:
                warning(
                    "No maximum assembly cache size. Install psutil >= 2.0.0 or risk leaking memory!"
                )
                self.evictwarned = True
            return

        cache_size = self.nbytes
        if cache_size < max_cache_size:
            return

        debug("Cache eviction triggered. %s bytes in cache, %s bytes allowed" %
              (cache_size, max_cache_size))

        # Evict down to 90% full.
        bytes_to_evict = cache_size - 0.9 * max_cache_size

        sorted_cache = sorted(self.cache.items(), key=lambda x: x[1][1].value)

        nbytes = lambda x: x[1][1].nbytes

        candidates = []
        while bytes_to_evict > 0:
            next = sorted_cache.pop(0)
            candidates.append(next)
            bytes_to_evict -= nbytes(next)

        for c in reversed(candidates):
            if bytes_to_evict + nbytes(c) < 0:
                # We may have been overzealous.
                bytes_to_evict += nbytes(c)
            else:
                del self.cache[c[0]]