Ejemplo n.º 1
0
def xreplace_indices(exprs, mapper, key=None, only_rhs=False):
    """
    Replace array indices in expressions.

    Parameters
    ----------
    exprs : expr-like or list of expr-like
        One or more expressions to which the replacement is applied.
    mapper : dict
        The substitution rules.
    key : list of symbols or callable
        An escape hatch to rule out some objects from the replacement.
        If a list, apply the replacement to the symbols in ``key`` only. If a
        callable, apply the replacement to a symbol S if and only if ``key(S)``
        gives True.
    only_rhs : bool, optional
        If True, apply the replacement to Eq right-hand sides only.
    """
    get = lambda i: i.rhs if only_rhs is True else i
    handle = flatten(retrieve_indexed(get(i)) for i in as_tuple(exprs))
    if isinstance(key, Iterable):
        handle = [i for i in handle if i.base.label in key]
    elif callable(key):
        handle = [i for i in handle if key(i)]
    mapper = dict(zip(handle, [i.xreplace(mapper) for i in handle]))
    replaced = [i.xreplace(mapper) for i in as_tuple(exprs)]
    return replaced if isinstance(exprs, Iterable) else replaced[0]
Ejemplo n.º 2
0
def iet_analyze(iet):
    """
    Analyze an Iteration/Expression tree and decorate it with metadata describing
    relevant computational properties (e.g., if an Iteration is parallelizable or not).
    This function performs actual data dependence analysis.
    """
    # Analyze Iterations
    analysis = mark_iteration_parallel(iet)
    analysis = mark_iteration_vectorizable(analysis)
    analysis = mark_iteration_wrappable(analysis)
    analysis = mark_iteration_affine(analysis)

    # Analyze HaloSpots
    analysis = mark_halospot_useless(analysis)
    analysis = mark_halospot_hoistable(analysis)
    analysis = mark_halospot_overlappable(analysis)

    # Decorate the Iteration/Expression tree with the found properties
    mapper = OrderedDict()
    for k, v in list(analysis.properties.items()):
        args = k.args
        properties = as_tuple(args.pop('properties')) + as_tuple(v)
        mapper[k] = k._rebuild(properties=properties, **args)
    processed = Transformer(mapper, nested=True).visit(iet)

    return processed
Ejemplo n.º 3
0
    def visit_Node(self, o, ret=None, parents=None, in_parent=False):
        if ret is None:
            ret = self.default_retval()
        if parents is None:
            parents = []
        if isinstance(o, self.child_types):
            if self.mode == 'groupby':
                ret.setdefault(as_tuple(parents), []).append(o)
            elif self.mode == 'immediate':
                if in_parent:
                    ret.setdefault(parents[-1], []).append(o)
                else:
                    ret.setdefault(None, []).append(o)
            else:
                for i in parents:
                    ret.setdefault(i, []).append(o)
        if isinstance(o, self.parent_type):
            parents.append(o)
            for i in o.children:
                ret = self._visit(i, ret=ret, parents=parents, in_parent=True)
            parents.remove(o)
        else:
            for i in o.children:
                ret = self._visit(i, ret=ret, parents=parents, in_parent=in_parent)

        return ret
Ejemplo n.º 4
0
 def __init__(self, exprs, ispace, dspace, atomics=None, guards=None):
     self._exprs = list(ClusterizedEq(i, ispace=ispace, dspace=dspace)
                        for i in as_tuple(exprs))
     self._ispace = ispace
     self._dspace = dspace
     self._atomics = set(atomics or [])
     self._guards = guards or {}
Ejemplo n.º 5
0
    def __staggered_setup__(self, **kwargs):
        """
        Setup staggering-related metadata. This method assigns:

            * 0 to non-staggered dimensions;
            * 1 to staggered dimensions.
        """
        staggered = kwargs.get('staggered')
        if staggered is None:
            self.is_Staggered = False
            return tuple(0 for _ in self.indices)
        else:
            self.is_Staggered = True
            if staggered is NODE:
                staggered = ()
            elif staggered is CELL:
                staggered = self.indices
            else:
                staggered = as_tuple(staggered)
            mask = []
            for d in self.indices:
                if d in staggered:
                    mask.append(1)
                elif -d in staggered:
                    mask.append(-1)
                else:
                    mask.append(0)
            return tuple(mask)
Ejemplo n.º 6
0
 def _create_implicit_exprs(self):
     if not len(self._bounds) == 2*len(self.dimensions):
         raise ValueError("Left and right bounds must be supplied for each dimension")
     n_domains = self.n_domains
     i_dim = self.implicit_dimension
     dat = []
     # Organise the data contained in 'bounds' into a form such that the
     # associated implicit equations can easily be created.
     for j in range(len(self._bounds)):
         index = floor(j/2)
         d = self.dimensions[index]
         if j % 2 == 0:
             fname = d.min_name
         else:
             fname = d.max_name
         func = Function(name=fname, shape=(n_domains, ), dimensions=(i_dim, ),
                         dtype=np.int32)
         # Check if shorthand notation has been provided:
         if isinstance(self._bounds[j], int):
             bounds = np.full((n_domains,), self._bounds[j], dtype=np.int32)
             func.data[:] = bounds
         else:
             func.data[:] = self._bounds[j]
         dat.append(Eq(d.thickness[j % 2][0], func[i_dim]))
     return as_tuple(dat)
Ejemplo n.º 7
0
def run(problem, **kwargs):
    """
    A single run with a specific set of performance parameters.
    """
    setup = tti_setup if problem == 'tti' else acoustic_setup
    options = {}

    time_order = kwargs.pop('time_order')[0]
    space_order = kwargs.pop('space_order')[0]
    autotune = kwargs.pop('autotune')

    # Should a specific block-shape be used? Useful if one wants to skip
    # the autotuning pass as a good block-shape is already known
    block_shape = as_tuple(kwargs.pop('block_shape'))
    if all(block_shape):
        if autotune:
            warning("Skipping autotuning (using explicit block-shape `%s`)"
                    % str(block_shape))
            autotune = False
        # This is quite hacky, but it does the trick
        for d, bs in zip(['x', 'y', 'z'], block_shape):
            options['%s0_blk_size' % d] = bs

    solver = setup(space_order=space_order, time_order=time_order, **kwargs)
    solver.forward(autotune=autotune, **options)
Ejemplo n.º 8
0
def q_affine(expr, vars):
    """
    Return True if ``expr`` is (separately) affine in the variables ``vars``,
    False otherwise.

    Readapted from: https://stackoverflow.com/questions/36283548\
        /check-if-an-equation-is-linear-for-a-specific-set-of-variables/
    """
    vars = as_tuple(vars)
    # If any `vars` does not appear in `expr`, the only possibility
    # for `expr` to be affine is that it's a constant function
    if any(x not in expr.atoms() for x in vars):
        return q_constant(expr)
    # At this point, `expr` is (separately) affine in the `vars` variables
    # if all non-mixed second order derivatives are identically zero.
    for x in vars:
        # The vast majority of calls here are incredibly simple tests
        # like q_affine(x+1, [x]).  Catch these quickly and
        # explicitly, instead of calling the very slow function `diff`.
        if expr == x:
            continue
        if expr.is_Add and len(expr.args) == 2:
            if expr.args[0] == x and expr.args[1].is_Number:
                continue
            if expr.args[1] == x and expr.args[0].is_Number:
                continue

        try:
            if diff(expr, x) == nan or not Eq(diff(expr, x, x), 0):
                return False
        except TypeError:
            return False
    return True
Ejemplo n.º 9
0
    def sum(self, p=None, dims=None):
        """
        Generate a symbolic expression computing the sum of ``p`` points
        along the spatial dimensions ``dims``.

        Parameters
        ----------
        p : int, optional
            The number of summands. Defaults to the halo size.
        dims : tuple of Dimension, optional
            The Dimensions along which the sum is computed. Defaults to
            ``self``'s spatial dimensions.
        """
        points = []
        for d in (as_tuple(dims) or self.space_dimensions):
            if p is None:
                lp = self._size_inhalo[d].left
                rp = self._size_inhalo[d].right
            else:
                lp = p // 2 + p % 2
                rp = p // 2
            indices = [d - i for i in range(lp, 0, -1)]
            indices.extend([d + i for i in range(rp)])
            points.extend([self.subs(d, i) for i in indices])
        return sum(points)
Ejemplo n.º 10
0
 def section(self, findices):
     """
     Return a view of ``self`` in which the slots corresponding to the
     provided ``findices`` have been zeroed.
     """
     return Vector(*[d if i not in as_tuple(findices) else 0
                     for d, i in zip(self, self.findices)])
Ejemplo n.º 11
0
    def distance(self, other, findex=None, view=None):
        """
        Compute the distance from ``self`` to ``other``.

        Parameters
        ----------
        other : IterationInstance
            The IterationInstance from which the distance is computed.
        findex : Dimension, optional
            If supplied, compute the distance only up to and including ``findex``.
        view : list of Dimension, optional
            If supplied, project the distance along these Dimensions.
        """
        if not isinstance(other, IterationInstance):
            raise TypeError("Cannot compute distance from obj of type %s", type(other))
        if self.findices != other.findices:
            raise TypeError("Cannot compute distance due to mismatching `findices`")
        if findex is not None:
            try:
                limit = self._cached_findices_index[findex] + 1
            except KeyError:
                raise TypeError("Cannot compute distance as `findex` not in `findices`")
        else:
            limit = self.rank
        distance = super(IterationInstance, self).distance(other)[:limit]
        if view is None:
            return distance
        else:
            proj = [d for d, i in zip(distance, self.findices) if i in as_tuple(view)]
            return Vector(*proj)
Ejemplo n.º 12
0
 def affine_if_present(self, findices):
     """
     Return False if any of the provided findices appears in self and
     is not affine, True otherwise.
     """
     findices = as_tuple(findices)
     return (set(findices) & set(self.findices)).issubset(set(self.findices_affine))
Ejemplo n.º 13
0
 def __new__(cls, base, index):
     if isinstance(base, (str, sympy.IndexedBase, sympy.Symbol)):
         return sympy.Indexed(base, index)
     elif not isinstance(base, sympy.Basic):
         raise ValueError("`base` must be of type sympy.Basic")
     obj = sympy.Expr.__new__(cls, base)
     obj._base = base
     obj._index = as_tuple(index)
     return obj
Ejemplo n.º 14
0
    def __init__(self, shape, extent=None, origin=None, dimensions=None,
                 time_dimension=None, dtype=np.float32, subdomains=None,
                 comm=None):
        self._shape = as_tuple(shape)
        self._extent = as_tuple(extent or tuple(1. for _ in self.shape))
        self._dtype = dtype

        if dimensions is None:
            # Create the spatial dimensions and constant spacing symbols
            assert(self.dim <= 3)
            dim_names = self._default_dimensions[:self.dim]
            dim_spacing = tuple(self._const(name='h_%s' % n, value=v, dtype=self.dtype)
                                for n, v in zip(dim_names, self.spacing))
            self._dimensions = tuple(SpaceDimension(name=n, spacing=s)
                                     for n, s in zip(dim_names, dim_spacing))
        else:
            self._dimensions = dimensions

        # Initialize SubDomains
        subdomains = tuple(i for i in (Domain(), Interior(), *as_tuple(subdomains)))
        for i in subdomains:
            i.__subdomain_finalize__(self.dimensions, self.shape)
        self._subdomains = subdomains

        origin = as_tuple(origin or tuple(0. for _ in self.shape))
        self._origin = tuple(self._const(name='o_%s' % d.name, value=v, dtype=self.dtype)
                             for d, v in zip(self.dimensions, origin))

        # Sanity check
        assert (self.dim == len(self.origin) == len(self.extent) == len(self.spacing))

        # Store or create default symbols for time and stepping dimensions
        if time_dimension is None:
            spacing = self._const(name='dt', dtype=self.dtype)
            self._time_dim = TimeDimension(name='time', spacing=spacing)
            self._stepping_dim = self._make_stepping_dim(self.time_dim, name='t')
        elif isinstance(time_dimension, TimeDimension):
            self._time_dim = time_dimension
            self._stepping_dim = self._make_stepping_dim(self.time_dim)
        else:
            raise ValueError("`time_dimension` must be None or of type TimeDimension")

        self._distributor = Distributor(self.shape, self.dimensions, comm)
Ejemplo n.º 15
0
 def wrapper(self, other):
     if not isinstance(other, Vector):
         try:
             other = Vector(*other)
         except TypeError:
             # Not iterable
             other = Vector(*(as_tuple(other)*len(self)))
     if relax is False and len(self) != len(other):
         raise TypeError("Cannot operate with Vectors of different rank")
     return func(self, other)
Ejemplo n.º 16
0
def q_multivar(expr, vars):
    """
    Return True if at least two variables in ``vars`` appear in ``expr``,
    False otherwise.
    """
    # The vast majority of calls here provide incredibly simple single variable
    # functions, so if there are < 2 free symbols we return immediately
    if not len(expr.free_symbols) > 1:
        return False
    return len(set(as_tuple(vars)) & expr.free_symbols) >= 2
Ejemplo n.º 17
0
def detect_io(exprs, relax=False):
    """
    ``{exprs} -> ({reads}, {writes})``

    Parameters
    ----------
    exprs : expr-like or list of expr-like
        The searched expressions.
    relax : bool, optional
        If False, as by default, collect only Constants and Functions.
        Otherwise, collect any Basic object.
    """
    exprs = as_tuple(exprs)
    if relax is False:
        rule = lambda i: i.is_Input
    else:
        rule = lambda i: i.is_Scalar or i.is_Tensor

    # Don't forget this nasty case, with indirections on the LHS:
    # >>> u[t, a[x]] = f[x]  -> (reads={a, f}, writes={u})

    roots = []
    for i in exprs:
        try:
            roots.append(i.rhs)
            roots.extend(list(i.lhs.indices))
        except AttributeError:
            # E.g., FunctionFromPointer
            roots.append(i)

    reads = []
    terminals = flatten(retrieve_terminals(i, deep=True) for i in roots)
    for i in terminals:
        candidates = i.free_symbols
        try:
            candidates.update({i.function})
        except AttributeError:
            pass
        for j in candidates:
            try:
                if rule(j):
                    reads.append(j)
            except AttributeError:
                pass

    writes = []
    for i in exprs:
        try:
            f = i.lhs.function
        except AttributeError:
            continue
        if rule(f):
            writes.append(f)

    return filter_sorted(reads), filter_sorted(writes)
Ejemplo n.º 18
0
def process(func, state):
    """
    Apply ``func`` to the IETs in ``state._efuncs``, and update ``state`` accordingly.
    """
    # Create a Call graph. `func` will be applied to each node in the Call graph.
    # `func` might change an `efunc` signature; the Call graph will be used to
    # propagate such change through the `efunc` callers
    dag = DAG(nodes=['root'])
    queue = ['root']
    while queue:
        caller = queue.pop(0)
        callees = FindNodes(Call).visit(state._efuncs[caller])
        for callee in filter_ordered([i.name for i in callees]):
            if callee in state._efuncs:  # Exclude foreign Calls, e.g., MPI calls
                try:
                    dag.add_node(callee)
                    queue.append(callee)
                except KeyError:
                    # `callee` already in `dag`
                    pass
                dag.add_edge(callee, caller)
    assert dag.size == len(state._efuncs)

    # Apply `func`
    for i in dag.topological_sort():
        state._efuncs[i], metadata = func(state._efuncs[i])

        # Track any new Dimensions introduced by `func`
        state._dimensions.extend(list(metadata.get('dimensions', [])))

        # Track any new #include required by `func`
        state._includes.extend(list(metadata.get('includes', [])))
        state._includes = filter_ordered(state._includes)

        # Track any new ElementalFunctions
        state._efuncs.update(OrderedDict([(i.name, i)
                                          for i in metadata.get('efuncs', [])]))

        # If there's a change to the `args` and the `iet` is an efunc, then
        # we must update the call sites as well, as the arguments dropped down
        # to the efunc have just increased
        args = as_tuple(metadata.get('args'))
        if args:
            # `extif` avoids redundant updates to the parameters list, due
            # to multiple children wanting to add the same input argument
            extif = lambda v: list(v) + [e for e in args if e not in v]
            stack = [i] + dag.all_downstreams(i)
            for n in stack:
                efunc = state._efuncs[n]
                calls = [c for c in FindNodes(Call).visit(efunc) if c.name in stack]
                mapper = {c: c._rebuild(arguments=extif(c.arguments)) for c in calls}
                efunc = Transformer(mapper).visit(efunc)
                if efunc.is_Callable:
                    efunc = efunc._rebuild(parameters=extif(efunc.parameters))
                state._efuncs[n] = efunc
Ejemplo n.º 19
0
def parallel(item):
    """
    Run a test in parallel. Readapted from:

        ``https://github.com/firedrakeproject/firedrake/blob/master/tests/conftest.py``
    """
    mpi_exec = 'mpiexec'
    mpi_distro = sniff_mpi_distro(mpi_exec)

    marker = item.get_closest_marker("parallel")
    mode = as_tuple(marker.kwargs.get("mode", 2))
    for m in mode:
        # Parse the `mode`
        if isinstance(m, int):
            nprocs = m
            scheme = 'basic'
            restrain = False
        else:
            if len(m) == 2:
                nprocs, scheme = m
                restrain = False
            elif len(m) == 3:
                nprocs, scheme, restrain = m
            else:
                raise ValueError("Can't run test: unexpected mode `%s`" % m)

        if restrain and os.environ.get('MPI_RESTRAIN', False):
            # A computationally expensive test that would take too long to
            # run on the current machine
            continue

        # Only spew tracebacks on rank 0.
        # Run xfailing tests to ensure that errors are reported to calling process
        if item.cls is not None:
            testname = "%s::%s::%s" % (item.fspath, item.cls.__name__, item.name)
        else:
            testname = "%s::%s" % (item.fspath, item.name)
        args = ["-n", "1", "python", "-m", "pytest", "--runxfail", "-s",
                "-q", testname]
        if nprocs > 1:
            args.extend([":", "-n", "%d" % (nprocs - 1), "python", "-m", "pytest",
                         "--runxfail", "--tb=no", "-q", testname])
        # OpenMPI requires an explicit flag for oversubscription. We need it as some
        # of the MPI tests will spawn lots of processes
        if mpi_distro == 'OpenMPI':
            call = [mpi_exec, '--oversubscribe'] + args
        else:
            call = [mpi_exec] + args

        # Tell the MPI ranks that they are running a parallel test
        os.environ['DEVITO_MPI'] = scheme
        try:
            check_call(call)
        finally:
            os.environ['DEVITO_MPI'] = '0'
Ejemplo n.º 20
0
 def __init__(self, entries=None):
     processed = []
     for i in (entries or []):
         if isinstance(i, StencilEntry):
             processed.append((i.dim, i.ofs))
         elif isinstance(i, tuple):
             entry = StencilEntry(*i)  # Implicit type check
             processed.append((entry.dim, set(as_tuple(entry.ofs))))
         else:
             raise TypeError('Cannot construct a Stencil for %s' % str(i))
     super(Stencil, self).__init__(set, processed)
Ejemplo n.º 21
0
def is_foldable(nodes):
    """
    Return True if the iterable ``nodes`` consists of foldable Iterations,
    False otherwise.
    """
    nodes = as_tuple(nodes)
    if len(nodes) <= 1 or any(not i.is_Iteration for i in nodes):
        return False
    main = nodes[0]
    return all(i.dim == main.dim and i.limits == main.limits and i.index == main.index
               and i.properties == main.properties for i in nodes)
Ejemplo n.º 22
0
    def __init__(self, name, body, retval, parameters=None, prefix=('static', 'inline'),
                 dynamic_parameters=None):
        super(ElementalFunction, self).__init__(name, body, retval, parameters, prefix)

        self._mapper = {}
        for i in as_tuple(dynamic_parameters):
            if i.is_Dimension:
                self._mapper[i] = (parameters.index(i.symbolic_min),
                                   parameters.index(i.symbolic_max))
            else:
                self._mapper[i] = (parameters.index(i),)
Ejemplo n.º 23
0
 def _apply_op(cls, intervals, key):
     """
     Create a new Interval resulting from the iterative application
     of the method ``key`` over the Intervals in ``intervals``, i.e.:
     ``intervals[0].key(intervals[1]).key(intervals[2])...``.
     """
     intervals = as_tuple(intervals)
     partial = intervals[0]
     for i in intervals[1:]:
         partial = getattr(partial, key)(i)
     return partial
Ejemplo n.º 24
0
 def _normalize_index(self, idx):
     if isinstance(idx, np.ndarray):
         # Advanced indexing mode
         return (idx,)
     else:
         idx = as_tuple(idx)
         if any(i is Ellipsis for i in idx):
             # Explicitly replace the Ellipsis
             items = (slice(None),)*(self.ndim - len(idx) + 1)
             return idx[:idx.index(Ellipsis)] + items + idx[idx.index(Ellipsis)+1:]
         else:
             return idx + (slice(None),)*(self.ndim - len(idx))
Ejemplo n.º 25
0
 def __init__(self, parent_type=None, child_types=None, mode=None):
     super(MapNodes, self).__init__()
     if parent_type is None:
         self.parent_type = Iteration
     elif parent_type == 'any':
         self.parent_type = Node
     else:
         assert issubclass(parent_type, Node)
         self.parent_type = parent_type
     self.child_types = as_tuple(child_types) or (Call, Expression)
     assert mode in (None, 'immediate', 'groupby')
     self.mode = mode
Ejemplo n.º 26
0
 def __new__(cls, params):
     args = []
     for p in as_tuple(params):
         if isinstance(p, str):
             args.append(Symbol(p))
         elif not isinstance(p, Expr):
             raise ValueError("`params` must be an iterable of Expr or str")
         else:
             args.append(p)
     obj = sympy.Expr.__new__(cls, *args)
     obj.params = tuple(args)
     return obj
Ejemplo n.º 27
0
def q_identity(expr, var):
    """
    Return True if ``expr`` is the identity function in ``var``, modulo a constant
    (that is, a function affine in ``var`` in which the value of the coefficient of
    ``var`` is 1), False otherwise.

    Examples
    ========
    3x -> False
    3x + 1 -> False
    x + 2 -> True
    """
    return len(as_tuple(var)) == 1 and q_affine(expr, var) and (expr - var).is_Number
Ejemplo n.º 28
0
def _new_operator1(shape, blockshape=None, dle=None):
    blockshape = as_tuple(blockshape)
    grid = Grid(shape=shape, dtype=np.int32)
    infield = Function(name='infield', grid=grid)
    infield.data[:] = np.arange(reduce(mul, shape), dtype=np.int32).reshape(shape)
    outfield = Function(name='outfield', grid=grid)

    stencil = Eq(outfield.indexify(), outfield.indexify() + infield.indexify()*3.0)
    op = Operator(stencil, dle=dle)

    blocksizes = get_blocksizes(op, dle, grid, blockshape)
    op(infield=infield, outfield=outfield, **blocksizes)

    return outfield, op
Ejemplo n.º 29
0
    def _simdize(self, iet):
        """
        Add pragmas to the Iteration/Expression tree to enforce SIMD auto-vectorization
        by the backend compiler.
        """
        ignore_deps = as_tuple(self._backend_compiler_pragma('ignore-deps'))

        mapper = {}
        for tree in retrieve_iteration_tree(iet):
            vector_iterations = [i for i in tree if i.is_Vectorizable]
            for i in vector_iterations:
                aligned = [j for j in FindSymbols('symbolics').visit(i)
                           if j.is_DiscreteFunction]
                if aligned:
                    simd = Ompizer.lang['simd-for-aligned']
                    simd = as_tuple(simd(','.join([j.name for j in aligned]),
                                    self.platform.simd_reg_size))
                else:
                    simd = as_tuple(Ompizer.lang['simd-for'])
                mapper[i] = i._rebuild(pragmas=i.pragmas + ignore_deps + simd)

        processed = Transformer(mapper).visit(iet)

        return processed, {}
Ejemplo n.º 30
0
 def __new__(cls, lhs, rhs=0, subdomain=None, coefficients=None, implicit_dims=None,
             **kwargs):
     kwargs['evaluate'] = False
     obj = sympy.Eq.__new__(cls, lhs, rhs, **kwargs)
     obj._subdomain = subdomain
     obj._substitutions = coefficients
     obj._implicit_dims = as_tuple(implicit_dims)
     if obj._uses_symbolic_coefficients:
         # NOTE: As Coefficients.py is expanded we will not want
         # all rules to be expunged during this procress.
         rules = default_rules(obj, obj._symbolic_functions)
         try:
             obj = obj.xreplace({**coefficients.rules, **rules})
         except AttributeError:
             if bool(rules):
                 obj = obj.xreplace(rules)
     return obj
Ejemplo n.º 31
0
 def affine(self, findices):
     """Return True if all of the provided findices appear in self and are
     affine, False otherwise."""
     return set(as_tuple(findices)).issubset(set(self.findices_affine))
Ejemplo n.º 32
0
 def irregular(self, findices):
     """Return True if all of the provided findices appear in self and are
     irregular, False otherwise."""
     return set(as_tuple(findices)).issubset(set(self.findices_irregular))
Ejemplo n.º 33
0
 def __str__(self):
     return '%s->%s(%s)' % (self.pointer, self.function, ", ".join(
         str(i) for i in as_tuple(self.params)))
Ejemplo n.º 34
0
 def __indices_setup__(cls, **kwargs):
     return as_tuple(kwargs['dimensions']), as_tuple(kwargs['dimensions'])
Ejemplo n.º 35
0
 def getwrites(self, function):
     return as_tuple(self.writes.get(function))
Ejemplo n.º 36
0
    def _build_dag(self, cgroups, prefix):
        """
        A DAG captures data dependences between ClusterGroups up to the iteration
        space depth dictated by ``prefix``.

        Examples
        --------
        Consider two ClusterGroups `c0` and `c1`, and ``prefix=[i]``.

        1) cg0 := b[i, j] = ...
           cg1 := ... = ... b[i, j] ...
           Non-carried flow-dependence, so `cg1` must go after `cg0`.

        2) cg0 := b[i, j] = ...
           cg1 := ... = ... b[i, j-1] ...
           Carried flow-dependence in `j`, so `cg1` must go after `cg0`.

        3) cg0 := b[i, j] = ...
           cg1 := ... = ... b[i, j+1] ...
           Carried anti-dependence in `j`, so `cg1` must go after `cg0`.

        4) cg0 := b[i, j] = ...
           cg1 := ... = ... b[i-1, j+1] ...
           Carried flow-dependence in `i`, so `cg1` can safely go before or after
           `cg0`. Note: the `j+1` in `cg1` has no impact -- the actual dependence
           betweeb `b[i, j]` and `b[i-1, j+1]` is along `i`.
        """
        prefix = {i.dim for i in as_tuple(prefix)}

        dag = DAG(nodes=cgroups)
        for n, cg0 in enumerate(cgroups):
            for cg1 in cgroups[n + 1:]:
                scope = Scope(exprs=cg0.exprs + cg1.exprs)

                # Handle anti-dependences
                deps = scope.d_anti - (cg0.scope.d_anti + cg1.scope.d_anti)
                if any(i.cause & prefix for i in deps):
                    # Anti-dependences break the execution flow
                    # i) ClusterGroups between `cg0` and `cg1` must precede `cg1`
                    for cg2 in cgroups[n:cgroups.index(cg1)]:
                        dag.add_edge(cg2, cg1)
                    # ii) ClusterGroups after `cg1` cannot precede `cg1`
                    for cg2 in cgroups[cgroups.index(cg1) + 1:]:
                        dag.add_edge(cg1, cg2)
                    break
                elif deps:
                    dag.add_edge(cg0, cg1)

                # Flow-dependences along one of the `prefix` Dimensions can
                # be ignored; all others require sequentialization
                deps = scope.d_flow - (cg0.scope.d_flow + cg1.scope.d_flow)
                if any(not (i.cause and i.cause & prefix) for i in deps):
                    dag.add_edge(cg0, cg1)
                    continue

                # Handle increment-after-write dependences
                deps = scope.d_output - (cg0.scope.d_output +
                                         cg1.scope.d_output)
                if any(i.is_iaw for i in deps):
                    dag.add_edge(cg0, cg1)
                    continue

        return dag
Ejemplo n.º 37
0
 def add_ldflags(self, flags):
     self.ldflags = filter_ordered(self.ldflags + list(as_tuple(flags)))
Ejemplo n.º 38
0
 def add_libraries(self, libs):
     self.libraries = filter_ordered(self.libraries + list(as_tuple(libs)))
Ejemplo n.º 39
0
 def add_library_dirs(self, dirs):
     self.library_dirs = filter_ordered(self.library_dirs +
                                        list(as_tuple(dirs)))
Ejemplo n.º 40
0
 def add_include_dirs(self, dirs):
     self.include_dirs = filter_ordered(self.include_dirs +
                                        list(as_tuple(dirs)))
Ejemplo n.º 41
0
 def __new__(cls, clusters, itintervals=None):
     obj = super(ClusterGroup, cls).__new__(cls,
                                            flatten(as_tuple(clusters)))
     obj._itintervals = itintervals
     return obj
Ejemplo n.º 42
0
def xreplace_constrained(exprs,
                         make,
                         rule=None,
                         costmodel=lambda e: True,
                         repeat=False):
    """
    Unlike ``xreplace``, which replaces all objects specified in a mapper,
    this function replaces all objects satisfying two criteria: ::

        * The "matching rule" -- a function returning True if a node within ``expr``
            satisfies a given property, and as such should be replaced;
        * A "cost model" -- a function triggering replacement only if a certain
            cost (e.g., operation count) is exceeded. This function is optional.

    Note that there is not necessarily a relationship between the set of nodes
    for which the matching rule returns True and those nodes passing the cost
    model check. It might happen for example that, given the expression ``a + b``,
    all of ``a``, ``b``, and ``a + b`` satisfy the matching rule, but only
    ``a + b`` satisfies the cost model.

    Parameters
    ----------
    exprs : expr-like or list of expr-like
        One or more expressions to which the replacement is applied.
    make : dict or callable
        Either a mapper M: K -> V, indicating how to replace an expression in K
        with a symbol in V, or a callable with internal state that, when
        called, returns unique symbols.
    rule : callable, optional
        The matching rule (see above). Unnecessary if ``make`` is a dict.
    costmodel : callable, optional
        The cost model (see above).
    repeat : bool, optional
        If True, repeatedly apply ``xreplace`` until no more replacements are
        possible. Defaults to False.
    """
    found = OrderedDict()
    rebuilt = []

    # Define /replace()/ based on the user-provided /make/
    if isinstance(make, dict):
        rule = rule if rule is not None else (lambda i: i in make)
        replace = lambda i: make[i]
    else:
        assert callable(make) and callable(rule)

        def replace(expr):
            temporary = found.get(expr)
            if not temporary:
                temporary = make()
                found[expr] = temporary
            return temporary

    def run(expr):
        if expr.is_Atom or expr.is_Indexed:
            return expr, rule(expr)
        elif expr.is_Pow:
            base, flag = run(expr.base)
            if flag and costmodel(base):
                return expr.func(replace(base), expr.exp,
                                 evaluate=False), False
            else:
                return expr.func(base, expr.exp, evaluate=False), flag
        else:
            children = [run(a) for a in expr.args]
            matching = [a for a, flag in children if flag]
            other = [a for a, _ in children if a not in matching]
            if matching:
                matched = expr.func(*matching, evaluate=False)
                if len(matching) == len(children) and rule(expr):
                    # Go look for longer expressions first
                    return matched, True
                elif rule(matched) and costmodel(matched):
                    # Replace what I can replace, then give up
                    rebuilt = expr.func(*(other + [replace(matched)]),
                                        evaluate=False)
                    return rebuilt, False
                else:
                    # Replace flagged children, then give up
                    replaced = [replace(e) for e in matching if costmodel(e)]
                    unreplaced = [e for e in matching if not costmodel(e)]
                    rebuilt = expr.func(*(other + replaced + unreplaced),
                                        evaluate=False)
                    return rebuilt, False
            return expr.func(*other, evaluate=False), False

    # Process the provided expressions
    for expr in as_tuple(exprs):
        assert expr.is_Equality
        root = expr.rhs

        while True:
            ret, flag = run(root)
            if isinstance(make, dict) and root.is_Atom and flag:
                rebuilt.append(
                    expr.func(expr.lhs, replace(root), evaluate=False))
                break
            elif repeat and ret != root:
                root = ret
            else:
                rebuilt.append(expr.func(expr.lhs, ret, evaluate=False))
                break

    # Post-process the output
    found = [Eq(v, k) for k, v in found.items()]

    return found + rebuilt, found
Ejemplo n.º 43
0
 def __init__(self, pragmas, functions=None, **kwargs):
     super().__init__(header=pragmas)
     self._functions = as_tuple(functions)
Ejemplo n.º 44
0
 def __init__(self, condition, body=None):
     self.condition = condition
     self.body = as_tuple(body)
Ejemplo n.º 45
0
 def __init__(self, header=None, body=None, footer=None):
     self.header = as_tuple(header)
     self.body = as_tuple(body)
     self.footer = as_tuple(footer)
Ejemplo n.º 46
0
 def __init__(self, name, params=None):
     self.name = name
     self.params = as_tuple(params)
Ejemplo n.º 47
0
 def __init__(self, shape, dimensions):
     self._glb_shape = as_tuple(shape)
     self._dimensions = as_tuple(dimensions)
Ejemplo n.º 48
0
 def __init__(self, condition, then_body, else_body=None):
     self.condition = condition
     self.then_body = as_tuple(then_body)
     self.else_body = as_tuple(else_body)
Ejemplo n.º 49
0
 def getreads(self, function):
     return as_tuple(self.reads.get(function))
Ejemplo n.º 50
0
 def __init__(self, halo_scheme, body=None, properties=None):
     super(HaloSpot, self).__init__(body=body)
     self.halo_scheme = halo_scheme
     self.properties = as_tuple(properties)
Ejemplo n.º 51
0
 def callback_shape(ctx, param, value):
     return as_tuple(value)
Ejemplo n.º 52
0
def mpi_index_maps(loc_idx, shape, topology, coords, comm):
    """
    Generate various data structures used to determine what MPI communication
    is required. The function creates the following:

    owners: An array of shape ``shape`` where each index signifies the rank on which
    that data is stored.

    send: An array of shape ``shape`` where each index signifies the rank to which
    data beloning to that index should be sent.

    global_si: An array of ``shape`` shape where each index contains the global index
    to which that index should be sent.

    local_si: An array of shape ``shape`` where each index contains the local index
    (on the destination rank) to which that index should be sent.

    Parameters
    ----------
    loc_idx : tuple of slices
        The coordinates of interest to the current MPI rank.
    shape: np.array of tuples
        Array containing the local shape of data to each rank.
    topology: tuple
        Topology of the decomposed domain.
    coords: tuple of tuples
        The coordinates of each MPI rank in the decomposed domain, ordered
        based on the MPI rank.
    comm : MPI communicator

    Examples
    --------
    An array is given by A = [[  0,  1,  2,  3],
                              [  4,  5,  6,  7],
                              [  8,  9, 10, 11],
                              [ 12, 13, 14, 15]],
    which is then distributed over four ranks such that on rank 0:

    A = [[ 0, 1],
         [ 4, 5]],

    on rank 1:

    A = [[ 2, 3],
         [ 6, 7]],

    on rank 2:

    A = [[  8,  9],
         [ 12, 13]],

    on rank 3:

    A = [[ 10, 11],
         [ 14, 15]].

    Taking the slice A[2:0:-1, 2:0:-1] the expected output (in serial) is

    [[  0,  1,  2,  3],
     [  4, 10,  9,  7],
     [  8,  6,  5, 11],
     [ 12, 13, 14, 15]],

    Hence, in this case the following would be generated:

    owners = [[0, 1],
              [2, 3]],

    send = [[3, 2],
            [1, 0]],

    global_si = [[(2, 2), (2, 1)],
                 [(1, 2), (1, 1)]],

    local_si = [[(0, 0), (0, 1)],
                [(1, 0), (1, 1)]].
    """

    nprocs = comm.Get_size()

    # Gather data structures from all ranks in order to produce the
    # relevant mappings.
    dat_len = np.zeros(topology, dtype=tuple)
    for j in range(nprocs):
        dat_len[coords[j]] = comm.bcast(shape, root=j)
        if any(k == 0 for k in dat_len[coords[j]]):
            dat_len[coords[j]] = as_tuple([0] * len(dat_len[coords[j]]))
    dat_len_cum = distributed_data_size(dat_len, coords, topology)
    # This 'transform' will be required to produce the required maps
    transform = []
    for i in as_tuple(loc_idx):
        if isinstance(i, slice):
            if i.step is not None:
                transform.append(slice(None, None, np.sign(i.step)))
            else:
                transform.append(slice(None, None, None))
        else:
            transform.append(0)
    transform = as_tuple(transform)

    global_size = dat_len_cum[coords[-1]]

    indices = np.zeros(global_size, dtype=tuple)
    global_si = np.zeros(global_size, dtype=tuple)
    it = np.nditer(indices, flags=['refs_ok', 'multi_index'])
    while not it.finished:
        index = it.multi_index
        indices[index] = index
        it.iternext()
    global_si[:] = indices[transform]

    # Create the 'rank' slices
    rank_slice = []
    for j in coords:
        this_rank = []
        for k in dat_len[j]:
            this_rank.append(slice(0, k, 1))
        rank_slice.append(this_rank)
    # Normalize the slices:
    n_rank_slice = []
    for i in range(len(rank_slice)):
        my_coords = coords[i]
        if any([j.stop == j.start for j in rank_slice[i]]):
            n_rank_slice.append(as_tuple([None] * len(rank_slice[i])))
            continue
        if i == 0:
            n_rank_slice.append(as_tuple(rank_slice[i]))
            continue
        left_neighbours = []
        for j in range(len(my_coords)):
            left_coord = list(my_coords)
            left_coord[j] -= 1
            left_neighbours.append(as_tuple(left_coord))
        left_neighbours = as_tuple(left_neighbours)
        n_slice = []
        for j in range(len(my_coords)):
            if left_neighbours[j][j] < 0:
                start = 0
                stop = dat_len_cum[my_coords][j]
            else:
                start = dat_len_cum[left_neighbours[j]][j]
                stop = dat_len_cum[my_coords][j]
            n_slice.append(slice(start, stop, 1))
        n_rank_slice.append(as_tuple(n_slice))
    n_rank_slice = as_tuple(n_rank_slice)

    # Now fill each elements owner:
    owners = np.zeros(global_size, dtype=np.int32)
    send = np.zeros(global_size, dtype=np.int32)
    for i in range(len(n_rank_slice)):
        if any([j is None for j in n_rank_slice[i]]):
            continue
        else:
            owners[n_rank_slice[i]] = i
    send[:] = owners[transform]

    # Construct local_si
    local_si = np.zeros(global_size, dtype=tuple)
    it = np.nditer(local_si, flags=['refs_ok', 'multi_index'])
    while not it.finished:
        index = it.multi_index
        owner = owners[index]
        my_slice = n_rank_slice[owner]
        rnorm_index = []
        for j, k in zip(my_slice, index):
            rnorm_index.append(k - j.start)
        local_si[index] = as_tuple(rnorm_index)
        it.iternext()
    return owners, send, global_si, local_si
def detect_flow_directions(exprs):
    """
    Return a mapper from :class:`Dimension`s to iterables of
    :class:`IterationDirection`s representing the theoretically necessary
    directions to evaluate ``exprs`` so that the information "naturally
    flows" from an iteration to another.
    """
    exprs = as_tuple(exprs)

    writes = [Access(i.lhs, 'W') for i in exprs]
    reads = flatten(retrieve_indexed(i.rhs, mode='all') for i in exprs)
    reads = [Access(i, 'R') for i in reads]

    # Determine indexed-wise direction by looking at the vector distance
    mapper = defaultdict(set)
    for w in writes:
        for r in reads:
            if r.name != w.name:
                continue
            dimensions = [d for d in w.aindices if d is not None]
            if not dimensions:
                continue
            for d in dimensions:
                distance = None
                for i in d._defines:
                    try:
                        distance = w.distance(r, i, view=i)
                    except TypeError:
                        pass
                try:
                    if distance > 0:
                        mapper[d].add(Forward)
                        break
                    elif distance < 0:
                        mapper[d].add(Backward)
                        break
                    else:
                        mapper[d].add(Any)
                except TypeError:
                    # Nothing can be deduced
                    mapper[d].add(Any)
                    break
            # Remainder
            for d in dimensions[dimensions.index(d) + 1:]:
                mapper[d].add(Any)

    # Add in any encountered Dimension
    mapper.update({
        d: {Any}
        for d in flatten(i.aindices for i in reads + writes)
        if d is not None and d not in mapper
    })

    # Add in derived-dimensions parents, in case they haven't been detected yet
    mapper.update({
        k.parent: set(v)
        for k, v in mapper.items()
        if k.is_Derived and mapper.get(k.parent, {Any}) == {Any}
    })

    return mapper
Ejemplo n.º 54
0
def flip_idx(idx, decomposition):
    """
    This function serves two purposes:
    1) To Convert a global index with containing a slice with step < 0 to a 'mirrored'
       index with all slice steps > 0.
    2) Normalize indices with slices containing negative start/stops.

    Parameters
    ----------
    idx: tuple of slices/ints/tuples
        Representation of the indices that require processing.
    decomposition : tuple of Decomposition
        The data decomposition, for each dimension.

    Examples
    --------
    In the following examples, the domain consists of 12 indices, split over
    four subdomains [0, 3]. We pick 2 as local subdomain.

    >>> from devito.data import Decomposition, flip_idx
    >>> d = Decomposition([[0, 1, 2], [3, 4], [5, 6, 7], [8, 9, 10, 11]], 2)
    >>> d
    Decomposition([0,2], [3,4], <<[5,7]>>, [8,11])

    Example with negative stepped slices:

    >>> idx = (slice(4, None, -1))
    >>> fidx = flip_idx(idx, (d,))
    >>> fidx
    (slice(None, 5, 1),)

    Example with negative start/stops:

    >>> idx2 = (slice(-4, -1, 1))
    >>> fidx2 = flip_idx(idx2, (d,))
    >>> fidx2
    (slice(8, 11, 1),)
    """
    processed = []
    for i, j in zip(as_tuple(idx), decomposition):
        if isinstance(i, slice) and i.step is not None and i.step < 0:
            if i.start is None:
                stop = None
            elif i.start > 0:
                stop = i.start + 1
            else:
                stop = i.start + j.glb_max + 2
            if i.stop is None:
                start = None
            elif i.stop > 0:
                start = i.stop + 1
            else:
                start = i.stop + j.glb_max + 2
            processed.append(slice(start, stop, -i.step))
        elif isinstance(i, slice):
            if i.start is not None and i.start < 0:
                start = i.start + j.glb_max + 1
            else:
                start = i.start
            if i.stop is not None and i.stop < 0:
                stop = i.stop + j.glb_max + 1
            else:
                stop = i.stop
            processed.append(slice(start, stop, i.step))
        else:
            processed.append(i)
    return as_tuple(processed)
Ejemplo n.º 55
0
 def affine_if_present(self, findices):
     """Return False if any of the provided findices appears in self and
     is not affine, True otherwise."""
     findices = as_tuple(findices)
     return (set(findices) & set(self.findices)).issubset(
         set(self.findices_affine))
Ejemplo n.º 56
0
def distributed_data_size(shape, coords, topology):
    """
    Compute the cumulative shape of the distributed data (cshape).

    Parameters
    -----------
    shape: np.array of tuples
        Array containing the local shape of data to each rank.
    coords: tuple of tuples
        The coordinates of each MPI rank in the decomposed domain, ordered
        based on the MPI rank.
    topology: tuple
        Topology of the decomposed domain.

    Examples
    --------
    Given a set of distributed data such that:

    shape = [[ (2, 2), (2, 2)],
             [ (2, 2), (2, 2)]],

    (that is, there are 4 ranks and the data on each rank has shape (2, 2)).
    cshape will be returned as

    cshape = [[ (2, 2), (2, 4)],
              [ (4, 2), (4, 4)]].
    """
    cshape = np.zeros(topology, dtype=tuple)
    for i in range(len(coords)):
        my_coords = coords[i]
        if i == 0:
            cshape[my_coords] = shape[my_coords]
            continue
        left_neighbours = []
        for j in range(len(my_coords)):
            left_coord = list(my_coords)
            left_coord[j] -= 1
            left_neighbours.append(as_tuple(left_coord))
        left_neighbours = as_tuple(left_neighbours)
        n_dat = []  # Normalised data size
        if sum(shape[my_coords]) == 0:
            prev_dat_len = []
            for j in left_neighbours:
                if any(d < 0 for d in j):
                    pass
                else:
                    prev_dat_len.append(cshape[j])
            func = lambda a, b: max([d[b] for d in a])
            max_dat_len = []
            for j in range(len(my_coords)):
                max_dat_len.append(func(prev_dat_len, j))
            cshape[my_coords] = as_tuple(max_dat_len)
        else:
            for j in range(len(my_coords)):
                if left_neighbours[j][j] < 0:
                    c_dat = shape[my_coords][j]
                    n_dat.append(c_dat)
                else:
                    c_dat = shape[my_coords][j]  # Current length
                    p_dat = cshape[left_neighbours[j]][j]  # Previous length
                    n_dat.append(c_dat + p_dat)
            cshape[my_coords] = as_tuple(n_dat)
    return cshape
Ejemplo n.º 57
0
 def __new__(cls, **kwargs):
     name = kwargs.get('name', cls.name)
     value = cls.default_value()
     obj = Constant.__new__(cls, name=name, dtype=np.int32, value=value)
     obj.aliases = as_tuple(kwargs.get('aliases')) + (name,)
     return obj
Ejemplo n.º 58
0
    def __init__(self, expressions, **kwargs):
        expressions = as_tuple(expressions)

        # Input check
        if any(not isinstance(i, sympy.Eq) for i in expressions):
            raise InvalidOperator("Only SymPy expressions are allowed.")

        self.name = kwargs.get("name", "Kernel")
        subs = kwargs.get("subs", {})
        dse = kwargs.get("dse", configuration['dse'])
        dle = kwargs.get("dle", configuration['dle'])

        # Header files, etc.
        self._headers = list(self._default_headers)
        self._includes = list(self._default_includes)
        self._globals = list(self._default_globals)

        # Required for compilation
        self._compiler = configuration['compiler']
        self._lib = None
        self._cfunction = None

        # References to local or external routines
        self.func_table = OrderedDict()

        # Expression lowering and analysis
        expressions = [LoweredEq(e, subs=subs) for e in expressions]
        self.dtype = retrieve_dtype(expressions)
        self.input = filter_sorted(flatten(e.reads for e in expressions))
        self.output = filter_sorted(flatten(e.writes for e in expressions))
        self.dimensions = filter_sorted(
            flatten(e.dimensions for e in expressions))

        # Group expressions based on their iteration space and data dependences,
        # and apply the Devito Symbolic Engine (DSE) for flop optimization
        clusters = clusterize(expressions)
        clusters = rewrite(clusters, mode=set_dse_mode(dse))

        # Lower Clusters to an Iteration/Expression tree (IET)
        nodes = iet_build(clusters, self.dtype)

        # Introduce C-level profiling infrastructure
        nodes, self.profiler = self._profile_sections(nodes)

        # Translate into backend-specific representation (e.g., GPU, Yask)
        nodes = self._specialize(nodes)

        # Apply the Devito Loop Engine (DLE) for loop optimization
        dle_state = transform(nodes, *set_dle_mode(dle))

        # Update the Operator state based on the DLE
        self.dle_arguments = dle_state.arguments
        self.dle_flags = dle_state.flags
        self.func_table.update(
            OrderedDict([(i.name, MetaCall(i, True))
                         for i in dle_state.elemental_functions]))
        self.dimensions.extend([
            i.argument for i in self.dle_arguments
            if isinstance(i.argument, Dimension)
        ])
        self._includes.extend(list(dle_state.includes))

        # Introduce the required symbol declarations
        nodes = iet_insert_C_decls(dle_state.nodes, self.func_table)

        # Insert data and pointer casts for array parameters and profiling structs
        nodes = self._build_casts(nodes)

        # Derive parameters as symbols not defined in the kernel itself
        parameters = self._build_parameters(nodes)

        # Finish instantiation
        super(Operator, self).__init__(self.name, nodes, 'int', parameters, ())
Ejemplo n.º 59
0
 def __init__(self, body, captures=None, parameters=None):
     self.body = as_tuple(body)
     self.captures = as_tuple(captures)
     self.parameters = as_tuple(parameters)
Ejemplo n.º 60
0
 def __expr_finalize__(self, expr, pragmas):
     """Finalize the Expression initialization."""
     self._expr = expr
     self._pragmas = as_tuple(pragmas)