def xreplace_indices(exprs, mapper, key=None, only_rhs=False): """ Replace array indices in expressions. Parameters ---------- exprs : expr-like or list of expr-like One or more expressions to which the replacement is applied. mapper : dict The substitution rules. key : list of symbols or callable An escape hatch to rule out some objects from the replacement. If a list, apply the replacement to the symbols in ``key`` only. If a callable, apply the replacement to a symbol S if and only if ``key(S)`` gives True. only_rhs : bool, optional If True, apply the replacement to Eq right-hand sides only. """ get = lambda i: i.rhs if only_rhs is True else i handle = flatten(retrieve_indexed(get(i)) for i in as_tuple(exprs)) if isinstance(key, Iterable): handle = [i for i in handle if i.base.label in key] elif callable(key): handle = [i for i in handle if key(i)] mapper = dict(zip(handle, [i.xreplace(mapper) for i in handle])) replaced = [i.xreplace(mapper) for i in as_tuple(exprs)] return replaced if isinstance(exprs, Iterable) else replaced[0]
def iet_analyze(iet): """ Analyze an Iteration/Expression tree and decorate it with metadata describing relevant computational properties (e.g., if an Iteration is parallelizable or not). This function performs actual data dependence analysis. """ # Analyze Iterations analysis = mark_iteration_parallel(iet) analysis = mark_iteration_vectorizable(analysis) analysis = mark_iteration_wrappable(analysis) analysis = mark_iteration_affine(analysis) # Analyze HaloSpots analysis = mark_halospot_useless(analysis) analysis = mark_halospot_hoistable(analysis) analysis = mark_halospot_overlappable(analysis) # Decorate the Iteration/Expression tree with the found properties mapper = OrderedDict() for k, v in list(analysis.properties.items()): args = k.args properties = as_tuple(args.pop('properties')) + as_tuple(v) mapper[k] = k._rebuild(properties=properties, **args) processed = Transformer(mapper, nested=True).visit(iet) return processed
def visit_Node(self, o, ret=None, parents=None, in_parent=False): if ret is None: ret = self.default_retval() if parents is None: parents = [] if isinstance(o, self.child_types): if self.mode == 'groupby': ret.setdefault(as_tuple(parents), []).append(o) elif self.mode == 'immediate': if in_parent: ret.setdefault(parents[-1], []).append(o) else: ret.setdefault(None, []).append(o) else: for i in parents: ret.setdefault(i, []).append(o) if isinstance(o, self.parent_type): parents.append(o) for i in o.children: ret = self._visit(i, ret=ret, parents=parents, in_parent=True) parents.remove(o) else: for i in o.children: ret = self._visit(i, ret=ret, parents=parents, in_parent=in_parent) return ret
def __init__(self, exprs, ispace, dspace, atomics=None, guards=None): self._exprs = list(ClusterizedEq(i, ispace=ispace, dspace=dspace) for i in as_tuple(exprs)) self._ispace = ispace self._dspace = dspace self._atomics = set(atomics or []) self._guards = guards or {}
def __staggered_setup__(self, **kwargs): """ Setup staggering-related metadata. This method assigns: * 0 to non-staggered dimensions; * 1 to staggered dimensions. """ staggered = kwargs.get('staggered') if staggered is None: self.is_Staggered = False return tuple(0 for _ in self.indices) else: self.is_Staggered = True if staggered is NODE: staggered = () elif staggered is CELL: staggered = self.indices else: staggered = as_tuple(staggered) mask = [] for d in self.indices: if d in staggered: mask.append(1) elif -d in staggered: mask.append(-1) else: mask.append(0) return tuple(mask)
def _create_implicit_exprs(self): if not len(self._bounds) == 2*len(self.dimensions): raise ValueError("Left and right bounds must be supplied for each dimension") n_domains = self.n_domains i_dim = self.implicit_dimension dat = [] # Organise the data contained in 'bounds' into a form such that the # associated implicit equations can easily be created. for j in range(len(self._bounds)): index = floor(j/2) d = self.dimensions[index] if j % 2 == 0: fname = d.min_name else: fname = d.max_name func = Function(name=fname, shape=(n_domains, ), dimensions=(i_dim, ), dtype=np.int32) # Check if shorthand notation has been provided: if isinstance(self._bounds[j], int): bounds = np.full((n_domains,), self._bounds[j], dtype=np.int32) func.data[:] = bounds else: func.data[:] = self._bounds[j] dat.append(Eq(d.thickness[j % 2][0], func[i_dim])) return as_tuple(dat)
def run(problem, **kwargs): """ A single run with a specific set of performance parameters. """ setup = tti_setup if problem == 'tti' else acoustic_setup options = {} time_order = kwargs.pop('time_order')[0] space_order = kwargs.pop('space_order')[0] autotune = kwargs.pop('autotune') # Should a specific block-shape be used? Useful if one wants to skip # the autotuning pass as a good block-shape is already known block_shape = as_tuple(kwargs.pop('block_shape')) if all(block_shape): if autotune: warning("Skipping autotuning (using explicit block-shape `%s`)" % str(block_shape)) autotune = False # This is quite hacky, but it does the trick for d, bs in zip(['x', 'y', 'z'], block_shape): options['%s0_blk_size' % d] = bs solver = setup(space_order=space_order, time_order=time_order, **kwargs) solver.forward(autotune=autotune, **options)
def q_affine(expr, vars): """ Return True if ``expr`` is (separately) affine in the variables ``vars``, False otherwise. Readapted from: https://stackoverflow.com/questions/36283548\ /check-if-an-equation-is-linear-for-a-specific-set-of-variables/ """ vars = as_tuple(vars) # If any `vars` does not appear in `expr`, the only possibility # for `expr` to be affine is that it's a constant function if any(x not in expr.atoms() for x in vars): return q_constant(expr) # At this point, `expr` is (separately) affine in the `vars` variables # if all non-mixed second order derivatives are identically zero. for x in vars: # The vast majority of calls here are incredibly simple tests # like q_affine(x+1, [x]). Catch these quickly and # explicitly, instead of calling the very slow function `diff`. if expr == x: continue if expr.is_Add and len(expr.args) == 2: if expr.args[0] == x and expr.args[1].is_Number: continue if expr.args[1] == x and expr.args[0].is_Number: continue try: if diff(expr, x) == nan or not Eq(diff(expr, x, x), 0): return False except TypeError: return False return True
def sum(self, p=None, dims=None): """ Generate a symbolic expression computing the sum of ``p`` points along the spatial dimensions ``dims``. Parameters ---------- p : int, optional The number of summands. Defaults to the halo size. dims : tuple of Dimension, optional The Dimensions along which the sum is computed. Defaults to ``self``'s spatial dimensions. """ points = [] for d in (as_tuple(dims) or self.space_dimensions): if p is None: lp = self._size_inhalo[d].left rp = self._size_inhalo[d].right else: lp = p // 2 + p % 2 rp = p // 2 indices = [d - i for i in range(lp, 0, -1)] indices.extend([d + i for i in range(rp)]) points.extend([self.subs(d, i) for i in indices]) return sum(points)
def section(self, findices): """ Return a view of ``self`` in which the slots corresponding to the provided ``findices`` have been zeroed. """ return Vector(*[d if i not in as_tuple(findices) else 0 for d, i in zip(self, self.findices)])
def distance(self, other, findex=None, view=None): """ Compute the distance from ``self`` to ``other``. Parameters ---------- other : IterationInstance The IterationInstance from which the distance is computed. findex : Dimension, optional If supplied, compute the distance only up to and including ``findex``. view : list of Dimension, optional If supplied, project the distance along these Dimensions. """ if not isinstance(other, IterationInstance): raise TypeError("Cannot compute distance from obj of type %s", type(other)) if self.findices != other.findices: raise TypeError("Cannot compute distance due to mismatching `findices`") if findex is not None: try: limit = self._cached_findices_index[findex] + 1 except KeyError: raise TypeError("Cannot compute distance as `findex` not in `findices`") else: limit = self.rank distance = super(IterationInstance, self).distance(other)[:limit] if view is None: return distance else: proj = [d for d, i in zip(distance, self.findices) if i in as_tuple(view)] return Vector(*proj)
def affine_if_present(self, findices): """ Return False if any of the provided findices appears in self and is not affine, True otherwise. """ findices = as_tuple(findices) return (set(findices) & set(self.findices)).issubset(set(self.findices_affine))
def __new__(cls, base, index): if isinstance(base, (str, sympy.IndexedBase, sympy.Symbol)): return sympy.Indexed(base, index) elif not isinstance(base, sympy.Basic): raise ValueError("`base` must be of type sympy.Basic") obj = sympy.Expr.__new__(cls, base) obj._base = base obj._index = as_tuple(index) return obj
def __init__(self, shape, extent=None, origin=None, dimensions=None, time_dimension=None, dtype=np.float32, subdomains=None, comm=None): self._shape = as_tuple(shape) self._extent = as_tuple(extent or tuple(1. for _ in self.shape)) self._dtype = dtype if dimensions is None: # Create the spatial dimensions and constant spacing symbols assert(self.dim <= 3) dim_names = self._default_dimensions[:self.dim] dim_spacing = tuple(self._const(name='h_%s' % n, value=v, dtype=self.dtype) for n, v in zip(dim_names, self.spacing)) self._dimensions = tuple(SpaceDimension(name=n, spacing=s) for n, s in zip(dim_names, dim_spacing)) else: self._dimensions = dimensions # Initialize SubDomains subdomains = tuple(i for i in (Domain(), Interior(), *as_tuple(subdomains))) for i in subdomains: i.__subdomain_finalize__(self.dimensions, self.shape) self._subdomains = subdomains origin = as_tuple(origin or tuple(0. for _ in self.shape)) self._origin = tuple(self._const(name='o_%s' % d.name, value=v, dtype=self.dtype) for d, v in zip(self.dimensions, origin)) # Sanity check assert (self.dim == len(self.origin) == len(self.extent) == len(self.spacing)) # Store or create default symbols for time and stepping dimensions if time_dimension is None: spacing = self._const(name='dt', dtype=self.dtype) self._time_dim = TimeDimension(name='time', spacing=spacing) self._stepping_dim = self._make_stepping_dim(self.time_dim, name='t') elif isinstance(time_dimension, TimeDimension): self._time_dim = time_dimension self._stepping_dim = self._make_stepping_dim(self.time_dim) else: raise ValueError("`time_dimension` must be None or of type TimeDimension") self._distributor = Distributor(self.shape, self.dimensions, comm)
def wrapper(self, other): if not isinstance(other, Vector): try: other = Vector(*other) except TypeError: # Not iterable other = Vector(*(as_tuple(other)*len(self))) if relax is False and len(self) != len(other): raise TypeError("Cannot operate with Vectors of different rank") return func(self, other)
def q_multivar(expr, vars): """ Return True if at least two variables in ``vars`` appear in ``expr``, False otherwise. """ # The vast majority of calls here provide incredibly simple single variable # functions, so if there are < 2 free symbols we return immediately if not len(expr.free_symbols) > 1: return False return len(set(as_tuple(vars)) & expr.free_symbols) >= 2
def detect_io(exprs, relax=False): """ ``{exprs} -> ({reads}, {writes})`` Parameters ---------- exprs : expr-like or list of expr-like The searched expressions. relax : bool, optional If False, as by default, collect only Constants and Functions. Otherwise, collect any Basic object. """ exprs = as_tuple(exprs) if relax is False: rule = lambda i: i.is_Input else: rule = lambda i: i.is_Scalar or i.is_Tensor # Don't forget this nasty case, with indirections on the LHS: # >>> u[t, a[x]] = f[x] -> (reads={a, f}, writes={u}) roots = [] for i in exprs: try: roots.append(i.rhs) roots.extend(list(i.lhs.indices)) except AttributeError: # E.g., FunctionFromPointer roots.append(i) reads = [] terminals = flatten(retrieve_terminals(i, deep=True) for i in roots) for i in terminals: candidates = i.free_symbols try: candidates.update({i.function}) except AttributeError: pass for j in candidates: try: if rule(j): reads.append(j) except AttributeError: pass writes = [] for i in exprs: try: f = i.lhs.function except AttributeError: continue if rule(f): writes.append(f) return filter_sorted(reads), filter_sorted(writes)
def process(func, state): """ Apply ``func`` to the IETs in ``state._efuncs``, and update ``state`` accordingly. """ # Create a Call graph. `func` will be applied to each node in the Call graph. # `func` might change an `efunc` signature; the Call graph will be used to # propagate such change through the `efunc` callers dag = DAG(nodes=['root']) queue = ['root'] while queue: caller = queue.pop(0) callees = FindNodes(Call).visit(state._efuncs[caller]) for callee in filter_ordered([i.name for i in callees]): if callee in state._efuncs: # Exclude foreign Calls, e.g., MPI calls try: dag.add_node(callee) queue.append(callee) except KeyError: # `callee` already in `dag` pass dag.add_edge(callee, caller) assert dag.size == len(state._efuncs) # Apply `func` for i in dag.topological_sort(): state._efuncs[i], metadata = func(state._efuncs[i]) # Track any new Dimensions introduced by `func` state._dimensions.extend(list(metadata.get('dimensions', []))) # Track any new #include required by `func` state._includes.extend(list(metadata.get('includes', []))) state._includes = filter_ordered(state._includes) # Track any new ElementalFunctions state._efuncs.update(OrderedDict([(i.name, i) for i in metadata.get('efuncs', [])])) # If there's a change to the `args` and the `iet` is an efunc, then # we must update the call sites as well, as the arguments dropped down # to the efunc have just increased args = as_tuple(metadata.get('args')) if args: # `extif` avoids redundant updates to the parameters list, due # to multiple children wanting to add the same input argument extif = lambda v: list(v) + [e for e in args if e not in v] stack = [i] + dag.all_downstreams(i) for n in stack: efunc = state._efuncs[n] calls = [c for c in FindNodes(Call).visit(efunc) if c.name in stack] mapper = {c: c._rebuild(arguments=extif(c.arguments)) for c in calls} efunc = Transformer(mapper).visit(efunc) if efunc.is_Callable: efunc = efunc._rebuild(parameters=extif(efunc.parameters)) state._efuncs[n] = efunc
def parallel(item): """ Run a test in parallel. Readapted from: ``https://github.com/firedrakeproject/firedrake/blob/master/tests/conftest.py`` """ mpi_exec = 'mpiexec' mpi_distro = sniff_mpi_distro(mpi_exec) marker = item.get_closest_marker("parallel") mode = as_tuple(marker.kwargs.get("mode", 2)) for m in mode: # Parse the `mode` if isinstance(m, int): nprocs = m scheme = 'basic' restrain = False else: if len(m) == 2: nprocs, scheme = m restrain = False elif len(m) == 3: nprocs, scheme, restrain = m else: raise ValueError("Can't run test: unexpected mode `%s`" % m) if restrain and os.environ.get('MPI_RESTRAIN', False): # A computationally expensive test that would take too long to # run on the current machine continue # Only spew tracebacks on rank 0. # Run xfailing tests to ensure that errors are reported to calling process if item.cls is not None: testname = "%s::%s::%s" % (item.fspath, item.cls.__name__, item.name) else: testname = "%s::%s" % (item.fspath, item.name) args = ["-n", "1", "python", "-m", "pytest", "--runxfail", "-s", "-q", testname] if nprocs > 1: args.extend([":", "-n", "%d" % (nprocs - 1), "python", "-m", "pytest", "--runxfail", "--tb=no", "-q", testname]) # OpenMPI requires an explicit flag for oversubscription. We need it as some # of the MPI tests will spawn lots of processes if mpi_distro == 'OpenMPI': call = [mpi_exec, '--oversubscribe'] + args else: call = [mpi_exec] + args # Tell the MPI ranks that they are running a parallel test os.environ['DEVITO_MPI'] = scheme try: check_call(call) finally: os.environ['DEVITO_MPI'] = '0'
def __init__(self, entries=None): processed = [] for i in (entries or []): if isinstance(i, StencilEntry): processed.append((i.dim, i.ofs)) elif isinstance(i, tuple): entry = StencilEntry(*i) # Implicit type check processed.append((entry.dim, set(as_tuple(entry.ofs)))) else: raise TypeError('Cannot construct a Stencil for %s' % str(i)) super(Stencil, self).__init__(set, processed)
def is_foldable(nodes): """ Return True if the iterable ``nodes`` consists of foldable Iterations, False otherwise. """ nodes = as_tuple(nodes) if len(nodes) <= 1 or any(not i.is_Iteration for i in nodes): return False main = nodes[0] return all(i.dim == main.dim and i.limits == main.limits and i.index == main.index and i.properties == main.properties for i in nodes)
def __init__(self, name, body, retval, parameters=None, prefix=('static', 'inline'), dynamic_parameters=None): super(ElementalFunction, self).__init__(name, body, retval, parameters, prefix) self._mapper = {} for i in as_tuple(dynamic_parameters): if i.is_Dimension: self._mapper[i] = (parameters.index(i.symbolic_min), parameters.index(i.symbolic_max)) else: self._mapper[i] = (parameters.index(i),)
def _apply_op(cls, intervals, key): """ Create a new Interval resulting from the iterative application of the method ``key`` over the Intervals in ``intervals``, i.e.: ``intervals[0].key(intervals[1]).key(intervals[2])...``. """ intervals = as_tuple(intervals) partial = intervals[0] for i in intervals[1:]: partial = getattr(partial, key)(i) return partial
def _normalize_index(self, idx): if isinstance(idx, np.ndarray): # Advanced indexing mode return (idx,) else: idx = as_tuple(idx) if any(i is Ellipsis for i in idx): # Explicitly replace the Ellipsis items = (slice(None),)*(self.ndim - len(idx) + 1) return idx[:idx.index(Ellipsis)] + items + idx[idx.index(Ellipsis)+1:] else: return idx + (slice(None),)*(self.ndim - len(idx))
def __init__(self, parent_type=None, child_types=None, mode=None): super(MapNodes, self).__init__() if parent_type is None: self.parent_type = Iteration elif parent_type == 'any': self.parent_type = Node else: assert issubclass(parent_type, Node) self.parent_type = parent_type self.child_types = as_tuple(child_types) or (Call, Expression) assert mode in (None, 'immediate', 'groupby') self.mode = mode
def __new__(cls, params): args = [] for p in as_tuple(params): if isinstance(p, str): args.append(Symbol(p)) elif not isinstance(p, Expr): raise ValueError("`params` must be an iterable of Expr or str") else: args.append(p) obj = sympy.Expr.__new__(cls, *args) obj.params = tuple(args) return obj
def q_identity(expr, var): """ Return True if ``expr`` is the identity function in ``var``, modulo a constant (that is, a function affine in ``var`` in which the value of the coefficient of ``var`` is 1), False otherwise. Examples ======== 3x -> False 3x + 1 -> False x + 2 -> True """ return len(as_tuple(var)) == 1 and q_affine(expr, var) and (expr - var).is_Number
def _new_operator1(shape, blockshape=None, dle=None): blockshape = as_tuple(blockshape) grid = Grid(shape=shape, dtype=np.int32) infield = Function(name='infield', grid=grid) infield.data[:] = np.arange(reduce(mul, shape), dtype=np.int32).reshape(shape) outfield = Function(name='outfield', grid=grid) stencil = Eq(outfield.indexify(), outfield.indexify() + infield.indexify()*3.0) op = Operator(stencil, dle=dle) blocksizes = get_blocksizes(op, dle, grid, blockshape) op(infield=infield, outfield=outfield, **blocksizes) return outfield, op
def _simdize(self, iet): """ Add pragmas to the Iteration/Expression tree to enforce SIMD auto-vectorization by the backend compiler. """ ignore_deps = as_tuple(self._backend_compiler_pragma('ignore-deps')) mapper = {} for tree in retrieve_iteration_tree(iet): vector_iterations = [i for i in tree if i.is_Vectorizable] for i in vector_iterations: aligned = [j for j in FindSymbols('symbolics').visit(i) if j.is_DiscreteFunction] if aligned: simd = Ompizer.lang['simd-for-aligned'] simd = as_tuple(simd(','.join([j.name for j in aligned]), self.platform.simd_reg_size)) else: simd = as_tuple(Ompizer.lang['simd-for']) mapper[i] = i._rebuild(pragmas=i.pragmas + ignore_deps + simd) processed = Transformer(mapper).visit(iet) return processed, {}
def __new__(cls, lhs, rhs=0, subdomain=None, coefficients=None, implicit_dims=None, **kwargs): kwargs['evaluate'] = False obj = sympy.Eq.__new__(cls, lhs, rhs, **kwargs) obj._subdomain = subdomain obj._substitutions = coefficients obj._implicit_dims = as_tuple(implicit_dims) if obj._uses_symbolic_coefficients: # NOTE: As Coefficients.py is expanded we will not want # all rules to be expunged during this procress. rules = default_rules(obj, obj._symbolic_functions) try: obj = obj.xreplace({**coefficients.rules, **rules}) except AttributeError: if bool(rules): obj = obj.xreplace(rules) return obj
def affine(self, findices): """Return True if all of the provided findices appear in self and are affine, False otherwise.""" return set(as_tuple(findices)).issubset(set(self.findices_affine))
def irregular(self, findices): """Return True if all of the provided findices appear in self and are irregular, False otherwise.""" return set(as_tuple(findices)).issubset(set(self.findices_irregular))
def __str__(self): return '%s->%s(%s)' % (self.pointer, self.function, ", ".join( str(i) for i in as_tuple(self.params)))
def __indices_setup__(cls, **kwargs): return as_tuple(kwargs['dimensions']), as_tuple(kwargs['dimensions'])
def getwrites(self, function): return as_tuple(self.writes.get(function))
def _build_dag(self, cgroups, prefix): """ A DAG captures data dependences between ClusterGroups up to the iteration space depth dictated by ``prefix``. Examples -------- Consider two ClusterGroups `c0` and `c1`, and ``prefix=[i]``. 1) cg0 := b[i, j] = ... cg1 := ... = ... b[i, j] ... Non-carried flow-dependence, so `cg1` must go after `cg0`. 2) cg0 := b[i, j] = ... cg1 := ... = ... b[i, j-1] ... Carried flow-dependence in `j`, so `cg1` must go after `cg0`. 3) cg0 := b[i, j] = ... cg1 := ... = ... b[i, j+1] ... Carried anti-dependence in `j`, so `cg1` must go after `cg0`. 4) cg0 := b[i, j] = ... cg1 := ... = ... b[i-1, j+1] ... Carried flow-dependence in `i`, so `cg1` can safely go before or after `cg0`. Note: the `j+1` in `cg1` has no impact -- the actual dependence betweeb `b[i, j]` and `b[i-1, j+1]` is along `i`. """ prefix = {i.dim for i in as_tuple(prefix)} dag = DAG(nodes=cgroups) for n, cg0 in enumerate(cgroups): for cg1 in cgroups[n + 1:]: scope = Scope(exprs=cg0.exprs + cg1.exprs) # Handle anti-dependences deps = scope.d_anti - (cg0.scope.d_anti + cg1.scope.d_anti) if any(i.cause & prefix for i in deps): # Anti-dependences break the execution flow # i) ClusterGroups between `cg0` and `cg1` must precede `cg1` for cg2 in cgroups[n:cgroups.index(cg1)]: dag.add_edge(cg2, cg1) # ii) ClusterGroups after `cg1` cannot precede `cg1` for cg2 in cgroups[cgroups.index(cg1) + 1:]: dag.add_edge(cg1, cg2) break elif deps: dag.add_edge(cg0, cg1) # Flow-dependences along one of the `prefix` Dimensions can # be ignored; all others require sequentialization deps = scope.d_flow - (cg0.scope.d_flow + cg1.scope.d_flow) if any(not (i.cause and i.cause & prefix) for i in deps): dag.add_edge(cg0, cg1) continue # Handle increment-after-write dependences deps = scope.d_output - (cg0.scope.d_output + cg1.scope.d_output) if any(i.is_iaw for i in deps): dag.add_edge(cg0, cg1) continue return dag
def add_ldflags(self, flags): self.ldflags = filter_ordered(self.ldflags + list(as_tuple(flags)))
def add_libraries(self, libs): self.libraries = filter_ordered(self.libraries + list(as_tuple(libs)))
def add_library_dirs(self, dirs): self.library_dirs = filter_ordered(self.library_dirs + list(as_tuple(dirs)))
def add_include_dirs(self, dirs): self.include_dirs = filter_ordered(self.include_dirs + list(as_tuple(dirs)))
def __new__(cls, clusters, itintervals=None): obj = super(ClusterGroup, cls).__new__(cls, flatten(as_tuple(clusters))) obj._itintervals = itintervals return obj
def xreplace_constrained(exprs, make, rule=None, costmodel=lambda e: True, repeat=False): """ Unlike ``xreplace``, which replaces all objects specified in a mapper, this function replaces all objects satisfying two criteria: :: * The "matching rule" -- a function returning True if a node within ``expr`` satisfies a given property, and as such should be replaced; * A "cost model" -- a function triggering replacement only if a certain cost (e.g., operation count) is exceeded. This function is optional. Note that there is not necessarily a relationship between the set of nodes for which the matching rule returns True and those nodes passing the cost model check. It might happen for example that, given the expression ``a + b``, all of ``a``, ``b``, and ``a + b`` satisfy the matching rule, but only ``a + b`` satisfies the cost model. Parameters ---------- exprs : expr-like or list of expr-like One or more expressions to which the replacement is applied. make : dict or callable Either a mapper M: K -> V, indicating how to replace an expression in K with a symbol in V, or a callable with internal state that, when called, returns unique symbols. rule : callable, optional The matching rule (see above). Unnecessary if ``make`` is a dict. costmodel : callable, optional The cost model (see above). repeat : bool, optional If True, repeatedly apply ``xreplace`` until no more replacements are possible. Defaults to False. """ found = OrderedDict() rebuilt = [] # Define /replace()/ based on the user-provided /make/ if isinstance(make, dict): rule = rule if rule is not None else (lambda i: i in make) replace = lambda i: make[i] else: assert callable(make) and callable(rule) def replace(expr): temporary = found.get(expr) if not temporary: temporary = make() found[expr] = temporary return temporary def run(expr): if expr.is_Atom or expr.is_Indexed: return expr, rule(expr) elif expr.is_Pow: base, flag = run(expr.base) if flag and costmodel(base): return expr.func(replace(base), expr.exp, evaluate=False), False else: return expr.func(base, expr.exp, evaluate=False), flag else: children = [run(a) for a in expr.args] matching = [a for a, flag in children if flag] other = [a for a, _ in children if a not in matching] if matching: matched = expr.func(*matching, evaluate=False) if len(matching) == len(children) and rule(expr): # Go look for longer expressions first return matched, True elif rule(matched) and costmodel(matched): # Replace what I can replace, then give up rebuilt = expr.func(*(other + [replace(matched)]), evaluate=False) return rebuilt, False else: # Replace flagged children, then give up replaced = [replace(e) for e in matching if costmodel(e)] unreplaced = [e for e in matching if not costmodel(e)] rebuilt = expr.func(*(other + replaced + unreplaced), evaluate=False) return rebuilt, False return expr.func(*other, evaluate=False), False # Process the provided expressions for expr in as_tuple(exprs): assert expr.is_Equality root = expr.rhs while True: ret, flag = run(root) if isinstance(make, dict) and root.is_Atom and flag: rebuilt.append( expr.func(expr.lhs, replace(root), evaluate=False)) break elif repeat and ret != root: root = ret else: rebuilt.append(expr.func(expr.lhs, ret, evaluate=False)) break # Post-process the output found = [Eq(v, k) for k, v in found.items()] return found + rebuilt, found
def __init__(self, pragmas, functions=None, **kwargs): super().__init__(header=pragmas) self._functions = as_tuple(functions)
def __init__(self, condition, body=None): self.condition = condition self.body = as_tuple(body)
def __init__(self, header=None, body=None, footer=None): self.header = as_tuple(header) self.body = as_tuple(body) self.footer = as_tuple(footer)
def __init__(self, name, params=None): self.name = name self.params = as_tuple(params)
def __init__(self, shape, dimensions): self._glb_shape = as_tuple(shape) self._dimensions = as_tuple(dimensions)
def __init__(self, condition, then_body, else_body=None): self.condition = condition self.then_body = as_tuple(then_body) self.else_body = as_tuple(else_body)
def getreads(self, function): return as_tuple(self.reads.get(function))
def __init__(self, halo_scheme, body=None, properties=None): super(HaloSpot, self).__init__(body=body) self.halo_scheme = halo_scheme self.properties = as_tuple(properties)
def callback_shape(ctx, param, value): return as_tuple(value)
def mpi_index_maps(loc_idx, shape, topology, coords, comm): """ Generate various data structures used to determine what MPI communication is required. The function creates the following: owners: An array of shape ``shape`` where each index signifies the rank on which that data is stored. send: An array of shape ``shape`` where each index signifies the rank to which data beloning to that index should be sent. global_si: An array of ``shape`` shape where each index contains the global index to which that index should be sent. local_si: An array of shape ``shape`` where each index contains the local index (on the destination rank) to which that index should be sent. Parameters ---------- loc_idx : tuple of slices The coordinates of interest to the current MPI rank. shape: np.array of tuples Array containing the local shape of data to each rank. topology: tuple Topology of the decomposed domain. coords: tuple of tuples The coordinates of each MPI rank in the decomposed domain, ordered based on the MPI rank. comm : MPI communicator Examples -------- An array is given by A = [[ 0, 1, 2, 3], [ 4, 5, 6, 7], [ 8, 9, 10, 11], [ 12, 13, 14, 15]], which is then distributed over four ranks such that on rank 0: A = [[ 0, 1], [ 4, 5]], on rank 1: A = [[ 2, 3], [ 6, 7]], on rank 2: A = [[ 8, 9], [ 12, 13]], on rank 3: A = [[ 10, 11], [ 14, 15]]. Taking the slice A[2:0:-1, 2:0:-1] the expected output (in serial) is [[ 0, 1, 2, 3], [ 4, 10, 9, 7], [ 8, 6, 5, 11], [ 12, 13, 14, 15]], Hence, in this case the following would be generated: owners = [[0, 1], [2, 3]], send = [[3, 2], [1, 0]], global_si = [[(2, 2), (2, 1)], [(1, 2), (1, 1)]], local_si = [[(0, 0), (0, 1)], [(1, 0), (1, 1)]]. """ nprocs = comm.Get_size() # Gather data structures from all ranks in order to produce the # relevant mappings. dat_len = np.zeros(topology, dtype=tuple) for j in range(nprocs): dat_len[coords[j]] = comm.bcast(shape, root=j) if any(k == 0 for k in dat_len[coords[j]]): dat_len[coords[j]] = as_tuple([0] * len(dat_len[coords[j]])) dat_len_cum = distributed_data_size(dat_len, coords, topology) # This 'transform' will be required to produce the required maps transform = [] for i in as_tuple(loc_idx): if isinstance(i, slice): if i.step is not None: transform.append(slice(None, None, np.sign(i.step))) else: transform.append(slice(None, None, None)) else: transform.append(0) transform = as_tuple(transform) global_size = dat_len_cum[coords[-1]] indices = np.zeros(global_size, dtype=tuple) global_si = np.zeros(global_size, dtype=tuple) it = np.nditer(indices, flags=['refs_ok', 'multi_index']) while not it.finished: index = it.multi_index indices[index] = index it.iternext() global_si[:] = indices[transform] # Create the 'rank' slices rank_slice = [] for j in coords: this_rank = [] for k in dat_len[j]: this_rank.append(slice(0, k, 1)) rank_slice.append(this_rank) # Normalize the slices: n_rank_slice = [] for i in range(len(rank_slice)): my_coords = coords[i] if any([j.stop == j.start for j in rank_slice[i]]): n_rank_slice.append(as_tuple([None] * len(rank_slice[i]))) continue if i == 0: n_rank_slice.append(as_tuple(rank_slice[i])) continue left_neighbours = [] for j in range(len(my_coords)): left_coord = list(my_coords) left_coord[j] -= 1 left_neighbours.append(as_tuple(left_coord)) left_neighbours = as_tuple(left_neighbours) n_slice = [] for j in range(len(my_coords)): if left_neighbours[j][j] < 0: start = 0 stop = dat_len_cum[my_coords][j] else: start = dat_len_cum[left_neighbours[j]][j] stop = dat_len_cum[my_coords][j] n_slice.append(slice(start, stop, 1)) n_rank_slice.append(as_tuple(n_slice)) n_rank_slice = as_tuple(n_rank_slice) # Now fill each elements owner: owners = np.zeros(global_size, dtype=np.int32) send = np.zeros(global_size, dtype=np.int32) for i in range(len(n_rank_slice)): if any([j is None for j in n_rank_slice[i]]): continue else: owners[n_rank_slice[i]] = i send[:] = owners[transform] # Construct local_si local_si = np.zeros(global_size, dtype=tuple) it = np.nditer(local_si, flags=['refs_ok', 'multi_index']) while not it.finished: index = it.multi_index owner = owners[index] my_slice = n_rank_slice[owner] rnorm_index = [] for j, k in zip(my_slice, index): rnorm_index.append(k - j.start) local_si[index] = as_tuple(rnorm_index) it.iternext() return owners, send, global_si, local_si
def detect_flow_directions(exprs): """ Return a mapper from :class:`Dimension`s to iterables of :class:`IterationDirection`s representing the theoretically necessary directions to evaluate ``exprs`` so that the information "naturally flows" from an iteration to another. """ exprs = as_tuple(exprs) writes = [Access(i.lhs, 'W') for i in exprs] reads = flatten(retrieve_indexed(i.rhs, mode='all') for i in exprs) reads = [Access(i, 'R') for i in reads] # Determine indexed-wise direction by looking at the vector distance mapper = defaultdict(set) for w in writes: for r in reads: if r.name != w.name: continue dimensions = [d for d in w.aindices if d is not None] if not dimensions: continue for d in dimensions: distance = None for i in d._defines: try: distance = w.distance(r, i, view=i) except TypeError: pass try: if distance > 0: mapper[d].add(Forward) break elif distance < 0: mapper[d].add(Backward) break else: mapper[d].add(Any) except TypeError: # Nothing can be deduced mapper[d].add(Any) break # Remainder for d in dimensions[dimensions.index(d) + 1:]: mapper[d].add(Any) # Add in any encountered Dimension mapper.update({ d: {Any} for d in flatten(i.aindices for i in reads + writes) if d is not None and d not in mapper }) # Add in derived-dimensions parents, in case they haven't been detected yet mapper.update({ k.parent: set(v) for k, v in mapper.items() if k.is_Derived and mapper.get(k.parent, {Any}) == {Any} }) return mapper
def flip_idx(idx, decomposition): """ This function serves two purposes: 1) To Convert a global index with containing a slice with step < 0 to a 'mirrored' index with all slice steps > 0. 2) Normalize indices with slices containing negative start/stops. Parameters ---------- idx: tuple of slices/ints/tuples Representation of the indices that require processing. decomposition : tuple of Decomposition The data decomposition, for each dimension. Examples -------- In the following examples, the domain consists of 12 indices, split over four subdomains [0, 3]. We pick 2 as local subdomain. >>> from devito.data import Decomposition, flip_idx >>> d = Decomposition([[0, 1, 2], [3, 4], [5, 6, 7], [8, 9, 10, 11]], 2) >>> d Decomposition([0,2], [3,4], <<[5,7]>>, [8,11]) Example with negative stepped slices: >>> idx = (slice(4, None, -1)) >>> fidx = flip_idx(idx, (d,)) >>> fidx (slice(None, 5, 1),) Example with negative start/stops: >>> idx2 = (slice(-4, -1, 1)) >>> fidx2 = flip_idx(idx2, (d,)) >>> fidx2 (slice(8, 11, 1),) """ processed = [] for i, j in zip(as_tuple(idx), decomposition): if isinstance(i, slice) and i.step is not None and i.step < 0: if i.start is None: stop = None elif i.start > 0: stop = i.start + 1 else: stop = i.start + j.glb_max + 2 if i.stop is None: start = None elif i.stop > 0: start = i.stop + 1 else: start = i.stop + j.glb_max + 2 processed.append(slice(start, stop, -i.step)) elif isinstance(i, slice): if i.start is not None and i.start < 0: start = i.start + j.glb_max + 1 else: start = i.start if i.stop is not None and i.stop < 0: stop = i.stop + j.glb_max + 1 else: stop = i.stop processed.append(slice(start, stop, i.step)) else: processed.append(i) return as_tuple(processed)
def affine_if_present(self, findices): """Return False if any of the provided findices appears in self and is not affine, True otherwise.""" findices = as_tuple(findices) return (set(findices) & set(self.findices)).issubset( set(self.findices_affine))
def distributed_data_size(shape, coords, topology): """ Compute the cumulative shape of the distributed data (cshape). Parameters ----------- shape: np.array of tuples Array containing the local shape of data to each rank. coords: tuple of tuples The coordinates of each MPI rank in the decomposed domain, ordered based on the MPI rank. topology: tuple Topology of the decomposed domain. Examples -------- Given a set of distributed data such that: shape = [[ (2, 2), (2, 2)], [ (2, 2), (2, 2)]], (that is, there are 4 ranks and the data on each rank has shape (2, 2)). cshape will be returned as cshape = [[ (2, 2), (2, 4)], [ (4, 2), (4, 4)]]. """ cshape = np.zeros(topology, dtype=tuple) for i in range(len(coords)): my_coords = coords[i] if i == 0: cshape[my_coords] = shape[my_coords] continue left_neighbours = [] for j in range(len(my_coords)): left_coord = list(my_coords) left_coord[j] -= 1 left_neighbours.append(as_tuple(left_coord)) left_neighbours = as_tuple(left_neighbours) n_dat = [] # Normalised data size if sum(shape[my_coords]) == 0: prev_dat_len = [] for j in left_neighbours: if any(d < 0 for d in j): pass else: prev_dat_len.append(cshape[j]) func = lambda a, b: max([d[b] for d in a]) max_dat_len = [] for j in range(len(my_coords)): max_dat_len.append(func(prev_dat_len, j)) cshape[my_coords] = as_tuple(max_dat_len) else: for j in range(len(my_coords)): if left_neighbours[j][j] < 0: c_dat = shape[my_coords][j] n_dat.append(c_dat) else: c_dat = shape[my_coords][j] # Current length p_dat = cshape[left_neighbours[j]][j] # Previous length n_dat.append(c_dat + p_dat) cshape[my_coords] = as_tuple(n_dat) return cshape
def __new__(cls, **kwargs): name = kwargs.get('name', cls.name) value = cls.default_value() obj = Constant.__new__(cls, name=name, dtype=np.int32, value=value) obj.aliases = as_tuple(kwargs.get('aliases')) + (name,) return obj
def __init__(self, expressions, **kwargs): expressions = as_tuple(expressions) # Input check if any(not isinstance(i, sympy.Eq) for i in expressions): raise InvalidOperator("Only SymPy expressions are allowed.") self.name = kwargs.get("name", "Kernel") subs = kwargs.get("subs", {}) dse = kwargs.get("dse", configuration['dse']) dle = kwargs.get("dle", configuration['dle']) # Header files, etc. self._headers = list(self._default_headers) self._includes = list(self._default_includes) self._globals = list(self._default_globals) # Required for compilation self._compiler = configuration['compiler'] self._lib = None self._cfunction = None # References to local or external routines self.func_table = OrderedDict() # Expression lowering and analysis expressions = [LoweredEq(e, subs=subs) for e in expressions] self.dtype = retrieve_dtype(expressions) self.input = filter_sorted(flatten(e.reads for e in expressions)) self.output = filter_sorted(flatten(e.writes for e in expressions)) self.dimensions = filter_sorted( flatten(e.dimensions for e in expressions)) # Group expressions based on their iteration space and data dependences, # and apply the Devito Symbolic Engine (DSE) for flop optimization clusters = clusterize(expressions) clusters = rewrite(clusters, mode=set_dse_mode(dse)) # Lower Clusters to an Iteration/Expression tree (IET) nodes = iet_build(clusters, self.dtype) # Introduce C-level profiling infrastructure nodes, self.profiler = self._profile_sections(nodes) # Translate into backend-specific representation (e.g., GPU, Yask) nodes = self._specialize(nodes) # Apply the Devito Loop Engine (DLE) for loop optimization dle_state = transform(nodes, *set_dle_mode(dle)) # Update the Operator state based on the DLE self.dle_arguments = dle_state.arguments self.dle_flags = dle_state.flags self.func_table.update( OrderedDict([(i.name, MetaCall(i, True)) for i in dle_state.elemental_functions])) self.dimensions.extend([ i.argument for i in self.dle_arguments if isinstance(i.argument, Dimension) ]) self._includes.extend(list(dle_state.includes)) # Introduce the required symbol declarations nodes = iet_insert_C_decls(dle_state.nodes, self.func_table) # Insert data and pointer casts for array parameters and profiling structs nodes = self._build_casts(nodes) # Derive parameters as symbols not defined in the kernel itself parameters = self._build_parameters(nodes) # Finish instantiation super(Operator, self).__init__(self.name, nodes, 'int', parameters, ())
def __init__(self, body, captures=None, parameters=None): self.body = as_tuple(body) self.captures = as_tuple(captures) self.parameters = as_tuple(parameters)
def __expr_finalize__(self, expr, pragmas): """Finalize the Expression initialization.""" self._expr = expr self._pragmas = as_tuple(pragmas)