def _make_copy(self, f, hse, key='', swap=False): buf_dims = [] buf_indices = [] for d in f.dimensions: if d not in hse.loc_indices: buf_dims.append(Dimension(name='buf_%s' % d.root)) buf_indices.append(d.root) buf = Array(name='buf', dimensions=buf_dims, dtype=f.dtype) f_offsets = [] f_indices = [] for d in f.dimensions: offset = Symbol(name='o%s' % d.root) f_offsets.append(offset) f_indices.append(offset + (d.root if d not in hse.loc_indices else 0)) if swap is False: eq = DummyEq(buf[buf_indices], f[f_indices]) name = 'gather%s' % key else: eq = DummyEq(f[f_indices], buf[buf_indices]) name = 'scatter%s' % key iet = Expression(eq) for i, d in reversed(list(zip(buf_indices, buf_dims))): # The -1 below is because an Iteration, by default, generates <= iet = Iteration(iet, i, d.symbolic_size - 1, properties=PARALLEL) iet = List(body=[ArrayCast(f), ArrayCast(buf), iet]) parameters = [buf] + list(buf.shape) + [f] + f_offsets return Callable(name, iet, 'void', parameters, ('static', ))
def _build_casts(self, iet): """Introduce array and pointer casts at the top of the Iteration/Expression tree ``iet``.""" casts = [ArrayCast(f) for f in self.input if f.is_Tensor and f._mem_external] profiler = Object(self.profiler.name, self.profiler.dtype, self.profiler.new) casts.append(PointerCast(profiler)) return List(body=casts + [iet])
def iet_insert_casts(iet, parameters): """ Transform the input IET inserting the necessary type casts. The type casts are placed at the top of the IET. Parameters ---------- iet : Node The input Iteration/Expression tree. parameters : tuple, optional The symbol that might require casting. """ # Make the generated code less verbose: if a non-Array parameter does not # appear in any Expression, that is, if the parameter is merely propagated # down to another Call, then there's no need to cast it exprs = FindNodes(Expression).visit(iet) need_cast = { i for i in set().union(*[i.functions for i in exprs]) if i.is_Tensor } need_cast.update({i for i in parameters if i.is_Array}) casts = [ArrayCast(i) for i in parameters if i in need_cast] iet = List(body=casts + [iet]) return iet
def _build_casts(self, iet): """Introduce array and pointer casts at the top of the Iteration/Expression tree ``iet``.""" casts = [ ArrayCast(f) for f in self.input if f.is_Tensor and f._mem_external ] return List(body=casts + [iet])
def _make_copy(self, f, fixed, swap=False): """ Construct a Callable performing a copy of: * an arbitrary convex region of ``f`` into a contiguous Array, OR * if ``swap=True``, a contiguous Array into an arbitrary convex region of ``f``. """ buf_dims = [] buf_indices = [] for d in f.dimensions: if d not in fixed: buf_dims.append(Dimension(name='buf_%s' % d.root)) buf_indices.append(d.root) buf = Array(name='buf', dimensions=buf_dims, dtype=f.dtype) f_offsets = [] f_indices = [] for d in f.dimensions: offset = Symbol(name='o%s' % d.root) f_offsets.append(offset) f_indices.append(offset + (d.root if d not in fixed else 0)) if swap is False: eq = DummyEq(buf[buf_indices], f[f_indices]) name = 'gather%dd' % f.ndim else: eq = DummyEq(f[f_indices], buf[buf_indices]) name = 'scatter%dd' % f.ndim iet = Expression(eq) for i, d in reversed(list(zip(buf_indices, buf_dims))): # The -1 below is because an Iteration, by default, generates <= iet = Iteration(iet, i, d.symbolic_size - 1, properties=PARALLEL) iet = List(body=[ArrayCast(f), ArrayCast(buf), iet]) # Optimize the memory copy with the DLE from devito.dle import transform state = transform(iet, 'simd', {'openmp': self._threaded}) parameters = [buf] + list(buf.shape) + [f] + f_offsets + state.input return Callable(name, state.nodes, 'void', parameters, ('static', )), state.input
def copy(f, fixed, swap=False): """ Construct a :class:`Callable` capable of copying: :: * an arbitrary convex region of ``f`` into a contiguous :class:`Array`, OR * if ``swap=True``, a contiguous :class:`Array` into an arbitrary convex region of ``f``. """ buf_dims = [] buf_indices = [] for d in f.dimensions: if d not in fixed: buf_dims.append(Dimension(name='buf_%s' % d.root)) buf_indices.append(d.root) buf = Array(name='buf', dimensions=buf_dims, dtype=f.dtype) dat_dims = [] dat_offsets = [] dat_indices = [] for d in f.dimensions: dat_dims.append(Dimension(name='dat_%s' % d.root)) offset = Symbol(name='o%s' % d.root) dat_offsets.append(offset) dat_indices.append(offset + (d.root if d not in fixed else 0)) dat = Array(name='dat', dimensions=dat_dims, dtype=f.dtype) if swap is False: eq = DummyEq(buf[buf_indices], dat[dat_indices]) name = 'gather_%s' % f.name else: eq = DummyEq(dat[dat_indices], buf[buf_indices]) name = 'scatter_%s' % f.name iet = Expression(eq) for i, d in reversed(list(zip(buf_indices, buf_dims))): iet = Iteration(iet, i, d.symbolic_size - 1) # -1 as Iteration generates <= iet = List(body=[ArrayCast(dat), ArrayCast(buf), iet]) parameters = [buf] + list(buf.shape) + [dat] + list( dat.shape) + dat_offsets return Callable(name, iet, 'void', parameters, ('static', ))
def sendrecv(f, fixed): """Construct an IET performing a halo exchange along arbitrary dimension and side.""" assert f.is_Function assert f.grid is not None comm = f.grid.distributor._C_comm buf_dims = [Dimension(name='buf_%s' % d.root) for d in f.dimensions if d not in fixed] bufg = Array(name='bufg', dimensions=buf_dims, dtype=f.dtype, scope='heap') bufs = Array(name='bufs', dimensions=buf_dims, dtype=f.dtype, scope='heap') dat_dims = [Dimension(name='dat_%s' % d.root) for d in f.dimensions] dat = Array(name='dat', dimensions=dat_dims, dtype=f.dtype, scope='external') ofsg = [Symbol(name='og%s' % d.root) for d in f.dimensions] ofss = [Symbol(name='os%s' % d.root) for d in f.dimensions] fromrank = Symbol(name='fromrank') torank = Symbol(name='torank') parameters = [bufg] + list(bufg.shape) + [dat] + list(dat.shape) + ofsg gather = Call('gather_%s' % f.name, parameters) parameters = [bufs] + list(bufs.shape) + [dat] + list(dat.shape) + ofss scatter = Call('scatter_%s' % f.name, parameters) # The scatter must be guarded as we must not alter the halo values along # the domain boundary, where the sender is actually MPI.PROC_NULL scatter = Conditional(CondNe(fromrank, Macro('MPI_PROC_NULL')), scatter) srecv = MPIStatusObject(name='srecv') rrecv = MPIRequestObject(name='rrecv') rsend = MPIRequestObject(name='rsend') count = reduce(mul, bufs.shape, 1) recv = Call('MPI_Irecv', [bufs, count, Macro(numpy_to_mpitypes(f.dtype)), fromrank, '13', comm, rrecv]) send = Call('MPI_Isend', [bufg, count, Macro(numpy_to_mpitypes(f.dtype)), torank, '13', comm, rsend]) waitrecv = Call('MPI_Wait', [rrecv, srecv]) waitsend = Call('MPI_Wait', [rsend, Macro('MPI_STATUS_IGNORE')]) iet = List(body=[recv, gather, send, waitsend, waitrecv, scatter]) iet = List(body=[ArrayCast(dat), iet_insert_C_decls(iet)]) parameters = ([dat] + list(dat.shape) + list(bufs.shape) + ofsg + ofss + [fromrank, torank, comm]) return Callable('sendrecv_%s' % f.name, iet, 'void', parameters, ('static',))
def _create_elemental_functions(self, nodes, state): """ Extract :class:`Iteration` sub-trees and move them into :class:`Callable`s. Currently, only tagged, elementizable Iteration objects are targeted. """ noinline = self._compiler_decoration('noinline', c.Comment('noinline?')) functions = OrderedDict() mapper = {} for tree in retrieve_iteration_tree(nodes, mode='superset'): # Search an elementizable sub-tree (if any) tagged = filter_iterations(tree, lambda i: i.tag is not None, 'asap') if not tagged: continue root = tagged[0] if not root.is_Elementizable: continue target = tree[tree.index(root):] # Elemental function arguments args = [] # Found so far (scalars, tensors) defined_args = {} # Map of argument values defined by loop bounds # Build a new Iteration/Expression tree with free bounds free = [] for i in target: name, bounds = i.dim.name, i.bounds_symbolic # Iteration bounds start = Scalar(name='%s_start' % name, dtype=np.int32) finish = Scalar(name='%s_finish' % name, dtype=np.int32) defined_args[start.name] = bounds[0] defined_args[finish.name] = bounds[1] # Iteration unbounded indices ufunc = [ Scalar(name='%s_ub%d' % (name, j), dtype=np.int32) for j in range(len(i.uindices)) ] defined_args.update( {uf.name: j.start for uf, j in zip(ufunc, i.uindices)}) limits = [ Scalar(name=start.name, dtype=np.int32), Scalar(name=finish.name, dtype=np.int32), 1 ] uindices = [ UnboundedIndex(j.index, i.dim + as_symbol(k)) for j, k in zip(i.uindices, ufunc) ] free.append( i._rebuild(limits=limits, offsets=None, uindices=uindices)) # Construct elemental function body, and inspect it free = NestedTransformer(dict((zip(target, free)))).visit(root) # Insert array casts for all non-defined f_symbols = FindSymbols('symbolics').visit(free) defines = [s.name for s in FindSymbols('defines').visit(free)] casts = [ ArrayCast(f) for f in f_symbols if f.is_Tensor and f.name not in defines ] free = (List(body=casts), free) for i in derive_parameters(free): if i.name in defined_args: args.append((defined_args[i.name], i)) elif i.is_Dimension: d = Scalar(name=i.name, dtype=i.dtype) args.append((d, d)) else: args.append((i, i)) call, params = zip(*args) name = "f_%d" % root.tag # Produce the new Call mapper[root] = List(header=noinline, body=Call(name, call)) # Produce the new Callable functions.setdefault( name, Callable(name, free, 'void', flatten(params), ('static', ))) # Transform the main tree processed = Transformer(mapper).visit(nodes) return processed, {'elemental_functions': functions.values()}
def _build_casts(self, iet): """Introduce array casts.""" casts = [ ArrayCast(f) for f in self.input if f.is_Tensor and f._mem_external ] return List(body=casts + [iet])
def _create_efuncs(self, nodes, state): """ Extract Iteration sub-trees and turn them into Calls+Callables. Currently, only tagged, elementizable Iteration objects are targeted. """ noinline = self._compiler_decoration('noinline', c.Comment('noinline?')) efuncs = OrderedDict() mapper = {} for tree in retrieve_iteration_tree(nodes, mode='superset'): # Search an elementizable sub-tree (if any) tagged = filter_iterations(tree, lambda i: i.tag is not None, 'asap') if not tagged: continue root = tagged[0] if not root.is_Elementizable: continue target = tree[tree.index(root):] # Build a new Iteration/Expression tree with free bounds free = [] defined_args = {} # Map of argument values defined by loop bounds for i in target: name, bounds = i.dim.name, i.symbolic_bounds # Iteration bounds _min = Scalar(name='%sf_m' % name, dtype=np.int32, is_const=True) _max = Scalar(name='%sf_M' % name, dtype=np.int32, is_const=True) defined_args[_min.name] = bounds[0] defined_args[_max.name] = bounds[1] # Iteration unbounded indices ufunc = [ Scalar(name='%s_ub%d' % (name, j), dtype=np.int32) for j in range(len(i.uindices)) ] defined_args.update({ uf.name: j.symbolic_min for uf, j in zip(ufunc, i.uindices) }) uindices = [ IncrDimension(j.parent, i.dim + as_symbol(k), 1, j.name) for j, k in zip(i.uindices, ufunc) ] free.append( i._rebuild(limits=(_min, _max, 1), offsets=None, uindices=uindices)) # Construct elemental function body free = Transformer(dict((zip(target, free))), nested=True).visit(root) items = FindSymbols().visit(free) # Insert array casts casts = [ArrayCast(i) for i in items if i.is_Tensor] free = List(body=casts + [free]) # Insert declarations external = [i for i in items if i.is_Array] free = iet_insert_C_decls(free, external) # Create the Callable name = "f_%d" % root.tag params = derive_parameters(free) efuncs.setdefault(name, Callable(name, free, 'void', params, 'static')) # Create the Call args = [defined_args.get(i.name, i) for i in params] mapper[root] = List(header=noinline, body=Call(name, args)) # Transform the main tree processed = Transformer(mapper).visit(nodes) return processed, {'efuncs': efuncs.values()}