def _indices(cls, **kwargs): """Return the default dimension indices for a given data shape :param shape: Shape of the spatial data :return: indices used for axis. """ dimensions = kwargs.get('dimensions', None) grid = kwargs.get('grid', None) nt = kwargs.get('nt', 0) indices = [grid.time_dim, Dimension('p')] if nt > 0 else [Dimension('p')] return dimensions or indices
def sendrecv(f, fixed): """Construct an IET performing a halo exchange along arbitrary dimension and side.""" assert f.is_Function assert f.grid is not None comm = f.grid.distributor._C_comm buf_dims = [Dimension(name='buf_%s' % d.root) for d in f.dimensions if d not in fixed] bufg = Array(name='bufg', dimensions=buf_dims, dtype=f.dtype, scope='heap') bufs = Array(name='bufs', dimensions=buf_dims, dtype=f.dtype, scope='heap') dat_dims = [Dimension(name='dat_%s' % d.root) for d in f.dimensions] dat = Array(name='dat', dimensions=dat_dims, dtype=f.dtype, scope='external') ofsg = [Symbol(name='og%s' % d.root) for d in f.dimensions] ofss = [Symbol(name='os%s' % d.root) for d in f.dimensions] fromrank = Symbol(name='fromrank') torank = Symbol(name='torank') parameters = [bufg] + list(bufg.shape) + [dat] + list(dat.shape) + ofsg gather = Call('gather_%s' % f.name, parameters) parameters = [bufs] + list(bufs.shape) + [dat] + list(dat.shape) + ofss scatter = Call('scatter_%s' % f.name, parameters) # The scatter must be guarded as we must not alter the halo values along # the domain boundary, where the sender is actually MPI.PROC_NULL scatter = Conditional(CondNe(fromrank, Macro('MPI_PROC_NULL')), scatter) srecv = MPIStatusObject(name='srecv') rrecv = MPIRequestObject(name='rrecv') rsend = MPIRequestObject(name='rsend') count = reduce(mul, bufs.shape, 1) recv = Call('MPI_Irecv', [bufs, count, Macro(numpy_to_mpitypes(f.dtype)), fromrank, '13', comm, rrecv]) send = Call('MPI_Isend', [bufg, count, Macro(numpy_to_mpitypes(f.dtype)), torank, '13', comm, rsend]) waitrecv = Call('MPI_Wait', [rrecv, srecv]) waitsend = Call('MPI_Wait', [rsend, Macro('MPI_STATUS_IGNORE')]) iet = List(body=[recv, gather, send, waitsend, waitrecv, scatter]) iet = List(body=[ArrayCast(dat), iet_insert_C_decls(iet)]) parameters = ([dat] + list(dat.shape) + list(bufs.shape) + ofsg + ofss + [fromrank, torank, comm]) return Callable('sendrecv_%s' % f.name, iet, 'void', parameters, ('static',))
def _indices(cls, **kwargs): """Return the default dimension indices for a given data shape :param dimensions: Optional, list of :class:`Dimension` objects that defines data layout. :param shape: Optional, shape of the spatial data to automatically infer dimension symbols. :return: Dimension indices used for each axis. """ dimensions = kwargs.get('dimensions', None) if dimensions is None: # Infer dimensions from default and data shape if 'shape' not in kwargs: error("Creating symbolic data objects requries either" "a 'shape' or 'dimensions' argument") raise ValueError("Unknown symbol dimensions or shape") _indices = (x, y, z) shape = kwargs.get('shape') if len(shape) <= 3: dimensions = _indices[:len(shape)] else: dimensions = [ Dimension("x%d" % i) for i in range(1, len(shape) + 1) ] return dimensions
def __new__(cls, name, ntime=None, npoint=None, ndim=None, data=None, coordinates=None, **kwargs): p_dim = kwargs.get('dimension', Dimension('p_%s' % name)) ndim = ndim or coordinates.shape[1] npoint = npoint or coordinates.shape[0] if data is None: if ntime is None: error('Either data or ntime are required to' 'initialise source/receiver objects') else: ntime = ntime or data.shape[0] # Create the underlying PointData object obj = PointData(name=name, dimensions=[time, p_dim], npoint=npoint, nt=ntime, ndim=ndim, coordinates=coordinates, **kwargs) # If provided, copy initial data into the allocated buffer if data is not None: obj.data[:] = data return obj
def __indices_setup__(cls, **kwargs): """ Return the default dimension indices for a given data shape. """ dimensions = kwargs.get('dimensions') if dimensions is not None: return dimensions else: return (Dimension(name='p_%s' % kwargs["name"]), )
def _indices(cls, **kwargs): """Return the default dimension indices for a given data shape :return: indices used for axis. """ dimensions = kwargs.get('dimensions', None) grid = kwargs.get('grid', None) nt = kwargs.get('nt', 0) dim = Dimension(name='p') indices = [grid.time_dim, dim] if nt > 0 else [dim] return dimensions or indices
def copy(f, fixed, swap=False): """ Construct a :class:`Callable` capable of copying: :: * an arbitrary convex region of ``f`` into a contiguous :class:`Array`, OR * if ``swap=True``, a contiguous :class:`Array` into an arbitrary convex region of ``f``. """ buf_dims = [] buf_indices = [] for d in f.dimensions: if d not in fixed: buf_dims.append(Dimension(name='buf_%s' % d.root)) buf_indices.append(d.root) buf = Array(name='buf', dimensions=buf_dims, dtype=f.dtype) dat_dims = [] dat_offsets = [] dat_indices = [] for d in f.dimensions: dat_dims.append(Dimension(name='dat_%s' % d.root)) offset = Symbol(name='o%s' % d.root) dat_offsets.append(offset) dat_indices.append(offset + (d.root if d not in fixed else 0)) dat = Array(name='dat', dimensions=dat_dims, dtype=f.dtype) if swap is False: eq = DummyEq(buf[buf_indices], dat[dat_indices]) name = 'gather_%s' % f.name else: eq = DummyEq(dat[dat_indices], buf[buf_indices]) name = 'scatter_%s' % f.name iet = Expression(eq) for i, d in reversed(list(zip(buf_indices, buf_dims))): iet = Iteration(iet, i, d.symbolic_size - 1) # -1 as Iteration generates <= iet = List(body=[ArrayCast(dat), ArrayCast(buf), iet]) parameters = [buf] + list(buf.shape) + [dat] + list( dat.shape) + dat_offsets return Callable(name, iet, 'void', parameters, ('static', ))
def dims(): return { 'i': Dimension(name='i', size=3), 'j': Dimension(name='j', size=5), 'k': Dimension(name='k', size=7), 'l': Dimension(name='l', size=6), 's': Dimension(name='s', size=4), 'q': Dimension(name='q', size=4) }
def __init__(self, *args, **kwargs): if not self._cached(): self.nt = kwargs.get('nt', 0) self.npoint = kwargs.get('npoint') kwargs['shape'] = (self.nt, self.npoint) super(SparseFunction, self).__init__(self, *args, **kwargs) if self.grid is None: error('SparseFunction objects require a grid parameter.') raise ValueError('No grid provided for SparseFunction.') # Allocate and copy coordinate data d = Dimension('d') self.coordinates = Function(name='%s_coords' % self.name, dimensions=[self.indices[-1], d], shape=(self.npoint, self.grid.dim)) self._children.append(self.coordinates) coordinates = kwargs.get('coordinates', None) if coordinates is not None: self.coordinates.data[:] = coordinates[:]
def __init__(self, *args, **kwargs): if not self._cached(): super(SparseFunction, self).__init__(*args, **kwargs) npoint = kwargs.get('npoint') if not isinstance(npoint, int) and npoint > 0: raise ValueError( 'SparseFunction requires parameter `npoint` (> 0)') self.npoint = npoint # Grid must be provided grid = kwargs.get('grid') if kwargs.get('grid') is None: raise ValueError( 'SparseFunction objects require a grid parameter') self.grid = grid self.dtype = kwargs.get('dtype', self.grid.dtype) self.space_order = kwargs.get('space_order', 0) # Set up coordinates of sparse points coordinates = Function(name='%s_coords' % self.name, dimensions=(self.indices[-1], Dimension(name='d')), shape=(self.npoint, self.grid.dim), space_order=0) coordinate_data = kwargs.get('coordinates') if coordinate_data is not None: coordinates.data[:] = coordinate_data[:] self.coordinates = coordinates # Halo region self._halo = tuple((0, 0) for i in range(self.ndim)) # Padding region self._padding = tuple((0, 0) for i in range(self.ndim))
def _loop_blocking(self, state, **kwargs): """ Apply loop blocking to :class:`Iteration` trees. Blocking is applied to parallel iteration trees. Heuristically, innermost dimensions are not blocked to maximize the trip count of the SIMD loops. Different heuristics may be specified by passing the keywords ``blockshape`` and ``blockinner`` to the DLE. The former, a dictionary, is used to indicate a specific block size for each blocked dimension. For example, for the :class:`Iteration` tree: :: for i for j for k ... one may provide ``blockshape = {i: 4, j: 7}``, in which case the two outer loops will blocked, and the resulting 2-dimensional block will have size 4x7. The latter may be set to True to also block innermost parallel :class:`Iteration` objects. """ exclude_innermost = not self.params.get('blockinner', False) ignore_heuristic = self.params.get('blockalways', False) blocked = OrderedDict() processed = [] for node in state.nodes: # Make sure loop blocking will span as many Iterations as possible fold = fold_blockable_tree(node, exclude_innermost) mapper = {} for tree in retrieve_iteration_tree(fold): # Is the Iteration tree blockable ? iterations = [i for i in tree if i.is_Parallel] if exclude_innermost: iterations = [ i for i in iterations if not i.is_Vectorizable ] if len(iterations) <= 1: continue root = iterations[0] if not IsPerfectIteration().visit(root): # Illegal/unsupported continue if not tree[0].is_Sequential and not ignore_heuristic: # Heuristic: avoid polluting the generated code with blocked # nests (thus increasing JIT compilation time and affecting # readability) if the blockable tree isn't embedded in a # sequential loop (e.g., a timestepping loop) continue # Decorate intra-block iterations with an IterationProperty TAG = tagger(len(mapper)) # Build all necessary Iteration objects, individually. These will # subsequently be composed to implement loop blocking. inter_blocks = [] intra_blocks = [] remainders = [] for i in iterations: # Build Iteration over blocks dim = blocked.setdefault( i, Dimension("%s_block" % i.dim.name)) block_size = dim.symbolic_size iter_size = i.dim.size or i.dim.symbolic_size start = i.limits[0] - i.offsets[0] finish = iter_size - i.offsets[1] innersize = iter_size - (-i.offsets[0] + i.offsets[1]) finish = finish - (innersize % block_size) inter_block = Iteration([], dim, [start, finish, block_size], properties=PARALLEL) inter_blocks.append(inter_block) # Build Iteration within a block start = inter_block.dim finish = start + block_size intra_block = i._rebuild([], limits=[start, finish, 1], offsets=None, properties=i.properties + (TAG, ELEMENTAL)) intra_blocks.append(intra_block) # Build unitary-increment Iteration over the 'leftover' region. # This will be used for remainder loops, executed when any # dimension size is not a multiple of the block size. start = inter_block.limits[1] finish = iter_size - i.offsets[1] remainder = i._rebuild([], limits=[start, finish, 1], offsets=None) remainders.append(remainder) # Build blocked Iteration nest blocked_tree = compose_nodes(inter_blocks + intra_blocks + [iterations[-1].nodes]) # Build remainder Iterations remainder_trees = [] for n in range(len(iterations)): for c in combinations([i.dim for i in iterations], n + 1): # First all inter-block Interations nodes = [ b._rebuild(properties=b.properties + (REMAINDER, )) for b, r in zip(inter_blocks, remainders) if r.dim not in c ] # Then intra-block or remainder, for each dim (in order) properties = (REMAINDER, TAG, ELEMENTAL) for b, r in zip(intra_blocks, remainders): handle = r if b.dim in c else b nodes.append( handle._rebuild(properties=properties)) nodes.extend([iterations[-1].nodes]) remainder_trees.append(compose_nodes(nodes)) # Will replace with blocked loop tree mapper[root] = List(body=[blocked_tree] + remainder_trees) rebuilt = Transformer(mapper).visit(fold) # Finish unrolling any previously folded Iterations processed.append(unfold_blocked_tree(rebuilt)) # All blocked dimensions if not blocked: return {'nodes': processed} # Determine the block shape blockshape = self.params.get('blockshape') if not blockshape: # Use trivial heuristic for a suitable blockshape def heuristic(dim_size): ths = 8 # FIXME: This really needs to be improved return ths if dim_size > ths else 1 blockshape = {k: heuristic for k in blocked.keys()} else: try: nitems, nrequired = len(blockshape), len(blocked) blockshape = {k: v for k, v in zip(blocked, blockshape)} if nitems > nrequired: dle_warning("Provided 'blockshape' has more entries than " "blocked loops; dropping entries ...") if nitems < nrequired: dle_warning("Provided 'blockshape' has fewer entries than " "blocked loops; dropping dimensions ...") except TypeError: blockshape = {list(blocked)[0]: blockshape} blockshape.update( {k: None for k in blocked.keys() if k not in blockshape}) # Track any additional arguments required to execute /state.nodes/ arguments = [ BlockingArg(v, k, blockshape[k]) for k, v in blocked.items() ] return { 'nodes': processed, 'arguments': arguments, 'flags': 'blocking' }
def _loop_blocking(self, nodes, state): """ Apply loop blocking to PARALLEL :class:`Iteration` trees. """ exclude_innermost = not self.params.get('blockinner', False) ignore_heuristic = self.params.get('blockalways', False) # Make sure loop blocking will span as many Iterations as possible fold = fold_blockable_tree(nodes, exclude_innermost) mapper = {} blocked = OrderedDict() for tree in retrieve_iteration_tree(fold): # Is the Iteration tree blockable ? iterations = [i for i in tree if i.is_Parallel] if exclude_innermost: iterations = [i for i in iterations if not i.is_Vectorizable] if len(iterations) <= 1: continue root = iterations[0] if not IsPerfectIteration().visit(root): # Illegal/unsupported continue if not tree.root.is_Sequential and not ignore_heuristic: # Heuristic: avoid polluting the generated code with blocked # nests (thus increasing JIT compilation time and affecting # readability) if the blockable tree isn't embedded in a # sequential loop (e.g., a timestepping loop) continue # Decorate intra-block iterations with an IterationProperty TAG = tagger(len(mapper)) # Build all necessary Iteration objects, individually. These will # subsequently be composed to implement loop blocking. inter_blocks = [] intra_blocks = [] remainders = [] for i in iterations: name = "%s%d_block" % (i.dim.name, len(mapper)) # Build Iteration over blocks dim = blocked.setdefault(i, Dimension(name=name)) bsize = dim.symbolic_size bstart = i.limits[0] binnersize = i.symbolic_extent + (i.offsets[1] - i.offsets[0]) bfinish = i.dim.symbolic_end - (binnersize % bsize) inter_block = Iteration([], dim, [bstart, bfinish, bsize], offsets=i.offsets, properties=PARALLEL) inter_blocks.append(inter_block) # Build Iteration within a block limits = (dim, dim + bsize - 1, 1) intra_block = i._rebuild([], limits=limits, offsets=(0, 0), properties=i.properties + (TAG, ELEMENTAL)) intra_blocks.append(intra_block) # Build unitary-increment Iteration over the 'leftover' region. # This will be used for remainder loops, executed when any # dimension size is not a multiple of the block size. remainder = i._rebuild( [], limits=[bfinish + 1, i.dim.symbolic_end, 1], offsets=(i.offsets[1], i.offsets[1])) remainders.append(remainder) # Build blocked Iteration nest blocked_tree = compose_nodes(inter_blocks + intra_blocks + [iterations[-1].nodes]) # Build remainder Iterations remainder_trees = [] for n in range(len(iterations)): for c in combinations([i.dim for i in iterations], n + 1): # First all inter-block Interations nodes = [ b._rebuild(properties=b.properties + (REMAINDER, )) for b, r in zip(inter_blocks, remainders) if r.dim not in c ] # Then intra-block or remainder, for each dim (in order) properties = (REMAINDER, TAG, ELEMENTAL) for b, r in zip(intra_blocks, remainders): handle = r if b.dim in c else b nodes.append(handle._rebuild(properties=properties)) nodes.extend([iterations[-1].nodes]) remainder_trees.append(compose_nodes(nodes)) # Will replace with blocked loop tree mapper[root] = List(body=[blocked_tree] + remainder_trees) rebuilt = Transformer(mapper).visit(fold) # Finish unrolling any previously folded Iterations processed = unfold_blocked_tree(rebuilt) # All blocked dimensions if not blocked: return processed, {} # Determine the block shape blockshape = self.params.get('blockshape') if not blockshape: # Use trivial heuristic for a suitable blockshape def heuristic(dim_size): ths = 8 # FIXME: This really needs to be improved return ths if dim_size > ths else 1 blockshape = {k: heuristic for k in blocked.keys()} else: try: nitems, nrequired = len(blockshape), len(blocked) blockshape = {k: v for k, v in zip(blocked, blockshape)} if nitems > nrequired: dle_warning("Provided 'blockshape' has more entries than " "blocked loops; dropping entries ...") if nitems < nrequired: dle_warning("Provided 'blockshape' has fewer entries than " "blocked loops; dropping dimensions ...") except TypeError: blockshape = {list(blocked)[0]: blockshape} blockshape.update( {k: None for k in blocked.keys() if k not in blockshape}) # Track any additional arguments required to execute /state.nodes/ arguments = [ BlockingArg(v, k, blockshape[k]) for k, v in blocked.items() ] return processed, {'arguments': arguments, 'flags': 'blocking'}
def _loop_blocking(self, state, **kwargs): """ Apply loop blocking to :class:`Iteration` trees. By default, the blocked :class:`Iteration` objects and the block size are determined heuristically. The heuristic consists of searching the deepest Iteration/Expression tree and blocking all dimensions except: * The innermost (eg, to retain SIMD vectorization); * Those dimensions inducing loop-carried dependencies. The caller may take over the heuristic through ``kwargs['blocking']``, a dictionary indicating the block size of each blocked dimension. For example, for the :class:`Iteration` tree below: :: for i for j for k ... one may pass in ``kwargs['blocking'] = {i: 4, j: 7}``, in which case the two outer loops would be blocked, and the resulting 2-dimensional block would be of size 4x7. """ Region = namedtuple('Region', 'main leftover') blocked = OrderedDict() processed = [] for node in state.nodes: mapper = {} for tree in retrieve_iteration_tree(node): # Is the Iteration tree blockable ? iterations = [i for i in tree if i.is_Parallel] if 'blockinner' not in self.params: iterations = [ i for i in iterations if not i.is_Vectorizable ] if not iterations: continue root = iterations[0] if not IsPerfectIteration().visit(root): continue # Construct the blocked loop nest, as well as all necessary # remainder loops regions = OrderedDict() blocked_iterations = [] for i in iterations: # Build Iteration over blocks dim = blocked.setdefault( i, Dimension("%s_block" % i.dim.name)) block_size = dim.symbolic_size iter_size = i.dim.size or i.dim.symbolic_size start = i.limits[0] - i.offsets[0] finish = iter_size - i.offsets[1] finish = finish - ((finish - i.offsets[1]) % block_size) inter_block = Iteration([], dim, [start, finish, block_size], properties=as_tuple('parallel')) # Build Iteration within a block start = inter_block.dim finish = start + block_size properties = 'vector-dim' if i.is_Vectorizable else None intra_block = Iteration([], i.dim, [start, finish, 1], i.index, properties=as_tuple(properties)) blocked_iterations.append((inter_block, intra_block)) # Build unitary-increment Iteration over the 'main' region # (the one blocked); necessary to generate code iterating over # non-blocked ("remainder") iterations. start = inter_block.limits[0] finish = inter_block.limits[1] main = Iteration([], i.dim, [start, finish, 1], i.index, properties=i.properties) # Build unitary-increment Iteration over the 'leftover' region: # again as above, this may be necessary when the dimension size # is not a multiple of the block size. start = inter_block.limits[1] finish = iter_size - i.offsets[1] leftover = Iteration([], i.dim, [start, finish, 1], i.index, properties=i.properties) regions[i] = Region(main, leftover) blocked_tree = list(flatten(zip(*blocked_iterations))) blocked_tree = compose_nodes(blocked_tree + [iterations[-1].nodes]) # Build remainder loops remainder_tree = [] for n in range(len(iterations)): for i in combinations(iterations, n + 1): nodes = [ v.leftover if k in i else v.main for k, v in regions.items() ] nodes += [iterations[-1].nodes] remainder_tree.append(compose_nodes(nodes)) # Will replace with blocked loop tree mapper[root] = List(body=[blocked_tree] + remainder_tree) rebuilt = Transformer(mapper).visit(node) processed.append(rebuilt) # All blocked dimensions if not blocked: return {'nodes': processed} # Determine the block shape blockshape = self.params.get('blockshape') if not blockshape: # Use trivial heuristic for a suitable blockshape def heuristic(dim_size): ths = 8 # FIXME: This really needs to be improved return ths if dim_size > ths else 1 blockshape = {k: heuristic for k in blocked.keys()} else: try: nitems, nrequired = len(blockshape), len(blocked) blockshape = {k: v for k, v in zip(blocked, blockshape)} if nitems > nrequired: dle_warning("Provided 'blockshape' has more entries than " "blocked loops; dropping entries ...") if nitems < nrequired: dle_warning("Provided 'blockshape' has fewer entries than " "blocked loops; dropping dimensions ...") except TypeError: blockshape = {list(blocked)[0]: blockshape} blockshape.update( {k: None for k in blocked.keys() if k not in blockshape}) # Track any additional arguments required to execute /state.nodes/ arguments = [ BlockingArg(v, k, blockshape[k]) for k, v in blocked.items() ] return { 'nodes': processed, 'arguments': arguments, 'flags': 'blocking' }