def _loop_blocking(self, nodes, state): """Apply loop blocking to PARALLEL Iteration trees.""" exclude_innermost = not self.params.get('blockinner', False) ignore_heuristic = self.params.get('blockalways', False) # Make sure loop blocking will span as many Iterations as possible fold = fold_blockable_tree(nodes, exclude_innermost) mapper = {} blocked = OrderedDict() for tree in retrieve_iteration_tree(fold): # Is the Iteration tree blockable ? iterations = [i for i in tree if i.is_Parallel] if exclude_innermost: iterations = [i for i in iterations if not i.is_Vectorizable] if len(iterations) <= 1: continue root = iterations[0] if not IsPerfectIteration().visit(root): # Illegal/unsupported continue if not tree.root.is_Sequential and not ignore_heuristic: # Heuristic: avoid polluting the generated code with blocked # nests (thus increasing JIT compilation time and affecting # readability) if the blockable tree isn't embedded in a # sequential loop (e.g., a timestepping loop) continue # Decorate intra-block iterations with an IterationProperty TAG = tagger(len(mapper)) # Build all necessary Iteration objects, individually. These will # subsequently be composed to implement loop blocking. inter_blocks = [] intra_blocks = [] remainders = [] for i in iterations: # Build Iteration over blocks name = "%s%d_block" % (i.dim.name, len(mapper)) dim = blocked.setdefault(i, BlockDimension(i.dim, name=name)) binnersize = i.symbolic_size + (i.offsets[1] - i.offsets[0]) bmax = i.dim.symbolic_max - (binnersize % dim.step) inter_block = Iteration([], dim, bmax, offsets=i.offsets, properties=PARALLEL) inter_blocks.append(inter_block) # Build Iteration within a block limits = (dim, dim + dim.step - 1, 1) intra_block = i._rebuild([], limits=limits, offsets=(0, 0), properties=i.properties + (TAG, ELEMENTAL)) intra_blocks.append(intra_block) # Build unitary-increment Iteration over the 'leftover' region. # This will be used for remainder loops, executed when any # dimension size is not a multiple of the block size. remainder = i._rebuild([], limits=[bmax + 1, i.dim.symbolic_max, 1], offsets=(i.offsets[1], i.offsets[1])) remainders.append(remainder) # Build blocked Iteration nest blocked_tree = compose_nodes(inter_blocks + intra_blocks + [iterations[-1].nodes]) # Build remainder Iterations remainder_trees = [] for n in range(len(iterations)): for c in combinations([i.dim for i in iterations], n + 1): # First all inter-block Interations nodes = [b._rebuild(properties=b.properties + (REMAINDER,)) for b, r in zip(inter_blocks, remainders) if r.dim not in c] # Then intra-block or remainder, for each dim (in order) properties = (REMAINDER, TAG, ELEMENTAL) for b, r in zip(intra_blocks, remainders): handle = r if b.dim in c else b nodes.append(handle._rebuild(properties=properties)) nodes.extend([iterations[-1].nodes]) remainder_trees.append(compose_nodes(nodes)) # Will replace with blocked loop tree mapper[root] = List(body=[blocked_tree] + remainder_trees) rebuilt = Transformer(mapper).visit(fold) # Finish unrolling any previously folded Iterations processed = unfold_blocked_tree(rebuilt) return processed, {'dimensions': list(blocked.values())}
def _loop_blocking(self, state, **kwargs): """ Apply loop blocking to :class:`Iteration` trees. Blocking is applied to parallel iteration trees. Heuristically, innermost dimensions are not blocked to maximize the trip count of the SIMD loops. Different heuristics may be specified by passing the keywords ``blockshape`` and ``blockinner`` to the DLE. The former, a dictionary, is used to indicate a specific block size for each blocked dimension. For example, for the :class:`Iteration` tree: :: for i for j for k ... one may provide ``blockshape = {i: 4, j: 7}``, in which case the two outer loops will blocked, and the resulting 2-dimensional block will have size 4x7. The latter may be set to True to also block innermost parallel :class:`Iteration` objects. """ exclude_innermost = not self.params.get('blockinner', False) ignore_heuristic = self.params.get('blockalways', False) blocked = OrderedDict() processed = [] for node in state.nodes: # Make sure loop blocking will span as many Iterations as possible fold = fold_blockable_tree(node, exclude_innermost) mapper = {} for tree in retrieve_iteration_tree(fold): # Is the Iteration tree blockable ? iterations = [i for i in tree if i.is_Parallel] if exclude_innermost: iterations = [ i for i in iterations if not i.is_Vectorizable ] if len(iterations) <= 1: continue root = iterations[0] if not IsPerfectIteration().visit(root): # Illegal/unsupported continue if not tree[0].is_Sequential and not ignore_heuristic: # Heuristic: avoid polluting the generated code with blocked # nests (thus increasing JIT compilation time and affecting # readability) if the blockable tree isn't embedded in a # sequential loop (e.g., a timestepping loop) continue # Decorate intra-block iterations with an IterationProperty TAG = tagger(len(mapper)) # Build all necessary Iteration objects, individually. These will # subsequently be composed to implement loop blocking. inter_blocks = [] intra_blocks = [] remainders = [] for i in iterations: # Build Iteration over blocks dim = blocked.setdefault( i, Dimension("%s_block" % i.dim.name)) block_size = dim.symbolic_size iter_size = i.dim.size or i.dim.symbolic_size start = i.limits[0] - i.offsets[0] finish = iter_size - i.offsets[1] innersize = iter_size - (-i.offsets[0] + i.offsets[1]) finish = finish - (innersize % block_size) inter_block = Iteration([], dim, [start, finish, block_size], properties=PARALLEL) inter_blocks.append(inter_block) # Build Iteration within a block start = inter_block.dim finish = start + block_size intra_block = i._rebuild([], limits=[start, finish, 1], offsets=None, properties=i.properties + (TAG, ELEMENTAL)) intra_blocks.append(intra_block) # Build unitary-increment Iteration over the 'leftover' region. # This will be used for remainder loops, executed when any # dimension size is not a multiple of the block size. start = inter_block.limits[1] finish = iter_size - i.offsets[1] remainder = i._rebuild([], limits=[start, finish, 1], offsets=None) remainders.append(remainder) # Build blocked Iteration nest blocked_tree = compose_nodes(inter_blocks + intra_blocks + [iterations[-1].nodes]) # Build remainder Iterations remainder_trees = [] for n in range(len(iterations)): for c in combinations([i.dim for i in iterations], n + 1): # First all inter-block Interations nodes = [ b._rebuild(properties=b.properties + (REMAINDER, )) for b, r in zip(inter_blocks, remainders) if r.dim not in c ] # Then intra-block or remainder, for each dim (in order) properties = (REMAINDER, TAG, ELEMENTAL) for b, r in zip(intra_blocks, remainders): handle = r if b.dim in c else b nodes.append( handle._rebuild(properties=properties)) nodes.extend([iterations[-1].nodes]) remainder_trees.append(compose_nodes(nodes)) # Will replace with blocked loop tree mapper[root] = List(body=[blocked_tree] + remainder_trees) rebuilt = Transformer(mapper).visit(fold) # Finish unrolling any previously folded Iterations processed.append(unfold_blocked_tree(rebuilt)) # All blocked dimensions if not blocked: return {'nodes': processed} # Determine the block shape blockshape = self.params.get('blockshape') if not blockshape: # Use trivial heuristic for a suitable blockshape def heuristic(dim_size): ths = 8 # FIXME: This really needs to be improved return ths if dim_size > ths else 1 blockshape = {k: heuristic for k in blocked.keys()} else: try: nitems, nrequired = len(blockshape), len(blocked) blockshape = {k: v for k, v in zip(blocked, blockshape)} if nitems > nrequired: dle_warning("Provided 'blockshape' has more entries than " "blocked loops; dropping entries ...") if nitems < nrequired: dle_warning("Provided 'blockshape' has fewer entries than " "blocked loops; dropping dimensions ...") except TypeError: blockshape = {list(blocked)[0]: blockshape} blockshape.update( {k: None for k in blocked.keys() if k not in blockshape}) # Track any additional arguments required to execute /state.nodes/ arguments = [ BlockingArg(v, k, blockshape[k]) for k, v in blocked.items() ] return { 'nodes': processed, 'arguments': arguments, 'flags': 'blocking' }
def _loop_blocking(self, nodes, state): """ Apply loop blocking to PARALLEL :class:`Iteration` trees. """ exclude_innermost = not self.params.get('blockinner', False) ignore_heuristic = self.params.get('blockalways', False) # Make sure loop blocking will span as many Iterations as possible fold = fold_blockable_tree(nodes, exclude_innermost) mapper = {} blocked = OrderedDict() for tree in retrieve_iteration_tree(fold): # Is the Iteration tree blockable ? iterations = [i for i in tree if i.is_Parallel] if exclude_innermost: iterations = [i for i in iterations if not i.is_Vectorizable] if len(iterations) <= 1: continue root = iterations[0] if not IsPerfectIteration().visit(root): # Illegal/unsupported continue if not tree.root.is_Sequential and not ignore_heuristic: # Heuristic: avoid polluting the generated code with blocked # nests (thus increasing JIT compilation time and affecting # readability) if the blockable tree isn't embedded in a # sequential loop (e.g., a timestepping loop) continue # Decorate intra-block iterations with an IterationProperty TAG = tagger(len(mapper)) # Build all necessary Iteration objects, individually. These will # subsequently be composed to implement loop blocking. inter_blocks = [] intra_blocks = [] remainders = [] for i in iterations: name = "%s%d_block" % (i.dim.name, len(mapper)) # Build Iteration over blocks dim = blocked.setdefault(i, Dimension(name=name)) bsize = dim.symbolic_size bstart = i.limits[0] binnersize = i.symbolic_extent + (i.offsets[1] - i.offsets[0]) bfinish = i.dim.symbolic_end - (binnersize % bsize) inter_block = Iteration([], dim, [bstart, bfinish, bsize], offsets=i.offsets, properties=PARALLEL) inter_blocks.append(inter_block) # Build Iteration within a block limits = (dim, dim + bsize - 1, 1) intra_block = i._rebuild([], limits=limits, offsets=(0, 0), properties=i.properties + (TAG, ELEMENTAL)) intra_blocks.append(intra_block) # Build unitary-increment Iteration over the 'leftover' region. # This will be used for remainder loops, executed when any # dimension size is not a multiple of the block size. remainder = i._rebuild( [], limits=[bfinish + 1, i.dim.symbolic_end, 1], offsets=(i.offsets[1], i.offsets[1])) remainders.append(remainder) # Build blocked Iteration nest blocked_tree = compose_nodes(inter_blocks + intra_blocks + [iterations[-1].nodes]) # Build remainder Iterations remainder_trees = [] for n in range(len(iterations)): for c in combinations([i.dim for i in iterations], n + 1): # First all inter-block Interations nodes = [ b._rebuild(properties=b.properties + (REMAINDER, )) for b, r in zip(inter_blocks, remainders) if r.dim not in c ] # Then intra-block or remainder, for each dim (in order) properties = (REMAINDER, TAG, ELEMENTAL) for b, r in zip(intra_blocks, remainders): handle = r if b.dim in c else b nodes.append(handle._rebuild(properties=properties)) nodes.extend([iterations[-1].nodes]) remainder_trees.append(compose_nodes(nodes)) # Will replace with blocked loop tree mapper[root] = List(body=[blocked_tree] + remainder_trees) rebuilt = Transformer(mapper).visit(fold) # Finish unrolling any previously folded Iterations processed = unfold_blocked_tree(rebuilt) # All blocked dimensions if not blocked: return processed, {} # Determine the block shape blockshape = self.params.get('blockshape') if not blockshape: # Use trivial heuristic for a suitable blockshape def heuristic(dim_size): ths = 8 # FIXME: This really needs to be improved return ths if dim_size > ths else 1 blockshape = {k: heuristic for k in blocked.keys()} else: try: nitems, nrequired = len(blockshape), len(blocked) blockshape = {k: v for k, v in zip(blocked, blockshape)} if nitems > nrequired: dle_warning("Provided 'blockshape' has more entries than " "blocked loops; dropping entries ...") if nitems < nrequired: dle_warning("Provided 'blockshape' has fewer entries than " "blocked loops; dropping dimensions ...") except TypeError: blockshape = {list(blocked)[0]: blockshape} blockshape.update( {k: None for k in blocked.keys() if k not in blockshape}) # Track any additional arguments required to execute /state.nodes/ arguments = [ BlockingArg(v, k, blockshape[k]) for k, v in blocked.items() ] return processed, {'arguments': arguments, 'flags': 'blocking'}