def emit_unpack_instruction(self, *, loop_indices=None): pack = self.pack(loop_indices) if pack is None: return () elif self.access is READ: return () elif self.access in {INC, MIN, MAX}: op = {INC: Sum, MIN: Min, MAX: Max}[self.access] multiindex = tuple(Index(e) for e in pack.shape) rvalue, mask = self._rvalue(multiindex, loop_indices=loop_indices) acc = Accumulate(UnpackInst(), rvalue, op(rvalue, Indexed(pack, multiindex))) if mask is None: yield acc else: yield When(mask, acc) else: multiindex = tuple(Index(e) for e in pack.shape) rvalue, mask = self._rvalue(multiindex, loop_indices=loop_indices) acc = Accumulate(UnpackInst(), rvalue, Indexed(pack, multiindex)) if mask is None: yield acc else: yield When(mask, acc)
def indexed(self, multiindex, layer=None): n, i, f = multiindex if layer is not None and self.offset is not None: # For extruded mesh, prefetch the indirections for each map, so that they don't # need to be recomputed. Different f values need to be treated separately. key = f.extent if key is None: key = 1 if key not in self.prefetch: bottom_layer, _ = self.layer_bounds offset_extent, = self.offset.shape j = Index(offset_extent) base = Indexed(self.values, (n, j)) if f.extent: k = Index(f.extent) else: k = Index(1) offset = Sum( Sum(layer, Product(Literal(numpy.int32(-1)), bottom_layer)), k) offset = Product(offset, Indexed(self.offset, (j, ))) self.prefetch[key] = Materialise(PackInst(), Sum(base, offset), MultiIndex(k, j)) return Indexed(self.prefetch[key], (f, i)), (f, i) else: assert f.extent == 1 or f.extent is None base = Indexed(self.values, (n, i)) return base, (f, i)
def emit_unpack_instruction(self, *, loop_indices=None): pack = self.pack(loop_indices=loop_indices) mixed_to_local = [] local_to_global = [] roffset = 0 for row in self.packs: coffset = 0 for p in row: rshape, cshape = p.shapes pack_ = p.pack(loop_indices=loop_indices, only_declare=True) rindices = tuple(Index(e) for e in rshape) cindices = tuple(Index(e) for e in cshape) indices = MultiIndex(*rindices, *cindices) lvalue = Indexed(pack_, indices) rextents = [numpy.prod(rshape[i+1:], dtype=numpy.int32) for i in range(len(rshape))] cextents = [numpy.prod(cshape[i+1:], dtype=numpy.int32) for i in range(len(cshape))] flat_row_index = reduce(Sum, [Product(i, Literal(IntType.type(e), casting=False)) for i, e in zip(rindices, rextents)], Literal(IntType.type(0), casting=False)) flat_col_index = reduce(Sum, [Product(i, Literal(IntType.type(e), casting=False)) for i, e in zip(cindices, cextents)], Literal(IntType.type(0), casting=False)) flat_index = MultiIndex(Sum(flat_row_index, Literal(IntType.type(roffset), casting=False)), Sum(flat_col_index, Literal(IntType.type(coffset), casting=False))) rvalue = Indexed(pack, flat_index) # Copy from local mixed element tensor into non-mixed mixed_to_local.append(Accumulate(PreUnpackInst(), lvalue, rvalue)) # And into global matrix. local_to_global.extend(p.emit_unpack_instruction(loop_indices=loop_indices)) coffset += numpy.prod(cshape, dtype=numpy.int32) roffset += numpy.prod(rshape, dtype=numpy.int32) yield from iter(mixed_to_local) yield from iter(local_to_global)
def emit_unpack_instruction(self, *, loop_indices=None): from pyop2.codegen.rep2loopy import register_petsc_function ((rdim, cdim), ), = self.dims rmap, cmap = self.maps n, layer = self.pick_loop_indices(*loop_indices) unroll = any(m.unroll for m in self.maps) if unroll: maps = [map_.indexed_vector(n, (dim, ), layer=layer) for map_, dim in zip(self.maps, (rdim, cdim))] else: maps = [] for map_ in self.maps: i = Index() if self.interior_horizontal: f = Index(2) else: f = Index(1) maps.append(map_.indexed((n, i, f), layer=layer)) (rmap, cmap), (rindices, cindices) = zip(*maps) pack = self.pack(loop_indices=loop_indices) name = self.insertion_names[unroll] if unroll: # The shape of MatPack is # (row, cols) if it has vector BC # (block_rows, row_cmpt, block_cols, col_cmpt) otherwise free_indices = rindices + cindices pack = Indexed(pack, free_indices) else: free_indices = rindices + (Index(), ) + cindices + (Index(), ) pack = Indexed(pack, free_indices) access = Symbol({WRITE: "INSERT_VALUES", INC: "ADD_VALUES"}[self.access]) rextent = Extent(MultiIndex(*rindices)) cextent = Extent(MultiIndex(*cindices)) register_petsc_function(name) call = FunctionCall(name, UnpackInst(), (self.access, READ, READ, READ, READ, READ, READ), free_indices, self.outer, rextent, rmap, cextent, cmap, pack, access) yield call
def layer_extents(self): if self.iteration_region == ON_BOTTOM: start = Indexed(self._layers_array, (self._layer_index, FixedIndex(0))) end = Sum( Indexed(self._layers_array, (self._layer_index, FixedIndex(0))), Literal(IntType.type(1))) elif self.iteration_region == ON_TOP: start = Sum( Indexed(self._layers_array, (self._layer_index, FixedIndex(1))), Literal(IntType.type(-2))) end = Sum( Indexed(self._layers_array, (self._layer_index, FixedIndex(1))), Literal(IntType.type(-1))) elif self.iteration_region == ON_INTERIOR_FACETS: start = Indexed(self._layers_array, (self._layer_index, FixedIndex(0))) end = Sum( Indexed(self._layers_array, (self._layer_index, FixedIndex(1))), Literal(IntType.type(-2))) elif self.iteration_region == ALL: start = Indexed(self._layers_array, (self._layer_index, FixedIndex(0))) end = Sum( Indexed(self._layers_array, (self._layer_index, FixedIndex(1))), Literal(IntType.type(-1))) else: raise ValueError("Unknown iteration region") return (Materialise(PackInst(), start, MultiIndex()), Materialise(PackInst(), end, MultiIndex()))
def kernel_arg(self, loop_indices=None): if self.map_ is None: if loop_indices is None: raise ValueError("Need iteration index") n, layer = self.pick_loop_indices(*loop_indices) shape = self.outer.shape if self.view_index is None: multiindex = (n, ) + tuple(Index(e) for e in shape[1:]) else: multiindex = (n, ) + tuple(FixedIndex(i) for i in self.view_index) return Indexed(self.outer, multiindex) else: pack = self.pack(loop_indices) shape = pack.shape return Indexed(pack, (Index(e) for e in shape))
def loop_index(self): n = self._loop_index if self.subset: n = Materialise(PackInst(), Indexed(self._subset_indices, MultiIndex(n)), MultiIndex()) return n
def emit_unpack_instruction(self, *, loop_indices=None): pack = self.pack(loop_indices) if self.access is READ: return () else: if self.interior_horizontal: _shape = (2,) else: _shape = (1,) offset = 0 for p in self.packs: shape = _shape + p.map_.shape[1:] + p.outer.shape[1:] mi = MultiIndex(*(Index(e) for e in shape)) rvalue, mask = p._rvalue(mi, loop_indices) extents = [numpy.prod(shape[i+1:], dtype=numpy.int32) for i in range(len(shape))] index = reduce(Sum, [Product(i, Literal(IntType.type(e), casting=False)) for i, e in zip(mi, extents)], Literal(IntType.type(0), casting=False)) indices = MultiIndex(Sum(index, Literal(IntType.type(offset), casting=False)),) rhs = Indexed(pack, indices) offset += numpy.prod(shape, dtype=numpy.int32) if self.access in {INC, MIN, MAX}: op = {INC: Sum, MIN: Min, MAX: Max}[self.access] rhs = op(rvalue, rhs) acc = Accumulate(UnpackInst(), rvalue, rhs) if mask is None: yield acc else: yield When(mask, acc)
def pack(self, loop_indices=None): if hasattr(self, "_pack"): return self._pack shape = self.outer.shape if self.access is READ: # No packing required return self.outer # We don't need to pack for memory layout, however packing # globals that are written is required such that subsequent # vectorisation loop transformations privatise these reduction # variables. The extra memory movement cost is minimal. loop_indices = self.pick_loop_indices(*loop_indices) if self.init_with_zero: also_zero = {MIN, MAX} else: also_zero = set() if self.access in {INC, WRITE} | also_zero: val = Zero((), self.outer.dtype) multiindex = MultiIndex(*(Index(e) for e in shape)) self._pack = Materialise(PackInst(loop_indices), val, multiindex) elif self.access in {READ, RW, MIN, MAX} - also_zero: multiindex = MultiIndex(*(Index(e) for e in shape)) expr = Indexed(self.outer, multiindex) self._pack = Materialise(PackInst(loop_indices), expr, multiindex) else: raise ValueError("Don't know how to initialise pack for '%s' access" % self.access) return self._pack
def emit_pack_instruction(self, *, loop_indices=None): shape = self.outer.shape if self.access is WRITE: zero = Zero((), self.outer.dtype) multiindex = MultiIndex(*(Index(e) for e in shape)) yield Accumulate(PackInst(), Indexed(self.outer, multiindex), zero) else: return ()
def bottom_layer(self): if self.iteration_region == ON_TOP: return Materialise(PackInst(), Indexed(self._layers_array, (self._layer_index, FixedIndex(0))), MultiIndex()) else: start, _ = self.layer_extents return start
def emit_unpack_instruction(self, *, loop_indices=None): pack = self.pack(loop_indices) loop_indices = self.pick_loop_indices(*loop_indices) if pack is None: return () elif self.access is READ: return () elif self.access in {INC, MIN, MAX}: op = {INC: Sum, MIN: Min, MAX: Max}[self.access] multiindex = tuple(Index(e) for e in pack.shape) rvalue = Indexed(self.outer, multiindex) yield Accumulate(UnpackInst(loop_indices), rvalue, op(rvalue, Indexed(pack, multiindex))) else: multiindex = tuple(Index(e) for e in pack.shape) rvalue = Indexed(self.outer, multiindex) yield Accumulate(UnpackInst(loop_indices), rvalue, Indexed(pack, multiindex))
def top_layer(self): if self.iteration_region == ON_BOTTOM: return Materialise(PackInst(), Sum(Indexed(self._layers_array, (self._layer_index, FixedIndex(1))), Literal(IntType.type(-1))), MultiIndex()) else: _, end = self.layer_extents return end
def indexed_vector(self, n, shape, layer=None): shape = self.shape[1:] + shape if self.interior_horizontal: shape = (2, ) + shape else: shape = (1, ) + shape f, i, j = (Index(e) for e in shape) base, (f, i) = self.indexed((n, i, f), layer=layer) init = Sum(Product(base, Literal(numpy.int32(j.extent))), j) pack = Materialise(PackInst(), init, MultiIndex(f, i, j)) multiindex = tuple(Index(e) for e in pack.shape) return Indexed(pack, multiindex), multiindex
def _rvalue(self, multiindex, loop_indices=None): """Returns indexed Dat and masking condition to apply to reads/writes. If the masking condition is None, no mask is applied, otherwise the pack/unpack will be wrapped in When(mask, expr). This is used for the case where maps might have negative entries. """ f, i, *j = multiindex n, layer = self.pick_loop_indices(*loop_indices) if self.view_index is not None: j = tuple(j) + tuple(FixedIndex(i) for i in self.view_index) map_, (f, i) = self.map_.indexed((n, i, f), layer=layer) return Indexed(self.outer, MultiIndex(map_, *j)), self._mask(map_)
def indexed(self, multiindex, layer=None): n, i, f = multiindex if layer is not None and self.offset is not None: # For extruded mesh, prefetch the indirections for each map, so that they don't # need to be recomputed. # First prefetch the base map (not dependent on layers) base_key = None if base_key not in self.prefetch: j = Index() base = Indexed(self.values, (n, j)) self.prefetch[base_key] = Materialise(PackInst(), base, MultiIndex(j)) base = self.prefetch[base_key] # Now prefetch the extruded part of the map (inside the layer loop). # This is necessary so loopy DTRT for MatSetValues # Different f values need to be treated separately. key = f.extent if key is None: key = 1 if key not in self.prefetch: bottom_layer, _ = self.layer_bounds k = Index(f.extent if f.extent is not None else 1) offset = Sum(Sum(layer, Product(Literal(numpy.int32(-1)), bottom_layer)), k) j = Index() # Inline map offsets where all entries are identical. if self.offset.shape == (): offset = Product(offset, self.offset) else: offset = Product(offset, Indexed(self.offset, (j,))) base = Indexed(base, (j, )) self.prefetch[key] = Materialise(PackInst(), Sum(base, offset), MultiIndex(k, j)) return Indexed(self.prefetch[key], (f, i)), (f, i) else: assert f.extent == 1 or f.extent is None base = Indexed(self.values, (n, i)) return base, (f, i)
def replace_materialise_materialise(node, self): v = Variable(node.name, node.shape, node.dtype) inits = list(map(self, node.children)) label = node.label accs = [] for rvalue, indices in zip(*(inits[0::2], inits[1::2])): lvalue = Indexed(v, indices) if isinstance(rvalue, When): when, rvalue = rvalue.children acc = When(when, Accumulate(label, lvalue, rvalue)) else: acc = Accumulate(label, lvalue, rvalue) accs.append(acc) self.initialisers.append(tuple(accs)) return v
def kernel_arg(self, loop_indices=None): pack = self.pack(loop_indices) shape = pack.shape return Indexed(pack, (Index(e) for e in shape))
def kernel_arg(self, loop_indices=None): return Indexed(self.outer, (Index(e) for e in self.outer.shape))
def kernel_arg(self, loop_indices=None): pack = self.pack(loop_indices=loop_indices) return Indexed(pack, tuple(Index(e) for e in pack.shape))