def indexed(self, multiindex, layer=None): n, i, f = multiindex if layer is not None and self.offset is not None: # For extruded mesh, prefetch the indirections for each map, so that they don't # need to be recomputed. Different f values need to be treated separately. key = f.extent if key is None: key = 1 if key not in self.prefetch: bottom_layer, _ = self.layer_bounds offset_extent, = self.offset.shape j = Index(offset_extent) base = Indexed(self.values, (n, j)) if f.extent: k = Index(f.extent) else: k = Index(1) offset = Sum( Sum(layer, Product(Literal(numpy.int32(-1)), bottom_layer)), k) offset = Product(offset, Indexed(self.offset, (j, ))) self.prefetch[key] = Materialise(PackInst(), Sum(base, offset), MultiIndex(k, j)) return Indexed(self.prefetch[key], (f, i)), (f, i) else: assert f.extent == 1 or f.extent is None base = Indexed(self.values, (n, i)) return base, (f, i)
def pack(self, loop_indices=None): if hasattr(self, "_pack"): return self._pack shape = self.outer.shape if self.access is READ: # No packing required return self.outer # We don't need to pack for memory layout, however packing # globals that are written is required such that subsequent # vectorisation loop transformations privatise these reduction # variables. The extra memory movement cost is minimal. loop_indices = self.pick_loop_indices(*loop_indices) if self.init_with_zero: also_zero = {MIN, MAX} else: also_zero = set() if self.access in {INC, WRITE} | also_zero: val = Zero((), self.outer.dtype) multiindex = MultiIndex(*(Index(e) for e in shape)) self._pack = Materialise(PackInst(loop_indices), val, multiindex) elif self.access in {READ, RW, MIN, MAX} - also_zero: multiindex = MultiIndex(*(Index(e) for e in shape)) expr = Indexed(self.outer, multiindex) self._pack = Materialise(PackInst(loop_indices), expr, multiindex) else: raise ValueError("Don't know how to initialise pack for '%s' access" % self.access) return self._pack
def emit_unpack_instruction(self, *, loop_indices=None): pack = self.pack(loop_indices) if pack is None: return () elif self.access is READ: return () elif self.access in {INC, MIN, MAX}: op = {INC: Sum, MIN: Min, MAX: Max}[self.access] multiindex = tuple(Index(e) for e in pack.shape) rvalue, mask = self._rvalue(multiindex, loop_indices=loop_indices) acc = Accumulate(UnpackInst(), rvalue, op(rvalue, Indexed(pack, multiindex))) if mask is None: yield acc else: yield When(mask, acc) else: multiindex = tuple(Index(e) for e in pack.shape) rvalue, mask = self._rvalue(multiindex, loop_indices=loop_indices) acc = Accumulate(UnpackInst(), rvalue, Indexed(pack, multiindex)) if mask is None: yield acc else: yield When(mask, acc)
def pack(self, loop_indices=None): if self.map_ is None: return None if hasattr(self, "_pack"): return self._pack if self.interior_horizontal: shape = (2, ) else: shape = (1, ) shape = shape + self.map_.shape[1:] if self.view_index is None: shape = shape + self.outer.shape[1:] if self.access in {INC, WRITE}: val = Zero((), self.outer.dtype) multiindex = MultiIndex(*(Index(e) for e in shape)) self._pack = Materialise(PackInst(), val, multiindex) elif self.access in {READ, RW, MIN, MAX}: multiindex = MultiIndex(*(Index(e) for e in shape)) expr, mask = self._rvalue(multiindex, loop_indices=loop_indices) if mask is not None: expr = When(mask, expr) self._pack = Materialise(PackInst(), expr, multiindex) else: raise ValueError( "Don't know how to initialise pack for '%s' access" % self.access) return self._pack
def emit_unpack_instruction(self, *, loop_indices=None): pack = self.pack(loop_indices=loop_indices) mixed_to_local = [] local_to_global = [] roffset = 0 for row in self.packs: coffset = 0 for p in row: rshape, cshape = p.shapes pack_ = p.pack(loop_indices=loop_indices, only_declare=True) rindices = tuple(Index(e) for e in rshape) cindices = tuple(Index(e) for e in cshape) indices = MultiIndex(*rindices, *cindices) lvalue = Indexed(pack_, indices) rextents = [numpy.prod(rshape[i+1:], dtype=numpy.int32) for i in range(len(rshape))] cextents = [numpy.prod(cshape[i+1:], dtype=numpy.int32) for i in range(len(cshape))] flat_row_index = reduce(Sum, [Product(i, Literal(IntType.type(e), casting=False)) for i, e in zip(rindices, rextents)], Literal(IntType.type(0), casting=False)) flat_col_index = reduce(Sum, [Product(i, Literal(IntType.type(e), casting=False)) for i, e in zip(cindices, cextents)], Literal(IntType.type(0), casting=False)) flat_index = MultiIndex(Sum(flat_row_index, Literal(IntType.type(roffset), casting=False)), Sum(flat_col_index, Literal(IntType.type(coffset), casting=False))) rvalue = Indexed(pack, flat_index) # Copy from local mixed element tensor into non-mixed mixed_to_local.append(Accumulate(PreUnpackInst(), lvalue, rvalue)) # And into global matrix. local_to_global.extend(p.emit_unpack_instruction(loop_indices=loop_indices)) coffset += numpy.prod(cshape, dtype=numpy.int32) roffset += numpy.prod(rshape, dtype=numpy.int32) yield from iter(mixed_to_local) yield from iter(local_to_global)
def indexed_vector(self, n, shape, layer=None): shape = self.shape[1:] + shape if self.interior_horizontal: shape = (2, ) + shape else: shape = (1, ) + shape f, i, j = (Index(e) for e in shape) base, (f, i) = self.indexed((n, i, f), layer=layer) init = Sum(Product(base, Literal(numpy.int32(j.extent))), j) pack = Materialise(PackInst(), init, MultiIndex(f, i, j)) multiindex = tuple(Index(e) for e in pack.shape) return Indexed(pack, multiindex), multiindex
def emit_unpack_instruction(self, *, loop_indices=None): from pyop2.codegen.rep2loopy import register_petsc_function ((rdim, cdim), ), = self.dims rmap, cmap = self.maps n, layer = self.pick_loop_indices(*loop_indices) unroll = any(m.unroll for m in self.maps) if unroll: maps = [map_.indexed_vector(n, (dim, ), layer=layer) for map_, dim in zip(self.maps, (rdim, cdim))] else: maps = [] for map_ in self.maps: i = Index() if self.interior_horizontal: f = Index(2) else: f = Index(1) maps.append(map_.indexed((n, i, f), layer=layer)) (rmap, cmap), (rindices, cindices) = zip(*maps) pack = self.pack(loop_indices=loop_indices) name = self.insertion_names[unroll] if unroll: # The shape of MatPack is # (row, cols) if it has vector BC # (block_rows, row_cmpt, block_cols, col_cmpt) otherwise free_indices = rindices + cindices pack = Indexed(pack, free_indices) else: free_indices = rindices + (Index(), ) + cindices + (Index(), ) pack = Indexed(pack, free_indices) access = Symbol({WRITE: "INSERT_VALUES", INC: "ADD_VALUES"}[self.access]) rextent = Extent(MultiIndex(*rindices)) cextent = Extent(MultiIndex(*cindices)) register_petsc_function(name) call = FunctionCall(name, UnpackInst(), (self.access, READ, READ, READ, READ, READ, READ), free_indices, self.outer, rextent, rmap, cextent, cmap, pack, access) yield call
def pack(self, loop_indices=None): if hasattr(self, "_pack"): return self._pack flat_shape = numpy.sum( tuple( numpy.prod(p.map_.shape[1:] + p.outer.shape[1:]) for p in self.packs)) if self.interior_horizontal: _shape = (2, ) flat_shape *= 2 else: _shape = (1, ) if self.access in {INC, WRITE}: val = Zero((), self.dtype) multiindex = MultiIndex(Index(flat_shape)) self._pack = Materialise(PackInst(), val, multiindex) elif self.access in {READ, RW, MIN, MAX}: multiindex = MultiIndex(Index(flat_shape)) val = Zero((), self.dtype) expressions = [] offset = 0 for p in self.packs: shape = _shape + p.map_.shape[1:] + p.outer.shape[1:] mi = MultiIndex(*(Index(e) for e in shape)) expr, mask = p._rvalue(mi, loop_indices) extents = [ numpy.prod(shape[i + 1:], dtype=numpy.int32) for i in range(len(shape)) ] index = reduce(Sum, [ Product(i, Literal(IntType.type(e), casting=False)) for i, e in zip(mi, extents) ], Literal(IntType.type(0), casting=False)) indices = MultiIndex( Sum(index, Literal(IntType.type(offset), casting=False)), ) offset += numpy.prod(shape, dtype=numpy.int32) if mask is not None: expr = When(mask, expr) expressions.append(expr) expressions.append(indices) self._pack = Materialise(PackInst(), val, multiindex, *expressions) else: raise ValueError( "Don't know how to initialise pack for '%s' access" % self.access) return self._pack
def kernel_arg(self, loop_indices=None): if self.map_ is None: if loop_indices is None: raise ValueError("Need iteration index") n, layer = self.pick_loop_indices(*loop_indices) shape = self.outer.shape if self.view_index is None: multiindex = (n, ) + tuple(Index(e) for e in shape[1:]) else: multiindex = (n, ) + tuple(FixedIndex(i) for i in self.view_index) return Indexed(self.outer, multiindex) else: pack = self.pack(loop_indices) shape = pack.shape return Indexed(pack, (Index(e) for e in shape))
def emit_unpack_instruction(self, *, loop_indices=None): pack = self.pack(loop_indices) if self.access is READ: return () else: if self.interior_horizontal: _shape = (2,) else: _shape = (1,) offset = 0 for p in self.packs: shape = _shape + p.map_.shape[1:] + p.outer.shape[1:] mi = MultiIndex(*(Index(e) for e in shape)) rvalue, mask = p._rvalue(mi, loop_indices) extents = [numpy.prod(shape[i+1:], dtype=numpy.int32) for i in range(len(shape))] index = reduce(Sum, [Product(i, Literal(IntType.type(e), casting=False)) for i, e in zip(mi, extents)], Literal(IntType.type(0), casting=False)) indices = MultiIndex(Sum(index, Literal(IntType.type(offset), casting=False)),) rhs = Indexed(pack, indices) offset += numpy.prod(shape, dtype=numpy.int32) if self.access in {INC, MIN, MAX}: op = {INC: Sum, MIN: Min, MAX: Max}[self.access] rhs = op(rvalue, rhs) acc = Accumulate(UnpackInst(), rvalue, rhs) if mask is None: yield acc else: yield When(mask, acc)
def emit_pack_instruction(self, *, loop_indices=None): shape = self.outer.shape if self.access is WRITE: zero = Zero((), self.outer.dtype) multiindex = MultiIndex(*(Index(e) for e in shape)) yield Accumulate(PackInst(), Indexed(self.outer, multiindex), zero) else: return ()
def emit_unpack_instruction(self, *, loop_indices=None): pack = self.pack(loop_indices) loop_indices = self.pick_loop_indices(*loop_indices) if pack is None: return () elif self.access is READ: return () elif self.access in {INC, MIN, MAX}: op = {INC: Sum, MIN: Min, MAX: Max}[self.access] multiindex = tuple(Index(e) for e in pack.shape) rvalue = Indexed(self.outer, multiindex) yield Accumulate(UnpackInst(loop_indices), rvalue, op(rvalue, Indexed(pack, multiindex))) else: multiindex = tuple(Index(e) for e in pack.shape) rvalue = Indexed(self.outer, multiindex) yield Accumulate(UnpackInst(loop_indices), rvalue, Indexed(pack, multiindex))
def indexed(self, multiindex, layer=None): n, i, f = multiindex if layer is not None and self.offset is not None: # For extruded mesh, prefetch the indirections for each map, so that they don't # need to be recomputed. # First prefetch the base map (not dependent on layers) base_key = None if base_key not in self.prefetch: j = Index() base = Indexed(self.values, (n, j)) self.prefetch[base_key] = Materialise(PackInst(), base, MultiIndex(j)) base = self.prefetch[base_key] # Now prefetch the extruded part of the map (inside the layer loop). # This is necessary so loopy DTRT for MatSetValues # Different f values need to be treated separately. key = f.extent if key is None: key = 1 if key not in self.prefetch: bottom_layer, _ = self.layer_bounds k = Index(f.extent if f.extent is not None else 1) offset = Sum(Sum(layer, Product(Literal(numpy.int32(-1)), bottom_layer)), k) j = Index() # Inline map offsets where all entries are identical. if self.offset.shape == (): offset = Product(offset, self.offset) else: offset = Product(offset, Indexed(self.offset, (j,))) base = Indexed(base, (j, )) self.prefetch[key] = Materialise(PackInst(), Sum(base, offset), MultiIndex(k, j)) return Indexed(self.prefetch[key], (f, i)), (f, i) else: assert f.extent == 1 or f.extent is None base = Indexed(self.values, (n, i)) return base, (f, i)
def pack(self, loop_indices=None, only_declare=False): if hasattr(self, "_pack"): return self._pack shape = tuple(itertools.chain(*self.shapes)) if only_declare: pack = Variable(f"matpack{next(self.count)}", shape, self.dtype) self._pack = pack if self.access in {WRITE, INC}: val = Zero((), self.dtype) multiindex = MultiIndex(*(Index(e) for e in shape)) pack = Materialise(PackInst(), val, multiindex) self._pack = pack else: raise ValueError("Unexpected access type") return self._pack
def pack(self, loop_indices=None): if hasattr(self, "_pack"): return self._pack ((rdim, cdim), ), = self.dims rmap, cmap = self.maps if self.interior_horizontal: shape = (2, ) else: shape = (1, ) rshape = shape + rmap.shape[1:] + (rdim, ) cshape = shape + cmap.shape[1:] + (cdim, ) if self.access in {WRITE, INC}: val = Zero((), self.dtype) multiindex = MultiIndex(*(Index(e) for e in (rshape + cshape))) pack = Materialise(PackInst(), val, multiindex) self._pack = pack return pack else: raise ValueError("Unexpected access type")
def pack(self, loop_indices=None): if hasattr(self, "_pack"): return self._pack rshape = 0 cshape = 0 # Need to compute row and col shape based on individual pack shapes for p in self.packs[:, 0]: shape, _ = p.shapes rshape += numpy.prod(shape, dtype=int) for p in self.packs[0, :]: _, shape = p.shapes cshape += numpy.prod(shape, dtype=int) shape = (rshape, cshape) if self.access in {WRITE, INC}: val = Zero((), self.dtype) multiindex = MultiIndex(*(Index(e) for e in shape)) pack = Materialise(PackInst(), val, multiindex) self._pack = pack return pack else: raise ValueError("Unexpected access type")
def kernel_arg(self, loop_indices=None): pack = self.pack(loop_indices=loop_indices) return Indexed(pack, tuple(Index(e) for e in pack.shape))
def kernel_arg(self, loop_indices=None): pack = self.pack(loop_indices) shape = pack.shape return Indexed(pack, (Index(e) for e in shape))
def _rename_node_index(node, self): name = self.replace.get(node, node.name) return Index(extent=node.extent, name=name)
def kernel_arg(self, loop_indices=None): return Indexed(self.outer, (Index(e) for e in self.outer.shape))