def pack(self, loop_indices=None): if hasattr(self, "_pack"): return self._pack shape = self.outer.shape if self.access is READ: # No packing required return self.outer # We don't need to pack for memory layout, however packing # globals that are written is required such that subsequent # vectorisation loop transformations privatise these reduction # variables. The extra memory movement cost is minimal. loop_indices = self.pick_loop_indices(*loop_indices) if self.init_with_zero: also_zero = {MIN, MAX} else: also_zero = set() if self.access in {INC, WRITE} | also_zero: val = Zero((), self.outer.dtype) multiindex = MultiIndex(*(Index(e) for e in shape)) self._pack = Materialise(PackInst(loop_indices), val, multiindex) elif self.access in {READ, RW, MIN, MAX} - also_zero: multiindex = MultiIndex(*(Index(e) for e in shape)) expr = Indexed(self.outer, multiindex) self._pack = Materialise(PackInst(loop_indices), expr, multiindex) else: raise ValueError("Don't know how to initialise pack for '%s' access" % self.access) return self._pack
def layer_extents(self): if self.iteration_region == ON_BOTTOM: start = Indexed(self._layers_array, (self._layer_index, FixedIndex(0))) end = Sum( Indexed(self._layers_array, (self._layer_index, FixedIndex(0))), Literal(IntType.type(1))) elif self.iteration_region == ON_TOP: start = Sum( Indexed(self._layers_array, (self._layer_index, FixedIndex(1))), Literal(IntType.type(-2))) end = Sum( Indexed(self._layers_array, (self._layer_index, FixedIndex(1))), Literal(IntType.type(-1))) elif self.iteration_region == ON_INTERIOR_FACETS: start = Indexed(self._layers_array, (self._layer_index, FixedIndex(0))) end = Sum( Indexed(self._layers_array, (self._layer_index, FixedIndex(1))), Literal(IntType.type(-2))) elif self.iteration_region == ALL: start = Indexed(self._layers_array, (self._layer_index, FixedIndex(0))) end = Sum( Indexed(self._layers_array, (self._layer_index, FixedIndex(1))), Literal(IntType.type(-1))) else: raise ValueError("Unknown iteration region") return (Materialise(PackInst(), start, MultiIndex()), Materialise(PackInst(), end, MultiIndex()))
def pack(self, loop_indices=None): if self.map_ is None: return None if hasattr(self, "_pack"): return self._pack if self.interior_horizontal: shape = (2, ) else: shape = (1, ) shape = shape + self.map_.shape[1:] if self.view_index is None: shape = shape + self.outer.shape[1:] if self.access in {INC, WRITE}: val = Zero((), self.outer.dtype) multiindex = MultiIndex(*(Index(e) for e in shape)) self._pack = Materialise(PackInst(), val, multiindex) elif self.access in {READ, RW, MIN, MAX}: multiindex = MultiIndex(*(Index(e) for e in shape)) expr, mask = self._rvalue(multiindex, loop_indices=loop_indices) if mask is not None: expr = When(mask, expr) self._pack = Materialise(PackInst(), expr, multiindex) else: raise ValueError( "Don't know how to initialise pack for '%s' access" % self.access) return self._pack
def pack(self, loop_indices=None): if hasattr(self, "_pack"): return self._pack flat_shape = numpy.sum( tuple( numpy.prod(p.map_.shape[1:] + p.outer.shape[1:]) for p in self.packs)) if self.interior_horizontal: _shape = (2, ) flat_shape *= 2 else: _shape = (1, ) if self.access in {INC, WRITE}: val = Zero((), self.dtype) multiindex = MultiIndex(Index(flat_shape)) self._pack = Materialise(PackInst(), val, multiindex) elif self.access in {READ, RW, MIN, MAX}: multiindex = MultiIndex(Index(flat_shape)) val = Zero((), self.dtype) expressions = [] offset = 0 for p in self.packs: shape = _shape + p.map_.shape[1:] + p.outer.shape[1:] mi = MultiIndex(*(Index(e) for e in shape)) expr, mask = p._rvalue(mi, loop_indices) extents = [ numpy.prod(shape[i + 1:], dtype=numpy.int32) for i in range(len(shape)) ] index = reduce(Sum, [ Product(i, Literal(IntType.type(e), casting=False)) for i, e in zip(mi, extents) ], Literal(IntType.type(0), casting=False)) indices = MultiIndex( Sum(index, Literal(IntType.type(offset), casting=False)), ) offset += numpy.prod(shape, dtype=numpy.int32) if mask is not None: expr = When(mask, expr) expressions.append(expr) expressions.append(indices) self._pack = Materialise(PackInst(), val, multiindex, *expressions) else: raise ValueError( "Don't know how to initialise pack for '%s' access" % self.access) return self._pack
def indexed(self, multiindex, layer=None): n, i, f = multiindex if layer is not None and self.offset is not None: # For extruded mesh, prefetch the indirections for each map, so that they don't # need to be recomputed. Different f values need to be treated separately. key = f.extent if key is None: key = 1 if key not in self.prefetch: bottom_layer, _ = self.layer_bounds offset_extent, = self.offset.shape j = Index(offset_extent) base = Indexed(self.values, (n, j)) if f.extent: k = Index(f.extent) else: k = Index(1) offset = Sum( Sum(layer, Product(Literal(numpy.int32(-1)), bottom_layer)), k) offset = Product(offset, Indexed(self.offset, (j, ))) self.prefetch[key] = Materialise(PackInst(), Sum(base, offset), MultiIndex(k, j)) return Indexed(self.prefetch[key], (f, i)), (f, i) else: assert f.extent == 1 or f.extent is None base = Indexed(self.values, (n, i)) return base, (f, i)
def loop_index(self): n = self._loop_index if self.subset: n = Materialise(PackInst(), Indexed(self._subset_indices, MultiIndex(n)), MultiIndex()) return n
def emit_pack_instruction(self, *, loop_indices=None): shape = self.outer.shape if self.access is WRITE: zero = Zero((), self.outer.dtype) multiindex = MultiIndex(*(Index(e) for e in shape)) yield Accumulate(PackInst(), Indexed(self.outer, multiindex), zero) else: return ()
def bottom_layer(self): if self.iteration_region == ON_TOP: return Materialise(PackInst(), Indexed(self._layers_array, (self._layer_index, FixedIndex(0))), MultiIndex()) else: start, _ = self.layer_extents return start
def top_layer(self): if self.iteration_region == ON_BOTTOM: return Materialise(PackInst(), Sum(Indexed(self._layers_array, (self._layer_index, FixedIndex(1))), Literal(IntType.type(-1))), MultiIndex()) else: _, end = self.layer_extents return end
def emit_instructions(self): yield from itertools.chain(*(pack.emit_pack_instruction(loop_indices=self.loop_indices) for pack in self.packed_args)) # Sometimes, actual instructions do not refer to all the loop # indices (e.g. all of them are globals). To ensure that loopy # knows about these indices, we emit a dummy instruction (that # doesn't generate any code) that does depend on them. yield DummyInstruction(PackInst(), *(x for x in self.loop_indices if x is not None)) yield self.kernel_call() yield from itertools.chain(*(pack.emit_unpack_instruction(loop_indices=self.loop_indices) for pack in self.packed_args))
def indexed_vector(self, n, shape, layer=None): shape = self.shape[1:] + shape if self.interior_horizontal: shape = (2, ) + shape else: shape = (1, ) + shape f, i, j = (Index(e) for e in shape) base, (f, i) = self.indexed((n, i, f), layer=layer) init = Sum(Product(base, Literal(numpy.int32(j.extent))), j) pack = Materialise(PackInst(), init, MultiIndex(f, i, j)) multiindex = tuple(Index(e) for e in pack.shape) return Indexed(pack, multiindex), multiindex
def indexed(self, multiindex, layer=None): n, i, f = multiindex if layer is not None and self.offset is not None: # For extruded mesh, prefetch the indirections for each map, so that they don't # need to be recomputed. # First prefetch the base map (not dependent on layers) base_key = None if base_key not in self.prefetch: j = Index() base = Indexed(self.values, (n, j)) self.prefetch[base_key] = Materialise(PackInst(), base, MultiIndex(j)) base = self.prefetch[base_key] # Now prefetch the extruded part of the map (inside the layer loop). # This is necessary so loopy DTRT for MatSetValues # Different f values need to be treated separately. key = f.extent if key is None: key = 1 if key not in self.prefetch: bottom_layer, _ = self.layer_bounds k = Index(f.extent if f.extent is not None else 1) offset = Sum(Sum(layer, Product(Literal(numpy.int32(-1)), bottom_layer)), k) j = Index() # Inline map offsets where all entries are identical. if self.offset.shape == (): offset = Product(offset, self.offset) else: offset = Product(offset, Indexed(self.offset, (j,))) base = Indexed(base, (j, )) self.prefetch[key] = Materialise(PackInst(), Sum(base, offset), MultiIndex(k, j)) return Indexed(self.prefetch[key], (f, i)), (f, i) else: assert f.extent == 1 or f.extent is None base = Indexed(self.values, (n, i)) return base, (f, i)
def pack(self, loop_indices=None, only_declare=False): if hasattr(self, "_pack"): return self._pack shape = tuple(itertools.chain(*self.shapes)) if only_declare: pack = Variable(f"matpack{next(self.count)}", shape, self.dtype) self._pack = pack if self.access in {WRITE, INC}: val = Zero((), self.dtype) multiindex = MultiIndex(*(Index(e) for e in shape)) pack = Materialise(PackInst(), val, multiindex) self._pack = pack else: raise ValueError("Unexpected access type") return self._pack
def pack(self, loop_indices=None): if hasattr(self, "_pack"): return self._pack ((rdim, cdim), ), = self.dims rmap, cmap = self.maps if self.interior_horizontal: shape = (2, ) else: shape = (1, ) rshape = shape + rmap.shape[1:] + (rdim, ) cshape = shape + cmap.shape[1:] + (cdim, ) if self.access in {WRITE, INC}: val = Zero((), self.dtype) multiindex = MultiIndex(*(Index(e) for e in (rshape + cshape))) pack = Materialise(PackInst(), val, multiindex) self._pack = pack return pack else: raise ValueError("Unexpected access type")
def pack(self, loop_indices=None): if hasattr(self, "_pack"): return self._pack rshape = 0 cshape = 0 # Need to compute row and col shape based on individual pack shapes for p in self.packs[:, 0]: shape, _ = p.shapes rshape += numpy.prod(shape, dtype=int) for p in self.packs[0, :]: _, shape = p.shapes cshape += numpy.prod(shape, dtype=int) shape = (rshape, cshape) if self.access in {WRITE, INC}: val = Zero((), self.dtype) multiindex = MultiIndex(*(Index(e) for e in shape)) pack = Materialise(PackInst(), val, multiindex) self._pack = pack return pack else: raise ValueError("Unexpected access type")