def _range(annotation, args): """Handling TVM loop types""" n = args.__len__() if n == 1: low, ext = const(0, dtype="int32"), args[0] else: _internal_assert(n == 2, "A loop intrinsic should only have 1 or 2 arguments!") low, ext = args[0], args[1] if not tvm.tir.analysis.expr_deep_equal(low, const(0, dtype="int32")): ext = ext - low for_type = LOOP_INTRIN[annotation] iter_var = None return iter_var, low, ext, for_type
def allocate(self, dtype, shape, name="buf", scope=None): """Create a allocate statement. Parameters ---------- dtype : str The content data type. shape : tuple of Expr The shape of array to be allocated. name : str, optional The name of the buffer. scope : str, optional The scope of the buffer. Returns ------- buffer : BufferVar The buffer var representing the buffer. """ buffer_var = _expr.Var(name, dtype="handle") if not isinstance(shape, (list, tuple, _container.Array)): shape = [shape] if scope: self.scope_attr(buffer_var, "storage_scope", scope) self.emit(lambda x: _stmt.Allocate(buffer_var, dtype, shape, const(1, dtype="uint1"), x)) return BufferVar(self, buffer_var, dtype)
def __getitem__(self, index): t = DataType(self._content_type) index = self._linear_index(index) if t.lanes > 1: base = index * t.lanes index = _expr.Ramp(base, const(1, base.dtype), t.lanes) return _expr.Load(self._content_type, self._buffer_var, index)
def __getitem__(self, index): t = DataType(self._content_type) index = self._linear_index(index) if t.lanes > 1: base = index * t.lanes stride = 1 if (not hasattr(base, "dtype")) else const(1, base.dtype) index = _expr.Ramp(base, stride, t.lanes) return _expr.Load(self._content_type, self._buffer_var, index)
def bind(func_id, args): """Handling TVM thread binding""" _internal_assert(func_id == "bind", "This function cannot be directly invoked!") _internal_assert(args.__len__() == 2, "A loop bind should only have 2 arguments!") _internal_assert(isinstance(args[0], str), "A loop bind's first argument should be a string!") low, ext = const(0, "int32"), args[1] iter_var = tvm.te.thread_axis((low, ext), args[0]) for_type = None return iter_var, low, ext, for_type
def __setitem__(self, index, value): value = convert(value) if value.dtype != self._content_type: raise ValueError("data type does not match content type %s vs %s" % (value.dtype, self._content_type)) t = DataType(self._content_type) if t.lanes > 1: base = index * t.lanes index = _expr.Ramp(base, const(1, base.dtype), t.lanes) self._builder.emit(_stmt.Store(self._buffer_var, value, index))
def _pack_buffer(buf): """Build intrinsics that packs the buffer. """ shape = Call("handle", "tir.tvm_stack_make_shape", buf.shape) strides = Call("handle", "tir.tvm_stack_make_shape", buf.strides) if buf.strides else 0 pack_args = [ buf.data, shape, strides, len(buf.shape), const(0, dtype=buf.dtype), buf.elem_offset ] return Call("handle", Op.get("tir.tvm_stack_make_array"), pack_args)
def __setitem__(self, index, value): value = convert(value) if value.dtype != self._content_type: raise ValueError("data type does not match content type %s vs %s" % (value.dtype, self._content_type)) index = self._linear_index(index) t = DataType(self._content_type) if t.lanes > 1: base = index * t.lanes stride = 1 if (not hasattr(base, "dtype")) else const( 1, base.dtype) index = _expr.Ramp(base, stride, t.lanes) self._builder.emit(_stmt.Store(self._buffer_var, value, index))
def _pack_buffer(buf): """Build intrinsics that packs the buffer. """ assert buf.shape shape = Call("handle", "tvm_stack_make_shape", buf.shape, Call.Intrinsic) strides = Call("handle", "tvm_stack_make_shape", buf.strides, Call.Intrinsic) if buf.strides else 0 pack_args = [ buf.data, shape, strides, len(buf.shape), const(0, dtype=buf.dtype), buf.elem_offset ] return Call("handle", "tvm_stack_make_array", pack_args, Call.Intrinsic)
def allocate(self, dtype, shape, name="buf", axis_separators=None, scope=""): """Create a allocate statement. Parameters ---------- dtype : str The content data type. shape : tuple of Expr The shape of array to be allocated. name : str, optional The name of the buffer. axis_separators : list of int, optional If passed, a list of separators between groups of axes, each of which is flattened to an output axis. For flat memory spaces, should either be None, or an empty list. scope : str, optional The scope of the buffer. Returns ------- buffer : BufferVar The buffer var representing the buffer. """ if not isinstance(shape, (list, tuple, _container.Array)): shape = [shape] buffer = _buffer.decl_buffer(shape, dtype, name, scope=scope, axis_separators=axis_separators) buffer_var = buffer.data self.emit(lambda x: _stmt.Allocate(buffer_var, dtype, shape, const(1, dtype="uint1"), x)) return BufferVar(self, buffer, dtype)
def __init__( self, iter_values: List[PrimExpr], predicate: Union[PrimExpr, bool], block: Block, span: Optional[Span] = None, ): if isinstance(predicate, bool): predicate = const(predicate, "bool") self.__init_handle_by_constructor__( _ffi_api.BlockRealize, iter_values, predicate, block, span, )
def __neg__(self): neg_one = const(-1, self.dtype) return self.__mul__(neg_one)
Returns ------- value : PrimExpr The result value. Example ------- .. code-block:: python m = te.var("m") n = te.var("n") A = te.placeholder((m, n), name="A") k = te.reduce_axis((0, n), name="k") # there are two way to use this {0} reducer: # mode 1, accept (expr, axis, where) to produce an Reduce Expr # tvm.{0} represents tvm.te.{0} or tvm.tir.{0}. B = te.compute((m,), lambda i: tvm.{0}(A[i, k], axis=k), name="B") # mode 2, simply use it with multiple Exprs: {0}_res = tvm.{0}(m, n) """ reducer.__doc__ = doc_str.format(name) return reducer # pylint: disable=unnecessary-lambda sum = comm_reducer(lambda x, y: x + y, lambda t: const(0, dtype=t), name="sum") min = comm_reducer(lambda x, y: _ffi_api._OpMin(x, y), max_value, name="min") max = comm_reducer(lambda x, y: _ffi_api._OpMax(x, y), min_value, name="max")
def __neg__(self): neg_one = const(-1, self.dtype) # type: ignore return self.__mul__(neg_one)