def allocate(self, dtype, shape, name="buf", scope=""): """Create a allocate statement. Parameters ---------- dtype : str The content data type. shape : tuple of Expr The shape of array to be allocated. name : str, optional The name of the buffer. scope : str, optional The scope of the buffer. Returns ------- buffer : BufferVar The buffer var representing the buffer. """ buffer_var = _expr.Var(name, PointerType(PrimType(dtype), scope)) if not isinstance(shape, (list, tuple, _container.Array)): shape = [shape] if scope: self.scope_attr(buffer_var, "storage_scope", scope) self.emit(lambda x: _stmt.Allocate(buffer_var, dtype, shape, const(1, dtype="uint1"), x)) return BufferVar(self, buffer_var, shape, dtype)
def pointer(self, content_type, name="ptr", scope=""): """Create pointer variable with content type. Parameters ---------- content_type : str The content data type. name : str, optional The name of the pointer. scope : str, optional The scope of the pointer. Returns ------- ptr : BufferVar The buffer var representing the buffer. """ buffer_var = _expr.Var(name, PointerType(PrimType(content_type), scope)) return BufferVar(self, buffer_var, None, content_type)
def decl_buffer( shape, dtype=None, name="buffer", data=None, strides=None, elem_offset=None, scope="", data_alignment=-1, offset_factor=0, buffer_type="", axis_separators=None, span=None, ): """Declare a new symbolic buffer. Normally buffer is created automatically during lower and build. This is only needed if user want to specify their own buffer layout. See the note below for detailed discussion on usage of buffer. Parameters ---------- shape : tuple of Expr The shape of the buffer. dtype : str, optional The data type of the buffer. name : str, optional The name of the buffer. data : Var, optional The data pointer in the buffer. strides: array of Expr The stride of the buffer. elem_offset: Expr, optional The beginning offset of the array to data. In terms of number of elements of dtype. scope: str, optional The storage scope of the buffer, if not global. If scope equals empty string, it means it is global memory. data_alignment: int, optional The alignment of data pointer in bytes. If -1 is passed, the alignment will be set to TVM's internal default. offset_factor: int, optional The factor of elem_offset field, when set, elem_offset is required to be multiple of offset_factor. If 0 is pssed, the alignment will be set to 1. if non-zero is passed, we will created a Var for elem_offset if elem_offset is not None. buffer_type: str, optional, {"", "auto_broadcast"} auto_broadcast buffer allows one to implement broadcast computation without considering whether dimension size equals to one. TVM maps buffer[i][j][k] -> buffer[i][0][k] if dimension j's shape equals 1. axis_separators : list of int, optional If passed, a list of separators between groups of axes, each of which is flattened to an output axis. For flat memory spaces, should either be None, or an empty list. span: Optional[Span] The location of the decl_buffer creation in the source. Returns ------- buffer : tvm.tir.Buffer The created buffer Example ------- Here's an example of how broadcast buffer can be used to define a symbolic broadcast operation, .. code-block:: python m0, m1, m2 = te.var("m0"), te.var("m1"), te.var("m2") n0, n1, n2 = te.var("n0"), te.var("n1"), te.var("n2") o0, o1, o2 = te.var("o0"), te.var("o1"), te.var("o2") A = te.placeholder((m0, m1, m2), name='A') B = te.placeholder((n0, n1, n2), name='B') C = te.compute((o0, o1, o2), lambda i, j, k: A[i, j, k] + B[i, j, k], name='C') Ab = tvm.tir.decl_buffer(A.shape, A.dtype, name="Ab", buffer_type="auto_broadcast") Bb = tvm.tir.decl_buffer(B.shape, B.dtype, name="Bb", buffer_type="auto_broadcast") s = te.create_schedule(C.op) fadd = tvm.build(s, [A, B, C], target='llvm', name='bcast_add', binds={A:Ab, B:Bb}) dev = tvm.cpu(0) a = tvm.nd.array(np.random.uniform(size=(2, 4, 3)).astype(A.dtype), dev) b = tvm.nd.array(np.random.uniform(size=(2, 1, 3)).astype(B.dtype), dev) c = tvm.nd.array(np.zeros((2, 4, 3), dtype=C.dtype), dev) fadd(a, b, c) tvm.testing.assert_allclose(c.numpy(), a.numpy() + b.numpy()) Note ---- Buffer data structure reflects the DLTensor structure in dlpack. While DLTensor data structure is very general, it is usually helpful to create function that only handles specific case of data structure and make compiled function benefit from it. If user pass strides and elem_offset is passed as None when constructing the function, then the function will be specialized for the DLTensor that is compact and aligned. If user pass a fully generic symbolic array to the strides, then the resulting function becomes fully generic. """ # pylint: disable=import-outside-toplevel from .expr import Var shape = (shape, ) if isinstance(shape, (PrimExpr, Integral)) else shape dtype = "float32" if dtype is None else dtype strides = () if strides is None else strides if axis_separators is None: axis_separators = [] if offset_factor != 0 and elem_offset is None: shape_dtype = shape[0].dtype if shape and hasattr(shape[0], "dtype") else "int32" elem_offset = Var("%s_elem_offset" % name, shape_dtype) if data is None: # Bool is represented as uint1 in the IR, but stored as int8 storage_type = PrimType(dtype) storage_type = PrimType( "int8") if storage_type.dtype == "bool" else storage_type data = Var(name, PointerType(storage_type, scope), span) return _ffi_api.Buffer( # type: ignore data, dtype, shape, strides, elem_offset, name, data_alignment, offset_factor, buffer_type, axis_separators, span, )