コード例 #1
0
def compacted_complex_func(a: ty.handle, c: ty.handle, n: ty.int32) -> None:
    A = tir.match_buffer(a, (8, 8), "float32")
    C = tir.match_buffer(c, (8, 8), "float32")
    for i in range(0, 8):
        with tir.block([]):
            tir.reads(A[0, 8])
            tir.writes(C[0, 8])
            B = tir.alloc_buffer((1, 8), "float32")
            for j in range(0, 4):
                with tir.block([]) as []:
                    D = tir.alloc_buffer((6, 1), "float32")
                    tir.reads(A[i, j])
                    tir.writes(B[0, j])
                    for k in range(4, 8):
                        D[k - 2, 0] = 1.0
                    for k in range(2, 4):
                        tir.store(B.data, j, A[i, j] + D[k - 2, 0])
            for j in range(3, 5):
                with tir.block([]) as []:
                    tir.reads(B[0, j])
                    tir.writes(C[i, j])
                    C[i, j] = B[0, j]
            for j in range(6, 8):
                with tir.block([]) as []:
                    tir.reads(B[0, j])
                    tir.writes(C[i, j])
                    C[i, j] = B[0, j]
コード例 #2
0
def buffer_opaque_access(b: ty.handle, c: ty.handle) -> None:
    B = tir.match_buffer(b, [16, 16], "float32")
    C = tir.match_buffer(c, [16, 16], "float32")

    with tir.block([]):
        tir.reads([])
        tir.writes(B[0:16, 0:16])
        A = tir.allocate([256], "float32", "global")
        for i, j in tir.grid(16, 16):
            tir.store(A, i * 16 + j, 1)
        for i in range(0, 16):
            for j in range(0, 16):
                tir.evaluate(tir.load("float32", A, i * 16 + j))
            for j in range(0, 16):
                tir.evaluate(
                    tir.tvm_fill_fragment(B.data,
                                          16,
                                          16,
                                          16,
                                          0,
                                          tir.float32(0),
                                          dtype="handle"))

    for i, j in tir.grid(16, 16):
        with tir.block([16, 16]) as [vi, vj]:
            tir.bind(vi, i)
            tir.bind(vj, j)
            C[vi, vj] = B[vi, vj]
コード例 #3
0
def opaque_access_store(a: ty.handle, c: ty.handle) -> None:
    A = tir.match_buffer(a, (128, 128))
    B = tir.alloc_buffer((128, 128))
    C = tir.match_buffer(c, (128, 128))
    with tir.block([128, 128], "B") as [vi, vj]:
        B[vi, vj] = A[vi, vj] * 2.0
    with tir.block([128, 128], "C") as [vi, vj]:
        tir.reads(B[0:128, 0:128])
        tir.writes(C[0:128, 0:128])
        tir.store(C.data, vi * 128 + vj, B[vi, vj] + 1.0)
        C[vi, vj] = tir.load("float32", B.data, vi * 16 + vj) + 1.0
コード例 #4
0
def unschedulable_func(a: ty.handle, c: ty.handle) -> None:
    A = tir.match_buffer(a, (16, 16), "float32")
    C = tir.match_buffer(c, (16, 16), "float32")
    for i in range(0, 16):
        with tir.block([]):
            tir.reads(A[i, 0:16])
            tir.writes(C[i, 0:16])
            B = tir.alloc_buffer((16, 16), "float32")
            for j in range(0, 16):
                tir.store(B.data, i * 16 + j, A[i, j] + 1.0)
            for j in range(0, 16):
                C[i, j] = B[i, j] * 2.0
コード例 #5
0
def opaque_access(a: ty.handle, b: ty.handle) -> None:
    A = tir.match_buffer(a, [16, 16], "float32")
    B = tir.match_buffer(b, [16, 16], "float32")
    with tir.block([16, 16], "A") as [vi, vj]:
        tir.reads([])
        tir.writes([A[0:16, 0:16]])
        tir.store(A.data, vi * 16 + vj, 1)
    with tir.block([16, 16], "B") as [vi, vj]:
        tir.reads([])
        tir.writes([B[0:16, 0:16]])
        tir.evaluate(
            tir.tvm_fill_fragment(B.data,
                                  16,
                                  16,
                                  16,
                                  0,
                                  vi * 16 + vj,
                                  dtype="handle"))
コード例 #6
0
def opaque_access_split(a: ty.handle, b: ty.handle) -> None:
    A = tir.match_buffer(a, (16, 16))
    B = tir.match_buffer(b, (16, 16))
    for i, j0, j1 in tir.grid(16, 4, 4):
        with tir.block([16, 16], "A") as [vi, vj]:
            tir.bind(vi, i)
            tir.bind(vj, ((j0 * 4) + j1))
            tir.reads([])
            tir.writes([A[0:16, 0:16]])
            tir.store(A.data, ((vi * 16) + vj), 1, 1)
    for i, j0, j1 in tir.grid(16, 4, 4):
        with tir.block([16, 16], "B") as [vi, vj]:
            tir.bind(vi, i)
            tir.bind(vj, ((j0 * 4) + j1))
            tir.reads([])
            tir.writes([B[0:16, 0:16]])
            tir.evaluate(
                tir.tvm_fill_fragment(B.data,
                                      16,
                                      16,
                                      16,
                                      0, ((vi * 16) + vj),
                                      dtype="handle"))
コード例 #7
0
def opaque_access_fused(a: ty.handle, b: ty.handle) -> None:
    A = tir.match_buffer(a, [16, 16])
    B = tir.match_buffer(b, [16, 16])
    for i_j_fused in tir.serial(0, 256):
        with tir.block([16, 16], "A") as [vi, vj]:
            tir.bind(vi, tir.floordiv(i_j_fused, 16))
            tir.bind(vj, tir.floormod(i_j_fused, 16))
            tir.reads([])
            tir.writes([A[0:16, 0:16]])
            tir.store(A.data, ((vi * 16) + vj), 1, 1)
    for i_j_fused in tir.serial(0, 256):
        with tir.block([16, 16], "B") as [vi, vj]:
            tir.bind(vi, tir.floordiv(i_j_fused, 16))
            tir.bind(vj, tir.floormod(i_j_fused, 16))
            tir.reads([])
            tir.writes([B[0:16, 0:16]])
            tir.evaluate(
                tir.tvm_fill_fragment(B.data,
                                      16,
                                      16,
                                      16,
                                      0, ((vi * 16) + vj),
                                      dtype="handle"))