Exemple #1
0
def buffer_opaque_access(b: ty.handle, c: ty.handle) -> None:
    B = tir.match_buffer(b, [16, 16], "float32")
    C = tir.match_buffer(c, [16, 16], "float32")

    with tir.block([]):
        tir.reads([])
        tir.writes(B[0:16, 0:16])
        A = tir.allocate([256], "float32", "global")
        for i, j in tir.grid(16, 16):
            tir.store(A, i * 16 + j, 1)
        for i in range(0, 16):
            for j in range(0, 16):
                tir.evaluate(tir.load("float32", A, i * 16 + j))
            for j in range(0, 16):
                tir.evaluate(
                    tir.tvm_fill_fragment(B.data,
                                          16,
                                          16,
                                          16,
                                          0,
                                          tir.float32(0),
                                          dtype="handle"))

    for i, j in tir.grid(16, 16):
        with tir.block([16, 16]) as [vi, vj]:
            tir.bind(vi, i)
            tir.bind(vj, j)
            C[vi, vj] = B[vi, vj]
def transformed_recursive_match(a: ty.handle, b: ty.handle) -> None:
    A = tir.match_buffer(a, (64, 64, 64))
    B = tir.match_buffer(b, (64, 64, 64))
    for i, j, k in tir.grid(64, 4, 4):
        with tir.block([]):
            tir.reads([])
            tir.writes([
                A[i, j * 16:j * 16 + 16, k * 16:k * 16 + 16],
                B[i, j * 16:j * 16 + 16, k * 16:k * 16 + 16],
            ])
            for jj, kk in tir.grid(4, 4):
                with tir.block([]):
                    tir.reads([])
                    tir.writes([
                        A[i, j * 16 + jj * 4:j * 16 + jj * 4 + 4,
                          k * 16 + kk * 4:k * 16 + kk * 4 + 4, ],
                        B[i, j * 16 + jj * 4:j * 16 + jj * 4 + 4,
                          k * 16 + kk * 4:k * 16 + kk * 4 + 4, ],
                    ])
                    tir.evaluate(
                        tir.intrin_test(
                            A.data,
                            i * 4096 + j * 1024 + jj * 256 + k * 16 + kk * 4,
                            64,
                            1,
                            4,
                            4,
                            dtype="handle",
                        ))
                    for jjj, kkk in tir.grid(4, 4):
                        B[i, j * 16 + jj * 4 + jjj, k * 16 + kk * 4 + kkk] = 1
def transformed_opaque_access(a: ty.handle, b: ty.handle) -> None:
    A = tir.match_buffer(a, (32, 64, 128))
    B = tir.match_buffer(b, (64, 64, 64))
    for i, j, k in tir.grid(2, 64, 8):
        with tir.block([]):
            tir.reads([])
            tir.writes(A[i * 16:i * 16 + 16, j, k * 16:k * 16 + 16])
            tir.evaluate(
                tir.intrin_test(
                    A.data,
                    i * 131072 + j * 128 + k * 16,
                    8192,
                    128,
                    16,
                    1,
                    dtype="handle",
                ))
    for i, j, k in tir.grid(64, 2, 8):
        with tir.block([]):
            tir.reads([])
            tir.writes(B[i, j * 32:j * 32 + 32, k * 8:k * 8 + 8])
            tir.evaluate(
                tir.intrin_test(
                    B.data,
                    i * 4096 + j * 2048 + k * 8,
                    64,
                    1,
                    32,
                    8,
                    dtype="handle",
                ))
def symbolic_match(a: ty.handle, b: ty.handle, n: ty.int32,
                   m: ty.int32) -> None:
    A = tir.match_buffer(a, (n * m, m))
    B = tir.match_buffer(b, (n * 2, m * 4))
    for i in range(0, n):
        with tir.block([]):
            tir.reads([])
            tir.writes([A[i * m:i * m + n, 0:m], B[i * n:i * n + 2, 0:m * 4]])
            Bs_0 = tir.var("int32")
            Bs_1 = tir.var("int32")
            sub_A = tir.match_buffer(A[i * m:i * m + m, 0:m], (m, m),
                                     offset_factor=1)
            sub_B = tir.match_buffer(B[i * n:i * n + 2, 0:m * 4], (2, m * 4),
                                     strides=[Bs_0, Bs_1],
                                     offset_factor=1)
            for ii, jj in tir.grid(m, m):
                sub_A[ii, jj] = 1
            for j in range(0, 4):
                tir.evaluate(
                    tir.intrin_test(
                        sub_B.data,
                        sub_B.elem_offset,
                        sub_B.strides[0],
                        sub_B.strides[1],
                        sub_B.shape[0],
                        sub_B.shape[1],
                        dtype="handle",
                    ))
Exemple #5
0
def transformed_opaque_access(a: ty.handle, b: ty.handle) -> None:
    A = tir.match_buffer(a, [1024])
    B = tir.match_buffer(b, [1024])
    for i in tir.serial(0, 8):
        with tir.block([8]) as [vi]:
            tir.reads(A[vi * 128:vi * 128 + 128])
            tir.writes(B[vi * 128:vi * 128 + 128])
            A_cache = tir.alloc_buffer([1024])
            with tir.block([8]) as [v]:
                tir.bind(v, vi)
                tir.reads([A[v * 128:v * 128 + 128]])
                tir.writes([A_cache[v * 128:v * 128 + 128]])
                tir.evaluate(
                    tir.call_extern("test",
                                    A_cache.data,
                                    v * 128,
                                    128,
                                    A.data,
                                    v * 128,
                                    128,
                                    dtype="float32"))
            for j in tir.serial(0, 128):
                with tir.block([1024]) as [v]:
                    tir.bind(v, ((vi * 128) + j))
                    tir.reads([A_cache[v]])
                    tir.writes([B[v]])
                    B[v] = A_cache[v]
def fail_match_load(a: ty.handle) -> None:
    A = tir.match_buffer(a, (8, 8))
    for i, j in tir.grid(8, 8):
        with tir.block([]):
            tir.reads(A[i, j])
            tir.writes([])
            sub_A = tir.match_buffer(A[i, j], ())
            tir.evaluate(tir.load("float32", sub_A.data, 0))
def recursive_match(a: ty.handle, b: ty.handle) -> None:
    A = tir.match_buffer(a, (64, 64, 64))
    B = tir.match_buffer(b, (64, 64, 64))
    for i, j, k in tir.grid(64, 4, 4):
        with tir.block([]):
            tir.reads([])
            tir.writes([
                A[i, j * 16:j * 16 + 16, k * 16:k * 16 + 16],
                B[i, j * 16:j * 16 + 16, k * 16:k * 16 + 16],
            ])
            As_0 = tir.var("int32")
            As_1 = tir.var("int32")
            sub_A = tir.match_buffer(
                A[i, j * 16:j * 16 + 16, k * 16:k * 16 + 16],
                (16, 16),
                strides=[As_0, As_1],
                offset_factor=1,
            )
            sub_B = tir.match_buffer(
                B[i, j * 16:j * 16 + 16, k * 16:k * 16 + 16],
                (16, 16),
                offset_factor=1,
            )
            for jj, kk in tir.grid(4, 4):
                with tir.block([]):
                    tir.reads([])
                    tir.writes([
                        sub_A[jj * 4:jj * 4 + 4, kk * 4:kk * 4 + 4],
                        sub_B[jj * 4:jj * 4 + 4, kk * 4:kk * 4 + 4],
                    ])
                    Ass_0 = tir.var("int32")
                    Ass_1 = tir.var("int32")
                    sub_sub_A = tir.match_buffer(
                        sub_A[jj * 4:jj * 4 + 4, kk * 4:kk * 4 + 4],
                        (4, 4),
                        strides=[Ass_0, Ass_1],
                        offset_factor=1,
                    )
                    sub_sub_B = tir.match_buffer(
                        sub_B[jj * 4:jj * 4 + 4, kk * 4:kk * 4 + 4],
                        (4, 4),
                        offset_factor=1,
                    )
                    tir.evaluate(
                        tir.intrin_test(
                            sub_sub_A.data,
                            sub_sub_A.elem_offset,
                            sub_sub_A.strides[0],
                            sub_sub_A.strides[1],
                            sub_sub_A.shape[0],
                            sub_sub_A.shape[1],
                            dtype="handle",
                        ))
                    for jjj, kkk in tir.grid(4, 4):
                        sub_sub_B[jjj, kkk] = 1
def opaque_access_func() -> None:
    A = tir.alloc_buffer([1024])
    B = tir.alloc_buffer([1024])
    for i in tir.serial(0, 8):
        with tir.block([8]) as [v]:
            tir.bind(v, i)
            tir.reads([A[v * 128 : v * 128 + 128]])
            tir.writes([B[v * 128 : v * 128 + 128]])
            tir.evaluate(
                tir.call_extern("test", B.data, v * 128, 128, A.data, v * 128, 128, dtype="float32")
            )
def match_buffer_func(a: ty.handle, b: ty.handle) -> None:
    A = tir.match_buffer(a, (128, 128), "float32")
    B = tir.match_buffer(b, (128, 128), "float32")
    with tir.block([8, 8], "block") as [vi, vj]:
        tir.reads(B[vi * 16 + 2 : vi * 16 + 12, vj * 16 + 2 : vj * 16 + 16])
        tir.writes(A[vi * 16 : vi * 16 + 16, vj * 16 : vj * 16 + 16])
        B0 = tir.match_buffer(B[vi * 16 + 2 : vi * 16 + 6, vj * 16 + 2 : vj * 16 + 6], (4, 4))
        B1 = tir.match_buffer(B[vi * 16 + 8 : vi * 16 + 12, vj * 16 + 8 : vj * 16 + 16], (4, 8))
        with tir.block([16, 16], "AAA") as [i, j]:
            AA = tir.match_buffer(A[i, j], ())
            AA[()] = 1.0
        tir.evaluate(B0.data)
        tir.evaluate(B1.data)
Exemple #10
0
 def tir_packed_call() -> None:
     A = tir.var("handle")
     B = tir.var("handle")
     C = tir.var("handle")
     # body
     tir.evaluate(
         tir.tvm_call_cpacked(
             "tvm_test_cpacked",
             A,
             B,
             C,
             dtype="int32",
         ))
def func() -> None:
    A = tir.alloc_buffer((128, 128), "float32")
    B = tir.alloc_buffer((128, 128), "float32")
    C = tir.alloc_buffer((128, 128), "float32")
    D = tir.alloc_buffer((128, 128), "float32")
    with tir.block([]):
        # Need add read/write region manually to avoid triggering block access region detector
        tir.reads([B[0, 0], C[0:16, 0:16], A[4:12, 4:12]])
        tir.writes([A[0:12, 0:12]])
        for i, j in tir.grid(8, 8):
            A[i, j] = B[0, 0] + C[0, 0]
        with tir.block([2, 2]) as [vi, vj]:
            tir.reads([A[vi * 4 + 4 : vi * 4 + 8, vj * 4 + 4 : vj * 4 + 8], C[12:16, 12:16]])
            tir.writes([A[vi * 4 + 4 : vi * 4 + 8, vj * 4 + 4 : vj * 4 + 8]])
            for i, j in tir.grid(4, 4):
                A[vi * 4 + 4 + i, vj * 4 + 4 + j] += C[i + 12, j + 12]
        tir.evaluate(D.data)
def opaque_access(a: ty.handle, b: ty.handle) -> None:
    A = tir.match_buffer(a, (32, 64, 128))
    B = tir.match_buffer(b, (64, 64, 64))
    for i, j, k in tir.grid(2, 64, 8):
        with tir.block([]):
            tir.reads([])
            tir.writes(A[i * 16:i * 16 + 16, j, k * 16:k * 16 + 16])
            sub_A = tir.match_buffer(
                A[i * 16:i * 16 + 16, j, k * 16:k * 16 + 16],
                (16, 1, 16),
                strides=[8192, 128, 1],
                offset_factor=1,
            )
            tir.evaluate(
                tir.intrin_test(
                    sub_A.data,
                    sub_A.elem_offset,
                    sub_A.strides[0],
                    sub_A.strides[1],
                    sub_A.shape[0],
                    sub_A.shape[1],
                    dtype="handle",
                ))
    for i, j, k in tir.grid(64, 2, 8):
        with tir.block([]):
            Bs_0 = tir.var("int32")
            Bs_1 = tir.var("int32")
            tir.reads([])
            tir.writes(B[i, j * 32:j * 32 + 32, k * 8:k * 8 + 8])
            sub_B = tir.match_buffer(
                B[i, j * 32:j * 32 + 32, k * 8:k * 8 + 8],
                (32, 8),
                strides=[Bs_0, Bs_1],
                offset_factor=1,
            )
            tir.evaluate(
                tir.intrin_test(
                    sub_B.data,
                    sub_B.elem_offset,
                    sub_B.strides[0],
                    sub_B.strides[1],
                    sub_B.shape[0],
                    sub_B.shape[1],
                    dtype="handle",
                ))
Exemple #13
0
def opaque_access(a: ty.handle, b: ty.handle) -> None:
    A = tir.match_buffer(a, [16, 16], "float32")
    B = tir.match_buffer(b, [16, 16], "float32")
    with tir.block([16, 16], "A") as [vi, vj]:
        tir.reads([])
        tir.writes([A[0:16, 0:16]])
        tir.store(A.data, vi * 16 + vj, 1)
    with tir.block([16, 16], "B") as [vi, vj]:
        tir.reads([])
        tir.writes([B[0:16, 0:16]])
        tir.evaluate(
            tir.tvm_fill_fragment(B.data,
                                  16,
                                  16,
                                  16,
                                  0,
                                  vi * 16 + vj,
                                  dtype="handle"))
def transformed_rank0_buffer(a: ty.handle, b: ty.handle) -> None:
    A = tir.match_buffer(a, (8, 8))
    B = tir.match_buffer(b, (8, 8))
    for i, j in tir.grid(8, 8):
        with tir.block([]):
            tir.reads([])
            tir.writes([A[i, j], B[i, j]])
            A[i, j] = 1
            tir.evaluate(
                tir.intrin_test(
                    B.data,
                    i * 8 + j,
                    0,
                    0,
                    0,
                    0,
                    dtype="handle",
                ))
Exemple #15
0
def tir_extern(a: ty.handle, b: ty.handle, c: ty.handle) -> None:
    A = tir.match_buffer(a, (128, 128))
    B = tir.match_buffer(b, (128, 128))
    C = tir.match_buffer(c, (128, 128))
    # body
    with tir.block([], "C"):
        tir.reads([A[0:128, 0:128], B[0:128, 0:128]])
        tir.writes([C[0:128, 0:128]])
        tir.evaluate(
            tir.tvm_call_packed(
                "tvm.contrib.cblas.matmul",
                tir.tvm_stack_make_array(
                    A.data,
                    tir.tvm_stack_make_shape(128, 128, dtype="handle"),
                    0,
                    2,
                    0.0,
                    0,
                    dtype="handle",
                ),
                tir.tvm_stack_make_array(
                    B.data,
                    tir.tvm_stack_make_shape(128, 128, dtype="handle"),
                    0,
                    2,
                    0.0,
                    0,
                    dtype="handle",
                ),
                tir.tvm_stack_make_array(
                    C.data,
                    tir.tvm_stack_make_shape(128, 128, dtype="handle"),
                    0,
                    2,
                    0.0,
                    0,
                    dtype="handle",
                ),
                0,
                0,
                dtype="int32",
            )
        )
def rank0_buffer(a: ty.handle, b: ty.handle) -> None:
    A = tir.match_buffer(a, (8, 8))
    B = tir.match_buffer(b, (8, 8))
    for i, j in tir.grid(8, 8):
        with tir.block([]):
            tir.reads([])
            tir.writes([A[i, j], B[i, j]])
            sub_A = tir.match_buffer(A[i, j], (), offset_factor=1)
            sub_B = tir.match_buffer(B[i, j], (), offset_factor=1)
            sub_A[()] = 1
            tir.evaluate(
                tir.intrin_test(
                    sub_B.data,
                    sub_B.elem_offset,
                    0,
                    0,
                    0,
                    0,
                    dtype="handle",
                ))
def match_buffer_func() -> None:
    with tir.block([], "root"):
        A = tir.alloc_buffer((128, 128), "float32")
        B = tir.alloc_buffer((128, 128), "float32")
        tir.reads([])
        tir.writes([])
        # Need add read/write region manually to avoid triggering block access region detector
        with tir.block([8, 8], "block") as [vi, vj]:
            tir.reads(B[vi * 16 + 2 : vi * 16 + 12, vj * 16 + 2 : vj * 16 + 16])
            tir.writes(A[vi * 16 : vi * 16 + 16, vj * 16 : vj * 16 + 16])
            AA = tir.match_buffer(A[vi * 16 : vi * 16 + 16, vj * 16 : vj * 16 + 16], (16, 16))
            B0 = tir.match_buffer(B[vi * 16 + 2 : vi * 16 + 6, vj * 16 + 2 : vj * 16 + 6], (4, 4))
            B1 = tir.match_buffer(B[vi * 16 + 8 : vi * 16 + 12, vj * 16 + 8 : vj * 16 + 16], (4, 8))
            with tir.block([16, 16], "AAA") as [i, j]:
                tir.reads([])
                tir.writes(AA[i, j])
                AAA = tir.match_buffer(AA[i, j], ())
                AAA[()] = 1.0
            tir.evaluate(B0.data)
            tir.evaluate(B1.data)
def transformed_symbolic_match(a: ty.handle, b: ty.handle, n: ty.int32,
                               m: ty.int32) -> None:
    A = tir.match_buffer(a, (n * m, m))
    B = tir.match_buffer(b, (n * 2, m * 4))
    for i in range(0, n):
        with tir.block([]):
            tir.reads([])
            tir.writes([A[i * m:i * m + n, 0:m], B[i * n:i * n + 2, 0:m * 4]])
            for ii, jj in tir.grid(m, m):
                A[i * m + ii, jj] = 1
            for j in range(0, 4):
                tir.evaluate(
                    tir.intrin_test(
                        B.data,
                        i * n * (m * 4),
                        m * 4,
                        1,
                        2,
                        m * 4,
                        dtype="handle",
                    ))
Exemple #19
0
def opaque_access_split(a: ty.handle, b: ty.handle) -> None:
    A = tir.match_buffer(a, (16, 16))
    B = tir.match_buffer(b, (16, 16))
    for i, j0, j1 in tir.grid(16, 4, 4):
        with tir.block([16, 16], "A") as [vi, vj]:
            tir.bind(vi, i)
            tir.bind(vj, ((j0 * 4) + j1))
            tir.reads([])
            tir.writes([A[0:16, 0:16]])
            tir.store(A.data, ((vi * 16) + vj), 1, 1)
    for i, j0, j1 in tir.grid(16, 4, 4):
        with tir.block([16, 16], "B") as [vi, vj]:
            tir.bind(vi, i)
            tir.bind(vj, ((j0 * 4) + j1))
            tir.reads([])
            tir.writes([B[0:16, 0:16]])
            tir.evaluate(
                tir.tvm_fill_fragment(B.data,
                                      16,
                                      16,
                                      16,
                                      0, ((vi * 16) + vj),
                                      dtype="handle"))
Exemple #20
0
def opaque_access_fused(a: ty.handle, b: ty.handle) -> None:
    A = tir.match_buffer(a, [16, 16])
    B = tir.match_buffer(b, [16, 16])
    for i_j_fused in tir.serial(0, 256):
        with tir.block([16, 16], "A") as [vi, vj]:
            tir.bind(vi, tir.floordiv(i_j_fused, 16))
            tir.bind(vj, tir.floormod(i_j_fused, 16))
            tir.reads([])
            tir.writes([A[0:16, 0:16]])
            tir.store(A.data, ((vi * 16) + vj), 1, 1)
    for i_j_fused in tir.serial(0, 256):
        with tir.block([16, 16], "B") as [vi, vj]:
            tir.bind(vi, tir.floordiv(i_j_fused, 16))
            tir.bind(vj, tir.floormod(i_j_fused, 16))
            tir.reads([])
            tir.writes([B[0:16, 0:16]])
            tir.evaluate(
                tir.tvm_fill_fragment(B.data,
                                      16,
                                      16,
                                      16,
                                      0, ((vi * 16) + vj),
                                      dtype="handle"))
Exemple #21
0
    def tir_packed_call() -> None:
        A = tir.var("handle")
        B = tir.var("handle")
        C = tir.var("handle")

        # body
        tvm_value_2 = tir.var("handle")
        tvm_value_1 = tir.var("handle")
        tvm_value_0 = tir.var("handle")
        with tir.let(tvm_value_2,
                     tir.tvm_stack_alloca("array", 1, dtype="handle")):
            with tir.let(tvm_value_1,
                         tir.tvm_stack_alloca("array", 1, dtype="handle")):
                with tir.let(tvm_value_0,
                             tir.tvm_stack_alloca("array", 1, dtype="handle")):
                    tir.evaluate(
                        tir.tvm_struct_set(tvm_value_0,
                                           0,
                                           1,
                                           A,
                                           dtype="handle"))
                    tir.evaluate(
                        tir.tvm_struct_set(tvm_value_1,
                                           0,
                                           1,
                                           B,
                                           dtype="handle"))
                    tir.evaluate(
                        tir.tvm_struct_set(tvm_value_2,
                                           0,
                                           1,
                                           C,
                                           dtype="handle"))
                    tir.evaluate(
                        tir.tvm_call_cpacked(
                            "tvm_test_cpacked",
                            tvm_value_0,
                            tvm_value_1,
                            tvm_value_2,
                            dtype="int32",
                        ))
Exemple #22
0
def return_not_allowed(a: ty.handle) -> None:
    return tir.evaluate(0)  # error
Exemple #23
0
    def invalid_block_function(a: ty.handle) -> None:
        A = tir.match_buffer(a, (16, 16), "float32")

        with tir.evaluate(0.0):
            pass
 def main(placeholder: ty.handle, placeholder_1: ty.handle,
          placeholder_2: ty.handle, ethosu_write: ty.handle,
          placeholder_3: ty.handle, placeholder_4: ty.handle,
          placeholder_5: ty.handle, placeholder_6: ty.handle,
          placeholder_7: ty.handle, placeholder_8: ty.handle,
          placeholder_9: ty.handle, placeholder_10: ty.handle) -> None:
     # function attr dict
     tir.func_attr({
         "from_legacy_te_schedule": True,
         "global_symbol": "main",
         "tir.noalias": True
     })
     buffer = tir.match_buffer(placeholder_7, [80],
                               dtype="uint8",
                               elem_offset=0,
                               align=128,
                               offset_factor=1)
     buffer_1 = tir.match_buffer(placeholder_5, [80],
                                 dtype="uint8",
                                 elem_offset=0,
                                 align=128,
                                 offset_factor=1)
     buffer_2 = tir.match_buffer(placeholder_3, [80],
                                 dtype="uint8",
                                 elem_offset=0,
                                 align=128,
                                 offset_factor=1)
     buffer_3 = tir.match_buffer(placeholder_4, [32],
                                 dtype="uint8",
                                 elem_offset=0,
                                 align=128,
                                 offset_factor=1)
     buffer_4 = tir.match_buffer(placeholder_9, [80],
                                 dtype="uint8",
                                 elem_offset=0,
                                 align=128,
                                 offset_factor=1)
     buffer_5 = tir.match_buffer(placeholder_6, [32],
                                 dtype="uint8",
                                 elem_offset=0,
                                 align=128,
                                 offset_factor=1)
     placeholder_11 = tir.match_buffer(placeholder, [1, 16, 16, 32],
                                       dtype="int8",
                                       elem_offset=0,
                                       align=128,
                                       offset_factor=1)
     buffer_6 = tir.match_buffer(placeholder_1, [592],
                                 dtype="uint8",
                                 elem_offset=0,
                                 align=128,
                                 offset_factor=1)
     ethosu_write_1 = tir.match_buffer(ethosu_write, [1, 16, 16, 8],
                                       dtype="int8",
                                       elem_offset=0,
                                       align=128,
                                       offset_factor=1)
     buffer_7 = tir.match_buffer(placeholder_2, [160],
                                 dtype="uint8",
                                 elem_offset=0,
                                 align=128,
                                 offset_factor=1)
     buffer_8 = tir.match_buffer(placeholder_8, [32],
                                 dtype="uint8",
                                 elem_offset=0,
                                 align=128,
                                 offset_factor=1)
     buffer_9 = tir.match_buffer(placeholder_10, [32],
                                 dtype="uint8",
                                 elem_offset=0,
                                 align=128,
                                 offset_factor=1)
     # body
     ethosu_write_2 = tir.allocate([4096], "int8", "global")
     placeholder_global = tir.allocate([80], "uint8", "global")
     placeholder_d_global = tir.allocate([32], "uint8", "global")
     tir.evaluate(
         tir.call_extern("ethosu_conv2d",
                         "int8",
                         16,
                         16,
                         32,
                         16,
                         0,
                         16,
                         tir.load("int8", placeholder_11.data, 0),
                         0,
                         0,
                         0,
                         tir.float32(0.5),
                         10,
                         "NHWC",
                         512,
                         32,
                         1,
                         "int8",
                         16,
                         16,
                         16,
                         16,
                         0,
                         16,
                         tir.load("int8", ethosu_write_2, 0),
                         0,
                         0,
                         0,
                         tir.float32(0.25),
                         14,
                         "NHWC",
                         256,
                         16,
                         1,
                         1,
                         1,
                         1,
                         1,
                         1,
                         1,
                         tir.load("uint8", buffer_6.data, 0),
                         592,
                         12,
                         tir.load("uint8", buffer_7.data, 0),
                         160,
                         0,
                         0,
                         0,
                         0,
                         "NONE",
                         0,
                         0,
                         "NONE",
                         dtype="handle"))
     tir.evaluate(
         tir.call_extern("ethosu_copy",
                         tir.load("uint8", buffer_2.data, 0),
                         80,
                         tir.load("uint8", placeholder_global, 0),
                         dtype="handle"))
     tir.evaluate(
         tir.call_extern("ethosu_copy",
                         tir.load("uint8", buffer_3.data, 0),
                         32,
                         tir.load("uint8", placeholder_d_global, 0),
                         dtype="handle"))
     tir.evaluate(
         tir.call_extern("ethosu_conv2d",
                         "int8",
                         16,
                         16,
                         16,
                         16,
                         0,
                         16,
                         tir.load("int8", ethosu_write_2, 0),
                         0,
                         0,
                         0,
                         tir.float32(0.5),
                         10,
                         "NHWC",
                         256,
                         16,
                         1,
                         "int8",
                         16,
                         16,
                         2,
                         16,
                         0,
                         16,
                         tir.load("int8", ethosu_write_1.data, 0),
                         0,
                         0,
                         0,
                         tir.float32(0.25),
                         14,
                         "NHWC",
                         128,
                         8,
                         1,
                         1,
                         1,
                         1,
                         1,
                         1,
                         1,
                         tir.load("uint8", placeholder_global, 0),
                         80,
                         12,
                         tir.load("uint8", placeholder_d_global, 0),
                         32,
                         0,
                         0,
                         0,
                         0,
                         "NONE",
                         0,
                         0,
                         "NONE",
                         dtype="handle"))
     tir.evaluate(
         tir.call_extern("ethosu_copy",
                         tir.load("uint8", buffer_1.data, 0),
                         80,
                         tir.load("uint8", placeholder_global, 0),
                         dtype="handle"))
     tir.evaluate(
         tir.call_extern("ethosu_copy",
                         tir.load("uint8", buffer_5.data, 0),
                         32,
                         tir.load("uint8", placeholder_d_global, 0),
                         dtype="handle"))
     tir.evaluate(
         tir.call_extern("ethosu_conv2d",
                         "int8",
                         16,
                         16,
                         16,
                         16,
                         0,
                         16,
                         tir.load("int8", ethosu_write_2, 0),
                         0,
                         0,
                         0,
                         tir.float32(0.5),
                         10,
                         "NHWC",
                         256,
                         16,
                         1,
                         "int8",
                         16,
                         16,
                         2,
                         16,
                         0,
                         16,
                         tir.load("int8", ethosu_write_1.data, 2),
                         0,
                         0,
                         0,
                         tir.float32(0.25),
                         14,
                         "NHWC",
                         128,
                         8,
                         1,
                         1,
                         1,
                         1,
                         1,
                         1,
                         1,
                         tir.load("uint8", placeholder_global, 0),
                         80,
                         12,
                         tir.load("uint8", placeholder_d_global, 0),
                         32,
                         0,
                         0,
                         0,
                         0,
                         "NONE",
                         0,
                         0,
                         "NONE",
                         dtype="handle"))
     tir.evaluate(
         tir.call_extern("ethosu_copy",
                         tir.load("uint8", buffer.data, 0),
                         80,
                         tir.load("uint8", placeholder_global, 0),
                         dtype="handle"))
     tir.evaluate(
         tir.call_extern("ethosu_copy",
                         tir.load("uint8", buffer_8.data, 0),
                         32,
                         tir.load("uint8", placeholder_d_global, 0),
                         dtype="handle"))
     tir.evaluate(
         tir.call_extern("ethosu_conv2d",
                         "int8",
                         16,
                         16,
                         16,
                         16,
                         0,
                         16,
                         tir.load("int8", ethosu_write_2, 0),
                         0,
                         0,
                         0,
                         tir.float32(0.5),
                         10,
                         "NHWC",
                         256,
                         16,
                         1,
                         "int8",
                         16,
                         16,
                         2,
                         16,
                         0,
                         16,
                         tir.load("int8", ethosu_write_1.data, 4),
                         0,
                         0,
                         0,
                         tir.float32(0.25),
                         14,
                         "NHWC",
                         128,
                         8,
                         1,
                         1,
                         1,
                         1,
                         1,
                         1,
                         1,
                         tir.load("uint8", placeholder_global, 0),
                         80,
                         12,
                         tir.load("uint8", placeholder_d_global, 0),
                         32,
                         0,
                         0,
                         0,
                         0,
                         "NONE",
                         0,
                         0,
                         "NONE",
                         dtype="handle"))
     tir.evaluate(
         tir.call_extern("ethosu_copy",
                         tir.load("uint8", buffer_4.data, 0),
                         80,
                         tir.load("uint8", placeholder_global, 0),
                         dtype="handle"))
     tir.evaluate(
         tir.call_extern("ethosu_copy",
                         tir.load("uint8", buffer_9.data, 0),
                         32,
                         tir.load("uint8", placeholder_d_global, 0),
                         dtype="handle"))
     tir.evaluate(
         tir.call_extern("ethosu_conv2d",
                         "int8",
                         16,
                         16,
                         16,
                         16,
                         0,
                         16,
                         tir.load("int8", ethosu_write_2, 0),
                         0,
                         0,
                         0,
                         tir.float32(0.5),
                         10,
                         "NHWC",
                         256,
                         16,
                         1,
                         "int8",
                         16,
                         16,
                         2,
                         16,
                         0,
                         16,
                         tir.load("int8", ethosu_write_1.data, 6),
                         0,
                         0,
                         0,
                         tir.float32(0.25),
                         14,
                         "NHWC",
                         128,
                         8,
                         1,
                         1,
                         1,
                         1,
                         1,
                         1,
                         1,
                         tir.load("uint8", placeholder_global, 0),
                         80,
                         12,
                         tir.load("uint8", placeholder_d_global, 0),
                         32,
                         0,
                         0,
                         0,
                         0,
                         "NONE",
                         0,
                         0,
                         "NONE",
                         dtype="handle"))
Exemple #25
0
 def main(
     placeholder: ty.handle,
     placeholder_1: ty.handle,
     placeholder_2: ty.handle,
     ethosu_conv2d: ty.handle,
 ) -> None:
     # function attr dict
     tir.func_attr({"global_symbol": "main", "tir.noalias": True})
     placeholder_3 = tir.match_buffer(placeholder, [1, 8, 8, 3],
                                      dtype="uint8",
                                      elem_offset=0,
                                      align=128,
                                      offset_factor=1)
     placeholder_4 = tir.match_buffer(placeholder_1, [48],
                                      dtype="uint8",
                                      elem_offset=0,
                                      align=128,
                                      offset_factor=1)
     placeholder_5 = tir.match_buffer(placeholder_2, [16],
                                      dtype="int32",
                                      elem_offset=0,
                                      align=128,
                                      offset_factor=1)
     ethosu_conv2d_1 = tir.match_buffer(ethosu_conv2d, [1, 8, 8, 16],
                                        dtype="uint8",
                                        elem_offset=0,
                                        align=128,
                                        offset_factor=1)
     # body
     tir.evaluate(
         tir.call_extern(
             "ethosu_conv2d",
             "uint8",
             8,
             8,
             3,
             8,
             0,
             8,
             tir.load("uint8", placeholder_3.data, 0),
             0,
             0,
             0,
             tir.float32(0.5),
             10,
             "NHWC",
             24,
             3,
             1,
             "uint8",
             8,
             8,
             16,
             8,
             0,
             8,
             tir.load("uint8", ethosu_conv2d_1.data, 0),
             0,
             0,
             0,
             tir.float32(0.25),
             14,
             "NHWC",
             128,
             16,
             1,
             1,
             1,
             1,
             1,
             1,
             1,
             tir.load("uint8", placeholder_4.data, 0),
             0,
             12,
             tir.load("uint8", placeholder_5.data, 0),
             0,
             0,
             0,
             0,
             0,
             "CLIP",
             0,
             0,
             "NONE",
             dtype="uint8",
         ))
Exemple #26
0
    def invalid_for_function(a: ty.handle) -> None:
        A = tir.match_buffer(a, (16, 16), "float32")

        for i in tir.evaluate(0.0):
            for j in tir.serial(0, 16):
                A[i, j] = 0.0
Exemple #27
0
 def invalid_concise_scoping() -> None:
     tir.Assert(1.0 > 0.0, "aaaa")
     tir.evaluate(0.0)
Exemple #28
0
def missing_type_annotation(a) -> None:  # error
    tir.evaluate(0.0)
def opaque_access(a: ty.handle, b: ty.handle, c: ty.handle, d: ty.handle) -> None:
    A = tir.match_buffer(a, (128, 128), dtype="float16")
    B = tir.match_buffer(b, (128, 128), dtype="float16")
    C = tir.match_buffer(c, (128, 128), dtype="float16")
    D = tir.match_buffer(d, (128, 128), dtype="float16")

    with tir.block([128, 128], "load_store") as [vi, vj]:
        tir.reads(A[vi, vj])
        tir.writes(D[vi, vj])
        D.data[vi * 128 + vj] = tir.load("float16", A.data, vi * 128 + vj)
    with tir.block([8, 8], "opaque") as [vi, vj]:
        tir.reads(A[vi * 16 : vi * 16 + 16, vj * 16 : vj * 16 + 16])
        tir.writes(B[vi * 16 : vi * 16 + 16, vj * 16 : vj * 16 + 16])
        tir.evaluate(
            tir.tvm_load_matrix_sync(
                B.data,
                16,
                16,
                16,
                vi * 8 + vj,
                tir.tvm_access_ptr(
                    tir.type_annotation(dtype="float16"),
                    A.data,
                    vi * 2048 + vj * 16,
                    128,
                    1,
                    dtype="handle",
                ),
                128,
                "row_major",
                dtype="handle",
            )
        )
    with tir.block([8, 8], "match_buffer") as [vi, vj]:
        tir.reads(A[vi * 16 : vi * 16 + 16, vj * 16 : vj * 16 + 16])
        tir.writes(C[vi * 16 : vi * 16 + 16, vj * 16 : vj * 16 + 16])
        A0 = tir.match_buffer(
            A[
                vi * 16 : vi * 16 + 16,
                vj * 16 : vj * 16 + 16,
            ],
            (16, 16),
            "float16",
            strides=[128, 1],
            offset_factor=1,
        )
        C0 = tir.match_buffer(
            C[
                vi * 16 : vi * 16 + 16,
                vj * 16 : vj * 16 + 16,
            ],
            (16, 16),
            "float16",
            strides=[128, 1],
            offset_factor=1,
        )
        tir.evaluate(
            tir.tvm_load_matrix_sync(
                C0.data,
                16,
                16,
                16,
                vi * 8 + vj,
                tir.tvm_access_ptr(
                    tir.type_annotation(dtype="float16"),
                    A0.data,
                    A0.elem_offset,
                    A0.strides[0],
                    1,
                    dtype="handle",
                ),
                128,
                "row_major",
                dtype="handle",
            )
        )
Exemple #30
0
def intrin_except_unassign(a: ty.handle) -> None:
    A = tir.match_buffer(a, (16, 16), "float32")
    tir.evaluate(A)  # error