Example #1
0
def transformed_opaque_access(a: T.handle, b: T.handle) -> None:
    A = T.match_buffer(a, (32, 64, 128))
    B = T.match_buffer(b, (64, 64, 64))
    for i, j, k in T.grid(2, 64, 8):
        with T.block():
            T.reads([])
            T.writes(A[i * 16:i * 16 + 16, j, k * 16:k * 16 + 16])
            T.evaluate(
                T.intrin_test(
                    A.data,
                    i * 131072 + j * 128 + k * 16,
                    8192,
                    128,
                    16,
                    1,
                    dtype="handle",
                ))
    for i, j, k in T.grid(64, 2, 8):
        with T.block():
            T.reads([])
            T.writes(B[i, j * 32:j * 32 + 32, k * 8:k * 8 + 8])
            T.evaluate(
                T.intrin_test(
                    B.data,
                    i * 4096 + j * 2048 + k * 8,
                    64,
                    1,
                    32,
                    8,
                    dtype="handle",
                ))
Example #2
0
def symbolic_match(a: T.handle, b: T.handle, n: T.int32, m: T.int32) -> None:
    A = T.match_buffer(a, (n * m, m))
    B = T.match_buffer(b, (n * 2, m * 4))
    for i in range(0, n):
        with T.block():
            T.reads([])
            T.writes([A[i * m:i * m + n, 0:m], B[i * n:i * n + 2, 0:m * 4]])
            Bs_0 = T.var("int32")
            Bs_1 = T.var("int32")
            sub_A = T.match_buffer(A[i * m:i * m + m, 0:m], (m, m),
                                   offset_factor=1)
            sub_B = T.match_buffer(B[i * n:i * n + 2, 0:m * 4], (2, m * 4),
                                   strides=[Bs_0, Bs_1],
                                   offset_factor=1)
            for ii, jj in T.grid(m, m):
                sub_A[ii, jj] = 1
            for j in range(0, 4):
                T.evaluate(
                    T.intrin_test(
                        sub_B.data,
                        sub_B.elem_offset,
                        sub_B.strides[0],
                        sub_B.strides[1],
                        sub_B.shape[0],
                        sub_B.shape[1],
                        dtype="handle",
                    ))
Example #3
0
def transformed_recursive_match(a: T.handle, b: T.handle) -> None:
    A = T.match_buffer(a, (64, 64, 64))
    B = T.match_buffer(b, (64, 64, 64))
    for i, j, k in T.grid(64, 4, 4):
        with T.block():
            T.reads([])
            T.writes([
                A[i, j * 16:j * 16 + 16, k * 16:k * 16 + 16],
                B[i, j * 16:j * 16 + 16, k * 16:k * 16 + 16],
            ])
            for jj, kk in T.grid(4, 4):
                with T.block():
                    T.reads([])
                    T.writes([
                        A[i, j * 16 + jj * 4:j * 16 + jj * 4 + 4,
                          k * 16 + kk * 4:k * 16 + kk * 4 + 4, ],
                        B[i, j * 16 + jj * 4:j * 16 + jj * 4 + 4,
                          k * 16 + kk * 4:k * 16 + kk * 4 + 4, ],
                    ])
                    T.evaluate(
                        T.intrin_test(
                            A.data,
                            i * 4096 + j * 1024 + jj * 256 + k * 16 + kk * 4,
                            64,
                            1,
                            4,
                            4,
                            dtype="handle",
                        ))
                    for jjj, kkk in T.grid(4, 4):
                        B[i, j * 16 + jj * 4 + jjj, k * 16 + kk * 4 + kkk] = 1
def recursive_match(a: T.handle, b: T.handle) -> None:
    A = T.match_buffer(a, (64, 64, 64))
    B = T.match_buffer(b, (64, 64, 64))
    for i, j, k in T.grid(64, 4, 4):
        with T.block([]):
            T.reads([])
            T.writes(
                [
                    A[i, j * 16 : j * 16 + 16, k * 16 : k * 16 + 16],
                    B[i, j * 16 : j * 16 + 16, k * 16 : k * 16 + 16],
                ]
            )
            As_0 = T.var("int32")
            As_1 = T.var("int32")
            sub_A = T.match_buffer(
                A[i, j * 16 : j * 16 + 16, k * 16 : k * 16 + 16],
                (16, 16),
                strides=[As_0, As_1],
                offset_factor=1,
            )
            sub_B = T.match_buffer(
                B[i, j * 16 : j * 16 + 16, k * 16 : k * 16 + 16],
                (16, 16),
                offset_factor=1,
            )
            for jj, kk in T.grid(4, 4):
                with T.block([]):
                    T.reads([])
                    T.writes(
                        [
                            sub_A[jj * 4 : jj * 4 + 4, kk * 4 : kk * 4 + 4],
                            sub_B[jj * 4 : jj * 4 + 4, kk * 4 : kk * 4 + 4],
                        ]
                    )
                    Ass_0 = T.var("int32")
                    Ass_1 = T.var("int32")
                    sub_sub_A = T.match_buffer(
                        sub_A[jj * 4 : jj * 4 + 4, kk * 4 : kk * 4 + 4],
                        (4, 4),
                        strides=[Ass_0, Ass_1],
                        offset_factor=1,
                    )
                    sub_sub_B = T.match_buffer(
                        sub_B[jj * 4 : jj * 4 + 4, kk * 4 : kk * 4 + 4],
                        (4, 4),
                        offset_factor=1,
                    )
                    T.evaluate(
                        T.intrin_test(
                            sub_sub_A.data,
                            sub_sub_A.elem_offset,
                            sub_sub_A.strides[0],
                            sub_sub_A.strides[1],
                            sub_sub_A.shape[0],
                            sub_sub_A.shape[1],
                            dtype="handle",
                        )
                    )
                    for jjj, kkk in T.grid(4, 4):
                        sub_sub_B[jjj, kkk] = 1
def high_dim_opaque_access_with_source_strides(a: T.handle) -> None:
    A = T.match_buffer(a, (16, 32, 64), strides=[2576, 80, 1])
    for i, j, k in T.grid(16, 2, 4):
        with T.block([]):
            As_0 = T.var("int32")
            As_1 = T.var("int32")
            T.reads([])
            T.writes(A[i, j * 16 : j * 16 + 16, k * 16 : k * 16 + 16])
            sub_A = T.match_buffer(
                A[i, j * 16 : j * 16 + 16, k * 16 : k * 16 + 16],
                (16, 16),
                strides=[As_0, As_1],
                offset_factor=1,
            )
            T.evaluate(
                T.intrin_test(
                    sub_A.data,
                    sub_A.elem_offset,
                    sub_A.strides[0],
                    sub_A.strides[1],
                    sub_A.shape[0],
                    sub_A.shape[1],
                    dtype="handle",
                )
            )
def opaque_access(a: T.handle, b: T.handle) -> None:
    A = T.match_buffer(a, (32, 64, 128))
    B = T.match_buffer(b, (64, 64, 64))
    for i, j, k in T.grid(2, 64, 8):
        with T.block([]):
            T.reads([])
            T.writes(A[i * 16 : i * 16 + 16, j, k * 16 : k * 16 + 16])
            sub_A = T.match_buffer(
                A[i * 16 : i * 16 + 16, j, k * 16 : k * 16 + 16],
                (16, 1, 16),
                strides=[8192, 128, 1],
                offset_factor=1,
            )
            T.evaluate(
                T.intrin_test(
                    sub_A.data,
                    sub_A.elem_offset,
                    sub_A.strides[0],
                    sub_A.strides[1],
                    sub_A.shape[0],
                    sub_A.shape[1],
                    dtype="handle",
                )
            )
    for i, j, k in T.grid(64, 2, 8):
        with T.block([]):
            Bs_0 = T.var("int32")
            Bs_1 = T.var("int32")
            T.reads([])
            T.writes(B[i, j * 32 : j * 32 + 32, k * 8 : k * 8 + 8])
            sub_B = T.match_buffer(
                B[i, j * 32 : j * 32 + 32, k * 8 : k * 8 + 8],
                (32, 8),
                strides=[Bs_0, Bs_1],
                offset_factor=1,
            )
            T.evaluate(
                T.intrin_test(
                    sub_B.data,
                    sub_B.elem_offset,
                    sub_B.strides[0],
                    sub_B.strides[1],
                    sub_B.shape[0],
                    sub_B.shape[1],
                    dtype="handle",
                )
            )
Example #7
0
def transformed_high_dim_opaque_access(a: T.handle) -> None:
    A = T.match_buffer(a, (16, 32, 64))
    for i, j, k in T.grid(16, 2, 4):
        with T.block():
            T.reads([])
            T.writes(A[i, j * 16:j * 16 + 16, k * 16:k * 16 + 16])
            T.evaluate(
                T.intrin_test(
                    A.data,
                    i * 2048 + j * 1024 + k * 16,
                    64,
                    1,
                    16,
                    16,
                    dtype="handle",
                ))
Example #8
0
def transformed_high_dim_opaque_access_with_source_strides(
        a: T.handle) -> None:
    A = T.match_buffer(a, (16, 32, 64), strides=[2576, 80, 1])
    for i, j, k in T.grid(16, 2, 4):
        with T.block():
            T.reads([])
            T.writes(A[i, j * 16:j * 16 + 16, k * 16:k * 16 + 16])
            T.evaluate(
                T.intrin_test(
                    A.data,
                    i * 2576 + j * 1280 + k * 16,
                    80,
                    1,
                    16,
                    16,
                    dtype="handle",
                ))
Example #9
0
def transformed_rank0_buffer(a: T.handle, b: T.handle) -> None:
    A = T.match_buffer(a, (8, 8))
    B = T.match_buffer(b, (8, 8))
    for i, j in T.grid(8, 8):
        with T.block():
            T.reads([])
            T.writes([A[i, j], B[i, j]])
            A[i, j] = 1
            T.evaluate(
                T.intrin_test(
                    B.data,
                    i * 8 + j,
                    0,
                    0,
                    0,
                    0,
                    dtype="handle",
                ))
Example #10
0
def rank0_buffer(a: T.handle, b: T.handle) -> None:
    A = T.match_buffer(a, (8, 8))
    B = T.match_buffer(b, (8, 8))
    for i, j in T.grid(8, 8):
        with T.block():
            T.reads([])
            T.writes([A[i, j], B[i, j]])
            sub_A = T.match_buffer(A[i, j], (), offset_factor=1)
            sub_B = T.match_buffer(B[i, j], (), offset_factor=1)
            sub_A[()] = 1
            T.evaluate(
                T.intrin_test(
                    sub_B.data,
                    sub_B.elem_offset,
                    0,
                    0,
                    0,
                    0,
                    dtype="handle",
                ))
Example #11
0
def transformed_symbolic_match(a: T.handle, b: T.handle, n: T.int32,
                               m: T.int32) -> None:
    A = T.match_buffer(a, (n * m, m))
    B = T.match_buffer(b, (n * 2, m * 4))
    for i in range(0, n):
        with T.block():
            T.reads([])
            T.writes([A[i * m:i * m + n, 0:m], B[i * n:i * n + 2, 0:m * 4]])
            for ii, jj in T.grid(m, m):
                A[i * m + ii, jj] = 1
            for j in range(0, 4):
                T.evaluate(
                    T.intrin_test(
                        B.data,
                        i * n * (m * 4),
                        m * 4,
                        1,
                        2,
                        m * 4,
                        dtype="handle",
                    ))