def transformed_opaque_access(a: ty.handle, b: ty.handle) -> None:
    A = tir.match_buffer(a, (32, 64, 128))
    B = tir.match_buffer(b, (64, 64, 64))
    for i, j, k in tir.grid(2, 64, 8):
        with tir.block([]):
            tir.reads([])
            tir.writes(A[i * 16:i * 16 + 16, j, k * 16:k * 16 + 16])
            tir.evaluate(
                tir.intrin_test(
                    A.data,
                    i * 131072 + j * 128 + k * 16,
                    8192,
                    128,
                    16,
                    1,
                    dtype="handle",
                ))
    for i, j, k in tir.grid(64, 2, 8):
        with tir.block([]):
            tir.reads([])
            tir.writes(B[i, j * 32:j * 32 + 32, k * 8:k * 8 + 8])
            tir.evaluate(
                tir.intrin_test(
                    B.data,
                    i * 4096 + j * 2048 + k * 8,
                    64,
                    1,
                    32,
                    8,
                    dtype="handle",
                ))
def symbolic_match(a: ty.handle, b: ty.handle, n: ty.int32,
                   m: ty.int32) -> None:
    A = tir.match_buffer(a, (n * m, m))
    B = tir.match_buffer(b, (n * 2, m * 4))
    for i in range(0, n):
        with tir.block([]):
            tir.reads([])
            tir.writes([A[i * m:i * m + n, 0:m], B[i * n:i * n + 2, 0:m * 4]])
            Bs_0 = tir.var("int32")
            Bs_1 = tir.var("int32")
            sub_A = tir.match_buffer(A[i * m:i * m + m, 0:m], (m, m),
                                     offset_factor=1)
            sub_B = tir.match_buffer(B[i * n:i * n + 2, 0:m * 4], (2, m * 4),
                                     strides=[Bs_0, Bs_1],
                                     offset_factor=1)
            for ii, jj in tir.grid(m, m):
                sub_A[ii, jj] = 1
            for j in range(0, 4):
                tir.evaluate(
                    tir.intrin_test(
                        sub_B.data,
                        sub_B.elem_offset,
                        sub_B.strides[0],
                        sub_B.strides[1],
                        sub_B.shape[0],
                        sub_B.shape[1],
                        dtype="handle",
                    ))
def transformed_recursive_match(a: ty.handle, b: ty.handle) -> None:
    A = tir.match_buffer(a, (64, 64, 64))
    B = tir.match_buffer(b, (64, 64, 64))
    for i, j, k in tir.grid(64, 4, 4):
        with tir.block([]):
            tir.reads([])
            tir.writes([
                A[i, j * 16:j * 16 + 16, k * 16:k * 16 + 16],
                B[i, j * 16:j * 16 + 16, k * 16:k * 16 + 16],
            ])
            for jj, kk in tir.grid(4, 4):
                with tir.block([]):
                    tir.reads([])
                    tir.writes([
                        A[i, j * 16 + jj * 4:j * 16 + jj * 4 + 4,
                          k * 16 + kk * 4:k * 16 + kk * 4 + 4, ],
                        B[i, j * 16 + jj * 4:j * 16 + jj * 4 + 4,
                          k * 16 + kk * 4:k * 16 + kk * 4 + 4, ],
                    ])
                    tir.evaluate(
                        tir.intrin_test(
                            A.data,
                            i * 4096 + j * 1024 + jj * 256 + k * 16 + kk * 4,
                            64,
                            1,
                            4,
                            4,
                            dtype="handle",
                        ))
                    for jjj, kkk in tir.grid(4, 4):
                        B[i, j * 16 + jj * 4 + jjj, k * 16 + kk * 4 + kkk] = 1
def opaque_access(a: ty.handle, b: ty.handle) -> None:
    A = tir.match_buffer(a, (32, 64, 128))
    B = tir.match_buffer(b, (64, 64, 64))
    for i, j, k in tir.grid(2, 64, 8):
        with tir.block([]):
            tir.reads([])
            tir.writes(A[i * 16:i * 16 + 16, j, k * 16:k * 16 + 16])
            sub_A = tir.match_buffer(
                A[i * 16:i * 16 + 16, j, k * 16:k * 16 + 16],
                (16, 1, 16),
                strides=[8192, 128, 1],
                offset_factor=1,
            )
            tir.evaluate(
                tir.intrin_test(
                    sub_A.data,
                    sub_A.elem_offset,
                    sub_A.strides[0],
                    sub_A.strides[1],
                    sub_A.shape[0],
                    sub_A.shape[1],
                    dtype="handle",
                ))
    for i, j, k in tir.grid(64, 2, 8):
        with tir.block([]):
            Bs_0 = tir.var("int32")
            Bs_1 = tir.var("int32")
            tir.reads([])
            tir.writes(B[i, j * 32:j * 32 + 32, k * 8:k * 8 + 8])
            sub_B = tir.match_buffer(
                B[i, j * 32:j * 32 + 32, k * 8:k * 8 + 8],
                (32, 8),
                strides=[Bs_0, Bs_1],
                offset_factor=1,
            )
            tir.evaluate(
                tir.intrin_test(
                    sub_B.data,
                    sub_B.elem_offset,
                    sub_B.strides[0],
                    sub_B.strides[1],
                    sub_B.shape[0],
                    sub_B.shape[1],
                    dtype="handle",
                ))
def recursive_match(a: ty.handle, b: ty.handle) -> None:
    A = tir.match_buffer(a, (64, 64, 64))
    B = tir.match_buffer(b, (64, 64, 64))
    for i, j, k in tir.grid(64, 4, 4):
        with tir.block([]):
            tir.reads([])
            tir.writes([
                A[i, j * 16:j * 16 + 16, k * 16:k * 16 + 16],
                B[i, j * 16:j * 16 + 16, k * 16:k * 16 + 16],
            ])
            As_0 = tir.var("int32")
            As_1 = tir.var("int32")
            sub_A = tir.match_buffer(
                A[i, j * 16:j * 16 + 16, k * 16:k * 16 + 16],
                (16, 16),
                strides=[As_0, As_1],
                offset_factor=1,
            )
            sub_B = tir.match_buffer(
                B[i, j * 16:j * 16 + 16, k * 16:k * 16 + 16],
                (16, 16),
                offset_factor=1,
            )
            for jj, kk in tir.grid(4, 4):
                with tir.block([]):
                    tir.reads([])
                    tir.writes([
                        sub_A[jj * 4:jj * 4 + 4, kk * 4:kk * 4 + 4],
                        sub_B[jj * 4:jj * 4 + 4, kk * 4:kk * 4 + 4],
                    ])
                    Ass_0 = tir.var("int32")
                    Ass_1 = tir.var("int32")
                    sub_sub_A = tir.match_buffer(
                        sub_A[jj * 4:jj * 4 + 4, kk * 4:kk * 4 + 4],
                        (4, 4),
                        strides=[Ass_0, Ass_1],
                        offset_factor=1,
                    )
                    sub_sub_B = tir.match_buffer(
                        sub_B[jj * 4:jj * 4 + 4, kk * 4:kk * 4 + 4],
                        (4, 4),
                        offset_factor=1,
                    )
                    tir.evaluate(
                        tir.intrin_test(
                            sub_sub_A.data,
                            sub_sub_A.elem_offset,
                            sub_sub_A.strides[0],
                            sub_sub_A.strides[1],
                            sub_sub_A.shape[0],
                            sub_sub_A.shape[1],
                            dtype="handle",
                        ))
                    for jjj, kkk in tir.grid(4, 4):
                        sub_sub_B[jjj, kkk] = 1
def transformed_rank0_buffer(a: ty.handle, b: ty.handle) -> None:
    A = tir.match_buffer(a, (8, 8))
    B = tir.match_buffer(b, (8, 8))
    for i, j in tir.grid(8, 8):
        with tir.block([]):
            tir.reads([])
            tir.writes([A[i, j], B[i, j]])
            A[i, j] = 1
            tir.evaluate(
                tir.intrin_test(
                    B.data,
                    i * 8 + j,
                    0,
                    0,
                    0,
                    0,
                    dtype="handle",
                ))
def rank0_buffer(a: ty.handle, b: ty.handle) -> None:
    A = tir.match_buffer(a, (8, 8))
    B = tir.match_buffer(b, (8, 8))
    for i, j in tir.grid(8, 8):
        with tir.block([]):
            tir.reads([])
            tir.writes([A[i, j], B[i, j]])
            sub_A = tir.match_buffer(A[i, j], (), offset_factor=1)
            sub_B = tir.match_buffer(B[i, j], (), offset_factor=1)
            sub_A[()] = 1
            tir.evaluate(
                tir.intrin_test(
                    sub_B.data,
                    sub_B.elem_offset,
                    0,
                    0,
                    0,
                    0,
                    dtype="handle",
                ))
def transformed_symbolic_match(a: ty.handle, b: ty.handle, n: ty.int32,
                               m: ty.int32) -> None:
    A = tir.match_buffer(a, (n * m, m))
    B = tir.match_buffer(b, (n * 2, m * 4))
    for i in range(0, n):
        with tir.block([]):
            tir.reads([])
            tir.writes([A[i * m:i * m + n, 0:m], B[i * n:i * n + 2, 0:m * 4]])
            for ii, jj in tir.grid(m, m):
                A[i * m + ii, jj] = 1
            for j in range(0, 4):
                tir.evaluate(
                    tir.intrin_test(
                        B.data,
                        i * n * (m * 4),
                        m * 4,
                        1,
                        2,
                        m * 4,
                        dtype="handle",
                    ))