def element_wise_invalid_annotation(a: ty.handle, c: ty.handle) -> None: C = tir.match_buffer(c, [128, 128], elem_offset=0, align=128, offset_factor=1) A = tir.match_buffer(a, [128, 128], elem_offset=0, align=128, offset_factor=1) # body with tir.block([], "root"): tir.reads([]) tir.writes([]) B = tir.alloc_buffer([128, 128], elem_offset=0, align=128, offset_factor=1) for i0 in tir.serial(0, 128): for ax1 in tir.serial(0, 128): with tir.block([128, 128], "B") as [vi, vj]: tir.block_attr({"buffer_dim_align": [0]}) tir.bind(vi, i0) tir.bind(vj, ax1) tir.reads([A[vi, vj]]) tir.writes([B[vi, vj]]) B[vi, vj] = (A[vi, vj] * tir.float32(2)) for i1 in tir.serial(0, 128): with tir.block([128, 128], "C") as [vi_1, vj_1]: tir.bind(vi_1, i0) tir.bind(vj_1, i1) tir.reads([B[vi_1, vj_1]]) tir.writes([C[vi_1, vj_1]]) C[vi_1, vj_1] = (B[vi_1, vj_1] + tir.float32(1))
def compacted_storage_align_func(a: ty.handle, c: ty.handle) -> None: A = tir.match_buffer(a, (16, 16), "float32") C = tir.match_buffer(c, (16, 16), "float32") for i in range(0, 16): with tir.block([]): tir.reads(A[i, 0:16]) tir.writes(C[i, 0:16]) B = tir.alloc_buffer((1, 16), strides=(31, 1), dtypes="float32") for j in range(0, 16): with tir.block() as []: tir.reads(A[i, j]) tir.writes(B[0, j]) tir.block_attr({"buffer_dim_align": [[0, 0, 16, 15]]}) B[0, j] = A[i, j] + 1.0 for j in range(0, 16): with tir.block() as []: tir.reads(B[0, j]) tir.writes(C[i, j]) C[i, j] = B[0, j] * 2.0
def duplicate_annotations() -> None: with tir.block([16, 16]) as [vi, vj]: tir.block_attr({}) tir.block_attr({}) # error