def data_pack(sch: Schedule): b16 = sch.get_block(name="data_pack") l17, l18, l19, l20, l21, l22 = sch.get_loops(block=b16) sch.unroll(loop=l17) sch.unroll(loop=l18) v23, v24 = sch.sample_perfect_tile( n=2, loop=l19, max_innermost_factor=64, decision=[3, 3], ) l25, l26 = sch.split(loop=l19, factors=[v23, v24]) v27, v28 = sch.sample_perfect_tile( n=2, loop=l20, max_innermost_factor=64, decision=[64, 2], ) l29, l30 = sch.split(loop=l20, factors=[v27, v28]) sch.unroll(loop=l21) sch.unroll(loop=l22) sch.reorder(l25, l29, l26, l30, l17, l18, l21, l22)
def inverse(sch: Schedule): b1 = sch.get_block(name="inverse") l2, l3, l4, l5, l6, l7 = sch.get_loops(block=b1) sch.unroll(loop=l2) sch.unroll(loop=l3) v8, v9 = sch.sample_perfect_tile( n=2, loop=l4, max_innermost_factor=64, decision=[3, 3], ) l10, l11 = sch.split(loop=l4, factors=[v8, v9]) v12, v13 = sch.sample_perfect_tile( n=2, loop=l5, max_innermost_factor=64, decision=[2, 64], ) l14, l15 = sch.split(loop=l5, factors=[v12, v13]) sch.unroll(loop=l6) sch.unroll(loop=l7) sch.reorder(l10, l14, l11, l15, l2, l3, l6, l7)
def data_pack(sch: Schedule): b18 = sch.get_block(name="data_pack") l19, l20, l21, l22, l23, l24 = sch.get_loops(block=b18) sch.unroll(loop=l19) sch.unroll(loop=l20) v25, v26 = sch.sample_perfect_tile( n=2, loop=l21, max_innermost_factor=64, decision=[9, 1], ) l27, l28 = sch.split(loop=l21, factors=[v25, v26]) v29, v30 = sch.sample_perfect_tile( n=2, loop=l22, max_innermost_factor=64, decision=[32, 4], ) l31, l32 = sch.split(loop=l22, factors=[v29, v30]) sch.unroll(loop=l23) sch.unroll(loop=l24) sch.reorder(l27, l31, l28, l32, l19, l20, l23, l24)
def inverse(sch: Schedule): b3 = sch.get_block(name="inverse") l4, l5, l6, l7, l8, l9 = sch.get_loops(block=b3) sch.unroll(loop=l4) sch.unroll(loop=l5) v10, v11 = sch.sample_perfect_tile( n=2, loop=l6, max_innermost_factor=64, decision=[1, 9], ) l12, l13 = sch.split(loop=l6, factors=[v10, v11]) v14, v15 = sch.sample_perfect_tile( n=2, loop=l7, max_innermost_factor=64, decision=[2, 64], ) l16, l17 = sch.split(loop=l7, factors=[v14, v15]) sch.unroll(loop=l8) sch.unroll(loop=l9) sch.reorder(l12, l16, l13, l17, l4, l5, l8, l9)
def inverse(sch: Schedule): b1 = sch.get_block(name="inverse") l2, l3, l4, l5, l6, l7 = sch.get_loops(block=b1) sch.unroll(loop=l2) sch.unroll(loop=l3) v8, v9 = sch.sample_perfect_tile( n=2, loop=l4, max_innermost_factor=64, decision=[3, 3], ) l10, l11 = sch.split(loop=l4, factors=[v8, v9]) v12, v13 = sch.sample_perfect_tile( n=2, loop=l5, max_innermost_factor=64, decision=[2, 64], ) l14, l15 = sch.split(loop=l5, factors=[v12, v13]) sch.unroll(loop=l6) sch.unroll(loop=l7) sch.reorder(l10, l14, l11, l15, l2, l3, l6, l7) l59 = sch.fuse(l10, l14, l11, l15) v60 = sch.sample_categorical( candidates=[32, 64, 128, 256, 512, 1024], probs=[ 0.16666666666666666, 0.16666666666666666, 0.16666666666666666, 0.16666666666666666, 0.16666666666666666, 0.16666666666666666, ], decision=2, ) l61, l62 = sch.split(loop=l59, factors=[None, v60]) sch.bind(loop=l61, thread_axis="blockIdx.x") sch.bind(loop=l62, thread_axis="threadIdx.x")