def test_bound(): A = te.placeholder((7, 7), name="A") B = te.compute((7, 7), lambda i, j: A[i, j], name="B") C = te.compute((3, 3), lambda i, j: B[i + 1, j * 2], name="C") s = te.create_schedule(C.op) KgeN.lower(s, [A, C]) assert s[B].op.axis[0].range.end.val == 3 assert s[B].op.axis[1].range.end.val == 5
def test_fuse(): A = te.placeholder((8, 8), name="A") B = te.compute((8, 8), lambda i, j: A[i, j], name="B") s = te.create_schedule(B.op) x, y = s[B].op.axis fused = s[B].fuse(x, y) KgeN.lower(s, [A, B]) assert fused.range.end.val == 64
def test_split1(): A = te.placeholder((9, 9), name="A") B = te.compute((9, 9), lambda i, j: A[i, j], name="B") s = te.create_schedule(B.op) x, _ = s[B].op.axis x_o, x_i = s[B].split(x, 2) KgeN.lower(s, [A, B]) assert x_o.range.end.val == 5 assert x_i.range.end.val == 2
def test_if_then_else(): A = te.placeholder((8, 8), name="A") B = te.compute((8, 8), lambda i, j: A[i, j], name="B") C = te.compute( (3, 3), lambda i, j: te.if_then_else(te.all(i < 2, j < 1), B[i, j] + 1, 0), name="C") s = te.create_schedule(C.op) KgeN.lower(s, [A, C]) x, y = s[B].op.axis assert x.range.end.val == 2 assert y.range.end.val == 1
import KgeN from KgeN import te m = 256 A = te.placeholder((m, m), name="A") B = te.compute((m, m), lambda i, j: 2 + A[i, j], name="B") C = te.compute((m, m), lambda i, j: B[i + j, j] * 2, name="C") # schedule s = te.create_schedule(C.op) outer, inner = s[C].split(s[C].op.axis[0], 32) B_outer, B_inner = s[B].split(s[B].op.axis[0], 32) # s[C].reorder(inner, outer) # fused = s[C].fuse(outer, inner) s[B].compute_at(s[C], s[C].op.axis[1]) # lower func = KgeN.lower(s, [A, C]) print(KgeN.build(func))
import KgeN from KgeN import te M = 128 N = 128 K = 128 A = te.placeholder((M, K), name="A") B = te.placeholder((K, N), name="B") k = te.reduce_axis(K, name="k") C = te.compute((M, N), lambda i, j: te.reduce_sum(A[i, k] * B[k, j], axis=k), name="C") s = te.create_schedule(C.op) AA = s.cache_read(A, "shared", [C]) BB = s.cache_read(B, "shared", [C]) M, N = s[C].op.axis K, = C.reduce_axis Mo, Mi = s[C].split(M, 16) No, Ni = s[C].split(N, 16) Ko, Ki = s[C].split(K, 16) s[C].reorder(Mo, No, Ko, Mi, Ni, Ki) s[AA].compute_at(s[C], Ko) s[BB].compute_at(s[C], Ko) func = KgeN.lower(s, [A, B, C]) print(KgeN.build(func))
import KgeN from KgeN import te # 1. not vthread # M = 128 # A = te.placeholder((M, ), name= "A") # B = te.compute((M, ), lambda i: A[i], name="B") # C = te.compute((M, ), lambda i: B[i], name="C") # s = te.create_schedule(C.op) # x, = s[C].op.axis # xo, xi = s[C].split(x, factor=4) # s[C].reorder(xi, xo) # s[B].compute_at(s[C], xi) # tir = str(KgeN.lower(s, [A, C])) # print(tir) # 2. vthread M = 1024 A = te.placeholder((M, ), name="A") B = te.compute((M, ), lambda i: A[i], name="B") C = te.compute((M, ), lambda i: B[i], name="C") s = te.create_schedule(C.op) x, = s[C].op.axis xo, xi = s[C].split(x, factor=64) xio, xii = s[C].split(xi, factor=2) s[C].bind(xo, te.thread_axis("vthread", name="vx")) # s[C].bind(xio, te.thread_axis("vthread", name="vy")) s[B].compute_at(s[C], xio) tir = str(KgeN.lower(s, [A, C])) print(tir)
import KgeN from KgeN import te # https://tvm.apache.org/docs/tutorials/optimize/opt_conv_cuda.html batch = 256 in_channel = 256 out_channel = 512 in_size = 14 kernel = 3 pad = 1 stride = 1 A = te.placeholder((in_size, in_size, in_channel, batch), name="A") W = te.placeholder((kernel, kernel, in_channel, out_channel), name="W") out_size = (in_size - kernel + 2 * pad) // stride + 1 # Pad input Apad = te.compute( (in_size + 2 * pad, in_size + 2 * pad, in_channel, batch), lambda yy, xx, cc, nn: te.if_then_else( te.all(yy >= pad, yy - pad < in_size, xx >= pad, xx - pad < in_size), A[yy - pad, xx - pad, cc, nn], 0, ), name="Apad", ) # Create reduction variables rc = te.reduce_axis(in_channel, name="rc") ry = te.reduce_axis(kernel, name="ry") rx = te.reduce_axis(kernel, name="rx") # Compute the convolution
import KgeN from KgeN import te m = 256 n = 128 A = te.placeholder((n, n), name="A") B = te.compute((n, n), lambda i, j: 2 + A[i, j], name="B") C = te.compute((m, m), lambda i, j: B[i, j] * 2, name="C") # schedule s = te.create_schedule(C.op) outer, inner = s[C].split(s[C].op.axis[0], 32) B_outer, B_inner = s[B].split(s[B].op.axis[0], 32) s[B].compute_at(s[C], s[C].op.axis[1]) # lower func = KgeN.lower(s, [A, C]) print(KgeN.build(func))