def test_dynamic_shape_2d(self): dN = te.VarHandle(torch.int32) dM = te.VarHandle(torch.int32) A = te.BufHandle([dN, dM], torch.float64) B = te.BufHandle([dN, dM], torch.float64) def compute(i, j): return A.load([i, j]) - B.load([i, j]) C = te.Compute("C", [dN, dM], compute) loopnest = te.LoopNest([C]) loopnest.prepare_for_codegen() cg = te.construct_codegen("ir_eval", loopnest.simplify(), [A, B, C, dN, dM]) def test_with_shape(n, m): tA = torch.randn(n, m, dtype=torch.double) tB = torch.randn(n, m, dtype=torch.double) tC = torch.empty(n, m, dtype=torch.double) cg.call([tA, tB, tC, n, m]) torch.testing.assert_close(tA - tB, tC) test_with_shape(2, 4) test_with_shape(5, 3)
def test_dynamic_shape(self): dN = te.VarHandle(torch.int32) A = te.BufHandle(torch.float64) B = te.BufHandle(torch.float64) def compute(i): return A.load(i) - B.load(i) C = te.Compute('C', [dN], compute) loopnest = te.LoopNest([C]) loopnest.prepare_for_codegen() cg = te.construct_codegen('ir_eval', loopnest.simplify(), [A, B, C, dN]) def test_with_shape(n): tA = torch.randn(n, dtype=torch.double) tB = torch.randn(n, dtype=torch.double) tC = torch.empty(n, dtype=torch.double) cg.call([tA, tB, tC, n]) torch.testing.assert_close(tA - tB, tC) test_with_shape(8) test_with_shape(31)
def test_dynamic_shape(self): with kernel_arena_scope(): dN = te.VarHandle("n", te.Dtype.Int) A = te.Placeholder('A', te.Dtype.Double, [dN]) B = te.Placeholder('B', te.Dtype.Double, [dN]) def compute(i): return A.load([i]) - B.load([i]) C = te.Compute('C', [te.DimArg(dN, 'i')], compute) loopnest = te.LoopNest([C]) loopnest.prepare_for_codegen() stmt = te.simplify(loopnest.root_stmt()) cg = te.construct_codegen('ir_eval', stmt, [A, B, C, dN]) def test_with_shape(n): tA = torch.randn(n, dtype=torch.double) tB = torch.randn(n, dtype=torch.double) tC = torch.empty(n, dtype=torch.double) cg.call([tA, tB, tC, n]) torch.testing.assert_allclose(tA - tB, tC) test_with_shape(8) test_with_shape(31)
def test_alloc_in_loop(self): a, tmp, b = [ te.BufHandle(name, [1], torch.float32) for name in ["a", "tmp", "b"] ] body = te.Block([tmp.store([0], a.load([0])), b.store([0], tmp.load([0]))]) for _ in range(4): i = te.VarHandle("i", torch.int32) body = te.For.make(i, 0, 100, body) nest = te.LoopNest(body, [b]) nest.prepare_for_codegen() f = te.construct_codegen("llvm", nest.simplify(), [a, b]) ta, tb = [torch.ones(1) for _ in range(2)] f.call([ta.data_ptr(), tb.data_ptr()])
def test_alloc_in_loop(self): a, tmp, b = [ te.Placeholder(name, te.Dtype.Float, [te.ExprHandle.int(1)]) for name in ["a", "tmp", "b"] ] t0, t100 = [te.ExprHandle.int(n) for n in [0, 100]] body = te.Block( [tmp.store([t0], a.load([t0])), b.store([t0], tmp.load([t0]))]) for _ in range(4): i = te.VarHandle("i", te.Dtype.Int) body = te.For.make(i, t0, t100, body) nest = te.LoopNest(body, [b.data()]) nest.prepare_for_codegen() f = te.construct_codegen("llvm", nest.simplify(), [a, b]) ta, tb = [torch.ones(1) for _ in range(2)] f.call([ta.data_ptr(), tb.data_ptr()])