def test_forced_iname_deps_and_reduction(): # See https://github.com/inducer/loopy/issues/24 # This is (purposefully) somewhat un-idiomatic, to replicate the conditions # under which the above bug was found. If assignees were phi[i], then the # iname propagation heuristic would not assume that dependent instructions # need to run inside of 'i', and hence the forced_iname_* bits below would not # be needed. i1 = lp.CInstruction("i", "doSomethingToGetPhi();", assignees="phi") from pymbolic.primitives import Subscript, Variable i2 = lp.Assignment("a", lp.Reduction("sum", "j", Subscript(Variable("phi"), Variable("j"))), forced_iname_deps=frozenset(), forced_iname_deps_is_final=True) k = lp.make_kernel( "{[i,j] : 0<=i,j<n}", [i1, i2], [ lp.GlobalArg("a", dtype=np.float32, shape=()), lp.ValueArg("n", dtype=np.int32), lp.TemporaryVariable("phi", dtype=np.float32, shape=("n", )), ], target=lp.CTarget(), ) k = lp.preprocess_kernel(k) assert 'i' not in k.insn_inames("insn_0_j_update") print(k.stringify(with_dependencies=True))
def test_c_instruction(ctx_factory): #logging.basicConfig(level=logging.DEBUG) ctx = ctx_factory() knl = lp.make_kernel("{[i,j]: 0<=i,j<n }", [ lp.CInstruction("i,j", """ x = sin((float) i*j); """, assignees="x"), "a[i,j] = x", ], [ lp.GlobalArg("a", shape=lp.auto, dtype=np.float32), lp.TemporaryVariable("x", np.float32), "...", ], assumptions="n>=1") knl = lp.split_iname(knl, "i", 128, outer_tag="g.0", inner_tag="l.0") print(knl) print(lp.CompiledKernel(ctx, knl).get_highlighted_code())