def test_c_execution_with_global_temporaries(): # ensure that the "host" code of a bare ExecutableCTarget with # global constant temporaries is None from loopy.target.c import ExecutableCTarget AS = lp.AddressSpace # noqa n = 10 knl = lp.make_kernel( "{[i]: 0 <= i < n}", """ a[i] = b[i] """, [ lp.GlobalArg("a", shape=(n, ), dtype=np.int32), lp.TemporaryVariable("b", shape=(n, ), initializer=np.arange(n, dtype=np.int32), dtype=np.int32, read_only=True, address_space=AS.GLOBAL) ], target=ExecutableCTarget()) knl = lp.fix_parameters(knl, n=n) assert ("int b[%d]" % n) not in lp.generate_code_v2(knl).host_code() assert np.allclose(knl(a=np.zeros(10, dtype=np.int32))[1], np.arange(10))
def test_c_execution_with_global_temporaries(): # ensure that the "host" code of a bare ExecutableCTarget with # global constant temporaries is None from loopy.target.c import ExecutableCTarget from loopy.kernel.data import temp_var_scope as scopes n = 10 knl = lp.make_kernel( '{[i]: 0 <= i < n}', """ a[i] = b[i] """, [ lp.GlobalArg('a', shape=(n, ), dtype=np.int32), lp.TemporaryVariable('b', shape=(n, ), initializer=np.arange(n, dtype=np.int32), dtype=np.int32, read_only=True, scope=scopes.GLOBAL) ], target=ExecutableCTarget()) knl = lp.fix_parameters(knl, n=n) assert ('int b[%d]' % n) not in lp.generate_code_v2(knl).host_code() assert np.allclose(knl(a=np.zeros(10, dtype=np.int32))[1], np.arange(10))
def __get_kernel(order="C"): return lp.make_kernel( "{ [i,j]: 0<=i,j<n }", "out[i, j] = 2*a[i, j]", [ lp.GlobalArg("out", np.float32, shape=("n", "n"), order=order), lp.GlobalArg("a", np.float32, shape=("n", "n"), order=order), "..." ], target=ExecutableCTarget())
def __get_knl(): return lp.make_kernel('{[i]: 0 <= i < 10}', """ a[i] = b[i] """, [lp.GlobalArg('a', shape=(10,), dtype=np.int32), lp.ConstantArg('b', shape=(10))], target=ExecutableCTarget(), name='cache_test')
def __get_knl(): return lp.make_kernel("{[i]: 0 <= i < 10}", """ a[i] = b[i] """, [lp.GlobalArg("a", shape=(10,), dtype=np.int32), lp.ConstantArg("b", shape=(10))], target=ExecutableCTarget(), name="cache_test")
def test_c_target(): from loopy.target.c import ExecutableCTarget knl = lp.make_kernel( "{ [i]: 0<=i<n }", "out[i] = 2*a[i]", [ lp.GlobalArg("out", np.float32, shape=lp.auto), lp.GlobalArg("a", np.float32, shape=lp.auto), "..." ], target=ExecutableCTarget()) assert np.allclose( knl(a=np.arange(16, dtype=np.float32))[1], 2 * np.arange(16, dtype=np.float32))
def test_function_decl_extractor(): # ensure that we can tell the difference between pointers, constants, etc. # in execution from loopy.target.c import ExecutableCTarget knl = lp.make_kernel('{[i]: 0 <= i < 10}', """ a[i] = b[i] + v """, [lp.GlobalArg('a', shape=(10,), dtype=np.int32), lp.ConstantArg('b', shape=(10)), lp.ValueArg('v', dtype=np.int32)], target=ExecutableCTarget()) assert np.allclose(knl(b=np.arange(10), v=-1)[1], np.arange(10) - 1)
def __get_kernel(order="C"): indicies = ["i", "j", "k"] sizes = tuple(np.random.randint(1, 11, size=len(indicies))) # create domain strings domain_template = "{{ [{iname}]: 0 <= {iname} < {size} }}" domains = [] for idx, size in zip(indicies, sizes): domains.append(domain_template.format(iname=idx, size=size)) statement = "out[{indexed}] = 2 * a[{indexed}]".format( indexed=", ".join(indicies)) return lp.make_kernel( domains, statement, [ lp.GlobalArg("out", np.float32, shape=sizes, order=order), lp.GlobalArg("a", np.float32, shape=sizes, order=order), "..." ], target=ExecutableCTarget())