def test_ispc_streaming_stores(): stream_dtype = np.float32 index_dtype = np.int32 knl = lp.make_kernel( "{[i]: 0<=i<n}", "a[i] = b[i] + scalar * c[i]", target=lp.ISPCTarget(), index_dtype=index_dtype, name="stream_triad") vars = ["a", "b", "c", "scalar"] knl = lp.assume(knl, "n>0") knl = lp.split_iname( knl, "i", 2**18, outer_tag="g.0", slabs=(0, 1)) knl = lp.split_iname(knl, "i_inner", 8, inner_tag="l.0") knl = lp.tag_instructions(knl, "!streaming_store") knl = lp.add_and_infer_dtypes(knl, { var: stream_dtype for var in vars }) knl = lp.set_argument_order(knl, vars + ["n"]) knl = lp.preprocess_kernel(knl) knl = lp.get_one_scheduled_kernel(knl) lp.generate_code_v2(knl).all_code()
def transform(knl, vars, stream_dtype): vars = [v.strip() for v in vars.split(",")] knl = lp.assume(knl, "n>0") knl = lp.split_iname(knl, "i", 2**18, outer_tag="g.0", slabs=(0, 1)) knl = lp.split_iname(knl, "i_inner", 8, inner_tag="l.0") knl = lp.add_and_infer_dtypes(knl, {var: stream_dtype for var in vars}) knl = lp.set_argument_order(knl, vars + ["n"]) return knl
def transform(knl, vars, stream_dtype): vars = [v.strip() for v in vars.split(",")] knl = lp.assume(knl, "n>0") knl = lp.split_iname( knl, "i", 2**18, outer_tag="g.0", slabs=(0, 1)) knl = lp.split_iname(knl, "i_inner", 8, inner_tag="l.0") knl = lp.add_and_infer_dtypes(knl, { var: stream_dtype for var in vars }) knl = lp.set_argument_order(knl, vars + ["n"]) return knl
def test_set_arg_order(): knl = lp.make_kernel("{ [i,j]: 0<=i,j<n }", "out[i,j] = a[i]*b[j]") knl = lp.set_argument_order(knl, "out,a,n,b")
def test_set_arg_order(): knl = lp.make_kernel( "{ [i,j]: 0<=i,j<n }", "out[i,j] = a[i]*b[j]") knl = lp.set_argument_order(knl, "out,a,n,b")