def get_target(lang, device=None, compiler=None): """ Parameters ---------- lang : str One of the supported languages, {'c', 'cuda', 'opencl'} device : :class:`pyopencl.Device` If supplied, and lang is 'opencl', passed to the :class:`loopy.PyOpenCLTarget` compiler: str If supplied, the C-compiler to use Returns ------- The correct loopy target type """ utils.check_lang(lang) # set target if lang == 'opencl': return lp.PyOpenCLTarget(device=device) elif lang == 'c': return lp.ExecutableCTarget(compiler=compiler) elif lang == 'cuda': return lp.CudaTarget() elif lang == 'ispc': return lp.ISPCTarget()
def test_ispc_streaming_stores(): stream_dtype = np.float32 index_dtype = np.int32 knl = lp.make_kernel( "{[i]: 0<=i<n}", "a[i] = b[i] + scalar * c[i]", target=lp.ISPCTarget(), index_dtype=index_dtype, name="stream_triad") vars = ["a", "b", "c", "scalar"] knl = lp.assume(knl, "n>0") knl = lp.split_iname( knl, "i", 2**18, outer_tag="g.0", slabs=(0, 1)) knl = lp.split_iname(knl, "i_inner", 8, inner_tag="l.0") knl = lp.tag_instructions(knl, "!streaming_store") knl = lp.add_and_infer_dtypes(knl, { var: stream_dtype for var in vars }) knl = lp.set_argument_order(knl, vars + ["n"]) knl = lp.preprocess_kernel(knl) knl = lp.get_one_scheduled_kernel(knl) lp.generate_code_v2(knl).all_code()
def make_knl(name, insn, vars): knl = lp.make_kernel( "{[i]: 0<=i<n}", insn, target=lp.ISPCTarget(), index_dtype=INDEX_DTYPE, name="stream_"+name+"_tasks") knl = transform(knl, vars, STREAM_DTYPE) return knl