예제 #1
0
def get_target(lang, device=None, compiler=None):
    """

    Parameters
    ----------
    lang : str
        One of the supported languages, {'c', 'cuda', 'opencl'}
    device : :class:`pyopencl.Device`
        If supplied, and lang is 'opencl', passed to the
        :class:`loopy.PyOpenCLTarget`
    compiler: str
        If supplied, the C-compiler to use

    Returns
    -------
    The correct loopy target type
    """

    utils.check_lang(lang)

    # set target
    if lang == 'opencl':
        return lp.PyOpenCLTarget(device=device)
    elif lang == 'c':
        return lp.ExecutableCTarget(compiler=compiler)
    elif lang == 'cuda':
        return lp.CudaTarget()
    elif lang == 'ispc':
        return lp.ISPCTarget()
예제 #2
0
def test_ispc_streaming_stores():
    stream_dtype = np.float32
    index_dtype = np.int32

    knl = lp.make_kernel(
            "{[i]: 0<=i<n}",
            "a[i] = b[i] + scalar * c[i]",
            target=lp.ISPCTarget(), index_dtype=index_dtype,
            name="stream_triad")

    vars = ["a", "b", "c", "scalar"]
    knl = lp.assume(knl, "n>0")
    knl = lp.split_iname(
        knl, "i", 2**18, outer_tag="g.0", slabs=(0, 1))
    knl = lp.split_iname(knl, "i_inner", 8, inner_tag="l.0")
    knl = lp.tag_instructions(knl, "!streaming_store")

    knl = lp.add_and_infer_dtypes(knl, {
        var: stream_dtype
        for var in vars
        })

    knl = lp.set_argument_order(knl, vars + ["n"])

    knl = lp.preprocess_kernel(knl)
    knl = lp.get_one_scheduled_kernel(knl)
    lp.generate_code_v2(knl).all_code()
    def make_knl(name, insn, vars):
        knl = lp.make_kernel(
                "{[i]: 0<=i<n}",
                insn,
                target=lp.ISPCTarget(), index_dtype=INDEX_DTYPE,
                name="stream_"+name+"_tasks")

        knl = transform(knl, vars, STREAM_DTYPE)
        return knl