예제 #1
0
def get_target(lang, device=None, compiler=None):
    """

    Parameters
    ----------
    lang : str
        One of the supported languages, {'c', 'cuda', 'opencl'}
    device : :class:`pyopencl.Device`
        If supplied, and lang is 'opencl', passed to the
        :class:`loopy.PyOpenCLTarget`
    compiler: str
        If supplied, the C-compiler to use

    Returns
    -------
    The correct loopy target type
    """

    utils.check_lang(lang)

    # set target
    if lang == 'opencl':
        return lp.PyOpenCLTarget(device=device)
    elif lang == 'c':
        return lp.ExecutableCTarget(compiler=compiler)
    elif lang == 'cuda':
        return lp.CudaTarget()
    elif lang == 'ispc':
        return lp.ISPCTarget()
예제 #2
0
def test_cuda_short_vector():
    knl = lp.make_kernel("{ [i]: 0<=i<n }",
                         "out[i] = 2*a[i]",
                         target=lp.CudaTarget())

    knl = lp.set_options(knl, write_code=True)
    knl = lp.split_iname(knl, "i", 4, slabs=(0, 1), inner_tag="vec")
    knl = lp.split_array_axis(knl, "a,out", axis_nr=0, count=4)
    knl = lp.tag_array_axes(knl, "a,out", "C,vec")

    knl = lp.set_options(knl, write_wrapper=True)
    knl = lp.add_and_infer_dtypes(knl, {"a": np.float32})

    print(lp.generate_code_v2(knl).device_code())
예제 #3
0
import numpy as np
import loopy as lp

target = lp.CudaTarget()

kernel = lp.make_kernel("{ [i_node,j_node]: 0<=i_node,j_node<n_node}",
                        """
	<float32> coupling_value = params(1)
	<float32> speed_value = params(0)

	<float32> dt=0.1
	<float32> M_PI_F = 2.0
	<float32> rec_n = 1.0f / n_node
	<float32> rec_speed_dt = 1.0f / speed_value / dt
	<float32> omega = 10.0 * 2.0 * M_PI_F / 1e3
	<float32> sig = sqrt(dt) * sqrt(2.0 * 1e-5)
	<float32> rand = 1.0

	for i_node
		tavg[i_node]=0.0f  							{id = clear}
	end 
	
	for i_node
		<float32> theta_i = state[i_node] 					{id = coupling1, dep=*}
		<float32> sum = 0.0							{id = coupling2}
		for j_node
			<float32> wij = weights[j_node]					{id = coupling3, dep=coupling1:coupling2}
			if wij != 0.0
                		<int> dij = lengths[j_node] * rec_speed_dt		{id = coupling4, dep=coupling3}
                		<float32> theta_j = state[j_node]
                		sum = sum + wij * sin(theta_j - theta_i)