def setslice_kernel(arr, value): index = cl.cl_uint4(clrt.get_global_id(0), clrt.get_global_id(1), clrt.get_global_id(2), 0) a_strides = index * arr.strides aidx = arr.offset + a_strides.x + a_strides.y + a_strides.z v_strides = index * value.strides vidx = value.offset + v_strides.x + v_strides.y + v_strides.z arr[aidx] = value[vidx]
def lp2dstep(u, dx2, dy2, dnr_inv, stidx): i = clrt.get_global_id(0) + 1 ny = u.shape[1] for j in range(1 + ((i + stidx) % 2), ny - 1, 2): u[j, i] = ((u[j - 1, i] + u[j + 1, i]) * dy2 + (u[j, i - 1] + u[j, i + 1]) * dx2) * dnr_inv
def generate_sin(a): gid = clrt.get_global_id(0) n = clrt.get_global_size(0) r = c_float(gid) / c_float(n) x = r * c_float(16.0) * c_float(3.1415) a[gid].x = c_float(r * 2.0) - c_float(1.0) a[gid].y = clrt.native_sin(x)
def ufunc_kernel(function, a, b, out): index = cl.cl_uint4(clrt.get_global_id(0), clrt.get_global_id(1), clrt.get_global_id(2), 0) a_strides = index * a.strides aidx = a.offset + a_strides.x + a_strides.y + a_strides.z b_strides = index * b.strides bidx = b.offset + b_strides.x + b_strides.y + b_strides.z out_strides = index * out.strides oidx = out.offset + out_strides.x + out_strides.y + out_strides.z a0 = a[aidx] b0 = b[bidx] out[oidx] = function(a0, b0)
def generate_sin(a): gid = clrt.get_global_id(0) n = clrt.get_global_size(0) r = c_float(gid) / c_float(n) # sin wave with 16 occilations x = r * c_float(16.0 * 3.1415) # x is a range from -1 to 1 a[gid].x = r * 2.0 - 1.0 # y is sin wave a[gid].y = clrt.native_sin(x)
def generate_sin(a): gid = clrt.get_global_id(0) n = clrt.get_global_size(0) r = cl.cl_float(gid) / cl.cl_float(n) # sin wave with 8 peaks y = r * cl.cl_float(16.0 * 3.1415) # x is a range from -1 to 1 a[gid].x = r * 2.0 - 1.0 # y is sin wave a[gid].y = clrt.native_sin(y)
def generate_sin(a): gid = clrt.get_global_id(0) n = clrt.get_global_size(0) r = c_float(gid) / c_float(n) # sin wave with 8 peaks y = r * c_float(16.0 * 3.1415) # x is a range from -1 to 1 a[gid].x = r * 2.0 - 1.0 # y is sin wave a[gid].y = clrt.native_sin(y)
def compute_gravitation(a): m_ind = 0 fx_ind = 1 fy_ind = 2 px_ind = 3 py_ind = 4 vx_ind = 5 vy_ind = 6 gid = clrt.get_global_id(0) n = clrt.get_global_size(0) G = 100000.0 #Q = 0.1 a[fx_ind,gid] = 0.0 a[fy_ind,gid] = 0.0 for i in range(n): if not(i==gid): diff_x = a[px_ind,gid] - a[px_ind,i] diff_y = a[py_ind,gid] - a[py_ind,i] distance = clrt.math.sqrt(diff_x*diff_x + diff_y*diff_y) forceNorm = G*a[m_ind,gid]*a[m_ind,i]/ pow((distance*distance) + 200.0**2,3/2.0) a[fx_ind,gid] -= forceNorm*diff_x/distance a[fy_ind,gid] -= forceNorm*diff_y/distance
def test_kernel(a): idx0 = clrt.get_global_id(0) idx1 = clrt.get_global_id(1) a[idx0, idx1] = idx0 * 100 + idx1
def unary_ufunc_kernel(function, a, out): gid = clrt.get_global_id(0) a0 = a[gid] out[gid] = function(a0)
def foo(a): x = clrt.get_global_id(0) y = clrt.get_global_id(1) a[x, y] = x + y * 100
def test_kernel(a): idx = clrt.get_global_id(0) a[idx] = idx
def setslice(a, value): i = clrt.get_global_id(0) a[i] = value clrt.barrier(clrt.CLK_GLOBAL_MEM_FENCE)
def conv(a, b, ret): i = clrt.get_global_id(0) ret[i] = b.size
def _linspace(a, start, stop): i = clrt.get_global_id(0) gsize = clrt.get_global_size(0) a[i] = i * (stop - start) / gsize
def _arange(a, start, step): i = clrt.get_global_id(0) a[i] = start + step * i