def test_pointer_arithmetics(): # Problem: abstract syntax tree does not distinguish if an identifier is a pointer or a variable. # E.g. if incrementing the pointer to an array a (a=a+1) in Python this would increment all values in # the underlying array. However if data = np.array([0, 0]).astype(Types.char) knl = Kernel('knl_pointer_arithmetics', {'data': data}, """ char a[5]={0}; a[0] = a[0] + 1; a[1] = 1; char* b = a + 1; b -= 1; b += 1; data[0] = b[0]; char* c = a; c += 1; a[1] = 3; data[1] = c[0]; """, global_size=data.shape) emulation.use_existing_file_for_emulation(False) knl_py = knl.compile(emulate=True) knl_cl = knl.compile() knl_cl() res_cl = knl_cl.data.get() knl_py() res_py = knl_cl.data.get() assert np.all(res_cl[0] == res_py[0])
def test_number_overflow(): inp1 = np.array([127, 10, -128]).astype(Types.char) inp2 = np.array([127, 10, -128]).astype(Types.char) knl = Kernel('knl', { 'inp1': inp1, 'inp2': inp2, 'out1': np.zeros_like(inp1, dtype=Types.char), 'out2': np.zeros_like(inp1, dtype=Types.char) }, """ char a = 0; a = add_sat(inp1[get_global_id(0)], inp2[get_global_id(0)]); char b = 0; b = inp1[get_global_id(0)] + inp2[get_global_id(0)]; out1[get_global_id(0)] = a; out2[get_global_id(0)] = b; """, global_size=inp1.shape) knl_cl = knl.compile() knl_py = knl.compile(emulate=True) knl_cl() res_cl = knl_cl.out1.get(), knl_cl.out2.get() knl_py() res_py = knl_cl.out1.get(), knl_cl.out2.get() assert np.all(res_cl[0] == res_py[0]) and np.all(res_cl[1] == res_py[1])
def test_debug_kernel_with_barriers(): buff = np.zeros(shape=(2, 4)).astype(Types.int) mem_buffer = to_device(buff) knl = Kernel('knl', {'mem_glob': Global(mem_buffer)}, """ __local int mem[2]; mem[0]=0; mem[1]=0; mem[get_local_id(1)] = get_local_id(1); barrier(CLK_LOCAL_MEM_FENCE); mem[get_local_id(1)] = mem[1]; //barrier(CLK_GLOBAL_MEM_FENCE); mem_glob[get_global_id(0)*get_global_size(1)+get_global_id(1)] = mem[get_local_id(1)]; """, global_size=(2, 4), local_size=(1, 2)) compiled_cl = knl.compile( emulate=False, file=Path(__file__).parent.joinpath('py_cl_kernels/knl')) compiled_cl() mem_buffer_py = zeros_like(mem_buffer) compiled_py = knl.compile( emulate=True, file=Path(__file__).parent.joinpath('py_cl_kernels/knl')) # out[0] = complex64(inp[0].real+out[0].imag*1j) instead of out[0].real=inp[0].real compiled_py(mem_glob=mem_buffer_py) assert np.all(mem_buffer.get() == mem_buffer_py.get())
def test_debug_c_code_with_unary_increment_operation_inside_of_array(): buff_cl = zeros((6, 1), Types.short) knl = Kernel('knl', {'buff': Global(buff_cl)}, """ int number = -1; number++; buff[number++] = 1; buff[number] = 2; number = 0; buff[2+ number--] = 3; buff[3+ ++number] = 4; buff[5 + --number] = 5; int count = 0; for(int i=1; i<3; i++){ count = count + i; } buff[5] = count; """, global_size=(1, )) compiled_cl = knl.compile(emulate=False) compiled_cl(buff=buff_cl) buff_py = zeros((6, 1), Types.short) compiled_py = knl.compile( emulate=True, file=Path(__file__).parent.joinpath('py_cl_kernels/knl')) compiled_py(buff=buff_py) assert np.all(buff_py.get() == buff_cl.get())
def test_two_input_integer_functions(name, dtype): a_cl = to_device(np.ones((10, ), dtype)) a_emulation = to_device(np.ones((10, ), dtype)) knl = Kernel(f'knl_{name}', { 'a': Global(a_cl), 'num': Scalar(dtype(0)) }, f'a[get_global_id(0)]={name}(a[get_global_id(0)], num);', global_size=a_cl.shape) knl.compile()(a=a_cl) knl.compile(emulate=True)(a=a_emulation) assert np.all(a_cl.get() == a_emulation.get())
def test_access_complex_variable(): buff = np.array([0.5]).astype(Types.cfloat) buff_in = to_device(buff) buff_out = zeros_like(buff_in) knl = Kernel('knl', { 'inp': Global(buff_in), 'out': Global(buff_out) }, """ out[get_global_id(0)].real = inp[get_global_id(0)].real; """, global_size=(1, )) compiled_cl = knl.compile( emulate=False, file=Path(__file__).parent.joinpath('py_cl_kernels/knl')) compiled_cl() buff_out_py = zeros_like(buff_in) compiled_py = knl.compile( emulate=True, file=Path(__file__).parent.joinpath('py_cl_kernels/knl')) # out[0] = complex64(inp[0].real+out[0].imag*1j) instead of out[0].real=inp[0].real compiled_py(out=buff_out_py) assert np.all(buff_out.get() == buff_out_py.get())
def run(emulate=False): ary = to_device(np.ones(10).astype(data_t)) local_mem = LocalArray( dtype=data_t, shape=5) # 5 is to to test that local array argument is changed knl = Kernel('knl_local_arg', { 'ary': Global(ary), 'local_mem': local_mem }, """ int offset = get_group_id(0)*get_local_size(0); for(int i=0; i<5; i++) local_mem[i] = ary[offset + i]; barrier(CLK_LOCAL_MEM_FENCE); data_t sum = (data_t)(0); for(int i=0; i<5; i++) sum+=local_mem[i]; ary[get_global_id(0)] = sum; """, type_defs={'data_t': data_t}, global_size=ary.shape, local_size=(5, )) local_mem = LocalArray(dtype=data_t, shape=5) knl.compile(emulate=emulate)(local_mem=local_mem) return ary.get()
def test_vector_types( ): # todo use https://numpy.org/doc/stable/reference/generated/numpy.ndarray.ctypes.html data = np.zeros((10, )).astype(Types.char2) knl = Kernel('knl_vector_types', {'data': data}, """ char2 a = (char2)(4,2); char2 b = (char2)(1,2); data[0] = a; data[1] = b; data[2] = a + b; data[3] = a * b; data[4] = a - b; data[5] = a / b; """, global_size=data.shape) knl_cl = knl.compile() knl_py = knl.compile(emulate=True) knl_cl() get_current_queue().finish() res_cl = knl_cl.data.get() knl_py() res_py = knl_py.data.get() assert np.all(res_cl == res_py)
def test_conversion_knl_fnc_args_with_no_pointer_format(): a_np = np.array([0.1, 0.2], dtype=Types.float) b_cl = zeros(shape=(2, ), dtype=Types.float) fnc = Function( 'copy_fnc', { 'a': a_np, 'b': b_cl, 'idx': Scalar(Types.int) }, """ b[idx] = a[idx]; """) knl = Kernel('some_knl', { 'a': a_np, 'b': b_cl }, """ copy_fnc(a, b, get_global_id(0)); """, functions=[fnc], global_size=b_cl.shape) knl.compile() knl() assert np.all(a_np == b_cl.get())