def test_pointer_arithmetics():
    # Problem: abstract syntax tree does not distinguish if an identifier is a pointer or a variable.
    # E.g. if incrementing the pointer to an array a (a=a+1) in Python this would increment all values in
    # the underlying array. However if
    data = np.array([0, 0]).astype(Types.char)
    knl = Kernel('knl_pointer_arithmetics', {'data': data},
                 """
        char a[5]={0};
        a[0] = a[0] + 1;
        a[1] = 1;
        char* b = a + 1;
        b -= 1; b += 1;
        data[0] = b[0];
        
        char* c = a;
        c += 1;
        a[1] = 3;
        data[1] = c[0];
    """,
                 global_size=data.shape)
    emulation.use_existing_file_for_emulation(False)
    knl_py = knl.compile(emulate=True)
    knl_cl = knl.compile()
    knl_cl()
    res_cl = knl_cl.data.get()
    knl_py()
    res_py = knl_cl.data.get()
    assert np.all(res_cl[0] == res_py[0])
def test_number_overflow():
    inp1 = np.array([127, 10, -128]).astype(Types.char)
    inp2 = np.array([127, 10, -128]).astype(Types.char)
    knl = Kernel('knl', {
        'inp1': inp1,
        'inp2': inp2,
        'out1': np.zeros_like(inp1, dtype=Types.char),
        'out2': np.zeros_like(inp1, dtype=Types.char)
    },
                 """
        char a = 0;
        a = add_sat(inp1[get_global_id(0)], inp2[get_global_id(0)]);
        char b = 0;
        b = inp1[get_global_id(0)] + inp2[get_global_id(0)];
        out1[get_global_id(0)] = a; 
        out2[get_global_id(0)] = b; 
    """,
                 global_size=inp1.shape)
    knl_cl = knl.compile()
    knl_py = knl.compile(emulate=True)
    knl_cl()
    res_cl = knl_cl.out1.get(), knl_cl.out2.get()
    knl_py()
    res_py = knl_cl.out1.get(), knl_cl.out2.get()
    assert np.all(res_cl[0] == res_py[0]) and np.all(res_cl[1] == res_py[1])
def test_debug_kernel_with_barriers():
    buff = np.zeros(shape=(2, 4)).astype(Types.int)
    mem_buffer = to_device(buff)
    knl = Kernel('knl', {'mem_glob': Global(mem_buffer)},
                 """
        __local int mem[2];
        mem[0]=0;
        mem[1]=0;
        mem[get_local_id(1)] = get_local_id(1);
        barrier(CLK_LOCAL_MEM_FENCE);
        mem[get_local_id(1)] = mem[1];
        //barrier(CLK_GLOBAL_MEM_FENCE);
        mem_glob[get_global_id(0)*get_global_size(1)+get_global_id(1)] = mem[get_local_id(1)];
    """,
                 global_size=(2, 4),
                 local_size=(1, 2))
    compiled_cl = knl.compile(
        emulate=False,
        file=Path(__file__).parent.joinpath('py_cl_kernels/knl'))
    compiled_cl()
    mem_buffer_py = zeros_like(mem_buffer)
    compiled_py = knl.compile(
        emulate=True, file=Path(__file__).parent.joinpath('py_cl_kernels/knl'))
    # out[0] = complex64(inp[0].real+out[0].imag*1j) instead of out[0].real=inp[0].real
    compiled_py(mem_glob=mem_buffer_py)
    assert np.all(mem_buffer.get() == mem_buffer_py.get())
def test_debug_c_code_with_unary_increment_operation_inside_of_array():
    buff_cl = zeros((6, 1), Types.short)
    knl = Kernel('knl', {'buff': Global(buff_cl)},
                 """
        int number = -1;
        number++;
        buff[number++] = 1;
        buff[number] = 2;
        number = 0;
        buff[2+ number--] = 3;
        buff[3+ ++number] = 4;
        buff[5 + --number] = 5;
        int count = 0;
        for(int i=1; i<3; i++){
            count = count + i;
        }        
        buff[5] = count;
    """,
                 global_size=(1, ))
    compiled_cl = knl.compile(emulate=False)
    compiled_cl(buff=buff_cl)
    buff_py = zeros((6, 1), Types.short)
    compiled_py = knl.compile(
        emulate=True, file=Path(__file__).parent.joinpath('py_cl_kernels/knl'))
    compiled_py(buff=buff_py)
    assert np.all(buff_py.get() == buff_cl.get())
def test_two_input_integer_functions(name, dtype):
    a_cl = to_device(np.ones((10, ), dtype))
    a_emulation = to_device(np.ones((10, ), dtype))
    knl = Kernel(f'knl_{name}', {
        'a': Global(a_cl),
        'num': Scalar(dtype(0))
    },
                 f'a[get_global_id(0)]={name}(a[get_global_id(0)], num);',
                 global_size=a_cl.shape)
    knl.compile()(a=a_cl)
    knl.compile(emulate=True)(a=a_emulation)
    assert np.all(a_cl.get() == a_emulation.get())
def test_access_complex_variable():
    buff = np.array([0.5]).astype(Types.cfloat)
    buff_in = to_device(buff)
    buff_out = zeros_like(buff_in)
    knl = Kernel('knl', {
        'inp': Global(buff_in),
        'out': Global(buff_out)
    },
                 """
        out[get_global_id(0)].real = inp[get_global_id(0)].real; 
    """,
                 global_size=(1, ))
    compiled_cl = knl.compile(
        emulate=False,
        file=Path(__file__).parent.joinpath('py_cl_kernels/knl'))
    compiled_cl()
    buff_out_py = zeros_like(buff_in)
    compiled_py = knl.compile(
        emulate=True, file=Path(__file__).parent.joinpath('py_cl_kernels/knl'))
    # out[0] = complex64(inp[0].real+out[0].imag*1j) instead of out[0].real=inp[0].real
    compiled_py(out=buff_out_py)
    assert np.all(buff_out.get() == buff_out_py.get())
 def run(emulate=False):
     ary = to_device(np.ones(10).astype(data_t))
     local_mem = LocalArray(
         dtype=data_t,
         shape=5)  # 5 is to to test that local array argument is changed
     knl = Kernel('knl_local_arg', {
         'ary': Global(ary),
         'local_mem': local_mem
     },
                  """
            int offset = get_group_id(0)*get_local_size(0);
            for(int i=0; i<5; i++) local_mem[i] = ary[offset + i];
            barrier(CLK_LOCAL_MEM_FENCE);
            data_t sum = (data_t)(0);
            for(int i=0; i<5; i++) sum+=local_mem[i];
            ary[get_global_id(0)] = sum;
                  """,
                  type_defs={'data_t': data_t},
                  global_size=ary.shape,
                  local_size=(5, ))
     local_mem = LocalArray(dtype=data_t, shape=5)
     knl.compile(emulate=emulate)(local_mem=local_mem)
     return ary.get()
def test_vector_types(
):  # todo use https://numpy.org/doc/stable/reference/generated/numpy.ndarray.ctypes.html
    data = np.zeros((10, )).astype(Types.char2)
    knl = Kernel('knl_vector_types', {'data': data},
                 """
        char2 a = (char2)(4,2);
        char2 b = (char2)(1,2);
        data[0] = a;
        data[1] = b;
        data[2] = a + b;
        data[3] = a * b;
        data[4] = a - b;
        data[5] = a / b;
    """,
                 global_size=data.shape)
    knl_cl = knl.compile()
    knl_py = knl.compile(emulate=True)
    knl_cl()
    get_current_queue().finish()
    res_cl = knl_cl.data.get()
    knl_py()
    res_py = knl_py.data.get()
    assert np.all(res_cl == res_py)
예제 #9
0
def test_conversion_knl_fnc_args_with_no_pointer_format():
    a_np = np.array([0.1, 0.2], dtype=Types.float)
    b_cl = zeros(shape=(2, ), dtype=Types.float)
    fnc = Function(
        'copy_fnc', {
            'a': a_np,
            'b': b_cl,
            'idx': Scalar(Types.int)
        }, """
                   b[idx] = a[idx];
                   """)
    knl = Kernel('some_knl', {
        'a': a_np,
        'b': b_cl
    },
                 """
                 copy_fnc(a, b, get_global_id(0));
                 """,
                 functions=[fnc],
                 global_size=b_cl.shape)
    knl.compile()
    knl()
    assert np.all(a_np == b_cl.get())