Esempio n. 1
0
def test_ternary(context, q, float_data, float_data_gpu):
    kernelSource = """
__global__ void setValue(float *data, int idx, float value) {
    if(threadIdx.x == 0) {
        data[idx] = value;
    }
}
__global__ void testTernary(float *data) {
    data[0] = data[1] > 0 ? data[2] : data[3];
}
"""
    setValueKernelName = test_common.mangle('setValue',
                                            ['float *', 'int', 'float'])
    setValueProg = compile_code(cl,
                                context,
                                kernelSource,
                                setValueKernelName,
                                num_clmems=1)

    testTernaryName = test_common.mangle('testTernary', ['float *'])
    testTernaryProg = compile_code(cl,
                                   context,
                                   kernelSource,
                                   testTernaryName,
                                   num_clmems=1)

    float_data_orig = np.copy(float_data)

    def set_float_value(gpu_buffer, idx, value):
        setValueProg.__getattr__(setValueKernelName)(q, (32, ), (32, ),
                                                     float_data_gpu,
                                                     offset_type(0),
                                                     np.int32(idx),
                                                     np.float32(value),
                                                     cl.LocalMemory(4))

    cl.enqueue_copy(q, float_data_gpu, float_data)
    print('float_data[:8]', float_data[:8])
    set_float_value(float_data_gpu, 1, 10)
    testTernaryProg.__getattr__(testTernaryName)(q, (32, ),
                                                 (32, ), float_data_gpu,
                                                 offset_type(0),
                                                 cl.LocalMemory(4))
    q.finish()
    cl.enqueue_copy(q, float_data, float_data_gpu)
    q.finish()
    print('float_data[:8]', float_data[:8])
    assert float_data[0] == float_data_orig[2]

    set_float_value(float_data_gpu, 1, -2)
    testTernaryProg.__getattr__(testTernaryName)(q, (32, ),
                                                 (32, ), float_data_gpu,
                                                 offset_type(0),
                                                 cl.LocalMemory(4))
    q.finish()
    cl.enqueue_copy(q, float_data, float_data_gpu)
    q.finish()
    print('float_data[:8]', float_data[:8])
    assert float_data[0] == float_data_orig[3]
Esempio n. 2
0
def test_use_template1(context, q, int_data, int_data_gpu, float_data, float_data_gpu):
    code = """
template< typename T >
__device__ T addNumbers(T one, T two) {
    return one + two;
}

__global__ void use_template1(float *data, int *intdata) {
    if(threadIdx.x == 0 && blockIdx.x == 0) {
        data[0] = addNumbers(data[1], data[2]);
        intdata[0] = addNumbers(intdata[1], intdata[2]);
    }
}
"""
    kernelName = test_common.mangle('use_template1', ['float *', 'int *'])
    prog = compile_code(cl, context, code, kernelName)
    float_data_orig = np.copy(float_data)
    int_data_orig = np.copy(int_data)

    prog.__getattr__(kernelName)(q, (32,), (32,), float_data_gpu, offset_type(0), int_data_gpu, offset_type(0), cl.LocalMemory(4))
    cl.enqueue_copy(q, float_data, float_data_gpu)
    cl.enqueue_copy(q, int_data, int_data_gpu)
    q.finish()
    assert float_data[0] == float_data_orig[1] + float_data_orig[2]
    assert int_data[0] == int_data_orig[1] + int_data_orig[2]
Esempio n. 3
0
def test_copy_float(cuSourcecode, context, q, float_data, float_data_gpu):
    kernelName = test_common.mangle('copy_float', ['float *'])
    testcudakernel1 = compile_code(cl, context, cuSourcecode, kernelName)
    testcudakernel1.__getattr__(kernelName)(q, (32,), (32,), float_data_gpu, offset_type(0), cl.LocalMemory(4))
    cl.enqueue_copy(q, float_data, float_data_gpu)
    q.finish()
    assert float_data[0] == float_data[1]
Esempio n. 4
0
def test_test_if_else(context, q, float_data, float_data_gpu):
    sourcecode = """
__global__ void testIfElse(float *data, int N) {
    int tid = threadIdx.x;
    if(tid < N) {
        data[tid] *= 2;
    } else {
        data[tid] += 5;
    }
}
"""
    kernelName = test_common.mangle('testIfElse', ['float *', 'int'])
    prog = compile_code(cl, context, sourcecode, kernelName, num_clmems=1)
    float_data_orig = np.copy(float_data)

    N = 2
    prog.__getattr__(kernelName)(q, (32, ), (32, ), float_data_gpu,
                                 offset_type(0), np.int32(N),
                                 cl.LocalMemory(4))
    cl.enqueue_copy(q, float_data, float_data_gpu)
    q.finish()
    with open('/tmp/testprog-device.cl', 'r') as f:
        cl_code = f.read()
    print('cl_code', cl_code)
    for i in range(10):
        if i < N:
            assert float_data[i] == float_data_orig[i] * 2
        else:
            assert abs(float_data[i] - float_data_orig[i] - 5) <= 1e-4
Esempio n. 5
0
def test_test_for(context, q, float_data, float_data_gpu):
    sourcecode = """
__global__ void testFor(float *data, int N) {
    if(threadIdx.x == 0) {
        float sum = 0.0f;
        for(int i = 0; i < N; i++) {
            sum += data[i];
        }
        data[0] = sum;
    }
}
"""
    kernelName = test_common.mangle('testFor', ['float *', 'int'])
    prog = compile_code(cl, context, sourcecode, kernelName, num_clmems=1)
    float_data_orig = np.copy(float_data)

    N = 4
    prog.__getattr__(kernelName)(q, (32, ), (32, ), float_data_gpu,
                                 offset_type(0), np.int32(N),
                                 cl.LocalMemory(4))
    cl.enqueue_copy(q, float_data, float_data_gpu)
    q.finish()
    with open('/tmp/testprog-device.cl', 'r') as f:
        cl_code = f.read()
    print('cl_code', cl_code)

    sum = 0
    for i in range(N):
        sum += float_data_orig[i]
    assert abs(float_data[0] - sum) <= 1e-4
Esempio n. 6
0
def test_test_do_while(context, q, float_data, float_data_gpu):
    sourcecode = """
__global__ void testIfElse(float *data, int N) {
    int tid = threadIdx.x;
    int i = 0;
    float sum = 0;
    do {
        sum += data[i];
        i++;
    } while(sum < 15);
    data[0] = sum;
}
"""
    kernelName = test_common.mangle('testIfElse', ['float *', 'int'])
    prog = compile_code(cl, context, sourcecode, kernelName, num_clmems=1)
    float_data_orig = np.copy(float_data)

    N = 2
    prog.__getattr__(kernelName)(q, (32, ), (32, ), float_data_gpu,
                                 offset_type(0), np.int32(N),
                                 cl.LocalMemory(4))
    cl.enqueue_copy(q, float_data, float_data_gpu)
    q.finish()
    with open('/tmp/testprog-device.cl', 'r') as f:
        cl_code = f.read()
    print('cl_code', cl_code)
    print(float_data[0])
Esempio n. 7
0
def test_use_tid2(cuSourcecode, context, q, int_data, int_data_gpu):
    int_data_orig = np.copy(int_data)
    kernelName = test_common.mangle('use_tid2', ['int *'])
    testcudakernel1 = compile_code(cl, context, cuSourcecode, kernelName)
    testcudakernel1.__getattr__(kernelName)(q, (32,), (32,), int_data_gpu, offset_type(0), cl.LocalMemory(4))
    cl.enqueue_copy(q, int_data, int_data_gpu)
    q.finish()
    assert int_data[0] == int_data_orig[0] + 0
    assert int_data[10] == int_data_orig[10] + 10
    assert int_data[31] == int_data_orig[31] + 31
Esempio n. 8
0
def test_float4(cuSourcecode, context, ctx, q, float_data, float_data_gpu):
    float_data_orig = np.copy(float_data)
    kernelName = test_common.mangle('testFloat4', ['float4 *'])
    testcudakernel1 = compile_code(cl, context, cuSourcecode, kernelName)
    testcudakernel1.__getattr__(kernelName)(q, (32,), (32,), float_data_gpu, offset_type(0), cl.LocalMemory(4))
    cl.enqueue_copy(q, float_data, float_data_gpu)
    q.finish()

    print('float_data_orig[:8]', float_data_orig[:8])
    print('float_data[:8]', float_data[:8])
    assert float_data[1] == float_data_orig[4 + 2] * float_data_orig[4 + 3]
Esempio n. 9
0
def test_float4_test2(cuSourcecode, context, ctx, q, float_data,
                      float_data_gpu):
    float_data_orig = np.copy(float_data)
    kernelName = test_common.mangle('testFloat4_test2', ['float4 *'])
    testcudakernel1 = compile_code(cl,
                                   context,
                                   cuSourcecode,
                                   kernelName,
                                   num_clmems=1)
    testcudakernel1.__getattr__(kernelName)(q, (32, ), (32, ), float_data_gpu,
                                            offset_type(0), cl.LocalMemory(4))
    cl.enqueue_copy(q, float_data, float_data_gpu)
    q.finish()

    print('float_data_orig[:8]', float_data_orig[:8])
    print('float_data[:8]', float_data[:8])
    for i in range(4):
        assert float_data[i] == float_data_orig[i + 4]
Esempio n. 10
0
def test_simpleloop(context, q, float_data, float_data_gpu):
    sourcecode = """
__global__ void longKernel(float *data, int N, float value) {
    for(int i = 0; i < N; i++) {
        data[i] += value;
    }
}
"""
    kernelName = test_common.mangle('longKernel', ['float *', 'int', 'float'])
    prog = compile_code(cl, context, sourcecode, kernelName, num_clmems=1)
    float_data_orig = np.copy(float_data)

    N = 2
    prog.__getattr__(kernelName)(q, (32, ), (32, ), float_data_gpu,
                                 offset_type(0), np.int32(N), np.float32(123),
                                 cl.LocalMemory(4))
    cl.enqueue_copy(q, float_data, float_data_gpu)
    q.finish()
    with open('/tmp/testprog-device.cl', 'r') as f:
        cl_code = f.read()
    print('cl_code', cl_code)
    print(float_data[0])
Esempio n. 11
0
def test_test_inlines(context, q, float_data, float_data_gpu):
    sourcecode = """
__device__ void somefunc(float *data) {
    data[0] = 3.4;
}

__global__ void testInline(float *data, int N) {
    somefunc(data);
}
"""
    kernelName = test_common.mangle('testInline', ['float *', 'int'])
    prog = compile_code(cl, context, sourcecode, kernelName, num_clmems=1)
    float_data_orig = np.copy(float_data)

    N = 4
    prog.__getattr__(kernelName)(q, (32, ), (32, ), float_data_gpu,
                                 offset_type(0), np.int32(N),
                                 cl.LocalMemory(4))
    cl.enqueue_copy(q, float_data, float_data_gpu)
    q.finish()
    with open('/tmp/testprog-device.cl', 'r') as f:
        cl_code = f.read()
    print('cl_code', cl_code)