Exemple #1
0
def test_get_async(ctx_factory):
    context = ctx_factory()
    queue = cl.CommandQueue(context)

    device = queue.device
    if device.platform.vendor == "The pocl project" \
            and device.type & cl.device_type.GPU:
        pytest.xfail("the async get test fails on POCL + Nvidia,"
                     "at least the K40, as of pocl 1.6, 2021-01-20")

    a = np.random.rand(10**6).astype(np.dtype("float32"))
    a_gpu = cl_array.to_device(queue, a)
    b = a + a**5 + 1
    b_gpu = a_gpu + a_gpu**5 + 1

    # deprecated, but still test
    b1 = b_gpu.get(async_=True)  # testing that this waits for events
    b_gpu.finish()
    assert np.abs(b1 - b).mean() < 1e-5

    b1, evt = b_gpu.get_async()  # testing that this waits for events
    evt.wait()
    assert np.abs(b1 - b).mean() < 1e-5

    wait_event = cl.UserEvent(context)
    b_gpu.add_event(wait_event)
    b, evt = b_gpu.get_async()  # testing that this doesn't hang
    wait_event.set_status(cl.command_execution_status.COMPLETE)
    evt.wait()
    assert np.abs(b1 - b).mean() < 1e-5
Exemple #2
0
def test_event_set_callback(ctx_factory):
    import sys
    if sys.platform.startswith("win"):
        pytest.xfail("Event.set_callback not present on Windows")

    ctx = ctx_factory()
    queue = cl.CommandQueue(ctx)

    _xfail_if_pocl_gpu(queue.device, "event callbacks")

    if ctx._get_cl_version() < (1, 1):
        pytest.skip("OpenCL 1.1 or newer required for set_callback")

    a_np = np.random.rand(50000).astype(np.float32)
    b_np = np.random.rand(50000).astype(np.float32)

    got_called = []

    def cb(status):
        got_called.append(status)

    mf = cl.mem_flags
    a_g = cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=a_np)
    b_g = cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=b_np)

    prg = cl.Program(
        ctx, """
    __kernel void sum(__global const float *a_g, __global const float *b_g,
        __global float *res_g) {
      int gid = get_global_id(0);
      res_g[gid] = a_g[gid] + b_g[gid];
    }
    """).build()

    res_g = cl.Buffer(ctx, mf.WRITE_ONLY, a_np.nbytes)

    uevt = cl.UserEvent(ctx)

    evt = prg.sum(queue, a_np.shape, None, a_g, b_g, res_g, wait_for=[uevt])

    evt.set_callback(cl.command_execution_status.COMPLETE, cb)

    uevt.set_status(cl.command_execution_status.COMPLETE)

    queue.finish()

    counter = 0

    # yuck
    while not got_called:
        from time import sleep
        sleep(0.01)

        # wait up to five seconds (?!)
        counter += 1
        if counter >= 500:
            break

    assert got_called
def test_user_event(ctx_factory):
    ctx = ctx_factory()
    if (ctx._get_cl_version() < (1, 1) and cl.get_cl_header_version() <
        (1, 1)):
        from pytest import skip
        skip("UserEvent is only available in OpenCL 1.1")

    if ctx.devices[0].platform.name == "Portable Computing Language":
        # https://github.com/pocl/pocl/issues/201
        pytest.xfail("POCL's user events don't work right")

    status = {}

    def event_waiter1(e, key):
        e.wait()
        status[key] = True

    def event_waiter2(e, key):
        cl.wait_for_events([e])
        status[key] = True

    from threading import Thread
    from time import sleep
    evt = cl.UserEvent(ctx)
    Thread(target=event_waiter1, args=(evt, 1)).start()
    sleep(.05)
    if status.get(1, False):
        raise RuntimeError('UserEvent triggered before set_status')
    evt.set_status(cl.command_execution_status.COMPLETE)
    sleep(.05)
    if not status.get(1, False):
        raise RuntimeError('UserEvent.wait timeout')
    assert evt.command_execution_status == cl.command_execution_status.COMPLETE

    evt = cl.UserEvent(ctx)
    Thread(target=event_waiter2, args=(evt, 2)).start()
    sleep(.05)
    if status.get(2, False):
        raise RuntimeError('UserEvent triggered before set_status')
    evt.set_status(cl.command_execution_status.COMPLETE)
    sleep(.05)
    if not status.get(2, False):
        raise RuntimeError('cl.wait_for_events timeout on UserEvent')
    assert evt.command_execution_status == cl.command_execution_status.COMPLETE
Exemple #4
0
def test_user_event(ctx_factory):
    ctx = ctx_factory()
    if (ctx._get_cl_version() < (1, 1) and cl.get_cl_header_version() <
        (1, 1)):
        from pytest import skip
        skip("UserEvent is only available in OpenCL 1.1")

    status = {}

    def event_waiter1(e, key):
        e.wait()
        status[key] = True

    def event_waiter2(e, key):
        cl.wait_for_events([e])
        status[key] = True

    from threading import Thread
    from time import sleep
    evt = cl.UserEvent(ctx)
    Thread(target=event_waiter1, args=(evt, 1)).start()
    sleep(.05)
    if status.get(1, False):
        raise RuntimeError('UserEvent triggered before set_status')
    evt.set_status(cl.command_execution_status.COMPLETE)
    sleep(.05)
    if not status.get(1, False):
        raise RuntimeError('UserEvent.wait timeout')
    assert evt.command_execution_status == cl.command_execution_status.COMPLETE

    evt = cl.UserEvent(ctx)
    Thread(target=event_waiter2, args=(evt, 2)).start()
    sleep(.05)
    if status.get(2, False):
        raise RuntimeError('UserEvent triggered before set_status')
    evt.set_status(cl.command_execution_status.COMPLETE)
    sleep(.05)
    if not status.get(2, False):
        raise RuntimeError('cl.wait_for_events timeout on UserEvent')
    assert evt.command_execution_status == cl.command_execution_status.COMPLETE
Exemple #5
0
 def __call__(self,
              global_size: KernelGridType = None,
              local_size: KernelGridType = None,
              **kwargs: Union[TypesClArray, object]) -> cl.Event:
     # e.g. if two kernels of a program shall run concurrently, this can be enable by passing another queue here
     queue = kwargs.pop('queue', get_current_queue())
     global_size, local_size, args = self._prepare_arguments(
         queue=queue,
         knl=self.kernel_model,
         global_size=global_size,
         local_size=local_size,
         **kwargs)
     self.function(global_size, local_size, *args)
     # create user event with context retrieved from first arg of type Array
     event = cl.UserEvent([
         _ for _ in args if isinstance(_, TypesClArray.__args__)
     ][0].context)
     event.set_status(cl.command_execution_status.COMPLETE)
     return event
Exemple #6
0
def test_get_async(ctx_factory):
    context = ctx_factory()
    queue = cl.CommandQueue(context)

    a = np.random.rand(10**6).astype(np.dtype('float32'))
    a_gpu = cl_array.to_device(queue, a)
    b = a + a**5 + 1
    b_gpu = a_gpu + a_gpu**5 + 1

    # deprecated, but still test
    b1 = b_gpu.get(async_=True)  # testing that this waits for events
    b_gpu.finish()
    assert np.abs(b1 - b).mean() < 1e-5

    b1, evt = b_gpu.get_async()  # testing that this waits for events
    evt.wait()
    assert np.abs(b1 - b).mean() < 1e-5

    wait_event = cl.UserEvent(context)
    b_gpu.add_event(wait_event)
    b, evt = b_gpu.get_async()  # testing that this doesn't hang
    wait_event.set_status(cl.command_execution_status.COMPLETE)
    evt.wait()
    assert np.abs(b1 - b).mean() < 1e-5
except:
    print('Build log:')
    print(prog.get_build_info(dev, cl.program_build_info.LOG))
    raise

# Data
v = np.arange(4, dtype=np.float32)
print('Input: ' + str(v))

# Create output buffer
v_buff = cl.Buffer(context,
                   flags=cl.mem_flags.READ_WRITE | cl.mem_flags.COPY_HOST_PTR,
                   hostbuf=v)

# Create user event
user_event = cl.UserEvent(context)


def read_complete(status, data):
    print('Output: ' + str(data))


# Enqueue kernel that waits for user event before executing
global_size = (1, )
local_size = None

# __call__(queue, global_size, local_size, *args, global_offset=None, wait_for=None, g_times_l=False)
kernel_event = prog.user_event(queue,
                               global_size,
                               local_size,
                               v_buff,