예제 #1
0
def to_device(ary,
              queue=None,
              allocator=None,
              async_=None,
              array_queue=cl_array._same_as_transfer,
              **kwargs):
    queue = get_current_queue() if queue is None else queue
    res = Array.from_array(
        cl_array.to_device(queue, ary, allocator, async_, array_queue,
                           **kwargs))
    if len(res.events) > 0:
        queue.add_event(res.events[-1], 'to_device')
    return res
예제 #2
0
def empty(shape,
          dtype,
          cq: CommandQueue = None,
          order="C",
          allocator=None,
          data=None,
          offset=0,
          strides=None,
          events=None,
          _flags=None):
    cq = get_current_queue() if cq is None else cq
    res = Array.from_array(cl_array.Array(cq, shape, dtype, order, allocator))
    res.add_latest_event('empty')
    return res
예제 #3
0
def compile_cl_program(program_model: Program,
                       context: Context = None,
                       emulate: bool = False,
                       file: str = '$default_path') -> ProgramContainer:
    t_ns_start = time.perf_counter_ns()
    # deal with file name
    if isinstance(file, Path):
        file = str(file)
    if file is None and emulate:
        raise ValueError(
            'You intended to create no file by setting file=None. '
            'However, a file must be created for debugging.'
        )  # todo can python debugging run without file?
    elif file == '$default_path':
        file = str(program_model.get_default_dir_pycl_kernels().joinpath(
            program_model.kernels[0].name))

    if context is None:
        context = get_current_queue().context

    dict_kernels_program_model = {
        knl.name: knl
        for knl in program_model.kernels
    }
    if emulate:
        dict_emulation_kernel_functions = compile_cl_program_emulation(
            program_model, context, file)
        callable_kernels = {
            k:
            CallableKernelEmulation(kernel_model=dict_kernels_program_model[k],
                                    function=v)
            for k, v in dict_emulation_kernel_functions.items()
        }
    else:
        dict_device_kernel_functions = compile_cl_program_device(
            program_model, context, file)
        callable_kernels = {
            k: CallableKernelDevice(kernel_model=dict_kernels_program_model[k],
                                    compiled=v)
            for k, v in dict_device_kernel_functions.items()
        }
    # make callable kernel available in knl model instance
    for knl in program_model.kernels:
        knl.callable_kernel = callable_kernels[knl.name]
    context.add_time_compilation(time.perf_counter_ns() - t_ns_start)
    return ProgramContainer(program_model=program_model,
                            file=file,
                            init=context,
                            callable_kernels=callable_kernels)
예제 #4
0
 def __call__(self,
              global_size: KernelGridType = None,
              local_size: KernelGridType = None,
              **kwargs: Union[TypesClArray, object]) -> cl.Event:
     # e.g. if two kernels of a program shall run concurrently, this can be enable by passing another queue here
     queue = kwargs.pop('queue', get_current_queue())
     global_size, local_size, args = self._prepare_arguments(
         queue=queue,
         knl=self.kernel_model,
         global_size=global_size,
         local_size=local_size,
         **kwargs)
     self.function(global_size, local_size, *args)
     # create user event with context retrieved from first arg of type Array
     event = cl.UserEvent([
         _ for _ in args if isinstance(_, TypesClArray.__args__)
     ][0].context)
     event.set_status(cl.command_execution_status.COMPLETE)
     return event
예제 #5
0
 def __call__(self,
              global_size: KernelGridType = None,
              local_size: KernelGridType = None,
              **kwargs) -> cl.Event:
     # e.g. if two kernels of a program shall run concurrently, this can be enable by passing another queue here
     queue = kwargs.pop('queue', get_current_queue())
     assert self.compiled.context.int_ptr == queue.context.int_ptr
     global_size, local_size, args = self._prepare_arguments(
         queue=queue,
         knl=self.kernel_model,
         global_size=global_size,
         local_size=local_size,
         **kwargs)
     self.check_local_size_not_exceeding_device_limits(
         queue.device, local_size)
     # extract buffer from cl arrays separate, since in emulation we need cl arrays
     args_cl = [
         arg.data if isinstance(arg, TypesClArray.__args__) else arg
         for i, arg in enumerate(args)
     ]
     event = self.compiled(queue, global_size, local_size, *args_cl)
     queue.add_event(event, self.kernel_model.name)
     return event
예제 #6
0
def zeros(shape, dtype, queue: CommandQueue = None, order="C", allocator=None):
    queue = get_current_queue() if queue is None else queue
    res = Array.from_array(
        cl_array.zeros(queue, shape, dtype, order, allocator))
    res.add_latest_event('zeros')
    return res