def asarray(ctx, other, queue=None, copy=True): if not isinstance(other, cl.DeviceMemoryView): other = cl.from_host(ctx, other, copy=copy) array = CLArray._view_as_this(other) array.__array_init__(ctx, queue) return array
def __call__(self, x, out=None, queue=None): if queue is None: queue = x.queue if not isinstance(x, cl.DeviceMemoryView): x = cl.from_host(queue.context, x) if out is None: out = cl.empty(queue.context, x.shape, x.format) unary_ufunc_kernel(queue, self.device_func, x, out) array = CLArray._view_as_this(out) array.__array_init__(queue) return array
def __call__(self, context, x, y, out=None, queue=None): if queue is None: if hasattr(x, 'queue'): queue = x.queue elif hasattr(y, 'queue'): queue = y.queue else: queue = context.queue if not isinstance(x, cl.DeviceMemoryView): x = context.asarray(x) if not isinstance(y, cl.DeviceMemoryView): y = context.asarray(y) if y.queue != queue: queue.enqueue_wait_for_events(y.queue.marker()) if x.queue != queue: queue.enqueue_wait_for_events(x.queue.marker()) new_shape = broadcast_shape(x.shape, y.shape) a = cl.broadcast(x, new_shape) b = cl.broadcast(y, new_shape) if out is None: out = context.empty(shape=new_shape, ctype=x.format, queue=queue) # kernel_source = ufunc_kernel._compile(queue.context, function=self.device_func, # a=cl.global_memory(a.format, flat=True), # b=cl.global_memory(b.format, flat=True), # out=cl.global_memory(out.format, flat=True), source_only=True) kernel = ufunc_kernel.compile(context, function=self.device_func, a=cl.global_memory(a.format, flat=True), b=cl.global_memory(b.format, flat=True), out=cl.global_memory(out.format, flat=True), cly_meta=self.device_func.func_name) kernel(queue, a, a.array_info, b, b.array_info, out, out.array_info) array = CLArray._view_as_this(out) array.__array_init__(context, queue) return array
def __call__(self, context, x, y, out=None, queue=None): if queue is None: if hasattr(x,'queue'): queue = x.queue elif hasattr(y,'queue'): queue = y.queue else: queue = context.queue if not isinstance(x, cl.DeviceMemoryView): x = context.asarray(x) if not isinstance(y, cl.DeviceMemoryView): y = context.asarray(y) if y.queue != queue: queue.enqueue_wait_for_events(y.queue.marker()) if x.queue != queue: queue.enqueue_wait_for_events(x.queue.marker()) new_shape = broadcast_shape(x.shape, y.shape) a = cl.broadcast(x, new_shape) b = cl.broadcast(y, new_shape) if out is None: out = context.empty(shape=new_shape, ctype=x.format, queue=queue) # kernel_source = ufunc_kernel._compile(queue.context, function=self.device_func, # a=cl.global_memory(a.format, flat=True), # b=cl.global_memory(b.format, flat=True), # out=cl.global_memory(out.format, flat=True), source_only=True) kernel = ufunc_kernel.compile(context, function=self.device_func, a=cl.global_memory(a.format, flat=True), b=cl.global_memory(b.format, flat=True), out=cl.global_memory(out.format, flat=True), cly_meta=self.device_func.func_name) kernel(queue, a, a.array_info, b, b.array_info, out, out.array_info) array = CLArray._view_as_this(out) array.__array_init__(context, queue) return array
def reduce(self, context, x, out=None, initial=0.0, queue=None): if queue is None: queue = x.queue if not isinstance(x, cl.DeviceMemoryView): x = cl.from_host(queue.context, x) #output, input, shared, group_size, initial=0.0 size = x.size shared = cl.local_memory(x.ctype, ndim=1, shape=[size]) group_size = size // 2 for item in [2, 4, 8, 16, 32, 64, 128, 256, 512]: if group_size < item: group_size = item // 2 break else: group_size = 512 if out is None: out = cl.empty(queue.context, [1], x.format) kernel = reduce_kernel.compile(queue.context, function=self.device_func, output=cl.global_memory(out.ctype, flat=True), array=cl.global_memory(x.ctype, flat=True), shared=shared, group_size=cl.cl_uint, cly_meta=self.device_func.func_name) max_wgsize = kernel.work_group_size(queue.device) group_size = min(max_wgsize, group_size) kernel(queue, out, out.array_info, x, x.array_info, shared, shared.local_info, group_size) # reduce_kernel(queue, self.device_func, out, x, shared, group_size) # reduce_kernel(queue, self.device_func, out, x, shared, group_size) array = CLArray._view_as_this(out) array.__array_init__(context, queue) return array