def duplicate(ctx, data): """ # XXX is this the same as transpose??? cannot use pyopencl.array.transpose at the momoment, why??? """ KERNEL = """ for (int j = 0; j < IN_BLOCK_SIZE; ++j) { b[OUT_BLOCK_SIZE*__id+j] = a[j]; } """ mapper = Blockwise(ctx, map_expr=KERNEL, arguments=[ ('a', 'global const', data.dtype, '*a'), ('b', 'global', data.dtype, '*b') ], in_blocksize=reduce(mul, data.shape), out_blocksize=reduce(mul, data.shape) ) mapper.build() def _kernel(queue, length, b=None): if b is None: shape = [length] + list(data.shape[1:]) b = cl.array.empty(queue, tuple(shape), data.dtype) mapper(queue, length, data.data, b.data) return b return _kernel
def rearange_to_block(ctx, strided_data): """ # XXX is this the same as transpose??? cannot use pyopencl.array.transpose at the momoment, why??? """ KERNEL = """ b[OUT_BLOCK_SIZE*__item_id.y+__id] = a[__in_offset+__item_id.y]; """ mapper = Blockwise(ctx, map_expr=KERNEL, arguments=[ ('a', 'global const', strided_data.dtype, '*a'), ('b', 'global', strided_data.dtype, '*b') ], in_blocksize=strided_data.shape[1], out_blocksize=strided_data.shape[0], block_shape=strided_data.shape, threads=(1, strided_data.shape[1],) ) mapper.build() def _kernel(queue, length, b): return mapper(queue, length, strided_data.data, b) return _kernel