def take(a, indices, out=None, stream=None): if out is None: out = GPUArray(indices.shape, a.dtype, a.allocator) assert len(indices.shape) == 1 func, tex_src = elementwise.get_take_kernel(a.dtype, indices.dtype) a.bind_to_texref_ext(tex_src[0], allow_double_hack=True) func.prepared_async_call(out._grid, out._block, stream, indices.gpudata, out.gpudata, indices.size) return out
def take(a, indices, out=None, stream=None): if out is None: out = GPUArray(indices.shape, a.dtype, a.allocator) assert len(indices.shape) == 1 func, tex_src = elementwise.get_take_kernel(a.dtype, indices.dtype) a.bind_to_texref_ext(tex_src[0], allow_double_hack=True, allow_complex_hack=True) func.prepared_async_call(out._grid, out._block, stream, indices.gpudata, out.gpudata, indices.size) return out
def make_func_for_chunk_size(chunk_size): return elementwise.get_take_kernel(a_dtype, indices.dtype, vec_count=chunk_size)
def make_func_for_chunk_size(chunk_size): func, tex_src = elementwise.get_take_kernel(a_dtype, indices.dtype, vec_count=chunk_size) func.set_block_shape(*indices._block) return func, tex_src