Beispiel #1
0
def take(a, indices, out=None, stream=None):
    if out is None:
        out = GPUArray(indices.shape, a.dtype, a.allocator)

    assert len(indices.shape) == 1

    func, tex_src = elementwise.get_take_kernel(a.dtype, indices.dtype)
    a.bind_to_texref_ext(tex_src[0], allow_double_hack=True)

    func.prepared_async_call(out._grid, out._block, stream, indices.gpudata, out.gpudata, indices.size)

    return out
Beispiel #2
0
def take(a, indices, out=None, stream=None):
    if out is None:
        out = GPUArray(indices.shape, a.dtype, a.allocator)

    assert len(indices.shape) == 1

    func, tex_src = elementwise.get_take_kernel(a.dtype, indices.dtype)
    a.bind_to_texref_ext(tex_src[0], allow_double_hack=True, allow_complex_hack=True)

    func.prepared_async_call(out._grid, out._block, stream,
            indices.gpudata, out.gpudata, indices.size)

    return out
Beispiel #3
0
 def make_func_for_chunk_size(chunk_size):
     return elementwise.get_take_kernel(a_dtype, indices.dtype,
             vec_count=chunk_size)
Beispiel #4
0
 def make_func_for_chunk_size(chunk_size):
     return elementwise.get_take_kernel(a_dtype, indices.dtype,
             vec_count=chunk_size)
Beispiel #5
0
 def make_func_for_chunk_size(chunk_size):
     func, tex_src = elementwise.get_take_kernel(a_dtype, indices.dtype, 
             vec_count=chunk_size)
     func.set_block_shape(*indices._block)
     return func, tex_src