Example #1
0
def cumsum(x_gpu):
    """
    Cumulative sum.

    Return the cumulative sum of the elements in the specified array.

    Parameters
    ----------
    x_gpu : pycuda.gpuarray.GPUArray
        Input array.

    Returns
    -------
    c_gpu : pycuda.gpuarray.GPUArray
        Output array containing cumulative sum of `x_gpu`.

    Notes
    -----
    Higher dimensional arrays are implicitly flattened row-wise by this function.

    Examples
    --------
    >>> import pycuda.autoinit
    >>> import pycuda.gpuarray as gpuarray
    >>> import misc
    >>> x_gpu = gpuarray.to_gpu(np.random.rand(5).astype(np.float32))
    >>> c_gpu = misc.cumsum(x_gpu)
    >>> np.allclose(c_gpu.get(), np.cumsum(x_gpu.get()))
    True

    """

    try:
        func = cumsum.cache[x_gpu.dtype]
    except KeyError:
        func = scan.InclusiveScanKernel(
            x_gpu.dtype, 'a+b', preamble='#include <pycuda-complex.hpp>')
        cumsum.cache[x_gpu.dtype] = func
    return func(x_gpu)
Example #2
0
                     a_gpu,
                     np.int32(size),
                     block=(BLOCK_S, 1, 1),
                     grid=((size - 1) // BLOCK_S + 1, 1, 1))
    time_inefficient.append(time.time() - start)

    start = time.time()
    scan_efficient(b_gpu2,
                   a_gpu2,
                   np.int32(size),
                   block=(BLOCK_S, 1, 1),
                   grid=((size - 1) // BLOCK_S + 1, 1, 1))
    time_efficient.append(time.time() - start)

    knl_gpu = gpuarray.to_gpu(a)
    scan_knl = scan.InclusiveScanKernel(np.float32, "a + b", "0")
    start = time.time()
    scan_knl(knl_gpu)
    time_scan_knl.append(time.time() - start)
    '''
  print a
  print "\n"
  print b_gpu.get()
  print "\n"
  print scan_cpu(a, size)
  '''

MAKE_PLOT = True
if MAKE_PLOT:
    import matplotlib as mpl
    mpl.use('agg')