Exemple #1
0
def blob_to_CudaNdArray(b, diff=False):
    from theano.sandbox import cuda
    data_ptr = long(b.gpu_data_ptr)
    diff_ptr = long(b.gpu_diff_ptr)
    strides = tuple()
    if len(b.shape) > 0:
	    strides = [1]
	    for i in b.shape[::-1][:-1]:
	        strides.append(strides[-1]*i)
	    strides = tuple(strides[::-1])
    return cuda.from_gpu_pointer(data_ptr, b.shape, strides, b), \
        cuda.from_gpu_pointer(diff_ptr, b.shape, strides, b)
Exemple #2
0
    def garray_to_cudandarray(x):
        """ take a gnumpy.garray and make a CudaNdarray that point to its memory
        """
        if not isinstance(x, gnumpy.garray):
            raise ValueError(
                "We can transfer only gnumpy.garray to CudaNdarray")
        # elif x.dtype != "float32":
        #     raise ValueError("CudaNdarray support only float32")
        # We don't need this, because cudamat is always float32.
        else:
            strides = [1]
            for i in x.shape[::-1][:-1]:
                strides.append(strides[-1] * i)
            strides = strides[::-1]
            for i in range(len(strides)):
                if x.shape[i] == 1:
                    strides[i] = 0
            strides = tuple(strides)

            import ctypes
            ptr_long = long(
                ctypes.cast(x._base.mat.data_device, ctypes.c_void_p).value)

            # seems legit.
            z = cuda.from_gpu_pointer(ptr_long, x.shape, strides, x._base)
            return z
Exemple #3
0
    def garray_to_cudandarray(x):
        """ take a gnumpy.garray and make a CudaNdarray that point to its memory
        """
        if not isinstance(x, gnumpy.garray):
            raise ValueError(
                "We can transfer only gnumpy.garray to CudaNdarray")
        # elif x.dtype != "float32":
        #     raise ValueError("CudaNdarray support only float32")
        # We don't need this, because cudamat is always float32.
        else:
            strides = [1]
            for i in x.shape[::-1][:-1]:
                strides.append(strides[-1] * i)
            strides = strides[::-1]
            for i in range(len(strides)):
                if x.shape[i] == 1:
                    strides[i] = 0
            strides = tuple(strides)

            import ctypes
            ptr_long = long(
                ctypes.cast(x._base.mat.data_device, ctypes.c_void_p).value)

            # seems legit.
            z = cuda.from_gpu_pointer(ptr_long, x.shape, strides, x._base)
            return z
def test_pycuda_memory_to_theano():
    # Test that we can use the GpuArray memory space in pycuda in a CudaNdarray
    y = pycuda.gpuarray.zeros((3, 4, 5), 'float32')
    print(sys.getrefcount(y))
    # This increase the ref count with never pycuda. Do pycuda also
    # cache ndarray?
    # print y.get()
    initial_refcount = sys.getrefcount(y)
    print("gpuarray ref count before creating a CudaNdarray", end=' ')
    print(sys.getrefcount(y))
    assert sys.getrefcount(y) == initial_refcount
    rand = np.random.randn(*y.shape).astype(np.float32)
    cuda_rand = cuda_ndarray.CudaNdarray(rand)

    strides = [1]
    for i in y.shape[::-1][:-1]:
        strides.append(strides[-1] * i)
    strides = tuple(strides[::-1])
    print('strides', strides)
    assert cuda_rand._strides == strides, (cuda_rand._strides, strides)

    # in pycuda trunk, y.ptr also works, which is a little cleaner
    y_ptr = int(y.gpudata)
    z = cuda_ndarray.from_gpu_pointer(y_ptr, y.shape, strides, y)
    print("gpuarray ref count after creating a CudaNdarray", sys.getrefcount(y))
    assert sys.getrefcount(y) == initial_refcount + 1
    assert (np.asarray(z) == 0).all()
    assert z.base is y

    # Test that we can take a view from this cuda view on pycuda memory
    zz = z.view()
    assert sys.getrefcount(y) == initial_refcount + 2
    assert zz.base is y
    del zz
    assert sys.getrefcount(y) == initial_refcount + 1

    cuda_ones = cuda_ndarray.CudaNdarray(np.asarray([[[1]]],
                                                    dtype='float32'))
    z += cuda_ones
    assert (np.asarray(z) == np.ones(y.shape)).all()
    assert (np.asarray(z) == 1).all()

    assert cuda_rand.shape == z.shape
    assert cuda_rand._strides == z._strides, (cuda_rand._strides, z._strides)
    assert (np.asarray(cuda_rand) == rand).all()
    z += cuda_rand
    assert (np.asarray(z) == (rand + 1)).all()

    # Check that the ref count to the gpuarray is right.
    del z
    print("gpuarray ref count after deleting the CudaNdarray", end=' ')
    print(sys.getrefcount(y))
    assert sys.getrefcount(y) == initial_refcount
def test_pycuda_memory_to_theano():
    # Test that we can use the GpuArray memory space in pycuda in a CudaNdarray
    y = pycuda.gpuarray.zeros((3, 4, 5), 'float32')
    print(sys.getrefcount(y))
    # This increase the ref count with never pycuda. Do pycuda also
    # cache ndarray?
    # print y.get()
    initial_refcount = sys.getrefcount(y)
    print("gpuarray ref count before creating a CudaNdarray", end=' ')
    print(sys.getrefcount(y))
    assert sys.getrefcount(y) == initial_refcount
    rand = np.random.randn(*y.shape).astype(np.float32)
    cuda_rand = cuda_ndarray.CudaNdarray(rand)

    strides = [1]
    for i in y.shape[::-1][:-1]:
        strides.append(strides[-1] * i)
    strides = tuple(strides[::-1])
    print('strides', strides)
    assert cuda_rand._strides == strides, (cuda_rand._strides, strides)

    # in pycuda trunk, y.ptr also works, which is a little cleaner
    y_ptr = int(y.gpudata)
    z = cuda_ndarray.from_gpu_pointer(y_ptr, y.shape, strides, y)
    print("gpuarray ref count after creating a CudaNdarray",
          sys.getrefcount(y))
    assert sys.getrefcount(y) == initial_refcount + 1
    assert (np.asarray(z) == 0).all()
    assert z.base is y

    # Test that we can take a view from this cuda view on pycuda memory
    zz = z.view()
    assert sys.getrefcount(y) == initial_refcount + 2
    assert zz.base is y
    del zz
    assert sys.getrefcount(y) == initial_refcount + 1

    cuda_ones = cuda_ndarray.CudaNdarray(np.asarray([[[1]]], dtype='float32'))
    z += cuda_ones
    assert (np.asarray(z) == np.ones(y.shape)).all()
    assert (np.asarray(z) == 1).all()

    assert cuda_rand.shape == z.shape
    assert cuda_rand._strides == z._strides, (cuda_rand._strides, z._strides)
    assert (np.asarray(cuda_rand) == rand).all()
    z += cuda_rand
    assert (np.asarray(z) == (rand + 1)).all()

    # Check that the ref count to the gpuarray is right.
    del z
    print("gpuarray ref count after deleting the CudaNdarray", end=' ')
    print(sys.getrefcount(y))
    assert sys.getrefcount(y) == initial_refcount
def to_cudandarray(x):
    """ take a pycuda.gpuarray.GPUArray and make a CudaNdarray that point to its memory

    :note: CudaNdarray support only float32, so only float32 GPUArray are accepted
    """
    if not isinstance(x, pycuda.gpuarray.GPUArray):
        raise ValueError("We can transfer only pycuda.gpuarray.GPUArray to CudaNdarray")
    elif x.dtype != "float32":
        raise ValueError("CudaNdarray support only float32")
    else:
        strides = [1]
        for i in x.shape[::-1][:-1]:
            strides.append(strides[-1]*i)
        strides = tuple(strides[::-1])
        ptr = int(x.gpudata)  # in pycuda trunk, y.ptr also works, which is a little cleaner
        z = cuda.from_gpu_pointer(ptr, x.shape, strides, x)
        return z
def to_complex_cudandarray(x):
    """
    adapted version of theano.misc.pycuda_utils.to_cudandarray that takes a complex64 array
    and turns it into a float32 CudaNdarray with an extra trailing dimension of length 2
    for real/imaginary parts.
    """
    if not isinstance(x, pycuda.gpuarray.GPUArray):
        raise ValueError("We can transfer only pycuda.gpuarray.GPUArray to CudaNdarray")
    elif x.dtype != "complex64":
        raise ValueError("Only conversion from complex64 arrays is supported")
    else:
        # TODO: figure out what is going on here and adapt it for the complex64-float32 case.
        strides = [1, 2]
        for i in x.shape[::-1][:-1]:
            strides.append(strides[-1]*i)
        strides = tuple(strides[::-1])
        shape = tuple(list(x.shape) + [2])
        ptr = int(x.gpudata) # in pycuda trunk, y.ptr also works, which is a little cleaner
        z = cuda.from_gpu_pointer(ptr, shape, strides, x)

        return z
Exemple #8
0
    def cudamat_to_cudandarray(x):
        """ take a cudamat.CUDAMatrix and make a CudaNdarray that point to its memory
        """
        if not isinstance(x, cudamat.CUDAMatrix):
            raise ValueError(
                "We can transfer only cudamat.CUDAMatrix to CudaNdarray")
        # elif x.dtype != "float32":
        # raise ValueError("CudaNdarray support only float32")
        # We don't need this, because cudamat is always float32.
        else:
            strides = [1]
            for i in x.shape[::-1][:-1]:
                strides.append(strides[-1] * i)
            strides = tuple(strides[::-1])

            import ctypes
            ptr_long = int(
                ctypes.cast(x.mat.data_device, ctypes.c_void_p).value)

            # seems legit.
            z = cuda.from_gpu_pointer(ptr_long, x.shape, strides, x)
            return z
Exemple #9
0
def to_complex_cudandarray(x):
    """
    adapted version of theano.misc.pycuda_utils.to_cudandarray that takes a complex64 array
    and turns it into a float32 CudaNdarray with an extra trailing dimension of length 2
    for real/imaginary parts.
    """
    if not isinstance(x, pycuda.gpuarray.GPUArray):
        raise ValueError(
            "We can transfer only pycuda.gpuarray.GPUArray to CudaNdarray")
    elif x.dtype != "complex64":
        raise ValueError("Only conversion from complex64 arrays is supported")
    else:
        # TODO: figure out what is going on here and adapt it for the complex64-float32 case.
        strides = [1, 2]
        for i in x.shape[::-1][:-1]:
            strides.append(strides[-1] * i)
        strides = tuple(strides[::-1])
        shape = tuple(list(x.shape) + [2])
        ptr = int(
            x.gpudata
        )  # in pycuda trunk, y.ptr also works, which is a little cleaner
        z = cuda.from_gpu_pointer(ptr, shape, strides, x)

        return z